In [1]:
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

set_label = ['50-25','75-37','100-50','125-67','150-75']
set_A_05 = ['0121_0157', '0121_0204', '0121_0211', '0121_0219', '0121_0227']
set_B_02 = ['0104_2223', '0104_2309', '0104_2328', '0104_2343', '0105_0013']
set_C_10 = ['0116_0117', '0116_0125', '0116_0132', '0116_0140', '0116_0149']
set_D_06 = ['0104_1949', '0104_2002', '0104_2016', '0104_2032', '0104_2056']

subset = set_A_05
# A:9.26% -> 1.0
# B:8.04% -> 1.1
# C:4.88% -> 3.2
# D:9.15% -> 0.8

In [2]:
# df = pd.read_csv(s1s2_path)
# X = np.array([df['scan_enp'], df['bev_enp']]).T
# y = np.array(df['s1_gt'])

# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=42)

def evaluate_regression(y_test, y_pred):
    mse = mean_squared_error(y_test, y_pred)
    rmse = mean_squared_error(y_test, y_pred, squared=False)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    return mse, rmse, mae, r2

In [4]:
idx, scan_entropy_list, bev_entropy_list, pf_scalar_list, pf_vector_list, s1_gt =[], [], [], [], [], []
empty_file = []
sub_cnt = 0
for time_dirc in subset:
    sem_pt_path = '/home/newDisk/tool/carla_dataset_tool/raw_data/record_2024_'+time_dirc+'/vehicle.tesla.model3.master/velodyne_semantic/'
    file_list = os.listdir(sem_pt_path)
    file_list = [f for f in file_list if f.endswith('.txt')]
    file_list.sort()

    for file_name in file_list:
        gt = 0
        with open(sem_pt_path+file_name, 'r') as f:
            lines = f.readlines()
            last_line = lines[-1]
            if len(lines) ==1:
                empty_file.append(str(sub_cnt)+file_name[-9:-4])
            else:
                scores = last_line.split(' ')
                s1_gt.append(scores[0])
                scan_entropy_list.append(np.float64(scores[1]))
                bev_entropy_list.append(np.float64(scores[2]))
                pf_scalar_list.append(np.float64(scores[3]))
                pf_vector_list.append(np.float64(scores[4].replace('\n','')))
                idx.append(str(sub_cnt)+file_name[-9:-4])

    sub_cnt += 1


In [5]:
len(scan_entropy_list)

4279

In [6]:
df = pd.DataFrame({'idx':idx, 'scan_enp':scan_entropy_list, 'bev_enp':bev_entropy_list, 'pf_scalar':pf_scalar_list, 'pf_vector':pf_vector_list, 's1_gt':s1_gt})
X = np.array([df['scan_enp'], df['bev_enp'],df['idx']]).T
y = np.array(df['s1_gt'])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=42)

train_idx = X_train[:,-1]
test_idx = X_test[:,-1]
X_train = X_train[:,:-1]
X_test = X_test[:,:-1]

# sns.scatterplot(x=bev_entropy_list, y=s1_gt)

In [18]:
mertic = 1
print(len(X_train), len(X_test), mertic)

gbr = GradientBoostingRegressor(loss='huber',learning_rate=0.2,n_estimators=100, min_samples_leaf=10, max_depth=10, max_features=15, random_state=42)
gbr.fit(X_train,y_train)


y_pred_train = gbr.predict(X_train)
gbr_mse, gbr_rmse, gbr_mae, gbr_r2 = evaluate_regression(y_train, y_pred_train)
print('[GBR x and y] test on train dataset')
print('mse: ', gbr_mse)
print('rmse: ', gbr_rmse)
print('mae: ', gbr_mae)
print('r2: ', gbr_r2)

y_pred_test = gbr.predict(X_test)
gbr_mse, gbr_rmse, gbr_mae, gbr_r2 = evaluate_regression(y_test, y_pred_test)
print('[GBR x and y] test on test dataset')
print('mse: ', gbr_mse)
print('rmse: ', gbr_rmse)
print('mae: ', gbr_mae)
print('r2: ', gbr_r2)

# calculate the accuary with metric
y_pred2 = y_pred_test.copy()
y_test2 = y_test.copy()
y_pred2[y_pred2 <= mertic] = 0
y_pred2[y_pred2 > mertic] = 1
y_test2[np.float64(y_test2) <= mertic] = 0
y_test2[np.float64(y_test2) > mertic] = 1
accuracy = np.sum(y_pred2 == y_test2) / len(y_test2)

print(len(y_pred2[y_pred2 == 0]) / len(y_pred2))


print('accuracy: ', accuracy)
        
# get the idx, where y_pred2 == 1 at the same place
idx_test = test_idx[y_pred2 == 1]

print(len(idx_test), len(y_test))


855 3424 1
[GBR x and y] test on train dataset
mse:  1.8389116952757107
rmse:  1.3560647828461998
mae:  0.7951560663120584
r2:  0.9233866268994515
[GBR x and y] test on test dataset
mse:  6.461238502541671
rmse:  2.5418966349050605
mae:  1.677312435597827
r2:  0.6926183006157052
0.09258177570093458
accuracy:  0.7453271028037384
3107 3424


In [19]:
# frame_id = train_idx + test_idx, write to imageset
train_list = train_idx.tolist() + idx_test.tolist()

print(len(train_list))

train_list.sort()

test_list = []
val_list = []
test_spilt = 12
val_spilt = 15

# idx should not has element in empty_file
for id in empty_file:
    if id in train_list:
        train_list.remove(id)

train_path = './imagesets/A/train.txt'
test_path = './imagesets/A/test.txt'
val_path = './imagesets/A/val.txt'


for i in range(len(train_list)):
    if i % test_spilt == 0:
        test_list.append(train_list[i])
        train_list[i] = 'none'
    if i % val_spilt == 0:
        val_list.append(train_list[i])
        train_list[i] = 'none'

train_list = [x for x in train_list if x != 'none']
val_list = [x for x in val_list if x != 'none']

with open(train_path, 'w') as f:
    for item in train_list:
        f.write("%s\n" % item)

with open(test_path, 'w') as f:
    for item in test_list:
        f.write("%s\n" % item)

with open(val_path, 'w') as f:
    for item in val_list:
        f.write("%s\n" % item)

3962


In [20]:
train_path = './imagesets/A/train.txt'
test_path = './imagesets/A/test.txt'
val_path = './imagesets/A/val.txt'

save_path = '/home/newDisk/tool/carla_dataset_tool/dataset/'

# read imageset(train_path, test_path, val_path)
train_set = set()
test_set = set()
val_set = set()

with open(train_path, 'r') as f:
    lines = f.readlines()
    for line in lines:
        train_set.add(line.replace('\n',''))

with open(test_path, 'r') as f:
    lines = f.readlines()
    for line in lines:
        test_set.add(line.replace('\n',''))

with open(val_path, 'r') as f:
    lines = f.readlines()
    for line in lines:
        val_set.add(line.replace('\n',''))

In [31]:
# import os
# import shutil
# import argparse

# proj_path = '/home/newDisk/tool/carla_dataset_tool/'
# dir_set = []

# for i in range(len(current_set)):
#     subset = current_set[i]
#     dir = '/home/newDisk/tool/carla_dataset_tool/raw_data/record_2024_'+subset
#     dir_set.append(dir)


# # get inner frame id
    






In [32]:

# # copy lidar
# target_path = {'train': proj_path+'dataset/training/velodyne/', 'test': proj_path+'dataset/testing/velodyne/'}
# for key in target_path:
#     if not os.path.exists(target_path[key]):
#         os.makedirs(target_path[key])
# source_path = []
# for i in dir_set:
#     source_path.append(i+'/vehicle.tesla.model3.master/velodyne/')

# for train_frame in train_set:
#     file_name = '00000' + train_frame[-5:]+'.bin'
#     dir = source_path[int(train_frame[0])]
#     file_path = dir+file_name
#     shutil.copy(os.path.join(dir, file_name), os.path.join(target_path['train'], train_frame+'.bin'))

# for test_frame in test_set:
#     file_name = '00000' + test_frame[-5:]+'.bin'
#     dir = source_path[int(test_frame[0])]
#     file_path = dir+file_name
#     shutil.copy(os.path.join(dir, file_name), os.path.join(target_path['test'], test_frame+'.bin'))

# for val_frame in val_set:
#     file_name = '00000' + val_frame[-5:]+'.bin'
#     dir = source_path[int(val_frame[0])]
#     file_path = dir+file_name
#     shutil.copy(os.path.join(dir, file_name), os.path.join(target_path['train'], val_frame+'.bin'))

In [33]:
# # copy image
# target_path = {'train': proj_path+'dataset/training/image_2/', 'test': proj_path+'dataset/testing/image_2/'}
# for key in target_path:
#     if not os.path.exists(target_path[key]):
#         os.makedirs(target_path[key])
# source_path = []
# for i in dir_set:
#     source_path.append(i+'/vehicle.tesla.model3.master/image_2/')
# for train_frame in train_set:
#     file_name = '00000' + train_frame[-5:]+'.png'
#     dir = source_path[int(train_frame[0])]
#     file_path = dir+file_name
#     shutil.copy(os.path.join(dir, file_name), os.path.join(target_path['train'], train_frame+'.png'))
# for test_frame in test_set:
#     file_name = '00000' + test_frame[-5:]+'.png'
#     dir = source_path[int(test_frame[0])]
#     file_path = dir+file_name
#     shutil.copy(os.path.join(dir, file_name), os.path.join(target_path['test'], test_frame+'.png'))
# for val_frame in val_set:
#     file_name = '00000' + val_frame[-5:]+'.png'
#     dir = source_path[int(val_frame[0])]
#     file_path = dir+file_name
#     shutil.copy(os.path.join(dir, file_name), os.path.join(target_path['train'], val_frame+'.png'))
    