# Notebook to extract the classification ground truths from fiftyone and save them in csv files. 
These csv are then used in the yolo multitask model 

In [1]:
# pip install --upgrade fiftyone

In [3]:
import fiftyone as fo
import pandas as pd

## Load the datasets

In [4]:
dataset = fo.load_dataset("esmart_wip")

In [5]:
train_dataset_51 = dataset.match_tags("TRAIN_RC_AUTO_TIME")
val_dataset_51 = dataset.match_tags("VAL_RC_AUTO_TIME")

In [6]:
fo.launch_app(dataset)

Connected to FiftyOne on port 5151 at localhost.
If you are not connecting to a remote session, you may need to start a new session and specify a port


Dataset:          esmart_wip
Media type:       image
Num samples:      6540
Selected samples: 0
Selected labels:  0
Session URL:      http://localhost:5151/

## Get the labels 

In [7]:
list_data_train = []

for sample in train_dataset_51:
    try:
        file_name = sample.filepath.split("/")[-1]
        road_cond_class = sample.gt_road_condition.classifications[0].label
    #     visibility_class = sample.gt_visibility.classifications[0].label
    #     road_type_class = sample.gt_roadtype.classifications[0].label
        data = (file_name, road_cond_class)
    #     data = (file_name, road_cond_class, visibility_class, road_type_class)
        list_data_train.append(data)
    except:
        print("Image doesn't contain a gt_road_condition")

Image doesn't contain a gt_road_condition
Image doesn't contain a gt_road_condition
Image doesn't contain a gt_road_condition
Image doesn't contain a gt_road_condition
Image doesn't contain a gt_road_condition
Image doesn't contain a gt_road_condition
Image doesn't contain a gt_road_condition
Image doesn't contain a gt_road_condition
Image doesn't contain a gt_road_condition
Image doesn't contain a gt_road_condition
Image doesn't contain a gt_road_condition


In [8]:
gt_class_train_df = pd.DataFrame(list_data_train, columns=['img','road_cond'])
# gt_class_train_df = pd.DataFrame(list_data, columns=['img','road_cond','visibility','road_type'])
gt_class_train_df

Unnamed: 0,img,road_cond
0,20210629_173500_2034.jpg,dry
1,20210629_173500_2064.jpg,dry
2,20210629_173500_2100.jpg,dry
3,20210629_173500_2214.jpg,dry
4,20210629_173500_2256.jpg,dry
...,...,...
4435,Log-20220326-183856 Data Log_3800.jpg,wet
4436,Log-20220326-183856 Data Log_3872.jpg,wet
4437,Log-20220326-183856 Data Log_3932.jpg,wet
4438,Log-20220326-183856 Data Log_4184.jpg,wet


In [9]:
list_data_val = []

for sample in val_dataset_51:
    try:
        file_name = sample.filepath.split("/")[-1]
        road_cond_class = sample.gt_road_condition.classifications[0].label
    #     visibility_class = sample.gt_visibility.classifications[0].label
    #     road_type_class = sample.gt_roadtype.classifications[0].label
        data = (file_name, road_cond_class)
    #     data = (file_name, road_cond_class, visibility_class, road_type_class)
        list_data_val.append(data)
    except:
        print("Image doesn't contain a gt_road_condition")

Image doesn't contain a gt_road_condition
Image doesn't contain a gt_road_condition


In [10]:
gt_class_val_df = pd.DataFrame(list_data_val, columns=['img','road_cond'])
# gt_class_val_df = pd.DataFrame(list_data, columns=['img','road_cond','visibility','road_type'])
gt_class_val_df

Unnamed: 0,img,road_cond
0,20210629_173500_13326.jpg,dry
1,20210629_173500_13614.jpg,dry
2,20210629_173500_14178.jpg,dry
3,20210629_173500_15654.jpg,dry
4,20210629_174553_24183.jpg,dry
...,...,...
1149,Log-20220324-064020_89284.jpg,wet
1150,Log-20220121-134942 Data Log_29089.jpg,dry
1151,Log-20220121-134942 Data Log_29393.jpg,dry
1152,Log-20220121-134942 Data Log_29441.jpg,dry


## Save the labels in CSV files (in the yolo directory)

In [11]:
PATH = '/home/selim/Desktop/yolov5_multitask/data/multitasks/'
gt_class_train_df.to_csv(f'{PATH}gt_class_train_time_split.csv', index=False)
gt_class_val_df.to_csv(f'{PATH}gt_class_val_time_split.csv', index=False)

## Add the path to the img name (for yolo)

In [12]:
gt_class_train_df['img_path'] = gt_class_train_df['img'].apply(lambda x: str('data/multitasks/esmart_wip/' + x))
gt_class_train_df.nunique()

img          4435
road_cond       3
img_path     4435
dtype: int64

In [13]:
gt_class_val_df['img_path'] = gt_class_val_df['img'].apply(lambda x: str('data/multitasks/esmart_wip/' + x))
gt_class_val_df.nunique()

img          1154
road_cond       3
img_path     1154
dtype: int64

In [14]:
gt_class_val_df.head()

Unnamed: 0,img,road_cond,img_path
0,20210629_173500_13326.jpg,dry,data/multitasks/esmart_wip/20210629_173500_133...
1,20210629_173500_13614.jpg,dry,data/multitasks/esmart_wip/20210629_173500_136...
2,20210629_173500_14178.jpg,dry,data/multitasks/esmart_wip/20210629_173500_141...
3,20210629_173500_15654.jpg,dry,data/multitasks/esmart_wip/20210629_173500_156...
4,20210629_174553_24183.jpg,dry,data/multitasks/esmart_wip/20210629_174553_241...


In [15]:
gt_class_train_df['img_path'].to_csv(f'{PATH}esmart_wip/train_time_split.txt', header=False, index=False)

In [16]:
gt_class_val_df['img_path'].to_csv(f'{PATH}esmart_wip/val_time_split.txt', header=False, index=False)