# Notebook to extract the classification ground truths from fiftyone and save them in csv files. 
These csv are then used in the yolo multitask model 

In [1]:
# pip install --upgrade fiftyone

In [1]:
import fiftyone as fo
import pandas as pd

## Load the datasets

In [None]:
dataset = fo.load_dataset("esmart_wip")

In [21]:
dataset = dataset.exists("gt_road_condition")
dataset

Dataset:     esmart_context
Media type:  image
Num samples: 15892
Sample fields:
    id:                fiftyone.core.fields.ObjectIdField
    filepath:          fiftyone.core.fields.StringField
    tags:              fiftyone.core.fields.ListField(fiftyone.core.fields.StringField)
    metadata:          fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.metadata.ImageMetadata)
    ground_truth:      fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Detections)
    location:          fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.GeoLocation)
    gt_roadtype:       fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Classifications)
    gt_road_condition: fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Classifications)
    gt_visibility:     fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Classifications)
View stages:
    1. Exists(field='gt_road_condition', bool=True)

In [None]:
fo.launch_app(dataset, port=5252)

In [22]:
train_dataset_51 = dataset.match_tags("TRAIN_RC_AUTO_TIME")
val_dataset_51 = dataset.match_tags("VAL_RC_AUTO_TIME")

## Get the labels 

In [23]:
list_data_train = []

for sample in train_dataset_51:
    try:
        file_name = sample.filepath.split("/")[-1]
        road_cond_class = sample.gt_road_condition.classifications[0].label
    #     visibility_class = sample.gt_visibility.classifications[0].label
    #     road_type_class = sample.gt_roadtype.classifications[0].label
        data = (file_name, road_cond_class)
    #     data = (file_name, road_cond_class, visibility_class, road_type_class)
        list_data_train.append(data)
    except:
        print("Image doesn't contain a gt_road_condition")

In [24]:
gt_class_train_df = pd.DataFrame(list_data_train, columns=['img','road_cond'])
# gt_class_train_df = pd.DataFrame(list_data, columns=['img','road_cond','visibility','road_type'])
gt_class_train_df

Unnamed: 0,img,road_cond
0,Log-20220412-190445 Untitled_684.jpg,dry
1,Log-20220412-190445 Untitled_715.jpg,dry
2,Log-20220412-190445 Untitled_746.jpg,dry
3,Log-20220412-190445 Untitled_777.jpg,dry
4,Log-20220412-190445 Untitled_808.jpg,dry
...,...,...
11177,Log-20220724- Selim-Data Log_1245.jpg,wet
11178,Log-20220724- Selim-Data Log_1276.jpg,wet
11179,Log-20220724- Selim-Data Log_1307.jpg,wet
11180,Log-20220724- Selim-Data Log_1338.jpg,wet


In [25]:
# Shuffling a Pandas dataframe with sklearn
from sklearn.utils import shuffle
gt_class_train_df = shuffle(gt_class_train_df, random_state=1)
gt_class_train_df.reset_index(drop=True, inplace=True)

In [26]:
gt_class_train_df

Unnamed: 0,img,road_cond
0,Log-20220123-124034 Data Log_31932.jpg,dry
1,Log-20220126-081950 Data Log_22683.jpg,wet
2,Log-20220219-131608 Data Log_21454.jpg,snowy
3,Log-20220219-131608 Data Log_21547.jpg,snowy
4,Log-20220419-101657 Data Log_22012.jpg,wet
...,...,...
11177,20210629_173500_2560.jpg,dry
11178,Log-20220324-064020_57512.jpg,wet
11179,Log-20220412-192914 Untitled_11132.jpg,dry
11180,Log-20220426-064124 Data Log_20784.jpg,dry


In [27]:
list_data_val = []

for sample in val_dataset_51:
    try:
        file_name = sample.filepath.split("/")[-1]
        road_cond_class = sample.gt_road_condition.classifications[0].label
    #     visibility_class = sample.gt_visibility.classifications[0].label
    #     road_type_class = sample.gt_roadtype.classifications[0].label
        data = (file_name, road_cond_class)
    #     data = (file_name, road_cond_class, visibility_class, road_type_class)
        list_data_val.append(data)
    except:
        print("Image doesn't contain a gt_road_condition")

In [28]:
gt_class_val_df = pd.DataFrame(list_data_val, columns=['img','road_cond'])

In [29]:
# Shuffling a Pandas dataframe with sklearn
from sklearn.utils import shuffle
gt_class_val_df = shuffle(gt_class_val_df, random_state=1)
gt_class_val_df.reset_index(drop=True, inplace=True)
gt_class_val_df

Unnamed: 0,img,road_cond
0,Log-20220412-192914 Untitled_25361.jpg,dry
1,Log-20220419-101657 Data Log_33668.jpg,wet
2,Log-20220419-063722 Data Log_58885.jpg,snowy
3,Log-20220419-063722 Data Log_49275.jpg,snowy
4,Log-20220414-064154 Data Log_82524.jpg,wet
...,...,...
3268,Log-20220222-083904 Data Log_47836.jpg,snowy
3269,Log-20220414-064154 Data Log_94335.jpg,wet
3270,Log-20220419-063722 Data Log_50391.jpg,snowy
3271,Log-20220412-192914 Untitled_24741.jpg,dry


## Save the labels in CSV files (in the yolo directory)

In [13]:
# PATH = '/home/selim/Desktop/yolov5_multitask/data/multitasks/'
# gt_class_train_df.to_csv(f'{PATH}gt_class_train_time_split.csv', index=False)
# gt_class_val_df.to_csv(f'{PATH}gt_class_val_time_split.csv', index=False)
PATH = '/home/selim/Desktop/yolov5_multitask/data/multitasks/'
gt_class_train_df.to_csv(f'{PATH}gt_class_train_time_split.csv', index=False)
gt_class_val_df.to_csv(f'{PATH}gt_class_val_time_split.csv', index=False)

## Add the path to the img name (for yolo)

In [14]:
gt_class_train_df['img_path'] = gt_class_train_df['img'].apply(lambda x: str('data/multitasks/esmart_wip/' + x))
gt_class_train_df.nunique()

img          4544
road_cond       3
img_path     4544
dtype: int64

In [15]:
gt_class_val_df['img_path'] = gt_class_val_df['img'].apply(lambda x: str('data/multitasks/esmart_wip/' + x))
gt_class_val_df.nunique()

img          1127
road_cond       3
img_path     1127
dtype: int64

In [16]:
gt_class_val_df.head()

Unnamed: 0,img,road_cond,img_path
0,20210724_133321_35184.jpg,dry,data/multitasks/esmart_wip/20210724_133321_351...
1,Log-20220219-131608 Data Log_36241.jpg,snowy,data/multitasks/esmart_wip/Log-20220219-131608...
2,20210724_141043_41646.jpg,dry,data/multitasks/esmart_wip/20210724_141043_416...
3,Log-20220318-064528 Data Log_32875.jpg,wet,data/multitasks/esmart_wip/Log-20220318-064528...
4,20211129_205422_18759.0.jpg,dry,data/multitasks/esmart_wip/20211129_205422_187...


In [17]:
gt_class_train_df['img_path'].to_csv(f'{PATH}esmart_wip/train_time_split.txt', header=False, index=False)

In [18]:
gt_class_val_df['img_path'].to_csv(f'{PATH}esmart_wip/val_time_split.txt', header=False, index=False)