#### Notes

This code takes a main folder with subsubsolders where the labels per frame are stored. The folder tree looks, for example, like:

Sweden:\
-Channel_1\
--Video_1\
---labels\
----label_1.txt\
----label_2.txt\
----...\
--Video_2\
---labels\
----label_1.txt\
...

* Some of the labels directories are empty 

In [30]:
import os
import pandas as pd
#import parquet 
from collections import defaultdict
import os.path
from fastparquet import write

In [31]:
def result_processing(path_to_results_dir:str, path_to_res_file:str, country:str):
    ### This will return a parque file 
    
    # File to store the results
    file_path = path_to_res_file+f'results_{country}.parquet'
    
    # Channel names 
    channel_dirs = [filename for filename in os.listdir(path_to_results_dir) if not filename.startswith('.')]
    for channel_d in channel_dirs:
        sub_folder_path = os.path.join(path_to_results_dir,channel_d)
        # Access the video's frames folder 
        videos_dir = [filename for filename in os.listdir(sub_folder_path) if not filename.startswith('.')]
        for video_d in videos_dir:
            video_path = os.path.join(sub_folder_path,video_d)
            labels_dir_path = os.path.join(video_path,'labels/')
            
            if os.listdir(labels_dir_path): # Labels dir is not empty:
                
                # Access labels information. This are txt files
                labels_files = [filename for filename in os.listdir(labels_dir_path) if not filename.startswith('.')]
                for label_f in labels_files:
                    
                    with open(os.path.join(labels_dir_path,label_f), 'r') as f:
                        lines = f.readlines()
                        # Write the labels to a data frame
                        df = pd.DataFrame([l.split() for l in lines],columns=['class_label',
                                'x_center','y_center','width','height','confidence_level'])
                        df.insert(0,'Frame', label_f.split('.')[0])
                        df.insert(0, 'Video_id', video_d)
                        df.insert(0,'Channel', channel_d)
       
                        # If the file does not exists, create it
                        if not os.path.isfile(file_path): 
                            write(file_path, df)
                        else: # Otherwise, write on it
                            write(file_path, df, append=True)
            else: 
                pass
            
    return file_path 

In [39]:
def transf_parque(parque_file_path:str, class_0_name:str, class_1_name:str):
    res = pd.read_parquet(parque_file_path, engine='fastparquet')
    mapping = {'1': class_1_name, '0': class_0_name}
    res = res.replace({'class_label': mapping})
    # Converting to CSV
    res.to_csv(parque_file_path.split('.par')[0]+'.csv', index = False)


In [34]:
main_folder = '../../../yolov5/Sweden_analysis'
end_results_folder = '../../data_collection/data/'
country = 'sweden'

In [35]:
#res = result_processing(main_folder, end_results_folder, country)


KeyboardInterrupt



In [None]:
# Convert parque to csv
#transf_parque(res, 's_svd','s_soc')

#### Read the results

In [36]:
res = pd.read_parquet('../../data_collection/data/results_norway.parquet', engine='fastparquet')

In [37]:
res

Unnamed: 0,Channel,Video_id,Frame,class_label,x_center,y_center,width,height,confidence_level
0,norsktannpleierforening3461,MX0TmghIvOw,MX0TmghIvOw_2500,1,0.607813,0.396296,0.0208333,0.0333333,0.305916
1,norsktannpleierforening3461,MX0TmghIvOw,MX0TmghIvOw_2500,1,0.572917,0.0902778,0.0333333,0.0324074,0.410396
2,norsktannpleierforening3461,MX0TmghIvOw,MX0TmghIvOw_1250,1,0.297396,0.119444,0.0458333,0.0555556,0.379419
3,norsktannpleierforening3461,MX0TmghIvOw,MX0TmghIvOw_2000,1,0.993229,0.734722,0.0135417,0.0712963,0.336906
4,teknawebmaster,gdRX-XDFlQk,gdRX-XDFlQk_0,1,0.5,0.5,1,1,0.291873
...,...,...,...,...,...,...,...,...,...
8358,Sykepleierforbundet,0v8RT1P_Opg,0v8RT1P_Opg_5750,0,0.29349,0.440278,0.0755208,0.182407,0.601827
8359,Sykepleierforbundet,0v8RT1P_Opg,0v8RT1P_Opg_6000,0,0.285937,0.469444,0.0791667,0.192593,0.376239
8360,Sykepleierforbundet,0v8RT1P_Opg,0v8RT1P_Opg_5250,0,0.278646,0.463426,0.0770833,0.189815,0.325324
8361,Sykepleierforbundet,RMGimLdU2OE,RMGimLdU2OE_0,1,0.5,0.5,1,1,0.29574


In [38]:
res.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8363 entries, 0 to 8362
Data columns (total 9 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   Channel           8363 non-null   object
 1   Video_id          8363 non-null   object
 2   Frame             8363 non-null   object
 3   class_label       8363 non-null   object
 4   x_center          8363 non-null   object
 5   y_center          8363 non-null   object
 6   width             8363 non-null   object
 7   height            8363 non-null   object
 8   confidence_level  8363 non-null   object
dtypes: object(9)
memory usage: 588.1+ KB


In [40]:
transf_parque('../../data_collection/data/results_norway.parquet', 'n_ab','n_hr')

In [46]:
# Read the results in csv
res_csv = pd.read_csv('../../data_collection/data/results_italy.csv')

In [47]:
res_csv[res_csv['confidence_level'] > 0.85]

Unnamed: 0,Channel,Video_id,Frame,class_label,x_center,y_center,width,height,confidence_level
3,Uilcanetwork,_3tU-hR_VGA,_3tU-hR_VGA_580,i_cm,0.760547,0.493056,0.478906,0.955556,0.888343
24,Uilcanetwork,mSet1wzLQNE,mSet1wzLQNE_5500,i_fr,0.920313,0.893056,0.159375,0.206481,0.878179
216,Uilcanetwork,NdLOABKG4H4,NdLOABKG4H4_2700,i_fr,0.559133,0.452083,0.066745,0.129167,0.939578
220,Uilcanetwork,NdLOABKG4H4,NdLOABKG4H4_7200,i_fr,0.369438,0.770833,0.357143,0.450000,0.850157
221,Uilcanetwork,NdLOABKG4H4,NdLOABKG4H4_3000,i_fr,0.234778,0.385417,0.040984,0.095833,0.906282
...,...,...,...,...,...,...,...,...,...
29725,cgilemiliaromagna,ScTfL_NkTbk,ScTfL_NkTbk_2750,i_cm,0.476562,0.426136,0.162500,0.295455,0.904601
29729,cgilemiliaromagna,ScTfL_NkTbk,ScTfL_NkTbk_1500,i_cm,0.032031,0.651989,0.060937,0.150568,0.901605
29743,cgilemiliaromagna,f6QTJBFmCZc,f6QTJBFmCZc_500,i_cm,0.932552,0.647685,0.064062,0.117593,0.903503
29744,cgilemiliaromagna,f6QTJBFmCZc,f6QTJBFmCZc_500,i_cm,0.299479,0.465278,0.076042,0.106481,0.924071


In [44]:
res_csv.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8363 entries, 0 to 8362
Data columns (total 9 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Channel           8363 non-null   object 
 1   Video_id          8363 non-null   object 
 2   Frame             8363 non-null   object 
 3   class_label       8363 non-null   object 
 4   x_center          8363 non-null   float64
 5   y_center          8363 non-null   float64
 6   width             8363 non-null   float64
 7   height            8363 non-null   float64
 8   confidence_level  8363 non-null   float64
dtypes: float64(5), object(4)
memory usage: 588.1+ KB
