In [1]:
import pandas as pd

### Read csv files

In [2]:
annotations = pd.read_csv("annotations.csv")

In [3]:
annotations = annotations.fillna(0)
annotations.head()

Unnamed: 0.1,Unnamed: 0,file_name,car,truck,bicycle,motorbike,bus,total
0,0,data/task_street_traffic-helsinki-269-8191-202...,2.0,0.0,0.0,0.0,0.0,2.0
1,0,data/task_20201223t000009_52333-2021_06_05_22_...,0.0,0.0,0.0,0.0,0.0,0.0
2,0,data/task_street_traffic-barcelona-163-4981-20...,5.0,3.0,0.0,1.0,0.0,9.0
3,0,data/task_street_traffic-london-168-5151-2021_...,0.0,0.0,0.0,0.0,0.0,0.0
4,0,data/task_street_traffic-helsinki-269-8175-202...,1.0,0.0,0.0,0.0,0.0,1.0


In [4]:
annotations.shape

(437, 8)

In [5]:
def get_video_name(name):
    name = name.split("data/task_")[1]
    name = name.split("-2021")[0]
    name = name.split(".mp4")[0]
    return name + ".mp4"

In [6]:
def get_file_name(name):
    name = name.split("data/task_")[1]
    name = name.split("-2021")[0]
    name = name.split(".mp4")[0]
    return name    

In [7]:
annotations["video_name"] = annotations["file_name"].apply(get_video_name)
annotations["file_name"] = annotations["file_name"].apply(get_file_name)

In [8]:
import glob
file_list = glob.glob("csv/*.csv")
marks = pd.DataFrame()
for file in file_list:
    d = pd.read_csv(file, header=0)
    marks = marks.append(d, ignore_index=True)
marks = marks.dropna(subset=["bbox_checked", "video_name"])

In [9]:
marks["video_name"] = marks["video_name"].apply(lambda x: x.lower())
marks = marks[["video_name", "bbox_checked", "needed_corrections", "audio_checked", "offscreen_sounds", "night", "snow", "rain"]]

In [11]:
data = pd.merge(annotations, marks, left_on="video_name", right_on="video_name", how="left")
data = data.drop_duplicates(subset=["video_name"], keep="first")

In [12]:
data

Unnamed: 0.1,Unnamed: 0,file_name,car,truck,bicycle,motorbike,bus,total,video_name,bbox_checked,needed_corrections,audio_checked,offscreen_sounds,night,snow,rain
0,0,street_traffic-helsinki-269-8191,2.0,0.0,0.0,0.0,0.0,2.0,street_traffic-helsinki-269-8191.mp4,1.0,,,,,,
1,0,20201223t000009_52333,0.0,0.0,0.0,0.0,0.0,0.0,20201223t000009_52333.mp4,1.0,1.0,,,,,
2,0,street_traffic-barcelona-163-4981,5.0,3.0,0.0,1.0,0.0,9.0,street_traffic-barcelona-163-4981.mp4,1.0,1.0,,,,,
3,0,street_traffic-london-168-5151,0.0,0.0,0.0,0.0,0.0,0.0,street_traffic-london-168-5151.mp4,1.0,,,,,,
4,0,street_traffic-helsinki-269-8175,1.0,0.0,0.0,0.0,0.0,1.0,street_traffic-helsinki-269-8175.mp4,1.0,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
466,0,20201223t000009_51195,1.0,0.0,0.0,0.0,0.0,1.0,20201223t000009_51195.mp4,1.0,1.0,,,,,
467,0,street_traffic-helsinki-165-5072,11.0,0.0,0.0,0.0,2.0,13.0,street_traffic-helsinki-165-5072.mp4,1.0,,,,,,
468,0,street_traffic-barcelona-268-8155,6.0,0.0,0.0,0.0,0.0,6.0,street_traffic-barcelona-268-8155.mp4,1.0,,,,1.0,,
469,0,street_traffic-lyon-1110-40780,8.0,0.0,0.0,0.0,0.0,8.0,street_traffic-lyon-1110-40780.mp4,1.0,1.0,,,,,


In [13]:
data.columns

Index(['Unnamed: 0', 'file_name', 'car', 'truck', 'bicycle', 'motorbike',
       'bus', 'total', 'video_name', 'bbox_checked', 'needed_corrections',
       'audio_checked', 'offscreen_sounds', 'night', 'snow', 'rain'],
      dtype='object')

In [14]:
data = data[['file_name', 'car', 'truck', 'bicycle',
       'motorbike', 'bus', 'total', 'night', 'snow', 'rain']]

In [19]:
data.loc[0:, "has_annotation"] = 1

In [20]:
data.head()

Unnamed: 0,file_name,car,truck,bicycle,motorbike,bus,total,night,snow,rain,has_annotation
0,street_traffic-helsinki-269-8191,2.0,0.0,0.0,0.0,0.0,2.0,,,,1
1,20201223t000009_52333,0.0,0.0,0.0,0.0,0.0,0.0,,,,1
2,street_traffic-barcelona-163-4981,5.0,3.0,0.0,1.0,0.0,9.0,,,,1
3,street_traffic-london-168-5151,0.0,0.0,0.0,0.0,0.0,0.0,,,,1
4,street_traffic-helsinki-269-8175,1.0,0.0,0.0,0.0,0.0,1.0,,,,1


### Read npy files

In [21]:
import numpy as np

In [22]:
index_npy = np.load("labels.npy")

In [23]:
index_df = pd.DataFrame(index_npy, columns=["file_name", "timestamp"])
index_df["timestamp"] = index_df["timestamp"].astype(float)
index_df.head()

Unnamed: 0,file_name,timestamp
0,street_traffic-barcelona-161-4901,0.5
1,street_traffic-barcelona-161-4901,1.0
2,street_traffic-barcelona-161-4901,1.5
3,street_traffic-barcelona-161-4901,2.0
4,street_traffic-barcelona-161-4901,2.5


In [24]:
index_df = index_df.drop_duplicates(subset=["file_name"], keep="first")
index_df

Unnamed: 0,file_name,timestamp
0,street_traffic-barcelona-161-4901,0.5
19,street_traffic-barcelona-161-4902,0.5
38,street_traffic-barcelona-161-4903,0.5
57,street_traffic-barcelona-161-4904,0.5
76,street_traffic-barcelona-161-4905,0.5
...,...,...
25308,street_traffic-vienna-274-8374,0.5
25327,street_traffic-vienna-274-8375,0.5
25346,street_traffic-vienna-274-8376,0.5
25365,street_traffic-vienna-274-8377,0.5


In [25]:
labels = pd.merge(index_df, data, how="left", left_on=["file_name"], right_on=["file_name"])
labels.head()

Unnamed: 0,file_name,timestamp,car,truck,bicycle,motorbike,bus,total,night,snow,rain,has_annotation
0,street_traffic-barcelona-161-4901,0.5,2.0,0.0,0.0,0.0,2.0,4.0,1.0,,,1.0
1,street_traffic-barcelona-161-4902,0.5,6.0,0.0,1.0,0.0,0.0,7.0,1.0,,,1.0
2,street_traffic-barcelona-161-4903,0.5,13.0,0.0,0.0,1.0,1.0,15.0,1.0,,,1.0
3,street_traffic-barcelona-161-4904,0.5,5.0,0.0,1.0,0.0,0.0,6.0,1.0,,,1.0
4,street_traffic-barcelona-161-4905,0.5,9.0,0.0,0.0,0.0,0.0,9.0,1.0,,,1.0


In [26]:
labels.shape

(1337, 12)

In [27]:
labels = labels.fillna(0)

In [28]:
labels.to_csv("video_labels.csv")