In [111]:
import pandas as pd

### Read files

In [112]:
annotations = pd.read_csv("annotations.csv")

In [113]:
annotations = annotations.fillna(0)
annotations.head()

Unnamed: 0.1,Unnamed: 0,file_name,car,truck,bicycle,motorbike,bus,total
0,0,data/task_street_traffic-helsinki-269-8191-202...,2.0,0.0,0.0,0.0,0.0,2.0
1,0,data/task_20201223t000009_52333-2021_06_05_22_...,0.0,0.0,0.0,0.0,0.0,0.0
2,0,data/task_street_traffic-barcelona-163-4981-20...,5.0,3.0,0.0,1.0,0.0,9.0
3,0,data/task_street_traffic-london-168-5151-2021_...,0.0,0.0,0.0,0.0,0.0,0.0
4,0,data/task_street_traffic-helsinki-269-8175-202...,1.0,0.0,0.0,0.0,0.0,1.0


In [114]:
def get_video_name(name):
    name = name.split("data/task_")[1]
    name = name.split("-2021")[0]
    name = name.split(".mp4")[0]
    return name + ".mp4"

In [115]:
annotations["video_name"] = annotations["file_name"].apply(get_video_name)

In [116]:
import glob
file_list = glob.glob("csv/*.csv")
marks = pd.DataFrame()
for file in file_list:
    d = pd.read_csv(file, header=0)
    marks = marks.append(d, ignore_index=True)
marks = marks.dropna(subset=["bbox_checked", "video_name"])

In [117]:
marks["video_name"] = marks["video_name"].apply(lambda x: x.lower())
marks = marks[["video_name", "bbox_checked", "needed_corrections", "audio_checked", "offscreen_sounds", "night", "snow", "rain"]]

In [118]:
data = pd.merge(annotations, marks, left_on="video_name", right_on="video_name", how="left")
data = data.drop_duplicates("video_name", keep="first")

In [119]:
data

Unnamed: 0.1,Unnamed: 0,file_name,car,truck,bicycle,motorbike,bus,total,video_name,bbox_checked,needed_corrections,audio_checked,offscreen_sounds,night,snow,rain
0,0,data/task_street_traffic-helsinki-269-8191-202...,2.0,0.0,0.0,0.0,0.0,2.0,street_traffic-helsinki-269-8191.mp4,1.0,,,,,,
1,0,data/task_20201223t000009_52333-2021_06_05_22_...,0.0,0.0,0.0,0.0,0.0,0.0,20201223t000009_52333.mp4,1.0,1.0,,,,,
2,0,data/task_street_traffic-barcelona-163-4981-20...,5.0,3.0,0.0,1.0,0.0,9.0,street_traffic-barcelona-163-4981.mp4,1.0,1.0,,,,,
3,0,data/task_street_traffic-london-168-5151-2021_...,0.0,0.0,0.0,0.0,0.0,0.0,street_traffic-london-168-5151.mp4,1.0,,,,,,
4,0,data/task_street_traffic-helsinki-269-8175-202...,1.0,0.0,0.0,0.0,0.0,1.0,street_traffic-helsinki-269-8175.mp4,1.0,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
466,0,data/task_20201223t000009_51195-2021_06_05_21_...,1.0,0.0,0.0,0.0,0.0,1.0,20201223t000009_51195.mp4,1.0,1.0,,,,,
467,0,data/task_street_traffic-helsinki-165-5072-202...,11.0,0.0,0.0,0.0,2.0,13.0,street_traffic-helsinki-165-5072.mp4,1.0,,,,,,
468,0,data/task_street_traffic-barcelona-268-8155-20...,6.0,0.0,0.0,0.0,0.0,6.0,street_traffic-barcelona-268-8155.mp4,1.0,,,,1.0,,
469,0,data/task_street_traffic-lyon-1110-40780-2021_...,8.0,0.0,0.0,0.0,0.0,8.0,street_traffic-lyon-1110-40780.mp4,1.0,1.0,,,,,


### File sources

In [120]:
def file_type(name):
    identifier = name.split("_")[1]
    if identifier == "street":
        return "TAU"
    return "Bosch"

In [121]:
data["source"] = data["file_name"].apply(file_type)

In [122]:
sources = data[["file_name", "source"]].groupby("source").count().rename(columns={"file_name": "count"}).reset_index().sort_values(by=["count"])
sources

Unnamed: 0,source,count
0,Bosch,76
1,TAU,345


In [123]:
import plotly.express as px

fig = px.pie(sources, values='count', names='source')
fig.show()

### File difficulty

In [124]:
def set_level(num):
    if num < 4:
        return "easy"
    if num < 10:
        return "medium"
    return "hard"

In [125]:
data["level"] = data["total"].apply(set_level)

In [126]:
levels = data[["file_name", "level"]].groupby("level").count().rename(columns={"file_name": "count"}).reset_index().sort_values(by=["count"])
levels["pct"] = levels["count"] / levels["count"].sum()
levels

Unnamed: 0,level,count,pct
1,hard,104,0.247031
2,medium,150,0.356295
0,easy,167,0.396675


In [127]:
import plotly.express as px

fig = px.bar(levels, x='level', y='count')
fig.show()

In [128]:
fig = px.pie(levels, values='count', names='level')
fig.show()

### Total vehicles

In [129]:
vehicles = data[["car", "truck", "motorbike", "bicycle", "bus"]].sum()
vehicles = pd.DataFrame(vehicles).reset_index().rename(columns={"index":"type", 0:"count"})
vehicles

Unnamed: 0,type,count
0,car,2304.0
1,truck,88.0
2,motorbike,220.0
3,bicycle,55.0
4,bus,105.0


In [130]:
fig = px.bar(vehicles, x="type", y="count")
fig.show()

In [131]:
fig = px.pie(vehicles, names="type", values="count")
fig.show()

### File difficulty with different sources

In [132]:
sources_detail = data[["file_name", "source", "level"]].groupby(by=["source", "level"]).count().rename(columns={"file_name": "count"}).reset_index().sort_values(by=["count"])
sources_detail

Unnamed: 0,source,level,count
1,Bosch,medium,16
0,Bosch,easy,60
3,TAU,hard,104
2,TAU,easy,107
4,TAU,medium,134


In [133]:
fig = px.bar(sources_detail, x="source", y="count", color="level")
fig.show()

### Night

In [134]:
data = data.fillna(0)

In [135]:
nights = data[["file_name", "night"]].groupby("night").count().reset_index().rename(columns={"file_name": "count", "night": "time"})
nights

Unnamed: 0,time,count
0,0.0,331
1,1.0,90


In [136]:
nights = nights.replace(0, "day")
nights = nights.replace(1, "night")

In [137]:
fig = px.pie(nights, names="time", values="count")
fig.show()

In [138]:
nights_details = data[["file_name", "night", "level"]].groupby(by=["night", "level"]).count().reset_index().rename(columns={"file_name": "count", "night": "time"})
nights_details

Unnamed: 0,time,level,count
0,0.0,easy,152
1,0.0,hard,69
2,0.0,medium,110
3,1.0,easy,15
4,1.0,hard,35
5,1.0,medium,40


In [139]:
nights_details = nights_details.replace(0, "day")
nights_details = nights_details.replace(1, "night")

In [140]:
fig = px.bar(nights_details, x="time", y="count", color="level")
fig.show()

### Weather

In [141]:
def weather_condition(row):
    if row["snow"] == 1:
        return "snow"
    if row["rain"] == 1:
        return "rain"
    return "normal"

In [142]:
data["weather"] = data.apply(weather_condition, axis=1)

In [143]:
weather = data[["file_name", "weather"]].groupby("weather").count().reset_index().rename(columns={"file_name": "count"})
weather

Unnamed: 0,weather,count
0,normal,386
1,rain,2
2,snow,33


In [144]:
fig = px.pie(weather, names="weather", values="count")
fig.show()

In [145]:
weather_details = data[data["weather"] != "normal"][["file_name", "weather", "level"]].groupby(by=["weather", "level"]).count().reset_index().rename(columns={"file_name": "count"})
weather_details

Unnamed: 0,weather,level,count
0,rain,easy,2
1,snow,easy,17
2,snow,hard,4
3,snow,medium,12


In [146]:
fig = px.bar(weather_details, x="weather", y="count", color="level")
fig.show()