In [1]:
import pandas as pd
from tqdm import tqdm
from random import shuffle
from sklearn.tree import DecisionTreeClassifier

In [2]:
df = pd.read_csv('../data/processed/data_prepared.csv')

# data preprocessing
cat_features = df.columns[df.columns.str.contains('id')]
for cat_col in cat_features:
    df[cat_col] = df[cat_col].astype(str)
df

Unnamed: 0,vehicle_id,circumstances_Alarm signal,circumstances_Backward motion,circumstances_Beginning of traffic,circumstances_Breaking,circumstances_Detour,circumstances_Direct traffic,circumstances_Fifth,circumstances_First,circumstances_Fog lights,...,road_conditions_Other,road_conditions_Snow,road_conditions_Wet,fd_decision_FG,fd_decision_NG,hour,day_of_month,month,year,n_vehicles_left_in_accident
0,26245.0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,1,1,2,2020,2
1,26244.0,0,0,0,0,0,0,0,1,0,...,0,0,0,1,0,1,1,2,2020,2
2,26244.0,0,0,0,0,0,1,0,0,0,...,0,0,0,1,0,1,1,2,2020,2
3,26244.0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,1,1,2,2020,2
4,26324.0,0,0,0,0,0,0,0,1,0,...,0,0,1,0,1,18,1,2,2020,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18439,566531.0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,1,19,7,2021,2
18440,566529.0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,1,19,7,2021,2
18441,566529.0,0,0,1,0,0,0,0,0,0,...,0,0,0,1,0,1,19,7,2021,2
18442,566529.0,0,1,0,0,0,0,0,0,0,...,0,0,0,1,0,1,19,7,2021,2


## Model trials

In [3]:
print(df.shape)
print(df.reset_index().vehicle_id.nunique())

(18444, 69)
7497


#### Feature transformation 

In [4]:
# Circumstances features transformation: from multiple rows into one row representation
circumstances_cols = df.columns[df.columns.str.contains('circumstances')]
vehicle_circum_data=[]
for vehicle in tqdm(df.vehicle_id.unique()):
    data_dict = df[(df.vehicle_id == vehicle)][circumstances_cols].drop_duplicates().sum(axis=0).reset_index()
    vehicle_circum_data.append(data_dict.iloc[:, 1].values)

df_circum = pd.DataFrame(vehicle_circum_data, columns = circumstances_cols)
df_circum['vehicle_id'] = df.vehicle_id.unique()
df_circum

100%|██████████████████████████████████████| 7497/7497 [00:14<00:00, 508.77it/s]


Unnamed: 0,circumstances_Alarm signal,circumstances_Backward motion,circumstances_Beginning of traffic,circumstances_Breaking,circumstances_Detour,circumstances_Direct traffic,circumstances_Fifth,circumstances_First,circumstances_Fog lights,circumstances_Fourth,...,circumstances_Reverse,circumstances_Right blinker,circumstances_Roadside,circumstances_Second,circumstances_Sixth,circumstances_Steady condition,circumstances_Third,circumstances_Turn to right,circumstances_Yellow,vehicle_id
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,26245.0
1,0,0,0,0,0,1,0,1,0,0,...,0,0,000,0,0,0,0,0,0,26244.0
2,0,0,0,0,0,1,0,1,0,0,...,0,0,000,0,0,0,0,0,0,26324.0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,26323.0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,26341.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7492,0,0,0,0,0,1,0,0,0,0,...,0,0,000,0,0,0,1,0,0,5651.0
7493,0,0,0,0,0,1,0,0,0,0,...,0,0,0000,1,0,0,0,0,0,5669.0
7494,0,0,0,0,0,1,0,1,0,0,...,0,0,00,0,0,0,0,0,0,5668.0
7495,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,566531.0


In [5]:
# Road Signs features transformation: from multiple rows into one row representation
road_signs_cols = df.columns[df.columns.str.contains('road_sign')]
vehicle_road_sign_data=[]
for vehicle in tqdm(df.vehicle_id.unique()):
    data_dict = df[(df.vehicle_id == vehicle)][road_signs_cols].drop_duplicates().sum(axis=0).reset_index()
    vehicle_road_sign_data.append(data_dict.iloc[:, 1].values)

df_road_signs = pd.DataFrame(vehicle_road_sign_data, columns = road_signs_cols)
df_road_signs['vehicle_id'] = df.vehicle_id.unique()
df_road_signs

100%|██████████████████████████████████████| 7497/7497 [00:10<00:00, 688.93it/s]


Unnamed: 0,road_signs_1.2 Երկաթուղային գծանց` առանց ուղեփակոցի,road_signs_1.22 Հետիոտնային անցում,road_signs_1.3.2 Բազմագիծ երկաթուղի,road_signs_1.4.3 Մոտեցում երկաթուղային գծանցին,road_signs_1.8 Լուսացուցային կարգավորում,road_signs_2.1 Գլխավոր ճանապարհ,road_signs_2.4 Զիջեք ճանապարհը,road_signs_4.1.1 Երթևեկությունն ուղիղ,road_signs_5.15.1 Երթևեկության ուղղությունները գոտիներով,road_signs_5.19.2 Հետիոտնային անցում,road_signs_8.13 Գլխավոր ճանապարհի ուղղություն,road_signs_8.22.1 Խոչընդոտ,vehicle_id
0,0,0,0,0,0,0,0,0,0,0,0,0,26245.0
1,0,0,0,0,0,0,0,0,0,0,0,0,26244.0
2,0,0,0,0,0,0,0,0,0,0,0,0,26324.0
3,0,0,0,0,0,0,0,0,0,0,0,0,26323.0
4,0,0,0,0,0,0,0,0,0,0,0,0,26341.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
7492,0,0,0,0,0,0,0,0,0,0,0,0,5651.0
7493,0,0,0,0,0,0,0,0,0,0,0,0,5669.0
7494,0,0,0,0,0,0,0,0,0,0,0,0,5668.0
7495,0,0,0,0,0,0,0,0,0,0,0,0,566531.0


In [6]:
# Road Surface features transformation: from multiple rows into one row representation
road_surface_cols = df.columns[df.columns.str.contains('road_surface')]
vehicle_road_surface_data=[]
for vehicle in tqdm(df.vehicle_id.unique()):
    data_dict = df[(df.vehicle_id == vehicle)][road_surface_cols].drop_duplicates().sum(axis=0).reset_index()
    vehicle_road_surface_data.append(data_dict.iloc[:, 1].values)

df_road_surface = pd.DataFrame(vehicle_road_surface_data, columns = road_surface_cols)
df_road_surface['vehicle_id'] = df.vehicle_id.unique()
df_road_surface

100%|██████████████████████████████████████| 7497/7497 [00:09<00:00, 774.64it/s]


Unnamed: 0,road_surface_Asphalt-concrete,road_surface_Cement-concrete,road_surface_Cobblestone,road_surface_Gravel base,road_surface_Other,vehicle_id
0,1,0,0,0,0,26245.0
1,1,0,0,0,0,26244.0
2,1,0,0,0,0,26324.0
3,1,0,0,0,0,26323.0
4,1,0,0,0,0,26341.0
...,...,...,...,...,...,...
7492,1,0,0,0,0,5651.0
7493,1,0,0,0,0,5669.0
7494,1,0,0,0,0,5668.0
7495,1,0,0,0,0,566531.0


In [7]:
# Visibility features transformation: from multiple rows into one row representation
visibility_cols = df.columns[df.columns.str.contains('visibility')]
vehicle_visibility_data=[]
for vehicle in tqdm(df.vehicle_id.unique()):
    data_dict = df[(df.vehicle_id == vehicle)][visibility_cols].drop_duplicates().sum(axis=0).reset_index()
    vehicle_visibility_data.append(data_dict.iloc[:, 1].values)

df_visibility = pd.DataFrame(vehicle_visibility_data, columns = visibility_cols)
df_visibility['vehicle_id'] = df.vehicle_id.unique()
df_visibility

100%|██████████████████████████████████████| 7497/7497 [00:09<00:00, 756.18it/s]


Unnamed: 0,visibility_Clear day,visibility_Clear night,visibility_Foggy day,visibility_Foggy night,visibility_Rainy,visibility_Snow,visibility_Sunny,vehicle_id
0,0,1,0,0,0,0,0,26245.0
1,0,1,0,0,0,0,0,26244.0
2,1,0,0,0,0,0,0,26324.0
3,1,0,0,0,0,0,0,26323.0
4,1,0,0,0,0,0,0,26341.0
...,...,...,...,...,...,...,...,...
7492,1,0,0,0,0,0,0,5651.0
7493,1,0,0,0,0,0,0,5669.0
7494,1,0,0,0,0,0,0,5668.0
7495,1,0,0,0,0,0,0,566531.0


In [8]:
# Road conditions features transformation: from multiple rows into one row representation
road_conditions_cols = df.columns[df.columns.str.contains('road_condition')]
vehicle_road_conditions_data=[]
for vehicle in tqdm(df.vehicle_id.unique()):
    data_dict = df[(df.vehicle_id == vehicle)][road_conditions_cols].drop_duplicates().sum(axis=0).reset_index()
    vehicle_road_conditions_data.append(data_dict.iloc[:, 1].values)

df_road_conditions = pd.DataFrame(vehicle_road_conditions_data, columns = road_conditions_cols)
df_road_conditions['vehicle_id'] = df.vehicle_id.unique()
df_road_conditions

100%|██████████████████████████████████████| 7497/7497 [00:09<00:00, 766.27it/s]


Unnamed: 0,road_conditions_Covered with flattened snow,road_conditions_Dry,road_conditions_Ice,road_conditions_Other,road_conditions_Snow,road_conditions_Wet,vehicle_id
0,0,1,0,0,0,0,26245.0
1,0,1,0,0,0,0,26244.0
2,0,0,0,0,0,1,26324.0
3,0,0,0,0,0,1,26323.0
4,0,1,0,0,0,0,26341.0
...,...,...,...,...,...,...,...
7492,0,1,0,0,0,0,5651.0
7493,0,1,0,0,0,0,5669.0
7494,0,1,0,0,0,0,5668.0
7495,0,1,0,0,0,0,566531.0


In [9]:
df_no_dup = df[['vehicle_id'] + list(set(df.columns) - 
                                     set(
                                         list(df_circum.columns) + 
                                         list(df_road_signs.columns) + 
                                         list(df_road_surface.columns) + 
                                         list(df_visibility.columns) + 
                                         list(df_road_conditions.columns)
                                     ))].drop_duplicates()
df_no_dup.vehicle_id.value_counts()

5.0         18
7.0         18
17.0        16
3.0         15
11.0        15
            ..
98334.0      1
98295.0      1
98296.0      1
98189.0      1
566529.0     1
Name: vehicle_id, Length: 7497, dtype: int64

In [10]:
a=df_no_dup.copy()

In [11]:
a[a.vehicle_id=='1396.0']

Unnamed: 0,vehicle_id,month,fd_decision_FG,n_vehicles_left_in_accident,day_of_month,hour,fd_decision_NG,year
10285,1396.0,9,1,2,13,20,0,2020
11187,1396.0,8,1,2,30,14,0,2020
11192,1396.0,9,1,7,27,13,0,2020


In [12]:
a[a.vehicle_id=='7.0']

Unnamed: 0,vehicle_id,month,fd_decision_FG,n_vehicles_left_in_accident,day_of_month,hour,fd_decision_NG,year
461,7.0,2,1,2,9,21,0,2020
548,7.0,2,1,2,11,12,0,2020
1606,7.0,3,1,2,1,23,0,2020
1615,7.0,3,1,2,2,15,0,2020
2715,7.0,3,0,2,3,2,1,2020
3548,7.0,2,1,2,8,8,0,2020
4342,7.0,5,0,6,23,8,1,2020
4434,7.0,5,0,3,23,12,1,2020
4460,7.0,5,0,2,23,11,1,2020
4640,7.0,5,1,3,30,9,0,2020


In [13]:
df_prep = df_no_dup.merge(df_circum, 
                          on = 'vehicle_id', how='left')\
                   .merge(df_road_signs, 
                          on = 'vehicle_id', how='left')\
                   .merge(df_road_surface, 
                          on = 'vehicle_id', how='left')\
                   .merge(df_visibility, 
                          on = 'vehicle_id', how='left')\
                   .merge(df_road_conditions, 
                          on = 'vehicle_id', how='left').set_index('vehicle_id')

In [14]:
l_unique_vehicles = list(df_prep.index.unique())
shuffle(l_unique_vehicles)

df_train = df_prep[df_prep.index.isin(l_unique_vehicles[:int(len(l_unique_vehicles)*0.7)])]
df_val = df_prep[df_prep.index.isin(l_unique_vehicles[int(len(l_unique_vehicles)*0.7):int(len(l_unique_vehicles)*0.8)])]
df_test = df_prep[df_prep.index.isin(l_unique_vehicles[int(len(l_unique_vehicles)*0.8):])]

x_train, y_train = df_train.drop(['fd_decision_FG', 'fd_decision_NG'], axis=1), df_train['fd_decision_NG']
x_val, y_val = df_val.drop(['fd_decision_FG', 'fd_decision_NG'], axis=1), df_val['fd_decision_NG']
x_test, y_test = df_test.drop(['fd_decision_FG', 'fd_decision_NG'], axis=1), df_test['fd_decision_NG']

print(f"N Total vehicles: {df_prep.index.nunique()}")
print(f"N vehicles in train: {df_train.index.nunique()}")
print(f"N vehicles in test: {df_test.index.nunique()}")
print(f"N vehicles in val: {df_val.index.nunique()}")

print(f"Checkings!!!!")
print(df_prep.index.nunique() == df_train.index.nunique() + df_test.index.nunique() + df_val.index.nunique())
print(set(df_train.index).intersection(set(df_val.index)) == set())
print(set(df_train.index).intersection(set(df_test.index)) == set())
print(set(df_test.index).intersection(set(df_val.index)) == set())

N Total vehicles: 7497
N vehicles in train: 5247
N vehicles in test: 1500
N vehicles in val: 750
Checkings!!!!
True
True
True
True


In [15]:
tree = DecisionTreeClassifier(random_state=0)
tree.fit(x_train, y_train)
print("Accuracy on training set: {:.3f}".format(tree.score(x_train, y_train)))
print("Accuracy on test set: {:.3f}".format(tree.score(x_test, y_test)))

Accuracy on training set: 0.964
Accuracy on test set: 0.460


### Take those vehicles with only one row

In [39]:
df_veh_count = df_prep.index.value_counts().reset_index()
veh_count1 = [f'{i}.0' for i in df_veh_count[df_veh_count.vehicle_id==1].index.values]
df_prep_count1 = df_prep[df_prep.index.isin(veh_count1)]

In [40]:
l_unique_vehicles = list(df_prep_count1.index.unique())
shuffle(l_unique_vehicles)

df_train = df_prep_count1[df_prep_count1.index.isin(l_unique_vehicles[:int(len(l_unique_vehicles)*0.7)])]
df_val = df_prep_count1[df_prep_count1.index.isin(l_unique_vehicles[int(len(l_unique_vehicles)*0.7):int(len(l_unique_vehicles)*0.8)])]
df_test = df_prep_count1[df_prep_count1.index.isin(l_unique_vehicles[int(len(l_unique_vehicles)*0.8):])]

x_train, y_train = df_train.drop(['fd_decision_FG', 'fd_decision_NG'], axis=1), df_train['fd_decision_NG']
x_val, y_val = df_val.drop(['fd_decision_FG', 'fd_decision_NG'], axis=1), df_val['fd_decision_NG']
x_test, y_test = df_test.drop(['fd_decision_FG', 'fd_decision_NG'], axis=1), df_test['fd_decision_NG']

print(f"N Total vehicles: {df_prep_count1.index.nunique()}")
print(f"N vehicles in train: {df_train.index.nunique()}")
print(f"N vehicles in test: {df_test.index.nunique()}")
print(f"N vehicles in val: {df_val.index.nunique()}")

print(f"Checkings!!!!")
print(df_prep_count1.index.nunique() == df_train.index.nunique() + df_test.index.nunique() + df_val.index.nunique())
print(set(df_train.index).intersection(set(df_val.index)) == set())
print(set(df_train.index).intersection(set(df_test.index)) == set())
print(set(df_test.index).intersection(set(df_val.index)) == set())

N Total vehicles: 1511
N vehicles in train: 1057
N vehicles in test: 303
N vehicles in val: 151
Checkings!!!!
True
True
True
True


In [18]:
tree = DecisionTreeClassifier(random_state=0)
tree.fit(x_train, y_train)
print("Accuracy on training set: {:.3f}".format(tree.score(x_train, y_train)))
print("Accuracy on test set: {:.3f}".format(tree.score(x_test, y_test)))

Accuracy on training set: 0.981
Accuracy on test set: 0.461


In [42]:
from sklearn.tree import export_graphviz
export_graphviz(tree, out_file="tree.dot", class_names=["fg", "ng"],
                feature_names=x_train.columns, impurity=False, filled=True)

In [46]:
import graphviz

with open("tree.dot") as f:
    dot_graph = f.read()
display(graphviz.Source(dot_graph))

ExecutableNotFound: failed to execute PosixPath('dot'), make sure the Graphviz executables are on your systems' PATH

<graphviz.sources.Source at 0x16bc7e2c0>

In [47]:
! brew install graphviz

Running `brew update --auto-update`...
[34m==>[0m [1mAuto-updated Homebrew![0m
Updated 1 tap (homebrew/core).
[34m==>[0m [1mNew Formulae[0m
akku                       hotbuild                   proxsuite
aribb24                    hysteria                   python-lsp-server
bindgen                    ibazel                     retdec
brpc                       joker                      rnr
btrfs-progs                jscpd                      ruff
buf                        kubefirst                  sapling
busted                     kubevious                  seven-kingdoms
cdsclient                  kustomizer                 skaffold@1.39
clitest                    libdivide                  skeema
clusterawsadm              libemf2svg                 snakefmt
cmctl                      libgrapheme                socket_vmnet
cntb                       libisofs                   souffle
cocogitto                  libunibreak                spectral-cli
code-cli           

In [19]:
df_prep_count1

Unnamed: 0_level_0,month,fd_decision_FG,n_vehicles_left_in_accident,day_of_month,hour,fd_decision_NG,year,circumstances_Alarm signal,circumstances_Backward motion,circumstances_Beginning of traffic,...,visibility_Foggy night,visibility_Rainy,visibility_Snow,visibility_Sunny,road_conditions_Covered with flattened snow,road_conditions_Dry,road_conditions_Ice,road_conditions_Other,road_conditions_Snow,road_conditions_Wet
vehicle_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
827.0,2,0,4,11,11,1,2020,0,0,0,...,0,0,0,0,0,1,1,0,0,0
825.0,2,1,4,11,11,0,2020,0,0,0,...,0,0,0,0,0,1,1,0,0,0
853.0,2,0,2,11,8,1,2020,0,0,0,...,0,0,0,0,0,1,0,0,0,0
879.0,2,1,2,7,7,0,2020,0,0,0,...,0,0,0,0,0,1,0,1,0,1
929.0,2,0,3,17,18,1,2020,0,0,0,...,0,0,0,1,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5634.0,7,0,2,16,21,1,2021,0,0,0,...,0,0,0,0,0,1,0,0,0,0
5652.0,7,1,2,13,21,0,2021,0,0,0,...,0,0,0,0,0,1,0,0,0,0
5651.0,7,0,2,13,21,1,2021,0,0,0,...,0,0,0,0,0,1,0,0,0,0
5669.0,7,0,2,13,21,1,2021,0,0,0,...,0,0,0,0,0,1,0,0,0,0


### Modelling without any feature transform

In [26]:
df_ = df.set_index('vehicle_id')

l_unique_vehicles = list(df_.index.unique())
shuffle(l_unique_vehicles)

df_train = df_[df_.index.isin(l_unique_vehicles[:int(len(l_unique_vehicles)*0.7)])]
df_val = df_[df_.index.isin(l_unique_vehicles[int(len(l_unique_vehicles)*0.7):int(len(l_unique_vehicles)*0.8)])]
df_test = df_[df_.index.isin(l_unique_vehicles[int(len(l_unique_vehicles)*0.8):])]

x_train, y_train = df_train.drop(['fd_decision_FG', 'fd_decision_NG'], axis=1), df_train['fd_decision_NG']
x_val, y_val = df_val.drop(['fd_decision_FG', 'fd_decision_NG'], axis=1), df_val['fd_decision_NG']
x_test, y_test = df_test.drop(['fd_decision_FG', 'fd_decision_NG'], axis=1), df_test['fd_decision_NG']

print(f"N Total vehicles: {df_.index.nunique()}")
print(f"N vehicles in train: {df_train.index.nunique()}")
print(f"N vehicles in test: {df_test.index.nunique()}")
print(f"N vehicles in val: {df_val.index.nunique()}")

print(f"Checkings!!!!")
print(df_.index.nunique() == df_train.index.nunique() + df_test.index.nunique() + df_val.index.nunique())
print(set(df_train.index).intersection(set(df_val.index)) == set())
print(set(df_train.index).intersection(set(df_test.index)) == set())
print(set(df_test.index).intersection(set(df_val.index)) == set())

N Total vehicles: 7497
N vehicles in train: 5247
N vehicles in test: 1500
N vehicles in val: 750
Checkings!!!!
True
True
True
True


In [27]:
tree = DecisionTreeClassifier(random_state=0)
tree.fit(x_train, y_train)
print("Accuracy on training set: {:.3f}".format(tree.score(x_train, y_train)))
print("Accuracy on test set: {:.3f}".format(tree.score(x_test, y_test)))

Accuracy on training set: 0.892
Accuracy on test set: 0.311


### EYE CHECK

In [28]:
df_prep.iloc[0:2,:7]

Unnamed: 0_level_0,month,fd_decision_FG,n_vehicles_left_in_accident,day_of_month,hour,fd_decision_NG,year
vehicle_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
26245.0,2,0,2,1,1,1,2020
26244.0,2,1,2,1,1,0,2020


In [29]:
df_prep[circumstances_cols].iloc[0:2,:10]

Unnamed: 0_level_0,circumstances_Alarm signal,circumstances_Backward motion,circumstances_Beginning of traffic,circumstances_Breaking,circumstances_Detour,circumstances_Direct traffic,circumstances_Fifth,circumstances_First,circumstances_Fog lights,circumstances_Fourth
vehicle_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
26245.0,0,0,0,0,0,0,0,0,0,0
26244.0,0,0,0,0,0,1,0,1,0,0


In [30]:
df_prep[circumstances_cols].iloc[0:2,10:20]

Unnamed: 0_level_0,circumstances_Green,circumstances_Left blinker,circumstances_Long,circumstances_Marker lights,circumstances_Not working,circumstances_Oncoming traffic,circumstances_Overtaking,circumstances_Parked,circumstances_Parking,circumstances_Rearrangement to left
vehicle_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
26245.0,0,0,0,0,0,0,0,0,0,0
26244.0,1,0,0,0,0,0,0,0,0,0


In [31]:
df_prep[circumstances_cols].iloc[0:2,20:]

Unnamed: 0_level_0,circumstances_Rearrangement to right,circumstances_Red,circumstances_Reverse,circumstances_Right blinker,circumstances_Roadside,circumstances_Second,circumstances_Sixth,circumstances_Steady condition,circumstances_Third,circumstances_Turn to right,circumstances_Yellow
vehicle_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
26245.0,0,0,0,0,0,0,0,0,0,0,0
26244.0,0,0,0,0,0,0,0,0,0,0,0


In [32]:
df_prep[visibility_cols].iloc[0:2,:]

Unnamed: 0_level_0,visibility_Clear day,visibility_Clear night,visibility_Foggy day,visibility_Foggy night,visibility_Rainy,visibility_Snow,visibility_Sunny
vehicle_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
26245.0,0,1,0,0,0,0,0
26244.0,0,1,0,0,0,0,0


In [35]:
df_prep[road_signs_cols].iloc[0:2,:]

Unnamed: 0_level_0,road_signs_1.2 Երկաթուղային գծանց` առանց ուղեփակոցի,road_signs_1.22 Հետիոտնային անցում,road_signs_1.3.2 Բազմագիծ երկաթուղի,road_signs_1.4.3 Մոտեցում երկաթուղային գծանցին,road_signs_1.8 Լուսացուցային կարգավորում,road_signs_2.1 Գլխավոր ճանապարհ,road_signs_2.4 Զիջեք ճանապարհը,road_signs_4.1.1 Երթևեկությունն ուղիղ,road_signs_5.15.1 Երթևեկության ուղղությունները գոտիներով,road_signs_5.19.2 Հետիոտնային անցում,road_signs_8.13 Գլխավոր ճանապարհի ուղղություն,road_signs_8.22.1 Խոչընդոտ
vehicle_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
26245.0,0,0,0,0,0,0,0,0,0,0,0,0
26244.0,0,0,0,0,0,0,0,0,0,0,0,0


In [37]:
df_prep[road_surface_cols].iloc[0:2,:]

Unnamed: 0_level_0,road_surface_Asphalt-concrete,road_surface_Cement-concrete,road_surface_Cobblestone,road_surface_Gravel base,road_surface_Other
vehicle_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
26245.0,1,0,0,0,0
26244.0,1,0,0,0,0


In [38]:
df_prep[road_conditions_cols].iloc[0:2,:]

Unnamed: 0_level_0,road_conditions_Covered with flattened snow,road_conditions_Dry,road_conditions_Ice,road_conditions_Other,road_conditions_Snow,road_conditions_Wet
vehicle_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
26245.0,0,1,0,0,0,0
26244.0,0,1,0,0,0,0
