# Sub-Entities' tables
This notebook will create the `.csv` files for the tables:
* Location
* Road
* Pcf
* Vehicule

None of these require any other table to be created, they should then be the first to be created.

In [1]:
import pandas as pd
import numpy as np

In [5]:
collision = pd.read_csv("../collisions_clean.csv", dtype={"officer_id":str, "case_id":str})

In [9]:
print(collision.dtypes)

case_id                      object
collision_date               object
collision_severity            int64
collision_time               object
county_city_location          int64
hit_and_run                  object
jurisdiction                float64
lighting                     object
location_type                object
officer_id                   object
pcf_violation               float64
pcf_violation_category      float64
pcf_violation_subsection     object
population                  float64
primary_collision_factor     object
process_date                 object
ramp_intersection           float64
road_condition_1             object
road_condition_2             object
road_surface                 object
tow_away                    float64
type_of_collision            object
weather_1                    object
weather_2                    object
dtype: object


In [12]:
party = pd.read_csv("../parties_clean.csv", dtype={"hazardous_materials":str, "party_safety_equipment_2":str, "school_bus_related":str})

In [13]:
print(party.dtypes)

at_fault                          int64
case_id                           int64
cellphone_use                    object
financial_responsibility         object
hazardous_materials              object
id                                int64
movement_preceding_collision     object
other_associated_factor_1        object
other_associated_factor_2        object
party_age                       float64
party_drug_physical              object
party_number                      int64
party_safety_equipment_1         object
party_safety_equipment_2         object
party_sex                        object
party_sobriety                   object
party_type                      float64
school_bus_related               object
statewide_vehicle_type           object
vehicle_make                     object
vehicle_year                    float64
dtype: object


In [18]:
victim = pd.read_csv("../victims_clean.csv", dtype={"party_safety_equipment_2":str})

In [19]:
print(victim.dtypes)

case_id                        int64
id                             int64
party_number                   int64
victim_age                   float64
victim_degree_of_injury      float64
victim_ejected               float64
victim_role                    int64
victim_safety_equipment_1     object
victim_safety_equipment_2     object
victim_seating_position      float64
victim_sex                    object
dtype: object


## Location

We have to read `county_city_location` and `population` entries from `collision`

In [20]:
df_location = pd.DataFrame()
df_location['county_city_location'] = collision['county_city_location']
df_location['population'] = collision['population']
df_location['population'] = df_location['population'].astype('Int32')

df_location = df_location.drop_duplicates()

In [21]:
df_location.to_csv(r'../location.csv', index = False)

In [22]:
test = pd.read_csv("../location.csv")
    
print(test.head(20))

    county_city_location  population
0                   1900         9.0
1                   1500         9.0
2                   1502         6.0
3                   3711         7.0
4                   3318         4.0
5                   1942         7.0
6                   1953         6.0
7                   1902         5.0
8                   1925         6.0
9                   1000         9.0
10                  1005         7.0
11                  3701         5.0
12                  3019         7.0
13                  3026         6.0
14                  3001         7.0
15                  5400         9.0
16                  5405         4.0
17                  1515         1.0
18                   900         9.0
19                  4313         7.0


## Road
We have to read `road_surface`, `lighting`, `location_type`, `ramp_intersection` entries from `collision`. We'll also generate a primary key `road_id`.

In [23]:
df_road = pd.DataFrame()
df_road['road_surface'] = collision['road_surface']
df_road['lighting'] = collision['lighting']
df_road['location_type'] = collision['location_type']
df_road['ramp_intersection'] = collision['ramp_intersection']

df_road['ramp_intersection'] = df_road['ramp_intersection'].astype('Int32')

df_road = df_road.drop_duplicates()

df_road['road_id'] = range(0,len(df_road))

In [24]:
df_road.to_csv(r'../road.csv', index = False)

test = pd.read_csv("../road.csv")
    
print(test.head(20))

   road_surface lighting location_type  ramp_intersection  road_id
0             A        A           NaN                NaN        0
1             A      NaN           NaN                NaN        1
2             A        A             H                NaN        2
3             A        B           NaN                NaN        3
4             A        C           NaN                NaN        4
5             A        A             I                5.0        5
6             A        D           NaN                NaN        6
7             A        D             H                NaN        7
8             B        A             H                NaN        8
9             A        A             R                1.0        9
10            A        A             R                2.0       10
11            A        C             H                NaN       11
12            A        D             R                2.0       12
13            A        C             R                2.0     

## PCF
We have to read `pcf_violation`, `pcf_violation_category`, `pcf_violation_subsection` in `collisions`. We'll also generate a primary key `pcf_id`.

In [29]:
df_pcf = pd.DataFrame()
df_pcf['pcf_violation'] = collision['pcf_violation']
df_pcf['pcf_violation_category'] = collision['pcf_violation_category']
df_pcf['pcf_violation_subsection'] = collision['pcf_violation_subsection']
df_pcf['primary_collision_factor'] = collision['primary_collision_factor']

df_pcf['pcf_violation_category']= df_pcf['pcf_violation_category'].astype('Int32')

df_pcf = df_pcf.drop_duplicates()

df_pcf['pcf_id'] = range(0,len(df_pcf))

all 561 unique 561


In [26]:
df_pcf.to_csv(r'../pcf.csv', index = False)

test = pd.read_csv("../pcf.csv")
    
print(test.head(20))

    pcf_violation  pcf_violation_category pcf_violation_subsection  \
0         22107.0                     8.0                      NaN   
1         22515.0                    13.0                        A   
2         23114.0                    17.0                        A   
3         22450.0                    12.0                        A   
4         22350.0                     3.0                      NaN   
5         22106.0                    19.0                      NaN   
6         21658.0                     7.0                        A   
7         21801.0                     9.0                        A   
8         21451.0                    17.0                        A   
9         21202.0                     5.0                        A   
10        21453.0                    12.0                        A   
11        23152.0                     1.0                        A   
12        21453.0                     9.0                        C   
13            NaN   

## Vehicule
We have to read `statewide_vehicule_type`, `vehicule_make`, `vehicule_year` in `party`. We'll also generate a primary key `vehicule_id`.

In [27]:
df_vehicle = pd.DataFrame()
df_vehicle['statewide_vehicle_type'] = party['statewide_vehicle_type']
df_vehicle['vehicle_make'] = party['vehicle_make']
df_vehicle['vehicle_year'] = party['vehicle_year']

df_vehicle = df_vehicle.drop_duplicates()

df_vehicle['vehicle_id'] = range(0,len(df_vehicle))

df_vehicle['vehicle_year'] = df_vehicle['vehicle_year'].astype('Int32')

In [28]:
df_vehicle.to_csv(r'../vehicle.csv', index = False)

test = pd.read_csv("../vehicle.csv")
    
print(test.head(20))

   statewide_vehicle_type  vehicle_make  vehicle_year  vehicle_id
0                       A          FORD        2000.0           0
1                       A         BUICK        1992.0           1
2                       D        TOYOTA           NaN           2
3                       A          FORD        1995.0           3
4                       D           NaN           NaN           4
5                       A         HONDA           NaN           5
6                       A        TOYOTA        2001.0           6
7                       G  FREIGHTLINER        2001.0           7
8                       A     CHEVROLET        1997.0           8
9                       D          FORD        2001.0           9
10                      A     CHEVROLET        1991.0          10
11                      A         DODGE        1996.0          11
12                      D         DODGE        1997.0          12
13                      D     CHEVROLET        1994.0          13
14        