In [8]:
import numpy as np
import matplotlib.pyplot as plt

import pandas as pd
from sqlalchemy import create_engine
import pymongo

In [3]:
# Bring ACCIDENT CSV into a separate data frame
accident_df = pd.read_csv("resources\ACCIDENT.csv")
accident_df.head(2)

Unnamed: 0,ACCIDENT_NO,ACCIDENTDATE,ACCIDENTTIME,ACCIDENT_TYPE,Accident Type Desc,DAY_OF_WEEK,Day Week Description,DCA_CODE,DCA Description,DIRECTORY,...,NO_PERSONS,NO_PERSONS_INJ_2,NO_PERSONS_INJ_3,NO_PERSONS_KILLED,NO_PERSONS_NOT_INJ,POLICE_ATTEND,ROAD_GEOMETRY,Road Geometry Desc,SEVERITY,SPEED_ZONE
0,T20060000010,13/01/2006,12:42:00,1,Collision with vehicle,6,Friday,113,RIGHT NEAR (INTERSECTIONS ONLY),MEL,...,6,0,1,0,5,1,1,Cross intersection,3,60
1,T20060000018,13/01/2006,19:10:00,1,Collision with vehicle,6,Friday,113,RIGHT NEAR (INTERSECTIONS ONLY),MEL,...,4,0,1,0,3,1,2,T intersection,3,70


In [4]:
# Bring ACCIDENT_EVENT CSV into a separate data frame
accident_event_df = pd.read_csv("resources\ACCIDENT_EVENT.csv")
accident_event_df.head(2)

Unnamed: 0,ACCIDENT_NO,EVENT_SEQ_NO,EVENT_TYPE,Event Type Desc,VEHICLE_1_ID,VEHICLE_1_COLL_PT,Vehicle 1 Coll Pt Desc,VEHICLE_2_ID,VEHICLE_2_COLL_PT,Vehicle 2 Coll Pt Desc,PERSON_ID,OBJECT_TYPE,Object Type Desc
0,T20060000010,1.0,C,Collision,B,2,Right side (forwards),A,F,Front,,99.0,Not Applicable
1,T20060000018,1.0,C,Collision,B,F,Front,A,9,Not known or Not Applicable,,99.0,Not Applicable


In [5]:
# Combine ACCIDENT and ACCIDENT EVENT data frames
combined_accident_event_df =pd.merge(accident_df, accident_event_df, how='outer', on='ACCIDENT_NO')
combined_accident_event_df.head(2)

Unnamed: 0,ACCIDENT_NO,ACCIDENTDATE,ACCIDENTTIME,ACCIDENT_TYPE,Accident Type Desc,DAY_OF_WEEK,Day Week Description,DCA_CODE,DCA Description,DIRECTORY,...,Event Type Desc,VEHICLE_1_ID,VEHICLE_1_COLL_PT,Vehicle 1 Coll Pt Desc,VEHICLE_2_ID,VEHICLE_2_COLL_PT,Vehicle 2 Coll Pt Desc,PERSON_ID,OBJECT_TYPE,Object Type Desc
0,T20060000010,13/01/2006,12:42:00,1,Collision with vehicle,6,Friday,113,RIGHT NEAR (INTERSECTIONS ONLY),MEL,...,Collision,B,2,Right side (forwards),A,F,Front,,99.0,Not Applicable
1,T20060000018,13/01/2006,19:10:00,1,Collision with vehicle,6,Friday,113,RIGHT NEAR (INTERSECTIONS ONLY),MEL,...,Collision,B,F,Front,A,9,Not known or Not Applicable,,99.0,Not Applicable


In [6]:
# Bring ACCIDENT_LOCATION CSV into a separate data frame
accident_location_df = pd.read_csv("resources\ACCIDENT_LOCATION.csv")
accident_location_df.head(2)

Unnamed: 0,ACCIDENT_NO,NODE_ID,ROAD_ROUTE_1,ROAD_NAME,ROAD_TYPE,ROAD_NAME_INT,ROAD_TYPE_INT,DISTANCE_LOCATION,DIRECTION_LOCATION,NEAREST_KM_POST,OFF_ROAD_LOCATION
0,T20060000010,43078,2090.0,FOSTER,STREET,MCCRAE,STREET,0.0,SW,,
1,T20060000018,29720,5057.0,HALLAM,ROAD,BELGRAVE-HALLAM,ROAD,70.0,S,,


In [9]:
# Combine ACCIDENT and ACCIDENT EVENT combined data frames with ACCIDENT LOCATION data frame
combined_accident_df =pd.merge(combined_accident_event_df, accident_location_df, how='outer', on='ACCIDENT_NO')
combined_accident_df.head(2)

Unnamed: 0,ACCIDENT_NO,ACCIDENTDATE,ACCIDENTTIME,ACCIDENT_TYPE,Accident Type Desc,DAY_OF_WEEK,Day Week Description,DCA_CODE,DCA Description,DIRECTORY,...,NODE_ID_y,ROAD_ROUTE_1,ROAD_NAME,ROAD_TYPE,ROAD_NAME_INT,ROAD_TYPE_INT,DISTANCE_LOCATION,DIRECTION_LOCATION,NEAREST_KM_POST,OFF_ROAD_LOCATION
0,T20060000010,13/01/2006,12:42:00,1,Collision with vehicle,6,Friday,113,RIGHT NEAR (INTERSECTIONS ONLY),MEL,...,43078,2090.0,FOSTER,STREET,MCCRAE,STREET,0.0,SW,,
1,T20060000018,13/01/2006,19:10:00,1,Collision with vehicle,6,Friday,113,RIGHT NEAR (INTERSECTIONS ONLY),MEL,...,29720,5057.0,HALLAM,ROAD,BELGRAVE-HALLAM,ROAD,70.0,S,,


In [18]:
# Creating a filtered dataframe from specific columns
accident_cols =["ACCIDENT_NO", "Day Week Description", "ACCIDENTDATE", "ACCIDENTTIME", "Accident Type Desc", "DCA Description", "DIRECTORY", "ROAD_NAME", "ROAD_TYPE", "ROAD_TYPE_INT"]
accident_transformed_df= combined_accident_df[accident_cols].copy()

# setting the index
accident_transformed_df.set_index("ACCIDENT_NO", inplace=True)

accident_transformed_df.head()

Unnamed: 0_level_0,Day Week Description,ACCIDENTDATE,ACCIDENTTIME,Accident Type Desc,DCA Description,DIRECTORY,ROAD_NAME,ROAD_TYPE,ROAD_TYPE_INT
ACCIDENT_NO,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
T20060000010,Friday,13/01/2006,12:42:00,Collision with vehicle,RIGHT NEAR (INTERSECTIONS ONLY),MEL,FOSTER,STREET,STREET
T20060000018,Friday,13/01/2006,19:10:00,Collision with vehicle,RIGHT NEAR (INTERSECTIONS ONLY),MEL,HALLAM,ROAD,ROAD
T20060000022,Saturday,14/01/2006,12:10:00,Fall from or in moving vehicle,FELL IN/FROM VEHICLE,MEL,BROWNS,ROAD,ROAD
T20060000023,Saturday,14/01/2006,11:49:00,Collision with vehicle,REAR END(VEHICLES IN SAME LANE),MEL,SPRINGVALE,ROAD,AVENUE
T20060000026,Saturday,14/01/2006,10:45:00,Collision with vehicle,RIGHT THROUGH,MEL,ELIZABETH,AVENUE,CRESCENT


In [19]:

accident_transformed_df.to_csv("Victorian_Accident_Data_2006-2020.csv")

In [17]:
# Postgres connection setup
# Creating database connection
# connection_string = "postgres:postgres@localhost:5432/customer_db"
# engine = create_engine(f'postgresql://{connection_string}')


In [None]:
# Postgres connection setup
# Confirm tables
#engine.table_names()

In [None]:
# Load DataFrame into database
# accident_transformed.to_sql(name='premise', con=engine, if_exists='append', index=True)

In [None]:
#Mongo setup
# The default port used by MongoDB is 27017
#conn = 'mongodb://localhost:27017'
#client = pymongo.MongoClient(conn)

# Define the database in Mongo
#db = client.accident_transformed