In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st
import numpy as np
from scipy.stats import linregress

In [13]:
#path = "../Reources/Crash_Statistics_Victoria_clean.csv" 
crash_data = pd.read_csv("Resources/Crash_Statistics_Victoria_clean.csv")
crash_data.head()

Unnamed: 0,OBJECTID,ACCIDENT_DATE,ACCIDENT_TIME,ALCOHOLTIME,ACCIDENT_TYPE,DAY_OF_WEEK,LIGHT_CONDITION,ROAD_GEOMETRY,SEVERITY,SPEED_ZONE,...,YOUNG_DRIVER,ALCOHOL_RELATED,UNLICENCSED,NO_OF_VEHICLES,HEAVYVEHICLE,PASSENGERVEHICLE,MOTORCYCLE,PUBLICVEHICLE,RMA,STAT_DIV_NAME
0,3401744,1/07/2013,18.30.00,Yes,Struck Pedestrian,Monday,Dark Street lights on,Not at intersection,Serious injury accident,60 km/hr,...,0,No,0,1,0,1,0,0,Local Road,Metro
1,3401745,2/07/2013,16.40.00,No,Collision with vehicle,Tuesday,Day,T intersection,Serious injury accident,60 km/hr,...,0,No,0,3,0,3,0,0,Arterial Other,Metro
2,3401746,2/07/2013,13.15.00,No,Collision with a fixed object,Tuesday,Day,T intersection,Serious injury accident,50 km/hr,...,0,Yes,0,1,0,1,0,0,Local Road,Metro
3,3401747,2/07/2013,16.45.00,No,Collision with a fixed object,Tuesday,Day,Not at intersection,Serious injury accident,110 km/hr,...,0,No,0,1,0,1,0,0,Freeway,Country
4,3401748,2/07/2013,15.48.00,No,Collision with vehicle,Tuesday,Day,Not at intersection,Other injury accident,50 km/hr,...,0,No,0,2,0,1,0,1,Local Road,Metro


In [3]:
#check dimensions
crash_data.shape

(65435, 47)

In [4]:
#check missing values
crash_data.isnull().sum()

OBJECTID                0
ACCIDENT_DATE           0
ACCIDENT_TIME           0
ALCOHOLTIME             0
ACCIDENT_TYPE           0
DAY_OF_WEEK          1146
LIGHT_CONDITION         0
ROAD_GEOMETRY           0
SEVERITY                0
SPEED_ZONE              0
NODE_ID                 0
LONGITUDE               0
LATITUDE                0
NODE_TYPE               6
LGA_NAME                0
REGION_NAME             0
VICGRID_X               0
VICGRID_Y               0
TOTAL_PERSONS           0
INJ_OR_FATAL            0
FATALITY                0
SERIOUSINJURY           0
OTHERINJURY             0
NONINJURED              0
MALES                   0
FEMALES                 0
BICYCLIST               0
PASSENGER               0
DRIVER                  0
PEDESTRIAN              0
PILLION                 0
MOTORIST                0
UNKNOWN                 0
PED_CYCLIST_5_12        0
PED_CYCLIST_13_18       0
OLD_PEDESTRIAN          0
OLD_DRIVER              0
YOUNG_DRIVER            0
ALCOHOL_RELA

In [5]:
#remove NA values based on row
crash_data = crash_data.dropna(axis = 0)

In [6]:
#check missing values
crash_data.isnull().sum()


OBJECTID             0
ACCIDENT_DATE        0
ACCIDENT_TIME        0
ALCOHOLTIME          0
ACCIDENT_TYPE        0
DAY_OF_WEEK          0
LIGHT_CONDITION      0
ROAD_GEOMETRY        0
SEVERITY             0
SPEED_ZONE           0
NODE_ID              0
LONGITUDE            0
LATITUDE             0
NODE_TYPE            0
LGA_NAME             0
REGION_NAME          0
VICGRID_X            0
VICGRID_Y            0
TOTAL_PERSONS        0
INJ_OR_FATAL         0
FATALITY             0
SERIOUSINJURY        0
OTHERINJURY          0
NONINJURED           0
MALES                0
FEMALES              0
BICYCLIST            0
PASSENGER            0
DRIVER               0
PEDESTRIAN           0
PILLION              0
MOTORIST             0
UNKNOWN              0
PED_CYCLIST_5_12     0
PED_CYCLIST_13_18    0
OLD_PEDESTRIAN       0
OLD_DRIVER           0
YOUNG_DRIVER         0
ALCOHOL_RELATED      0
UNLICENCSED          0
NO_OF_VEHICLES       0
HEAVYVEHICLE         0
PASSENGERVEHICLE     0
MOTORCYCLE 

In [7]:
#check data dimensions
crash_data.shape

(63037, 47)

In [8]:
crash_data.sample(10)

Unnamed: 0,OBJECTID,ACCIDENT_DATE,ACCIDENT_TIME,ALCOHOLTIME,ACCIDENT_TYPE,DAY_OF_WEEK,LIGHT_CONDITION,ROAD_GEOMETRY,SEVERITY,SPEED_ZONE,...,YOUNG_DRIVER,ALCOHOL_RELATED,UNLICENCSED,NO_OF_VEHICLES,HEAVYVEHICLE,PASSENGERVEHICLE,MOTORCYCLE,PUBLICVEHICLE,RMA,STAT_DIV_NAME
27648,3429416,28/05/2015,15.10.00,No,Collision with vehicle,Thursday,Day,Cross intersection,Other injury accident,60 km/hr,...,0,No,0,2,0,2,0,0,Arterial Other,Metro
26891,3428659,25/05/2015,16.30.00,Yes,Collision with a fixed object,Sunday,Dusk/Dawn,Not at intersection,Other injury accident,100 km/hr,...,1,No,0,1,0,1,0,0,Local Road,Country
63472,3465313,1/06/2017,13.00.00,No,No collision and no object struck,Thursday,Day,Not at intersection,Other injury accident,80 km/hr,...,0,No,0,1,0,0,0,0,Arterial Highway,Country
47471,3449257,15/12/2016,13.30.00,No,Collision with vehicle,Thursday,Day,Not at intersection,Other injury accident,80 km/hr,...,1,No,0,3,1,2,0,0,Freeway,Metro
57499,3459334,18/09/2017,19.45.00,Yes,Collision with vehicle,Sunday,Dark Street lights on,T intersection,Other injury accident,60 km/hr,...,1,No,0,2,0,2,0,0,Arterial Other,Metro
63735,3465576,31/03/2018,19.40.00,Yes,Collision with a fixed object,Saturday,Dusk/Dawn,Not at intersection,Other injury accident,100 km/hr,...,1,No,0,1,0,1,0,0,Arterial Other,Country
56478,3458304,3/07/2017,18.00.00,Yes,Collision with vehicle,Monday,Dark Street lights on,Not at intersection,Other injury accident,Not known,...,1,No,0,3,0,3,0,0,Arterial Highway,Metro
4716,3406463,16/10/2013,18.20.00,Yes,Collision with vehicle,Wednesday,Dusk/Dawn,Cross intersection,Other injury accident,80 km/hr,...,0,No,0,2,0,2,0,0,Arterial Other,Metro
53410,3455220,10/03/2017,05.50.00,Yes,Collision with vehicle,Friday,Dusk/Dawn,T intersection,Other injury accident,60 km/hr,...,0,No,0,2,0,0,0,0,Arterial Other,Metro
61852,3463690,1/12/2017,15.40.00,No,Struck Pedestrian,Thursday,Day,Not at intersection,Other injury accident,50 km/hr,...,0,No,0,1,0,1,0,0,Local Road,Metro


In [9]:
#load local government expenditure data, skip first row
localgov = pd.read_csv("C:/Users/racha/Documents/bootcamp_assignments/Turqoise-Bears-Project-Repo/Resources/lge_cleanfile.csv", skiprows = 1)
localgov.head(10)

Unnamed: 0,LGA_NAME_ALL,Local Roads - Sealed-length,Local Roads - Unsealed - Formed & Sheeted-length,Local Roads - Unsealed - Natural Surface_length,Total_roadlength,Local Roads - Sealed-expenditure,Local Roads - Unsealed - Formed & Sheeted-expenditure,Local Roads - Unsealed - Natural Surface-expenditure,Roads Ancillary-expenditure,Total-expenditure
0,Alpine,412,233,215,860,3748129,265116,0,0,4013244
1,Ararat,764,1420,240,2424,7303459,2975868,2281,1361455,11845829
2,Ballarat,1044,351,40,1435,41874084,3196942,363289,1045194,46775611
3,Banyule,538,6,1,545,10329600,0,0,0,10329600
4,Bass Coast,563,373,7,943,7355926,1677983,0,789457,9923525
5,Baw Baw,1110,721,184,2015,13512590,3092182,0,1493686,19073493
6,Bayside,355,1,0,356,5547236,0,0,6045707,11592943
7,Benalla,560,713,78,1351,2504772,1474785,0,0,3979557
8,Boroondara,562,0,0,562,11436082,0,0,0,11436082
9,Brimbank,889,5,0,894,25715082,344474,0,1997635,28078056


In [10]:
#check dimensions
localgov.shape

(79, 10)

In [11]:
#check null values
localgov.isnull().sum()

LGA_NAME_ALL                                              0
Local Roads - Sealed-length                               0
Local Roads - Unsealed - Formed &  Sheeted-length         0
Local Roads - Unsealed - Natural Surface_length           0
Total_roadlength                                          0
Local Roads - Sealed-expenditure                          0
Local Roads - Unsealed - Formed &  Sheeted-expenditure    0
Local Roads - Unsealed - Natural Surface-expenditure      0
Roads Ancillary-expenditure                               0
Total-expenditure                                         0
dtype: int64

In [12]:
#unique lGA names on crash dataset
crash_data["LGA_NAME"].nunique()
#crash_data["LGA_NAME_ALL"].value_counts()

79

In [None]:
#unique lGA names on local gov dataset
localgov["LGA_NAME_ALL"].nunique()
localgov["LGA_NAME_ALL"].value_counts()

In [None]:
df = crash_data.groupby("LGA_NAME_ALL")["OBJECTID"].count()
df.sort_values

In [None]:
df1 = localgov.loc[localgov["Total_roadlength"], :]
df1.head()