## Traffic Cameras.ipynb (Traffic Cameras Analysis file)

### Libraries and functions

In [None]:
%matplotlib inline
import matplotlib

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from csv import reader
import folium

\pagebreak

### Reading and merging data

In [None]:
#import traffic Camera location
camera_df = pd.read_csv('..\..\CSV_files\Traffic_Camera_Locations.csv')
camera_df['data']='TrafficCameraLocations'
#camera_df.head() #<-- visual QC of dataframe

In [None]:
#add traffic incidents
incidents_df = pd.read_csv('..\..\CSV_files\Traffic_Incidents.csv')
incidents_df['data']='Camera - Incident' #create a new column, to identify which dataframe the data came from after the merge
#rename columns so they match other dataframes columns - makes it easier for the merge
incidents_df=incidents_df.rename(columns={'Latitude':'latitude','Longitude':'longitude','Count':'Incidents'})
#incidents_df.head() #<-- visual QC of dataframe

In [None]:
#data given to 6 significant digets. Here we reduce it to 4, so that camera data will be matched up with accidents that occured
#within ~10m's
incidents_df = incidents_df.round({'latitude':4, 'longitude':4}) 
camera_df = camera_df.round({'latitude':4, 'longitude':4})
#camera_df.head() #<-- visual QC of dataframe

In [None]:
#merging two dataframes into df_total. merging on camera_df so we can determine which cameras has an incident occur near them
df_total = pd.merge(left=incidents_df, right=camera_df, how='right', left_on=['latitude','longitude'], right_on=['latitude','longitude'])
#nan data in "data_x" column belong to camera rows that had no incidents
#so replace those nan values with "Camera - no incident", so we can "group-by" later to count number of "camera no incident"
# and "camera - incident".
df_total['data_x'] = df_total['data_x'].fillna("Camera - no incident")
#need to fill "incidents" with a "1" value for even the non-incident rows, so that we have something to count when we 'groupby'
df_total['Incidents'] = df_total['Incidents'].fillna(1)
df_total= df_total.rename(columns={"Incidents":"Count"})  #rename incidents to count, so the title makes more sense
#df_total.head() #<-- visual QC of dataframe

\pagebreak

### Table showing the percentage of cameras that caught an incident (within ~10m of a camera (Table))

In [None]:
#'groupby-sum' /(count) the number of incident rows, and non-incident rows. and put into a nice table
df_total = df_total.groupby(['data_x']).sum()
#df_total   #4 decimal points =~10m radius around camera latitudes and longitudes

In [None]:
#It would be nice to calculate % of cameras that caught incidents in the above table
#To do that, need to calculate total first
totalCount = df_total['Count'].sum()
#totalCount #<-- visual QC check
df_total['%'] = (df_total['Count']/totalCount)*100
df_total = df_total[['Count','%']]
df_total #<-- show table

\pagebreak

### Cameras near incidents (bar graph)

In [None]:
#create histogram plot of above data
df_total=df_total.reset_index()
fig,ax = plt.subplots(figsize=(15,7))

sns.barplot(x = 'data_x', y = '%', data = df_total)

plt.ylabel("Percentage (%)",fontsize=20)
plt.xlabel("Cameras",fontsize=20)
plt.title("Cameras that Caught Incidents",fontsize=24)

\pagebreak

### Traffic Cameras (part 2)

In [None]:
# Re-Doing above tables and graphs, but merging onto "incident dataframe", so that we can determine
# how many incidents occured near cameras (oposite of the above)
incidents_df = incidents_df.round({'latitude':4, 'longitude':4}) #rounding lat and long aloud them to be grouped wihtin a 10m accuracy
camera_df = camera_df.round({'latitude':4, 'longitude':4})      #this is nice, because accidents and cameras dont need to occur exactly ontop of eachother
camera_df.data = 'Incidents with Cameras' #change input calues of 'data' column to set up for histogram plot later
#camera_df#<-- visual QC of dataframe

In [None]:
#merge data on traffic incidents this time. so we have the total amount of incidents, but only some of them have camera data
#this allows us to compare how many incidents had cameras near them
df_total = pd.merge(left=incidents_df, right=camera_df, how='left', left_on=['latitude','longitude'], right_on=['latitude','longitude'])
df_total['data_x'] = df_total['data_x'].fillna("Camera")
df_total['data_y'] = df_total['data_y'].fillna('Incidents with NO Cameras') #filled na's with a proper name, so we can groupby.
df_total= df_total.rename(columns={"Incidents":"Count"})  
#df_total.tail(20)#<-- visual QC of dataframe

In [None]:
totalCount = df_total['Count'].sum() #calculate the total rows, so we can calculate percentage
#totalCount #<-- visual QC of output

\pagebreak

### Number and percentage of incidents that occur with and without a camera nearby (Table)

In [None]:
df_total2 = df_total.groupby(['data_y']).sum()
df_total2['%'] = (df_total2['Count']/totalCount)*100
df_total2 = df_total2[['Count','%']]
#JOINED ONTO INCIDENTS, so we see only incident rows, and can see how many have cameras
df_total2 #<-- show table

\pagebreak

### Incidents that occured with and without a camera nearby (bar graph)

In [None]:
#create histogram
df_total2=df_total2.reset_index()
fig,ax = plt.subplots(figsize=(15,7))
sns.barplot(x = 'data_y', y = '%', data = df_total2)

plt.ylabel("Percentage (%)",fontsize=20)
plt.xlabel("Incidents",fontsize=20)
plt.title("Incidents Caught by Camera",fontsize=24)