In [None]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
from sqlalchemy import create_engine, text
import pandas as pd
import psycopg2
import matplotlib.pyplot as plt
import seaborn as sns
import geopandas as gpd
from shapely import wkt
import folium
from folium.plugins import MarkerCluster
from folium.plugins import FastMarkerCluster
from sqlalchemy import create_engine, text
import geopy.distance

In [None]:
pd.set_option('display.max_columns', None)

### read in all the things

In [None]:
### signals shape file
signals_geo = gpd.read_file('../data/traffic_signals_geo.zip')

In [None]:
### intelligent devices shape file
devices_geo = gpd.read_file('../data/intelligent_traffic_system_devices.zip')

In [None]:
### accidents csv
accidents_geo = pd.read_csv('../data/traffic_accidents.csv')

In [None]:
### zipcodes csv
zipcodes = gpd.read_file('../data/CO_Zips.zip')

In [None]:
accidents_geo.head(2)

In [None]:
### date formatting
accidents_geo['reported_date'] = pd.to_datetime(accidents_geo['reported_date'], format='mixed')


In [None]:
### convert Accidents to geodf
accidents_geo = gpd.GeoDataFrame(accidents_geo, 
                           crs = signals_geo.crs, 
                           geometry = gpd.points_from_xy(accidents_geo['lon'], accidents_geo['lat']))

In [None]:
zipcodes = zipcodes.to_crs("epsg:4326") 

In [None]:
zipcodes.plot()

In [None]:
accidents_geo.head(2)

In [None]:
accidents_geo.top_traffic_accident_offense.value_counts()

#### Function/If Statement old way of doing things

In [None]:
# def set_type(type):
#     if type["top_traffic_accident_offense"].strip() == "TRAF - ACCIDENT":
#         return "ACCIDENT"
#     elif type["top_traffic_accident_offense"].strip() == "TRAF - ACCIDENT - HIT & RUN":
#         return "HIT & RUN"
#     elif type["top_traffic_accident_offense"].strip() == "TRAF - ACCIDENT - DUI/DUID":
#         return "DUI"
#     elif type["top_traffic_accident_offense"].strip() == "TRAF - ACCIDENT - SBI":
#         return "SBI"
#     elif type["top_traffic_accident_offense"].strip() == "TRAF - ACCIDENT - POLICE":
#         return "POLICE"
#     elif type["top_traffic_accident_offense"].strip() == "TRAF - ACCIDENT - FATAL":
#         return "FATAL"
#     else:
#         pass
    
# accidents_geo = accidents_geo.assign(offense=accidents_geo.apply(set_type, axis=1))

In [None]:
# accidents_geo[accidents_geo["top_traffic_accident_offense"].strip() == "TRAF - ACCIDENT - HIT & RUN"]

#### ---------------------------------------------------------------------------

#### For loop to create the column instead

In [None]:
x=[]
for value in accidents_geo['top_traffic_accident_offense']:
    x.append(value.split("-")[-1].strip())
#     print(value.split("-")[-1].strip())

In [None]:
accidents_geo['offense_clean'] = x

In [None]:
accidents_geo[accidents_geo['offense_clean'] == 'ACCIDENT']

#### more date info extraction

In [None]:
accidents_geo['reported_date'] = pd.to_datetime(accidents_geo['reported_date'], format='ISO8601')
accidents_geo['month'] = accidents_geo['reported_date'].dt.month
accidents_geo['month_name'] = accidents_geo['reported_date'].dt.month_name()
accidents_geo['year'] = accidents_geo['reported_date'].dt.year
accidents_geo['day_name'] = accidents_geo['reported_date'].dt.day_name()
accidents_geo['hour'] = accidents_geo['reported_date'].dt.hour

In [None]:
accidents_geo.head()

In [None]:
accidents_geo.offense_id.nunique()

In [None]:
accidents_geo['TU1_TRAVEL_DIRECTION'] = accidents_geo['TU1_TRAVEL_DIRECTION'].str.lower()

In [None]:
accidents_geo['TU1_TRAVEL_DIRECTION'].value_counts()

In [None]:
### join on zipcodes
accidents_geo = gpd.sjoin(accidents_geo, zipcodes, predicate = 'within')

In [None]:
accidents_geo = accidents_geo.drop(columns=['index_right', 'OBJECTID', 'GEOID10'])
accidents_geo = accidents_geo.rename(columns = {'ZCTA5CE10' : 'zipcode'})
accidents_geo.head()

In [None]:
### function to build season column based on month
def get_season(x):
    if (x['month_name'] == "December" or x['month_name'] == "January" or x['month_name'] == "February" or x['month_name'] == "March"):
       return "WINTER"
    elif(x['month_name'] == "April" or x['month_name'] == "May"):
       return "SPRING"
    elif(x['month_name'] =="June" or x['month_name']=="July" or x['month_name'] == "August" or x['month_name'] == "September"):
       return "SUMMER"
    else:
       return "FALL"

accidents_geo = accidents_geo.assign(season=accidents_geo.apply(get_season, axis=1))

In [None]:
accidents_geo.head()

In [None]:
signals_geo.head()

In [None]:
accidents_geo.head()

In [None]:
### extract lat and long
signals_geo['lon'] = signals_geo['geometry'].x
signals_geo['lat'] = signals_geo['geometry'].y

In [None]:
# for crash in accidents_geo.index:
#     distances = []
#     for sig in signals_geo.index:
#         distances.append(geopy.distance.distance(accidents_geo.loc[crash, ['lat','lon']], signals_geo.loc[sig,['lat','lon']]))
#     min_dist = min(distances)
#     accidents_geo.loc[crash, 'min_distance'] = min_dist

# accidents_geo.head()

### testing a for loop on a small subset

In [None]:
acc100 = accidents_geo.head(10)

In [None]:
sig100 = signals_geo.head(10)

In [None]:
for crash in acc100.index:
    distances = []
    for sig in sig100.index:
        distances.append(geopy.distance.distance(acc100.loc[crash, ['lat','lon']], sig100.loc[sig,['lat','lon']]))
    min_dist = min(distances)
    acc100.loc[crash, 'min_distance'] = min_dist

In [None]:
acc100

In [None]:
# # # def get_distance(self):
    
# for i,row in accidents_geo.iterrows(): # A
#     a = row.lat, row.lon
#     distances = []
#     for j,row2 in signals_geo.iterrows(): # B
#         b = row2.lat, row2.lon
#         distances.append(geopy.distance.geodesic(a, b).miles)

#     min_distance = min(distances)
#     min_index = distances.index(min_distance)
# #         return(min_distance)    
# print(min_distance, "miles")

In [None]:
# accidents_geo = accidents_geo.assign(distance_to_signal=accidents_geo.apply(get_distance))

In [None]:
accidents_geo.head(2)

In [None]:
accidents_geo['LIGHT_CONDITION'] = accidents_geo['LIGHT_CONDITION'].str.lower()

In [None]:
accidents_geo['LIGHT_CONDITION'] = accidents_geo['LIGHT_CONDITION'].str.replace('-',' ')

In [None]:
accidents_geo['LIGHT_CONDITION'].value_counts()

In [None]:
accidents_geo['reported_date'].min()

In [None]:
accidents_geo['reported_date'].max()

In [None]:
accidents_geo['ROAD_CONDITION'] = accidents_geo['ROAD_CONDITION'].str.lower()

In [None]:
accidents_geo['ROAD_DESCRIPTION'] = accidents_geo['ROAD_DESCRIPTION'].str.lower()

In [None]:
accidents_geo['ROAD_DESCRIPTION'] = accidents_geo['ROAD_DESCRIPTION'].str.replace('-',' ')

In [None]:
accidents_geo['ROAD_DESCRIPTION'].value_counts()

In [None]:
traffic = pd.read_csv('../data/MASTER_TRAFFIC_COUNT.csv')
traffic.head()

In [None]:
signals_geo.to_csv('../data/traffic_signals_geo.csv', index=False)
devices_geo.to_csv('../data/intelligent_traffic_system_devices.csv', index=False)
accidents_geo.to_csv('../data/traffic_accidents.csv', index=False)

In [None]:
accidents_geo.head()

### No more on this notebook. Quit running it.