\pagebreak

## stats.ipynb (computing grid data file)

### Libraries and functions

In [None]:
import numpy as np
import pandas as pd
import folium
import geopandas as gpd
from shapely.geometry import Polygon
from shapely.geometry import box

\pagebreak

### Creating 10x10 grid

In [None]:
df_city_boundary_layer = pd.read_csv('..\..\CSV_files\City_Boundary_Layer.csv')

In [None]:
string = df_city_boundary_layer["the_geom"].values[0]
string_stripped= string.replace("POLYGON","").replace("(","").replace(")","").replace(",","")
string_split = string_stripped.split()

In [None]:
list_long = []
list_lat = []

#appends latitudes and longitudes based on long/lat/long/lat... pattern
for i in range(len(string_split)):
    if i % 2 == 0:
        list_long.append(float(string_split[i]))
    else:
        list_lat.append(float(string_split[i]))
        
min_long = min(list_long)
max_long = max(list_long)
min_lat = min(list_lat)
max_lat = max(list_lat)

In [None]:
coord_box = box(min_long,min_lat,max_long,max_lat)
geo = gpd.GeoSeries([coord_box]).__geo_interface__

In [None]:
map_osm = folium.Map(location=[min_lat,min_long], zoom_start=10)
folium.GeoJson(geo).add_to(map_osm)

In [None]:
map_osm

In [None]:
long_ten_split = np.linspace(min_long,max_long,num = 11)
lat_ten_split = np.linspace(min_lat,max_lat,num = 11)

In [None]:
grid_array = []

for y in range(len(lat_ten_split)-1):
    for x in range(len(long_ten_split)-1):
        bot_left = [long_ten_split[x],lat_ten_split[y]]
        bot_right = [long_ten_split[x+1],lat_ten_split[y]]
        top_left = [long_ten_split[x],lat_ten_split[y+1]]
        top_right = [long_ten_split[x+1],lat_ten_split[y+1]]
        grid_array.append([bot_left,bot_right,top_left,top_right])

        coord_box = box(bot_left[0],bot_left[1],top_right[0],top_right[1])
        geo = gpd.GeoSeries([coord_box]).__geo_interface__
        folium.GeoJson(geo).add_to(map_osm)

map_osm

\pagebreak

### Calculations for each 1x1 grid

In [None]:
# This method deals with csv files containing multistring longitude/latitude

# Iterate through each line of the csv file. Parse the multistring from the csv file to extract the values below for each row and store each value in the corresponding list:
# minimum longitude
# minimum latitude
# maximum longitude
# maximum latitude

# Check if a coordinate is within each 1x1 grid.

# Return a list of matching speed limit or volume for each 1x1 grid, depending on the parameters passed.


def compute_average_with_multistring(fileName, columnName, stringName, columnName2, grid_array):
    
    df = pd.read_csv(fileName)

    list_long = []
    list_lat = []

    min_long = []
    max_long = []
    min_lat = []
    max_lat = []

    for index, row in df.iterrows():
        # parsing "multiline" column to extract longitude/latitude
        message1 = df[columnName].values[index]
        message1 = message1.replace(stringName, '').replace('(', '').replace(',', '').replace(')', '')
        message1 = message1.split()
        
        # the multiline string follows longitude, latitude, longitude, latitude...... pattern
        for i in range(len(message1)):
            if i % 2 == 0:
                list_long.append(float(message1[i]))
            else:
                list_lat.append(float(message1[i]))

        # finding min/max longitude/latitude for each row    
        min_long.append(min(list_long))
        max_long.append(max(list_long))
        min_lat.append(min(list_lat))
        max_lat.append(max(list_lat))

    # consist of 100 lists corresponding to the 100 grids
    # each individual list stores the extracted speed limit if the coordinate is within the grid
    resultList = [[] for _ in range(100)]

    for index, row in df.iterrows():
        for i in range(len(grid_array)):
            
            # To reduce search time:
            # 1. Check if (minimum longitude, minimum latitude) is greater than the top right coordinate of the 1x1 grid.
            # 2. Check if (maximum longitude, maximum latitude) is smaller than the bottom left coordinate of the 1x1 grid.
            # If the above condition meets, none of the coordinates from the current row of the multiline column would lie within the current 1x1 grid.

            # For each 1x1 grid:
            # grid_array[i][3][0] = longitude of top right coordinate
            # grid_array[i][3][1] = latitude of top right coordinate
            # grid_array[i][0][0] = longitude of bottom left coordinate
            # grid_array[i][0][1] = latitude of bottom left coordinate

            if((min_long[index] > grid_array[i][3][0] and min_lat[index] > grid_array[i][3][1]) or (max_long[index] < grid_array[i][0][0] and max_lat[index] < grid_array[i][0][1])):
                break
            
            # If the above condition does not meet, there is the possibility of having coordinates lie within the current 1x1 grid. We would need to continue our search.
            else:
                # parsing "multiline" column to extract longitude/latitude
                message1 = df[columnName].values[index]
                message1 = message1.replace(stringName, '').replace('(', '').replace(',', '').replace(')', '')
                message1 = message1.split()

                # converting to float type
                for x in range(len(message1)):
                    message1[x] = float(message1[x])
                
                # If one of the coordinate from the multistring meets the condition, we break outside the loop and check the next row.
                # This means that we only consider each road once within each grid.
                # For example, if a road appears 5 times within 1 grid, we only count it once.
                for j in range(0, len(message1), 2):
                    if((grid_array[i][0][0] <= message1[j]) and (message1[j] <= grid_array[i][3][0]) and (grid_array[i][0][1] <= message1[j+1]) and (message1[j+1] <= grid_array[i][3][1])):
                        resultList[i].append(df[columnName2].values[index])
                        break
                    else:
                        continue
    return resultList

In [None]:
# This method deals with csv files containing only 1 value for longitude/latitude

# Iterate through each line of the csv file.

# Check if a coordinate is within each 1x1 grid.

# Return a list of results for each 1x1 grid.

def compute_result(df, columnLong, columnLat, grid_array):

    resultList = [[] for _ in range(100)]

    for index, row in df.iterrows():
        for i in range(len(grid_array)):
            if((grid_array[i][0][0] <= df[columnLong][index]) and (df[columnLong][index] <= grid_array[i][3][0]) and (grid_array[i][0][1] <= df[columnLat][index]) and (df[columnLat][index] <= grid_array[i][3][1])):
                resultList[i].append(1)
            else:
                continue
    
    return resultList

\pagebreak

### Computing average speed limit for each area/grid

In [None]:
# convert to dataframe
df_average_speed_limit = pd.DataFrame(compute_average_with_multistring('..\..\CSV_files\Speed_Limits.csv', 'multiline', 'MULTILINESTRING', 'SPEED', grid_array))
# compute average
df_average_speed_limit['Average Speed Limit'] = df_average_speed_limit.mean(axis=1)
df_average_speed_limit = df_average_speed_limit[['Average Speed Limit']].copy()

\pagebreak

### Computing average traffic volume for each area/grid

In [None]:
# convert to dataframe
df_average_traffic_volumes = pd.DataFrame(compute_average_with_multistring('..\..\CSV_files\Traffic_Volumes_for_2018.csv', 'multilinestring', 'MULTILINESTRING', 'VOLUME', grid_array))
# compute average
df_average_traffic_volumes['Average Traffic Volume'] = df_average_traffic_volumes.mean(axis=1)
df_average_traffic_volumes = df_average_traffic_volumes[['Average Traffic Volume']].copy()

\pagebreak

### Computing traffic cameras for each area/grid

In [None]:
df_traffic_cameras = pd.read_csv('..\..\CSV_files\Traffic_Camera_Locations.csv')
# convert to dataframe
df_traffic_cameras = pd.DataFrame(compute_result(df_traffic_cameras, 'longitude', 'latitude', grid_array))
# compute sum
df_traffic_cameras['Traffic Cameras'] = df_traffic_cameras.sum(axis=1)
df_traffic_cameras = df_traffic_cameras[['Traffic Cameras']].copy()

\pagebreak

### Computing traffic signals for each area/grid

In [None]:
df_traffic_signals = pd.read_csv('..\..\CSV_files\Traffic_Signals.csv')
# convert to dataframe
df_traffic_signals = pd.DataFrame(compute_result(df_traffic_signals, 'longitude', 'latitude', grid_array))
# compute sum
df_traffic_signals['Traffic Signals'] = df_traffic_signals.sum(axis=1)
df_traffic_signals = df_traffic_signals[['Traffic Signals']].copy()

\pagebreak

### Computing traffic signs for each area/grid

In [None]:
df_traffic_signs = pd.read_csv('..\..\CSV_files\Traffic_Signs.csv')

# extracting relevant signs
df_traffic_signs = df_traffic_signs.loc[(df_traffic_signs['BLADE_TYPE'] == 'Regulatory') | (df_traffic_signs['BLADE_TYPE'] == 'Warning') | (df_traffic_signs['BLADE_TYPE'] == 'Stop') | (df_traffic_signs['BLADE_TYPE'] == 'Playground') | (df_traffic_signs['BLADE_TYPE'] == 'Yield') | (df_traffic_signs['BLADE_TYPE'] == 'Pedestrian') | (df_traffic_signs['BLADE_TYPE'] == 'Disabled Parking') | (df_traffic_signs['BLADE_TYPE'] == 'Speed') | (df_traffic_signs['BLADE_TYPE'] == 'Bicycle / Pathway') | (df_traffic_signs['BLADE_TYPE'] == 'School')]

# reset index
df_traffic_signs = df_traffic_signs.reset_index(drop=True)

In [None]:
signs_per_grid = [[] for _ in range(100)]

for index, row in df_traffic_signs.iterrows():
    for i in range(len(grid_array)):
            message1 = df_traffic_signs['POINT'].values[index]
            message1 = message1.replace('POINT', '').replace('(', '').replace(')', '')
            message1 = message1.split()
            
            for x in range(2):
                message1[x] = float(message1[x])

            if((grid_array[i][0][0] <= message1[0]) and (message1[0] <= grid_array[i][3][0]) and (grid_array[i][0][1] <= message1[1]) and (message1[1] <= grid_array[i][3][1])):
                signs_per_grid[i].append(1)
                break
            else:
                continue

df_traffic_signs = pd.DataFrame(signs_per_grid)
# convert to dataframe
df_traffic_signs['Traffic Signs'] = df_traffic_signs.sum(axis=1)
# compute sum
df_traffic_signs = df_traffic_signs[['Traffic Signs']].copy()

\pagebreak

### Computing traffic incidents for each area/grid

In [None]:
df_traffic_incidents = pd.read_csv('..\..\CSV_files\Traffic_Incidents.csv')
# extract 2018 incidents
df_traffic_incidents = df_traffic_incidents[df_traffic_incidents['id'].str.startswith(str(2018))]

# convert to dataframe
df_traffic_incidents = pd.DataFrame(compute_result(df_traffic_incidents, 'Longitude', 'Latitude', grid_array))
# compute sum
df_traffic_incidents['Traffic Incidents'] = df_traffic_incidents.sum(axis=1)
df_traffic_incidents = df_traffic_incidents[['Traffic Incidents']].copy()

\pagebreak

### Combining results into 1 dataframe

In [None]:
# index column corresponds to the 100 grids:
# from bottom left (grid 0) to top right (grid 99)
df = pd.concat([df_average_speed_limit, df_average_traffic_volumes, df_traffic_cameras, df_traffic_signals, df_traffic_signs, df_traffic_incidents], axis=1).reindex(df_average_speed_limit.index)
df