In [1]:
not_already_runned = True

## Librerías

In [2]:
if not_already_runned:
  !pip install plotly --upgrade
  not_already_runned = False

Collecting plotly
  Downloading plotly-5.6.0-py2.py3-none-any.whl (27.7 MB)
[K     |████████████████████████████████| 27.7 MB 24.2 MB/s 
Installing collected packages: plotly
  Attempting uninstall: plotly
    Found existing installation: plotly 5.5.0
    Uninstalling plotly-5.5.0:
      Successfully uninstalled plotly-5.5.0
Successfully installed plotly-5.6.0


In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px

import requests
from bs4 import BeautifulSoup

from scipy.signal import find_peaks
from sklearn.preprocessing import MinMaxScaler

### Settings

In [4]:
pd.set_option('plotting.backend', 'plotly')

## General

In [5]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


### Mapas

In [6]:
access_token = 'pk.eyJ1IjoiZGllZ28tZHBoIiwiYSI6ImNreXF4d243ZzBvbWYybm55eTluM3dmNXAifQ.54kwpDBJZyQ7z1Yr8F7SfQ'
def setupMap(data, var_of_zones = None, zones = [], legend = True, dynamic = False):
  graph_data = data.copy()
  
  map = go.Figure()

  lat = [data['Latitude'][data[var_of_zones]==post_code].mean() for post_code in zones]
  lon = [data['Longitude'][data[var_of_zones]==post_code].mean() for post_code in zones]

  if not dynamic:

    map.add_trace(
        go.Scattermapbox(
        lat=lat,
        lon=lon,
        mode='markers+text',
        marker = dict(size = 25, color = 'black') ,
        #text = list(zones),
        #textposition = 'top right',
        #textfont = dict(color='black'),
        #texttemplate = 'C.P: %{text}',

        showlegend = False
        )
        )
    
    for i,post_code in enumerate(zones):
      map.add_trace(
          go.Scattermapbox(
          lat=np.array(lat[i]),
          lon=np.array(lon[i]),
          mode='markers',
          name=post_code,

          marker = dict(size = 23) ,
          text = [post_code],

          hovertemplate = 'Código Postal' + ": %{text}" + '<br><br>'
            
                                                      "<extra></extra>",
          showlegend = legend
          )
          )
  
  map.update_layout( mapbox_style="outdoors", width = 1500, height = 900, mapbox_center_lat = data['Latitude'].mean(), mapbox_center_lon = data['Longitude'].mean(),
                    mapbox = dict(
                        accesstoken = access_token,
                    ))
  map.update_mapboxes(zoom = 5)

  return map

In [7]:
def getNumericMap(map, data, heat_var, radius, z_range):
  graph_data = data.copy()
  heat_col = 'CONTEO DE SINIESTROS<br>CON IGUAL ' + heat_var

  graph_data[heat_col] = graph_data[heat_var]

  map.add_trace(go.Densitymapbox(lat=graph_data['Latitude'], lon=graph_data['Longitude'], z = graph_data[heat_col], radius=radius, zmin = z_range[0], zmax = z_range[1], text = graph_data[heat_var],
                                   
                                   hovertemplate = heat_var + ": %{text}" + '<br><br>'
                                                   "Count: %{z}" + '<br>'
                                                   "Coords: (%{lat:.4f},%{lon:.4f})"+ 
                                                   "<extra></extra>"
                          ))
  return map

In [8]:
def getCategoryMap(map, data, cat_var, legend = True):

  labels = data[cat_var].unique()

  lat = [data['Latitude'][data[cat_var]==entry] for entry in labels]
  lon = [data['Longitude'][data[cat_var]==entry] for entry in labels]

  for var in labels:

    filter = data[cat_var]==var

    lat = data['Latitude'][filter] 
    lon = data['Longitude'][filter]

    map.add_trace(
          go.Scattermapbox(
          lat=lat,
          lon=lon,
          mode='markers',
          name=str(var),

          marker = dict(size = 5) ,
          text = data[cat_var][filter],

          hovertemplate = cat_var + ": %{text}" + '<br><br>'
            
                                                      "<extra></extra>",
          showlegend = legend
          )
          )
    
  return map

In [9]:
def addSectors(map, data, sectors_to_plot, color = 'orange', legend = False):
  for sector in sectors_to_plot:
    coords = np.array(data.loc[sector].Sector.geohash_bounds)
    map.add_trace(
        go.Scattermapbox(
            fill = "toself",
            lon = coords[:,0], lat = coords[:,1],
            marker = { 'size': 10, 'color': color },
            showlegend = legend)
    )

    map.add_trace(
        go.Scattermapbox(
            lon = [np.mean(coords[:-1,0])], lat = [np.mean(coords[:-1,1])],
            marker = { 'size': 10, 'color': color },
            text = [int(data.loc[sector].IncidentsTotal)],
            hovertemplate = 'Cantidad de incidentes' + ": %{text}" + '<br><br>'
            
                                                      "<extra></extra>",

            showlegend = legend)
    )
  return map

In [11]:
def plotSectorsAndNeighbors(map, data, sectors_to_plot, legend = False):
  neighbors = []
  for sector in sectors_to_plot:
    neighbors.extend(data.loc[sector].Sector.get_neighbors(data['Sector']))

  sectors_to_plot = list(set(sectors_to_plot))
  neighbors = list(set(neighbors)-set(sectors_to_plot))

  map = addSectors(map, data, sectors_to_plot, color = 'red', legend = legend)

  map = addSectors(map, data, neighbors)
  
  
  return map

### Geotab

In [208]:
geotab = pd.read_csv('/content/drive/Shareddrives/SAS Safe Roads 2022 Hackathon/Datos/Canada/geotab_canada_on.csv')

In [13]:
geotab

Unnamed: 0,Geohash,GeohashBounds,Latitude_SW,Longitude_SW,Latitude_NE,Longitude_NE,Location,Latitude,Longitude,City,County,State,Country,ISO_3166_2,SeverityScore,IncidentsTotal,UpdateDate,Version
0,dpzc3my,"POLYGON((-79.0383911132812 43.8450622558594, -...",43.845,-79.038,43.846,-79.037,POINT(-79.03796 43.84536),43.845,-79.038,Ajax,,Ontario,Canada,CA-ON,0.040,33.0,2021-11-02,5.1
1,dpzcdkx,"POLYGON((-78.9930725097656 43.8821411132812, -...",43.882,-78.993,43.884,-78.992,POINT(-78.99196 43.88332),43.883,-78.992,Ajax,,Ontario,Canada,CA-ON,0.083,34.0,2021-11-02,5.1
2,dpzc3qk,"POLYGON((-79.0411376953125 43.8478088378906, -...",43.848,-79.041,43.849,-79.040,POINT(-79.04025 43.84861),43.849,-79.040,Ajax,,Ontario,Canada,CA-ON,0.006,83.0,2021-11-02,5.1
3,dpzc3qj,"POLYGON((-79.0397644042969 43.846435546875, -7...",43.846,-79.040,43.848,-79.038,POINT(-79.03931 43.84687),43.847,-79.039,Ajax,,Ontario,Canada,CA-ON,0.028,34.0,2021-11-02,5.1
4,dpzk375,"POLYGON((-79.7456359863281 44.3572998046875, -...",44.357,-79.746,44.359,-79.744,POINT(-79.74456 44.35797),44.358,-79.745,Essa,,Ontario,Canada,CA-ON,0.361,117.0,2021-11-02,5.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9370,dpz24m1,"POLYGON((-79.7044372558594 43.6212158203125, -...",43.621,-79.704,43.623,-79.703,POINT(-79.70378 43.6223),43.622,-79.704,Mississauga,,Ontario,Canada,CA-ON,0.003,28.0,2021-11-02,5.1
9371,dpz25z9,"POLYGON((-79.6385192871094 43.6349487304688, -...",43.635,-79.639,43.636,-79.637,POINT(-79.6381 43.63549),43.635,-79.638,Mississauga,,Ontario,Canada,CA-ON,0.070,28.0,2021-11-02,5.1
9372,dpz269w,"POLYGON((-79.6865844726562 43.6459350585938, -...",43.646,-79.687,43.647,-79.685,POINT(-79.6854 43.64681),43.647,-79.685,Mississauga,,Ontario,Canada,CA-ON,0.037,28.0,2021-11-02,5.1
9373,f20rndr,"POLYGON((-78.1031799316406 46.2428283691406, -...",46.243,-78.103,46.244,-78.102,POINT(-78.10265 46.24409),46.244,-78.103,"Head, Clara and Maria",,Ontario,Canada,CA-ON,0.111,28.0,2021-11-02,5.1


In [14]:
def getBoundsFromString(string):
  return [tuple([float(num) for num in pair.split(' ')]) for pair in string[9:-2].split(', ')]

In [15]:
class Sector:
  def __init__(self, geohash, geohash_bounds):
    super(Sector, self).__init__()
    self.geohash = geohash
    self.geohash_bounds = getBoundsFromString(geohash_bounds)

  def get_neighbors(self, sector_list):
    self.neighbors = []
    for another_sector in sector_list:
      if len(set(self.geohash_bounds) & set(another_sector.geohash_bounds))!=0:
        if self.geohash != another_sector.geohash:
          self.neighbors.append(another_sector.geohash)
    return self.neighbors

In [209]:
geotab['Sector'] = [Sector(*pair) for pair in zip(geotab.Geohash, geotab.GeohashBounds)]

In [210]:
geotab = geotab.set_index('Geohash')

In [211]:
geotab['SeverityScore'] = geotab.SeverityScore/geotab.SeverityScore.max()

In [19]:
geotab.sort_values('IncidentsTotal', ascending = False).IncidentsTotal[:10].plot.bar()

In [20]:
getNumericMap(plotSectorsAndNeighbors(setupMap(geotab),
                                      geotab,
                                      geotab.sort_values('IncidentsTotal', ascending = False).index[:10]), 
              geotab, 
              'IncidentsTotal', 
              20, 
              [0, geotab.IncidentsTotal.max()])

### KSI

In [212]:
ksi = pd.read_csv('/content/drive/Shareddrives/SAS Safe Roads 2022 Hackathon/Datos/Canada/ksi.csv')

In [20]:
ksi

Unnamed: 0,X,Y,INDEX_,ACCNUM,YEAR,DATE,TIME,HOUR,STREET1,STREET2,OFFSET,ROAD_CLASS,DISTRICT,WARDNUM,DIVISION,LATITUDE,LONGITUDE,LOCCOORD,ACCLOC,TRAFFCTL,VISIBILITY,LIGHT,RDSFCOND,ACCLASS,IMPACTYPE,INVTYPE,INVAGE,INJURY,FATAL_NO,INITDIR,VEHTYPE,MANOEUVER,DRIVACT,DRIVCOND,PEDTYPE,PEDACT,PEDCOND,CYCLISTYPE,CYCACT,CYCCOND,PEDESTRIAN,CYCLIST,AUTOMOBILE,MOTORCYCLE,TRUCK,TRSN_CITY_VEH,EMERG_VEH,PASSENGER,SPEEDING,AG_DRIV,REDLIGHT,ALCOHOL,DISABILITY,POLICE_DIVISION,HOOD_ID,NEIGHBOURHOOD,ObjectId
0,-8.844611e+06,5.412414e+06,3387730.0,892658.0,2006.0,2006/03/11 05:00:00+00,852.0,8.0,BLOOR ST W,DUNDAS ST W,<Null>,Major Arterial,Toronto and East York,4,11,43.656345,-79.452490,Intersection,At Intersection,Traffic Signal,Clear,Daylight,Dry,Fatal,Pedestrian Collisions,Driver,unknown,,<Null>,South,"Automobile, Station Wagon",Turning Left,Failed to Yield Right of Way,Unknown,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,Yes,<Null>,Yes,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,Yes,<Null>,<Null>,<Null>,D11,88.0,High Park North (88),1.0
1,-8.844611e+06,5.412414e+06,3387731.0,892658.0,2006.0,2006/03/11 05:00:00+00,852.0,8.0,BLOOR ST W,DUNDAS ST W,<Null>,Major Arterial,Toronto and East York,4,11,43.656345,-79.452490,Intersection,At Intersection,Traffic Signal,Clear,Daylight,Dry,Fatal,Pedestrian Collisions,Pedestrian,65 to 69,Fatal,<Null>,North,Other,<Null>,<Null>,<Null>,Vehicle turns left while ped crosses with ROW ...,Crossing with right of way,Unknown,<Null>,<Null>,<Null>,Yes,<Null>,Yes,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,Yes,<Null>,<Null>,<Null>,D11,88.0,High Park North (88),2.0
2,-8.816480e+06,5.434843e+06,3388101.0,892810.0,2006.0,2006/03/11 05:00:00+00,915.0,9.0,MORNINGSIDE AVE,SHEPPARD AVE E,<Null>,Major Arterial,Scarborough,25,42,43.801943,-79.199786,Intersection,At Intersection,Traffic Signal,Clear,Daylight,Dry,Fatal,Turning Movement,Motorcycle Driver,45 to 49,Fatal,<Null>,East,Motorcycle,Turning Right,Disobeyed Traffic Control,Unknown,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,Yes,Yes,<Null>,<Null>,<Null>,<Null>,<Null>,Yes,Yes,<Null>,<Null>,D42,131.0,Rouge (131),3.0
3,-8.816480e+06,5.434843e+06,3388102.0,892810.0,2006.0,2006/03/11 05:00:00+00,915.0,9.0,MORNINGSIDE AVE,SHEPPARD AVE E,<Null>,Major Arterial,Scarborough,25,42,43.801943,-79.199786,Intersection,At Intersection,Traffic Signal,Clear,Daylight,Dry,Fatal,Turning Movement,Driver,unknown,,<Null>,South,"Automobile, Station Wagon",Going Ahead,Driving Properly,Unknown,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,Yes,Yes,<Null>,<Null>,<Null>,<Null>,<Null>,Yes,Yes,<Null>,<Null>,D42,131.0,Rouge (131),4.0
4,-8.822759e+06,5.424516e+06,3387793.0,892682.0,2006.0,2006/03/12 05:00:00+00,240.0,2.0,EGLINTON AVE E,COMMONWEALTH AVE,<Null>,Major Arterial,Scarborough,2120,41,43.734945,-79.256190,Mid-Block,<Null>,No Control,Clear,Dark,Dry,Fatal,Pedestrian Collisions,Driver,25 to 29,,<Null>,West,"Automobile, Station Wagon",Going Ahead,Other,"Ability Impaired, Alcohol",<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,Yes,<Null>,Yes,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,Yes,<Null>,D41,138.0,Eglinton East (138),5.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16855,-8.820837e+06,5.421411e+06,81509784.0,1636966.0,2020.0,2020/08/30 04:00:00+00,1340.0,13.0,BRIMLEY RD,BARKDENE HILLS,50 m South of,<Null>,<Null>,<Null>,<Null>,43.714793,-79.238926,Mid-Block,Non Intersection,No Control,Clear,Daylight,Dry,Non-Fatal Injury,Cyclist Collisions,Cyclist,25 to 29,Major,<Null>,North,Bicycle,Going Ahead,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,Driving Properly,Normal,<Null>,Yes,Yes,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,Yes,<Null>,<Null>,<Null>,D41,123.0,Cliffcrest (123),16856.0
16856,-8.820068e+06,5.425334e+06,81505452.0,1650701.0,2020.0,2020/09/01 04:00:00+00,1205.0,12.0,EGLINTON AVE E,BELLAMY RD N,<Null>,Major Arterial,Scarborough,2120,43,43.740256,-79.232021,Intersection,At Intersection,Traffic Signal,Clear,Daylight,Dry,Non-Fatal Injury,Turning Movement,Driver,65 to 69,,<Null>,East,"Automobile, Station Wagon",Turning Left,Failed to Yield Right of Way,Inattentive,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,Yes,<Null>,<Null>,<Null>,<Null>,Yes,<Null>,Yes,<Null>,<Null>,<Null>,D43,138.0,Eglinton East (138),16857.0
16857,-8.820068e+06,5.425334e+06,81505453.0,1650701.0,2020.0,2020/09/01 04:00:00+00,1205.0,12.0,EGLINTON AVE E,BELLAMY RD N,<Null>,Major Arterial,Scarborough,2120,43,43.740256,-79.232021,Intersection,At Intersection,Traffic Signal,Clear,Daylight,Dry,Non-Fatal Injury,Turning Movement,Driver,30 to 34,Minor,<Null>,West,"Automobile, Station Wagon",Going Ahead,Driving Properly,Normal,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,Yes,<Null>,<Null>,<Null>,<Null>,Yes,<Null>,Yes,<Null>,<Null>,<Null>,D43,138.0,Eglinton East (138),16858.0
16858,-8.820068e+06,5.425334e+06,81505454.0,1650701.0,2020.0,2020/09/01 04:00:00+00,1205.0,12.0,EGLINTON AVE E,BELLAMY RD N,<Null>,Major Arterial,Scarborough,2120,43,43.740256,-79.232021,Intersection,At Intersection,Traffic Signal,Clear,Daylight,Dry,Non-Fatal Injury,Turning Movement,Passenger,10 to 14,Major,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,Yes,<Null>,<Null>,<Null>,<Null>,Yes,<Null>,Yes,<Null>,<Null>,<Null>,D43,138.0,Eglinton East (138),16859.0


In [213]:
ksi['Latitude'] = ksi.LATITUDE
ksi['Longitude'] = ksi.LONGITUDE

In [24]:
ksi.VEHTYPE.plot.hist()

In [None]:
getCategoryMap(setupMap(geotab, legend = False), ksi, 'VEHTYPE')

### Traffic collisions

In [214]:
traffic = pd.read_csv('/content/drive/Shareddrives/SAS Safe Roads 2022 Hackathon/Datos/Canada/traffic_collisions_asr_t_tbl_001.csv')

In [23]:
traffic

Unnamed: 0,X,Y,OBJECTID,EventUniqueId,OccurrenceDate,Month,Day_of_Week,Year,Hour,Division,Atom,Neighbourhood,Fatalities,Injury_Collisions,FTR_Collisions,PD_Collisions,Longitude,Latitude,ObjectId2
0,-8.834615e+06,5.412065e+06,126.0,GO-20141274487,2014/01/03 05:00:00+00,January,Friday,2014.0,14.0,D51,73,Moss Park (73),0.0,NO,NO,YES,-79.362699,43.654078,1.0
1,-8.832662e+06,5.414068e+06,127.0,GO-20141274697,2014/01/03 05:00:00+00,January,Friday,2014.0,15.0,D54/D55,68,North Riverdale (68),0.0,YES,NO,NO,-79.345155,43.667095,2.0
2,-8.847529e+06,5.419398e+06,128.0,GO-20141274712,2014/01/03 05:00:00+00,January,Friday,2014.0,15.0,D12,30,Brookhaven-Amesbury (30),0.0,NO,NO,YES,-79.478704,43.701719,3.0
3,-8.851156e+06,5.406576e+06,129.0,GO-20141274922,2014/01/03 05:00:00+00,January,Friday,2014.0,15.0,D22,17,Mimico (includes Humber Bay Shores) (17),0.0,NO,NO,YES,-79.511291,43.618394,4.0
4,-8.831959e+06,5.422290e+06,130.0,GO-20141275114,2014/01/03 05:00:00+00,January,Friday,2014.0,16.0,D33,42,Banbury-Don Mills (42),0.0,YES,NO,NO,-79.338834,43.720501,5.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
415913,-8.847325e+06,5.408403e+06,482131.0,GO-2020921177,2020/05/18 04:00:00+00,May,Monday,2020.0,20.0,D22,17,Mimico (includes Humber Bay Shores) (17),0.0,NO,NO,YES,-79.476876,43.630270,415914.0
415914,-8.839730e+06,5.413850e+06,482132.0,GO-2020921386,2020/05/18 04:00:00+00,May,Monday,2020.0,21.0,D14,95,Annex (95),0.0,NO,NO,YES,-79.408642,43.665676,415915.0
415915,-8.843856e+06,5.433373e+06,482134.0,GO-2020921567,2020/05/18 04:00:00+00,May,Monday,2020.0,22.0,D32,35,Westminster-Branson (35),0.0,YES,NO,NO,-79.445714,43.792408,415916.0
415916,-8.833117e+06,5.412011e+06,482135.0,GO-2020921807,2020/05/18 04:00:00+00,May,Monday,2020.0,21.0,D54/D55,70,South Riverdale (70),0.0,YES,YES,NO,-79.349240,43.653729,415917.0


### Meshing

In [24]:
coords_example = np.array(geotab['Sector'][0].geohash_bounds)
long_delta = abs(coords_example[:2,0][0] - coords_example[:2,0][1])
lat_delta = abs(coords_example[0,1] - coords_example[2,1])

In [25]:
longs = list(geotab['Longitude']) + list(ksi['Longitude']) + list(traffic['Longitude'])
lats = list(geotab['Latitude']) + list(ksi['Latitude']) + list(traffic['Latitude'])

In [26]:
min_long, max_long = min(longs), max(longs)
min_lat, max_lat = min(lats), max(lats)

In [79]:
def getCoordsFromString(string):
  return [tuple([float(num) for num in pair[1:-1].split(', ')]) for pair in string.split('|')]

In [192]:
mesh_freqs = {}
mesh_coords = {}
coords_class = {}
hash_ciphered = {}

for k in range(len(longs)):
  left = min_long + np.floor((longs[k]-min_long)/long_delta) * long_delta
  right = min_long + np.ceil((longs[k]-min_long)/long_delta) * long_delta
  down = min_lat + np.floor((lats[k]-min_lat)/lat_delta) * lat_delta
  up = min_lat + np.ceil((lats[k]-min_lat)/lat_delta) * lat_delta

  hash = str((left, down)) + '|' + str((right, down)) + '|' + str((right, up)) + '|' + str((left, up)) + '|' + str((left, down))

  coords_class[str((longs[k],lats[k]))] = hash
  hash_ciphered[hash] = str(k)
  mesh_coords[hash] = getCoordsFromString(hash)
  try:
    mesh_freqs[hash] += 1
  except:
    mesh_freqs[hash] = 1

In [132]:
mesh_freqs = dict(sorted(mesh_freqs.items(), key = lambda x: x[1], reverse = True))
mesh_sectors = list(mesh_freqs.keys())

In [123]:
def addHashes(data):
  hash_classes = []
  for i in range(len(data)):
    hash_classes.append( coords_class[ str((data['Longitude'].iloc[i], data['Latitude'].iloc[i])) ] )
    
  data['Hash'] = hash_classes
  return data

In [215]:
geotab = addHashes(geotab)
ksi = addHashes(ksi)
traffic = addHashes(traffic)

In [216]:
def cipherHashes(data):
  hashes = []
  for hash in data.Hash:
    hashes.append( hash_ciphered[hash] )
  data['Ciphered Hash'] = hashes
  return data

In [217]:
geotab = cipherHashes(geotab)
ksi = cipherHashes(ksi)
traffic = cipherHashes(traffic)

In [125]:
def getNeighborsMeshing(sector, sector_list):
  neighbors = []
  for another_sector in sector_list:
    if len(set(mesh_coords[sector]) & set(mesh_coords[another_sector]))!=0:
      if sector != another_sector:
        neighbors.append(another_sector)
  return neighbors

In [126]:
def addSectorsMeshing(map, data, sectors_to_plot, coords_dict, color = 'orange', legend = False):
  for sector in sectors_to_plot:
    coords = np.array(coords_dict[sector])
    map.add_trace(
        go.Scattermapbox(
            fill = "toself",
            lon = coords[:,0], lat = coords[:,1],
            marker = { 'size': 10, 'color': color },
            showlegend = legend)
    )

    map.add_trace(
        go.Scattermapbox(
            lon = [np.mean(coords[:-1,0])], lat = [np.mean(coords[:-1,1])],
            marker = { 'size': 10, 'color': color },
            text = [int(mesh_freqs[sector])],
            hovertemplate = 'Cantidad de incidentes' + ": %{text}" + '<br><br>'
            
                                                      "<extra></extra>",

            showlegend = legend)
    )
  return map

In [127]:
def plotSectorsAndNeighborsMesh(map, data, sectors_to_plot, legend = False):
  neighbors = []
  for sector in sectors_to_plot:
    neighbors.extend(getNeighborsMeshing(sector, mesh_sectors))

  sectors_to_plot = list(set(sectors_to_plot))
  neighbors = list(set(neighbors)-set(sectors_to_plot))

  map = addSectorsMeshing(map, data, sectors_to_plot, mesh_coords, color = 'red', legend = legend)

  map = addSectorsMeshing(map, data, neighbors, mesh_coords)
  
  return map

In [133]:
plotSectorsAndNeighborsMesh(setupMap(geotab),
                                      mesh_sectors,
                                      mesh_sectors[0:10])

### Conditional Probability

In [255]:
def getConditionalProb(data, A, B):
  events = list(B.keys())

  possibilities = B[events[0]]

  filter = np.array(data[events[0]] == possibilities[0])

  for possibility in possibilities[1:]:
    filter = filter | np.array(data[events[0]] == possibility)
  
  general_filter = filter.copy()

  try:
    for event in events[1:]:
      filter = np.array(data[event] == B[event][0])
      for possibility in B[event][1:]:
        filter = filter | np.array(data[event] == possibility)
      general_filter = general_filter & filter
  except:
    pass  

  probs = {}

  n = len(data)

  for target in data[A].unique():
    p_AyB = len(data[general_filter][data[general_filter][A] == target])/n
    p_B = len(data[general_filter])/n
    if p_B == 0:
      return 'No hay casos con esas especificaciones'
    probs[str(target)] = p_AyB/p_B
  
  probs = pd.DataFrame({'Probabilidad':probs.values()}, index = probs.keys())
  probs = probs.sort_values(by = 'Probabilidad', ascending = False)
  if len(probs['Probabilidad']) > 10:
    probs = probs[probs['Probabilidad'] >= probs['Probabilidad'].mean()]

  return probs 

#### Explore and generate insights

In [219]:
A = 'HOUR'
B = {'INVTYPE':['Motorcycle Driver']}
data = ksi
getConditionalProb(data, A, B).plot.bar()

In [184]:
A = 'HOUR'
B = {'INVTYPE':['Pedestrian']}
data = ksi
getConditionalProb(data, A, B).plot.bar()

In [185]:
A = 'HOUR'
B = {'INVTYPE':['Motorcycle Driver'],
     'STREET':['MORNINGSIDE AVE', 'BLOOR ST W']}

data = ksi
getConditionalProb(data, A, B).plot.bar()

#### Probability Map

In [221]:
def getProbabilityMap(map, graph_data, data, zone_kind):

  zones = list(graph_data.index)

  lat = [data['Latitude'][data[zone_kind]==zone].mean() for zone in zones]
  lon = [data['Longitude'][data[zone_kind]==zone].mean() for zone in zones]

  map.add_trace(go.Densitymapbox(lat=lat, lon=lon, z = graph_data['Probabilidad'], radius=50*(graph_data['Probabilidad']/graph_data['Probabilidad'].max()), 
                                 zmin = 0, 
                                 zmax = graph_data['Probabilidad'].max(), 
                                 text = data['Ciphered Hash'],
                                   
                                   hovertemplate = 'Zona' + ": %{text}" + '<br><br>'
                                                   "Probability: %{z}" + '<br>'
                                                   "Coords: (%{lat:.4f},%{lon:.4f})"+ 
                                                   "<extra></extra>"
                          ))
  return map

In [222]:
def showProbabilityMap(data,zone_kind,B):
  map = setupMap(data, legend = False)

  map = getProbabilityMap(map, getConditionalProb(data, 
                                                  zone_kind, 
                                                  B
                                                  ), data, zone_kind)
  map.show()

In [253]:
ksi

Unnamed: 0,X,Y,INDEX_,ACCNUM,YEAR,DATE,TIME,HOUR,STREET1,STREET2,OFFSET,ROAD_CLASS,DISTRICT,WARDNUM,DIVISION,LATITUDE,LONGITUDE,LOCCOORD,ACCLOC,TRAFFCTL,VISIBILITY,LIGHT,RDSFCOND,ACCLASS,IMPACTYPE,INVTYPE,INVAGE,INJURY,FATAL_NO,INITDIR,VEHTYPE,MANOEUVER,DRIVACT,DRIVCOND,PEDTYPE,PEDACT,PEDCOND,CYCLISTYPE,CYCACT,CYCCOND,PEDESTRIAN,CYCLIST,AUTOMOBILE,MOTORCYCLE,TRUCK,TRSN_CITY_VEH,EMERG_VEH,PASSENGER,SPEEDING,AG_DRIV,REDLIGHT,ALCOHOL,DISABILITY,POLICE_DIVISION,HOOD_ID,NEIGHBOURHOOD,ObjectId,Latitude,Longitude,Hash,Ciphered Hash
0,-8.844611e+06,5.412414e+06,3387730.0,892658.0,2006.0,2006/03/11 05:00:00+00,852.0,8.0,BLOOR ST W,DUNDAS ST W,<Null>,Major Arterial,Toronto and East York,4,11,43.656345,-79.452490,Intersection,At Intersection,Traffic Signal,Clear,Daylight,Dry,Fatal,Pedestrian Collisions,Driver,unknown,,<Null>,South,"Automobile, Station Wagon",Turning Left,Failed to Yield Right of Way,Unknown,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,Yes,<Null>,Yes,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,Yes,<Null>,<Null>,<Null>,D11,88.0,High Park North (88),1.0,43.656345,-79.452490,"(-79.45338769562662, 43.65548339840396)|(-79.4...",442046
1,-8.844611e+06,5.412414e+06,3387731.0,892658.0,2006.0,2006/03/11 05:00:00+00,852.0,8.0,BLOOR ST W,DUNDAS ST W,<Null>,Major Arterial,Toronto and East York,4,11,43.656345,-79.452490,Intersection,At Intersection,Traffic Signal,Clear,Daylight,Dry,Fatal,Pedestrian Collisions,Pedestrian,65 to 69,Fatal,<Null>,North,Other,<Null>,<Null>,<Null>,Vehicle turns left while ped crosses with ROW ...,Crossing with right of way,Unknown,<Null>,<Null>,<Null>,Yes,<Null>,Yes,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,Yes,<Null>,<Null>,<Null>,D11,88.0,High Park North (88),2.0,43.656345,-79.452490,"(-79.45338769562662, 43.65548339840396)|(-79.4...",442046
2,-8.816480e+06,5.434843e+06,3388101.0,892810.0,2006.0,2006/03/11 05:00:00+00,915.0,9.0,MORNINGSIDE AVE,SHEPPARD AVE E,<Null>,Major Arterial,Scarborough,25,42,43.801943,-79.199786,Intersection,At Intersection,Traffic Signal,Clear,Daylight,Dry,Fatal,Turning Movement,Motorcycle Driver,45 to 49,Fatal,<Null>,East,Motorcycle,Turning Right,Disobeyed Traffic Control,Unknown,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,Yes,Yes,<Null>,<Null>,<Null>,<Null>,<Null>,Yes,Yes,<Null>,<Null>,D42,131.0,Rouge (131),3.0,43.801943,-79.199786,"(-79.20070214875685, 43.801052246057196)|(-79....",440812
3,-8.816480e+06,5.434843e+06,3388102.0,892810.0,2006.0,2006/03/11 05:00:00+00,915.0,9.0,MORNINGSIDE AVE,SHEPPARD AVE E,<Null>,Major Arterial,Scarborough,25,42,43.801943,-79.199786,Intersection,At Intersection,Traffic Signal,Clear,Daylight,Dry,Fatal,Turning Movement,Driver,unknown,,<Null>,South,"Automobile, Station Wagon",Going Ahead,Driving Properly,Unknown,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,Yes,Yes,<Null>,<Null>,<Null>,<Null>,<Null>,Yes,Yes,<Null>,<Null>,D42,131.0,Rouge (131),4.0,43.801943,-79.199786,"(-79.20070214875685, 43.801052246057196)|(-79....",440812
4,-8.822759e+06,5.424516e+06,3387793.0,892682.0,2006.0,2006/03/12 05:00:00+00,240.0,2.0,EGLINTON AVE E,COMMONWEALTH AVE,<Null>,Major Arterial,Scarborough,2120,41,43.734945,-79.256190,Mid-Block,<Null>,No Control,Clear,Dark,Dry,Fatal,Pedestrian Collisions,Driver,25 to 29,,<Null>,West,"Automobile, Station Wagon",Going Ahead,Other,"Ability Impaired, Alcohol",<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,Yes,<Null>,Yes,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,Yes,<Null>,D41,138.0,Eglinton East (138),5.0,43.734945,-79.256190,"(-79.2570070803963, 43.733760986292964)|(-79.2...",436202
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16855,-8.820837e+06,5.421411e+06,81509784.0,1636966.0,2020.0,2020/08/30 04:00:00+00,1340.0,13.0,BRIMLEY RD,BARKDENE HILLS,50 m South of,<Null>,<Null>,<Null>,<Null>,43.714793,-79.238926,Mid-Block,Non Intersection,No Control,Clear,Daylight,Dry,Non-Fatal Injury,Cyclist Collisions,Cyclist,25 to 29,Major,<Null>,North,Bicycle,Going Ahead,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,Driving Properly,Normal,<Null>,Yes,Yes,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,Yes,<Null>,<Null>,<Null>,D41,123.0,Cliffcrest (123),16856.0,43.714793,-79.238926,"(-79.23915429719355, 43.71453491207461)|(-79.2...",407656
16856,-8.820068e+06,5.425334e+06,81505452.0,1650701.0,2020.0,2020/09/01 04:00:00+00,1205.0,12.0,EGLINTON AVE E,BELLAMY RD N,<Null>,Major Arterial,Scarborough,2120,43,43.740256,-79.232021,Intersection,At Intersection,Traffic Signal,Clear,Daylight,Dry,Non-Fatal Injury,Turning Movement,Driver,65 to 69,,<Null>,East,"Automobile, Station Wagon",Turning Left,Failed to Yield Right of Way,Inattentive,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,Yes,<Null>,<Null>,<Null>,<Null>,Yes,<Null>,Yes,<Null>,<Null>,<Null>,D43,138.0,Eglinton East (138),16857.0,43.740256,-79.232021,"(-79.23228784211557, 43.73925415035535)|(-79.2...",441506
16857,-8.820068e+06,5.425334e+06,81505453.0,1650701.0,2020.0,2020/09/01 04:00:00+00,1205.0,12.0,EGLINTON AVE E,BELLAMY RD N,<Null>,Major Arterial,Scarborough,2120,43,43.740256,-79.232021,Intersection,At Intersection,Traffic Signal,Clear,Daylight,Dry,Non-Fatal Injury,Turning Movement,Driver,30 to 34,Minor,<Null>,West,"Automobile, Station Wagon",Going Ahead,Driving Properly,Normal,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,Yes,<Null>,<Null>,<Null>,<Null>,Yes,<Null>,Yes,<Null>,<Null>,<Null>,D43,138.0,Eglinton East (138),16858.0,43.740256,-79.232021,"(-79.23228784211557, 43.73925415035535)|(-79.2...",441506
16858,-8.820068e+06,5.425334e+06,81505454.0,1650701.0,2020.0,2020/09/01 04:00:00+00,1205.0,12.0,EGLINTON AVE E,BELLAMY RD N,<Null>,Major Arterial,Scarborough,2120,43,43.740256,-79.232021,Intersection,At Intersection,Traffic Signal,Clear,Daylight,Dry,Non-Fatal Injury,Turning Movement,Passenger,10 to 14,Major,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,Yes,<Null>,<Null>,<Null>,<Null>,Yes,<Null>,Yes,<Null>,<Null>,<Null>,D43,138.0,Eglinton East (138),16859.0,43.740256,-79.232021,"(-79.23228784211557, 43.73925415035535)|(-79.2...",441506


In [257]:
data = ksi
B = {'MANOEUVER':['Turning Left', 'Turning Right']}
showProbabilityMap(ksi, 'Hash', B)