In [111]:
import pandas as pd
from geopy.geocoders import Nominatim
import folium
import os

In [112]:
df_circ = pd.read_csv('../Data/Circulation Data/circulation_data.csv')

#format Date column as datetimem and take only rows with 2019-10-01 to 2023-09-30
df_circ['Date'] = pd.to_datetime(df_circ['Date'])
df_circ = df_circ[(df_circ['Date'] >= '2019-10-01') & (df_circ['Date'] <= '2023-09-30')]
df_circ = df_circ.reset_index(drop=True)

df_circ.head()

Unnamed: 0,Id_Reference,Id_Intersection,Nom_Intersection,Date,Periode,Heure,Minute,Seconde,Code_Banque,Description_Code_Banque,...,WBT,WBRT,Approche_Nord,Approche_Sud,Approche_Est,Approche_Ouest,Localisation_X,Localisation_Y,Longitude,Latitude
0,8797,19157,Boul. Thiemens / Accès au 2707 Bibliothèque du...,2019-11-26,08:00:00,8,0,0,0,Autos,...,212,13,0,0,0,0,288925.6032,5040576.0,-73.703144,45.504924
1,8797,19157,Boul. Thiemens / Accès au 2707 Bibliothèque du...,2019-11-26,08:15:00,8,15,0,0,Autos,...,222,7,0,0,0,0,288925.6032,5040576.0,-73.703144,45.504924
2,8797,19157,Boul. Thiemens / Accès au 2707 Bibliothèque du...,2019-11-26,08:30:00,8,30,0,0,Autos,...,197,4,0,0,0,0,288925.6032,5040576.0,-73.703144,45.504924
3,8797,19157,Boul. Thiemens / Accès au 2707 Bibliothèque du...,2019-11-26,08:45:00,8,45,0,0,Autos,...,208,5,0,0,0,0,288925.6032,5040576.0,-73.703144,45.504924
4,8797,19157,Boul. Thiemens / Accès au 2707 Bibliothèque du...,2019-11-26,09:00:00,9,0,0,0,Autos,...,143,4,0,0,0,0,288925.6032,5040576.0,-73.703144,45.504924


In [113]:
df_circ.to_csv('../Data/Circulation Data/circulation_data.csv', index=False)

In [114]:
#To determine the data that will be used, we will plot each location related to the bus lines in a map and see which ones are relevant

#Bus lines and their associated streets:
# 100: Crémazie, Côte de Liesse, Hickmore, Montée de Liesse
# 460: Métropolitain, Crémazie, Côte de Liesse, Marshall
# 121: Sauvé, Côte-Vertu
# 139, 439: Pie-IX
# 67, 467: Saint-Michel
# 80: du Parc, Champagneur
# 480: du Parc, Champagneur, René-Lévesque

#Create a dictionary with dataframes for street
streets_dict = {}
streets = ['Crémazie', 'Côte de Liesse', 'Hickmore', 'Montée de Liesse', 'Métropolitain', 'Marshall', 
           'Sauvé', 'Côte-Vertu', 'Pie-IX', 'Saint-Michel', 'Parc', 'Champagneur', 'René-Lévesque']
for street in streets:
    streets_dict[street] = df_circ[df_circ['Nom_Intersection'].str.contains(street)]

#Create a list of colors for each street
colors = ['red', 'blue', 'green', 'purple', 'orange', 'darkred', 'lightred', 'beige', 'darkblue', 'darkgreen', 
          'cadetblue', 'darkpurple', 'pink', 'lightblue', 'lightgreen', 'gray', 'black', 'lightgray'
            ]

#Create a dictionary mapping each street to a color
colors_dict = {street: colors[i % len(colors)] for i, street in enumerate(streets_dict)}

#create a map 
m=folium.Map(location=[df_circ['Latitude'].mean(), df_circ['Longitude'].mean()], zoom_start=12)

#add markers to the map
for street in streets_dict:
    for _, row in streets_dict[street].iterrows():
        folium.CircleMarker(
            location=[row['Latitude'], row['Longitude']],
            radius=10,
            color=colors_dict[street],
            color_opacity=0.2,
            fill=True,
            fill_color=colors_dict[street],
            fill_opacity=0.2, 
            popup=row['Nom_Intersection'], 
            tooltip=row['Nom_Intersection']
        ).add_to(m)                   

# Display the map
#display map
#m

In [115]:
# From the map, we can see that the following bus lines are relevant:
# 121: Sauvé, Côte-Vertu
# 67, 467: Saint-Michel
# 480: du Parc, Champagneur, René-Lévesque
# 80: du Parc, Champagneur


# These lines have less locations available, so we should see less accurate results
# 100: Crémazie, Côte de Liesse, Hickmore, Montée de Liesse
# 460: Métropolitain, Crémazie, Côte de Liesse, Marshall
# 139, 439: Pie-IX

In [116]:
#make a dataframe from df_circ with all rows that include streets in streets_dict except for the following:
# Christophe-Colomb / Parc-La Fontaine/Rachel
# Pont de la Concorde / avenue Pierre-Dupuy / accès Parc de Dieppe
# René-Lévesque / Sanguinet
# René-Lévesque / Saint-Denis
# Atateken / René-Lévesque
# Panet / René-Lévesque
# rue Alexandre-DeSève / boulevard René-Lévesque
# Papineau / René-Lévesque
# La Vérendrye / Parc Angrignon
# Bégin / Côte-Vertu
# avenue Champagneur / rue Jean-Talon

non_rl = ['Christophe-Colomb / Parc-La Fontaine/Rachel', 'Pont de la Concorde / avenue Pierre-Dupuy / accès Parc de Dieppe', 
            'René-Lévesque / Sanguinet', 'René-Lévesque / Saint-Denis', 'Atateken / René-Lévesque', 'Panet / René-Lévesque', 
            'rue Alexandre-DeSève / boulevard René-Lévesque', 'Papineau / René-Lévesque', 'La Vérendrye / Parc Angrignon', 
            'Bégin / Côte-Vertu', 'avenue Champagneur / rue Jean-Talon']

df_circ_f = df_circ[df_circ['Nom_Intersection'].str.contains('|'.join(streets_dict.keys()))]
df_circ_f = df_circ_f[~df_circ['Nom_Intersection'].isin(non_rl)]





  df_circ_f = df_circ_f[~df_circ['Nom_Intersection'].isin(non_rl)]


In [117]:
df_circ_f.head()

Unnamed: 0,Id_Reference,Id_Intersection,Nom_Intersection,Date,Periode,Heure,Minute,Seconde,Code_Banque,Description_Code_Banque,...,WBT,WBRT,Approche_Nord,Approche_Sud,Approche_Est,Approche_Ouest,Localisation_X,Localisation_Y,Longitude,Latitude
48,8709,745,Pie-IX / Rouen,2019-11-06,00:00:00,0,0,0,0,Autos,...,3,1,0,0,0,0,301302.82,5045726.0,-73.54479,45.551443
67,8705,745,Pie-IX / Rouen,2019-11-05,00:00:00,0,0,0,0,Autos,...,2,2,0,0,0,0,301302.82,5045726.0,-73.54479,45.551443
70,8713,745,Pie-IX / Rouen,2019-11-07,00:00:00,0,0,0,0,Autos,...,2,3,0,0,0,0,301302.82,5045726.0,-73.54479,45.551443
158,8784,7207,Métropolitain Nord / Provencher,2019-10-29,00:00:00,0,0,0,0,Autos,...,180,4,0,0,0,0,297092.5,5047996.0,-73.59875,45.571835
159,8785,7207,Métropolitain Nord / Provencher,2019-10-30,00:00:00,0,0,0,0,Autos,...,211,4,0,0,0,0,297092.5,5047996.0,-73.59875,45.571835


In [118]:
#make a map with all unique locations in df_circ_f

#Create a dictionary with dataframes for street
streets_dict_f = {}
streets = df_circ_f['Nom_Intersection'].unique()
for street in streets:
    streets_dict_f[street] = df_circ_f[df_circ_f['Nom_Intersection'] == street]


#create a map
m=folium.Map(location=[df_circ_f['Latitude'].mean(), df_circ_f['Longitude'].mean()], zoom_start=12)

#add markers to the map
for street in streets_dict_f:
    for _, row in streets_dict_f[street].iterrows():
        folium.CircleMarker(
            location=[row['Latitude'], row['Longitude']],
            radius=10,
            color='darkred',
            color_opacity=0.2,
            fill=True,
            fill_color='darkred',
            fill_opacity=0.2, 
            popup=row['Nom_Intersection'], 
            tooltip=row['Nom_Intersection']
        ).add_to(m)

#display map

#m

In [120]:
# Remove unnecessary columns

df_circ_f=df_circ_f.drop(columns=['Code_Banque', 'Seconde', 'Description_Code_Banque', 'Localisation_X', 'Localisation_Y', 'Latitude', 'Longitude', 'Id_Intersection'])

df_circ_f.head()

Unnamed: 0,Id_Reference,Nom_Intersection,Date,Periode,Heure,Minute,NBLT,NBT,NBRT,SBLT,...,EBLT,EBT,EBRT,WBLT,WBT,WBRT,Approche_Nord,Approche_Sud,Approche_Est,Approche_Ouest
48,8709,Pie-IX / Rouen,2019-11-06,00:00:00,0,0,1,15,3,7,...,2,1,0,0,3,1,0,0,0,0
67,8705,Pie-IX / Rouen,2019-11-05,00:00:00,0,0,3,7,1,1,...,1,5,0,4,2,2,0,0,0,0
70,8713,Pie-IX / Rouen,2019-11-07,00:00:00,0,0,0,23,1,3,...,2,3,2,0,2,3,0,0,0,0
158,8784,Métropolitain Nord / Provencher,2019-10-29,00:00:00,0,0,6,19,0,0,...,0,0,0,9,180,4,0,0,0,0
159,8785,Métropolitain Nord / Provencher,2019-10-30,00:00:00,0,0,1,9,0,0,...,0,0,0,7,211,4,0,0,0,0


In [121]:
#add a column for the street that passes through each location
df_circ_f['Bus'] = df_circ_f['Nom_Intersection'].apply(lambda x: ', '.join([street for street in streets_dict.keys() if street in x]))

#rename streets in Bus to thebus lines they are associated with (e.g. Crémazie to 100_460)
df_circ_f['Bus'] = df_circ_f['Bus'].str.replace('Sauvé', '121')
df_circ_f['Bus'] = df_circ_f['Bus'].str.replace('Côte-Vertu', '121')
df_circ_f['Bus'] = df_circ_f['Bus'].str.replace('Saint-Michel', '67_467')
df_circ_f['Bus'] = df_circ_f['Bus'].str.replace('du Parc', '80_480')
df_circ_f['Bus'] = df_circ_f['Bus'].str.replace('Champagneur', '80_480')
df_circ_f['Bus'] = df_circ_f['Bus'].str.replace('René-Lévesque', '480')

df_circ_f['Bus'] = df_circ_f['Bus'].str.replace('Crémazie', '100_460')
df_circ_f['Bus'] = df_circ_f['Bus'].str.replace('Côte de Liesse', '100_460')
df_circ_f['Bus'] = df_circ_f['Bus'].str.replace('Métropolitain', '460')
df_circ_f['Bus'] = df_circ_f['Bus'].str.replace('Pie-IX', '139_439')

df_circ_f.head()

Unnamed: 0,Id_Reference,Nom_Intersection,Date,Periode,Heure,Minute,NBLT,NBT,NBRT,SBLT,...,EBT,EBRT,WBLT,WBT,WBRT,Approche_Nord,Approche_Sud,Approche_Est,Approche_Ouest,Bus
48,8709,Pie-IX / Rouen,2019-11-06,00:00:00,0,0,1,15,3,7,...,1,0,0,3,1,0,0,0,0,139_439
67,8705,Pie-IX / Rouen,2019-11-05,00:00:00,0,0,3,7,1,1,...,5,0,4,2,2,0,0,0,0,139_439
70,8713,Pie-IX / Rouen,2019-11-07,00:00:00,0,0,0,23,1,3,...,3,2,0,2,3,0,0,0,0,139_439
158,8784,Métropolitain Nord / Provencher,2019-10-29,00:00:00,0,0,6,19,0,0,...,0,0,9,180,4,0,0,0,0,460
159,8785,Métropolitain Nord / Provencher,2019-10-30,00:00:00,0,0,1,9,0,0,...,0,0,7,211,4,0,0,0,0,460


In [124]:
# add a column that adds all circulation data for each row 
# i.e. add all columns except for Id_Reference,Id_Intersection,Nom_Intersection,Date,Periode,Heure,Minute,Bus

df_circ_f['Circulation'] = df_circ_f.iloc[:, 6:22].sum(axis=1)


In [125]:
df_circ_f.to_csv('../Data/Circulation Data/circulation_data_filtered.csv', index=False)