### Connect to the database

In [1]:
import sqlite3
import geopandas as gpd
import json
from datetime import datetime
import pandas as pd
import folium
from folium.plugins import MarkerCluster
from folium.plugins import HeatMap

conn = sqlite3.connect('../max-experiments/itinerary-scraping/journeys.db')
cursor = conn.cursor()

# display all tables
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
print(cursor.fetchall())

[('journeys',), ('stops',), ('outages',)]


### Import Scraped Data

In [2]:
cursor.execute('SELECT * FROM outages')
outages = cursor.fetchall()

cursor.execute('SELECT * FROM journeys')
journeys = cursor.fetchall()

cursor.execute('SELECT * FROM stops')
stops = cursor.fetchall()

cursor.close()
conn.close()

### Set Up Dataframes

In [3]:
outages_df = pd.DataFrame(outages, columns=['outage_id', 'stop_id', 'effect', 'updated_at', 'outage_data'])
stops_df = pd.DataFrame(stops, columns=['stop_id', 'stop_name', 'stop_coords', 'stop_acc'])

# merge outages_df with stops_df
merged_df = pd.merge(outages_df, stops_df, on='stop_id')

### Import TCL Data

In [4]:
tcl_metro = gpd.read_file('data/tcl_metro.json')
tcl_metro = tcl_metro.drop(columns=['date_debut', 'date_fin', 'last_update', 'last_update_fme'])

In [5]:
# Metro line colors
colors = []
for index, row in tcl_metro.iterrows():
    if row['ligne'] == 'D': # green line
        colors.append('#029C41')
    elif row['ligne'] == 'A':   # pink line
        colors.append('#E62E86')
    elif row['ligne'] == 'B':   # blue line
        colors.append('#0065B1')
    elif row['ligne'] == 'C':   # yellow line
        colors.append('#F48E06')
    else:   # furnicular line, light green
        colors.append("#93BF38")
        
tcl_metro['style'] = [
    {
        "color": colors[i],
        "weight": 5,
        "opacity": 1
    }
    for i in range(len(tcl_metro))
]

### Expand Condensed Data

In [6]:
# unpack stop_coords
merged_df['lon'] = merged_df['stop_coords'].apply(lambda x: x.split(',')[0][9:-1])
merged_df['lat'] = merged_df['stop_coords'].apply(lambda x: x.split(',')[1][9:-2])
merged_df = merged_df.drop(columns=['stop_coords'])

# unpack outage_data
merged_df['outage_data'] = merged_df['outage_data'].apply(lambda x: json.loads(x))
merged_df['stop_acc'] = merged_df['stop_acc'].apply(lambda x: json.loads(x))
begin, end = [], []
cause, effect = [], []
for row in merged_df['outage_data']:
    ### Start and End Times
    # current format: YYYYMMDDTHHMMSS
    # required format: YYYY-MM-DDTHH:MM:SS+02:00        
    times = row['periods']    
    begin.append(datetime.strptime(times[0]['begin'], '%Y%m%dT%H%M%S').isoformat() + '+02:00')
    end.append(datetime.strptime(times[0]['end'], '%Y%m%dT%H%M%S').isoformat() + '+02:00')
    
    ### Cause and Effect
    cause.append(row['cause'])
    effect.append(row['effect'])
    
merged_df['begin'] = begin
merged_df['end'] = end
merged_df['cause'] = cause
merged_df['effect'] = effect

merged_df = merged_df.drop(columns=['outage_data'])

In [7]:
merged_df['effect'] = merged_df['effect'].apply(lambda x: x['label'])
merged_df['cause'] = merged_df['cause'].apply(lambda x: x['label'])
merged_df.head()

Unnamed: 0,outage_id,stop_id,effect,updated_at,stop_name,stop_acc,lon,lat,begin,end,cause
0,828,stop_point:tcl:SP:30205,.,2024-05-25T17:28:00+02:00,Gorge de Loup,"[has_wheelchair_boarding, has_elevator, has_es...",4.805544,45.766428,2024-05-25T00:00:00+02:00,2024-05-25T19:30:00+02:00,Accès fermé pour travaux
1,8201,stop_point:tcl:SP:30205,.,2024-05-26T05:33:00+02:00,Gorge de Loup,"[has_wheelchair_boarding, has_elevator, has_es...",4.805544,45.766428,2024-05-26T00:00:00+02:00,2024-05-26T07:30:00+02:00,Panne
2,822,stop_point:tcl:SP:30205,Accès impossible direction Gare de Vénissieux,2024-05-29T05:08:00+02:00,Gorge de Loup,"[has_wheelchair_boarding, has_elevator, has_es...",4.805544,45.766428,2024-05-29T00:00:00+02:00,2024-05-29T07:30:00+02:00,Panne
3,8202,stop_point:tcl:SP:30205,.,2024-06-04T05:08:00+02:00,Gorge de Loup,"[has_wheelchair_boarding, has_elevator, has_es...",4.805544,45.766428,2024-06-04T00:00:00+02:00,2024-06-04T07:30:00+02:00,Panne
4,8203,stop_point:tcl:SP:30205,.,2024-06-04T08:38:00+02:00,Gorge de Loup,"[has_wheelchair_boarding, has_elevator, has_es...",4.805544,45.766428,2024-06-04T00:00:00+02:00,2024-06-04T10:30:00+02:00,Vandalisme


In [8]:
# plot outages on a heatmap
m = folium.Map(location=[45.75, 4.85], zoom_start=13, tiles='cartodb voyager')

# heatmap
heat_data = [[row['lat'], row['lon']] for _, row in merged_df.iterrows()]
HeatMap(heat_data).add_to(m)

# marker cluster
marker_cluster = MarkerCluster().add_to(m)
for idx, row in merged_df.iterrows():
    tooltip = f"Stop Name: {row['stop_name']}<br>Outage ID: {row['outage_id']}<br>Start: {row['begin']}<br>End: {row['end']}<br>Cause: {row['cause']}<br>Effect: {row['effect']}"
                
    folium.Marker([row['lat'], row['lon']], tooltip=tooltip).add_to(marker_cluster)

# tcl metro lines
folium.GeoJson(tcl_metro, style_function=lambda x: x['properties']['style']).add_to(m)


'''
Outage database only contains data on metro line/stop outages, so bus lines are not displayed by default.
The bus line data is still useful to see how metro lines are connected to bus lines and thus how outages on metro lines could affect bus lines.
Uncomment the following lines to display the bus lines.
'''
# tcl_bus = gpd.read_file('data/tcl_bus.json')
# tcl_bus = tcl_bus.drop(columns=['date_debut', 'date_fin', 'last_update', 'last_update_fme'])
# folium.GeoJson(tcl_bus, style_function=lambda x: {'color': '#555', 'weight': 1.5, 'dashArray': '5, 5'}).add_to(m)

m.save('../public-folium-maps/outages_heatmap.html')

In [9]:
# filter all outages whose effect is '.'
merged_df = merged_df[merged_df['effect'] != '.']
merged_df.head()

Unnamed: 0,outage_id,stop_id,effect,updated_at,stop_name,stop_acc,lon,lat,begin,end,cause
2,822,stop_point:tcl:SP:30205,Accès impossible direction Gare de Vénissieux,2024-05-29T05:08:00+02:00,Gorge de Loup,"[has_wheelchair_boarding, has_elevator, has_es...",4.805544,45.766428,2024-05-29T00:00:00+02:00,2024-05-29T07:30:00+02:00,Panne
5,503,stop_point:tcl:SP:46036,Accès à la station et accès SNCF impossibles,2024-05-25T17:28:00+02:00,Gare d'Oullins,[has_wheelchair_boarding],4.814519,45.716706,2024-05-24T00:00:00+02:00,2024-05-30T19:30:00+02:00,Panne
6,501,stop_point:tcl:SP:46036,Sortie de la station impossible,2024-05-26T01:58:00+02:00,Gare d'Oullins,[has_wheelchair_boarding],4.814519,45.716706,2024-05-26T00:00:00+02:00,2024-05-26T04:00:00+02:00,Panne
8,502,stop_point:tcl:SP:46036,Accès impossible direction Charpennes,2024-06-02T19:38:00+02:00,Gare d'Oullins,[has_wheelchair_boarding],4.814519,45.716706,2024-06-02T00:00:00+02:00,2024-06-02T21:30:00+02:00,Panne
9,242,stop_point:tcl:SP:30156,"Direction Vaulx-en-Velin La Soie, prendre l'au...",2024-05-25T17:28:00+02:00,Hôtel de Ville L. Pradel,"[has_wheelchair_boarding, has_elevator, has_es...",4.835948,45.768132,2024-05-25T00:00:00+02:00,2024-05-25T19:30:00+02:00,Panne


In [10]:
# create a heatmap of outages
m = folium.Map(location=[45.75, 4.85], zoom_start=13, tiles='cartodb voyager')

# heatmap
heat_data = [[row['lat'], row['lon']] for _, row in merged_df.iterrows()]
HeatMap(heat_data).add_to(m)

# marker cluster
marker_cluster = MarkerCluster().add_to(m)

for idx, row in merged_df.iterrows():
    tooltip = f"Stop Name: {row['stop_name']}<br>Outage ID: {row['outage_id']}<br>Start: {row['begin']}<br>End: {row['end']}<br>Cause: {row['cause']}<br>Effect: {row['effect']}"
                
    folium.Marker([row['lat'], row['lon']], tooltip=tooltip).add_to(marker_cluster)
    
# tcl metro lines
folium.GeoJson(tcl_metro, style_function=lambda x: x['properties']['style']).add_to(m)

'''
Outage database only contains data on metro line/stop outages, so bus lines are not displayed by default.
The bus line data is still useful to see how metro lines are connected to bus lines and thus how outages on metro lines could affect bus lines.
Uncomment the following lines to display the bus lines.
'''
# tcl_bus = gpd.read_file('data/tcl_bus.json')
# tcl_bus = tcl_bus.drop(columns=['date_debut', 'date_fin', 'last_update', 'last_update_fme'])
# folium.GeoJson(tcl_bus, style_function=lambda x: {'color': '#555', 'weight': 1.5, 'dashArray': '5, 5'}).add_to(m)

m.save('../public-folium-maps/filtered_effect_outages.html')