In [1]:
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
from pyvis.physics import Physics
from pyvis.network import Network
from IPython.display import IFrame
from IPython.display import HTML


### Comments for Prof
- As you are planning to replicate the analysis in the paper, input modelling using Toronto's data (fitting appropriate models for arrivals, trip durations and destination, etc,) is an important part of the project. Make sure to clearly explain your methods and choice of models in the final report. 
- We will discuss simulation optimization later in the course. You should be able to implement the heuristic presented in the WSC paper as well as developing your own heuristics using methods that we discuss. The idea of accounting for limited capacity for movement is interesting and could be a good extension. 
- As discussed, start with a subset of stations to build the initial model and validate it. If time permits, you can also extend it to all stations in Toronto.

# 1. Dataloading
- https://www.kaggle.com/datasets/jackywang529/toronto-bikeshare-data

In [2]:
BSD = pd.read_csv("BikeShare Data\Bike share ridership 2022-08.csv")


In [3]:
BSD

Unnamed: 0,Trip Id,Trip Duration,Start Station Id,Start Time,Start Station Name,End Station Id,End Time,End Station Name,Bike Id,User Type
0,17515458,1174,7259,08/01/2022 00:00,Lower Spadina Ave / Lake Shore Blvd,7712.0,08/01/2022 00:20,,3328,Casual Member
1,17515440,468,7208,08/01/2022 00:00,80 Clinton St (North of College),7534.0,08/01/2022 00:07,Walnut Ave / Queen St W,4662,Casual Member
2,17515442,1217,7259,08/01/2022 00:00,Lower Spadina Ave / Lake Shore Blvd,7712.0,08/01/2022 00:20,,4510,Casual Member
3,17515441,1124,7269,08/01/2022 00:00,Toronto Eaton Centre (Yonge St),7076.0,08/01/2022 00:18,York St / Queens Quay W,1013,Casual Member
4,17515443,1992,7052,08/01/2022 00:00,Wellington St W / Bay St,7193.0,08/01/2022 00:33,Queen St W / Gladstone Ave,4017,Casual Member
...,...,...,...,...,...,...,...,...,...,...
705145,18317445,1670,7100,08/31/2022 23:58,Dundas St E / Regent Park Blvd,7322.0,09/01/2022 00:26,King St W / Brant St,6417,Casual Member
705146,18317447,574,7245,08/31/2022 23:58,Tecumseth St / Queen St W - SMART,7069.0,09/01/2022 00:08,Queen St W / Spadina Ave,141,Annual Member
705147,18317448,533,7257,08/31/2022 23:59,Dundas St W / St. Patrick St,7037.0,09/01/2022 00:08,Bathurst St / Dundas St W,1583,Casual Member
705148,18317449,615,7368,08/31/2022 23:59,Lisgar St / Dundas St SMART,7521.0,09/01/2022 00:09,Emerson Ave / Bloor St W,2059,Casual Member


# 2. Data Exploration

In [4]:
BSD["Trip  Duration"].describe()


count    7.051500e+05
mean     1.013313e+03
std      3.778077e+03
min      0.000000e+00
25%      4.570000e+02
50%      7.600000e+02
75%      1.197000e+03
max      1.232765e+06
Name: Trip  Duration, dtype: float64

In [5]:
BSD[["Start Station Name", "End Station Name", "User Type"]].describe()


Unnamed: 0,Start Station Name,End Station Name,User Type
count,672386,672375,705150
unique,597,597,2
top,York St / Queens Quay W,York St / Queens Quay W,Casual Member
freq,7247,7963,413345


## 2.1 Top 50 Routes Subset

In [6]:
# create a new DataFrame with just the start and end station names
stations = pd.DataFrame(BSD[['Start Station Name', 'End Station Name']])
# create a new DataFrame with the counts of trips between each pair of stations
counts = stations.groupby(['Start Station Name', 'End Station Name']).size().reset_index(name="NumOfTrips").sort_values(by='NumOfTrips', ascending=False)
# select the top 50 rows
top_50 = counts.head(50)
# create a DataFrame with top 50 routes
top50_routes = pd.DataFrame(top_50[['Start Station Name', 'End Station Name', 'NumOfTrips']])
top50_routes.head()


Unnamed: 0,Start Station Name,End Station Name,NumOfTrips
100209,Tommy Thompson Park (Leslie Street Spit),Tommy Thompson Park (Leslie Street Spit),1127
49643,HTO Park (Queens Quay W),HTO Park (Queens Quay W),528
114149,York St / Queens Quay W,York St / Queens Quay W,462
53037,Humber Bay Shores Park / Marine Parade Dr,Humber Bay Shores Park / Marine Parade Dr,387
13027,Bay St / Queens Quay W (Ferry Terminal),Bay St / Queens Quay W (Ferry Terminal),357


In [7]:
# create a graph
G = nx.from_pandas_edgelist(top50_routes, 'Start Station Name', 'End Station Name')

# create a Pyvis network
net = Network(notebook=True, cdn_resources='in_line')
net.from_nx(G)

# display the network
net.show('my_network.html', local=True, notebook=True)


my_network.html
