In [None]:
import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import pickle
import pygsp

In [None]:
df = pd.read_csv('./data/202003-citibike-tripdata.csv')
n_start_station = len(df['start station id'].unique())
n_end_station = len(df['end station id'].unique())


In [None]:
total_station_id = set(df['start station id']).union(set(df['end station id']))
n_tot_station = len(total_station_id)

In [None]:
id_index = dict(zip(sorted(total_station_id), np.arange(n_tot_station)))

In [None]:
df.head()

# Location Parser #

In [None]:
useful = [3, 5, 6, 7, 9, 10]
df.iloc[:,useful]

# Find Locations #

In [None]:
locations = dict()
for e in id_index.keys():
    if df[df['start station id'] == e]['start station latitude'].shape[0]:
        locations[id_index[e]] = ( df[df['start station id'] == e]['start station latitude'].iloc[0],
                         df[df['start station id'] == e]['start station longitude'].iloc[0])
    else:
        locations[id_index[e]] = ( df[df['end station id'] == e]['end station latitude'].iloc[0],
                         df[df['end station id'] == e]['end station longitude'].iloc[0])
 

In [None]:
plt.figure(figsize=(30,20))
nx.draw_networkx_nodes(g_2_6, locations)
nx.draw_networkx_edges(g_2_6, locations, alpha=0.1)
plt.show()

In [None]:
plt.figure(figsize=(30,20))
nx.draw_networkx_nodes(g_9_13, locations)
nx.draw_networkx_edges(g_9_13, locations, alpha=0.1)
plt.show()

In [None]:
plt.figure(figsize=(30,20))
nx.draw_networkx_nodes(g_16_20, locations)
nx.draw_networkx_edges(g_16_20, locations, alpha=0.1)
plt.show()

In [None]:
plt.figure(figsize=(30,20))
nx.draw_networkx_nodes(g_23_27, locations)
nx.draw_networkx_edges(g_23_27, locations, alpha=0.1)
plt.show()

# Parser #

In [None]:
useful = [1, 0, 3, 7, 11, 13, 14]
df.iloc[:,useful]

# Util Functions #

In [None]:
# Find indexes to extract daily rides

def days_index():
    d_i = {}
    for index, row in df.iterrows():
        day = int(row['starttime'].split()[0].split('-')[2])
        if d_i.get(day) is None:
            d_i[day] = index
    return d_i

In [None]:
# Dictionary with days starting indexes
"""d = days_index()
with open('./data/variables/march_index.pickle', 'wb') as file:
    pickle.dump(d, file)"""


# MARCH #

In [None]:
# Load day indexes for March
with open('./data/variables/march_index.pickle', 'rb') as file:
    d = pickle.load(file)

## 2st-6th of March ##

In [None]:
# 2 -> Monday
# Choose day 
days = [2, 3, 4, 5, 6]

# Find all connections with weights
adj_2_6_tot = np.zeros((n_tot_station, n_tot_station))
for day in days:
    for index, row in df.iloc[d[day]:d[day+1]].iterrows():
        adj_2_6_tot[id_index[row['start station id']], id_index[row['end station id']]] += 1
        adj_2_6_tot[id_index[row['end station id']], id_index[row['start station id']]] +=1
    print('Day {} loaded...'.format(day))
    

In [None]:
print("Total number of rides : {}".format(np.sum(adj_2_6_tot) / 2))

In [None]:
adj_2_6 = adj_2_6_tot.copy()
adj_2_6[adj_2_6 > 0] = 1
print("Unweighted number of rides : {}".format(np.sum(adj_2_6) / 2))

for i in range(n_tot_station):
    adj_2_6[i, i] = 0

In [None]:
# Create Undirected graph 
g_2_6 = nx.from_numpy_matrix(adj_2_6)
degs_2_6 = np.array([x[1] for x in g_2_6.degree])
_ = plt.hist(degs_2_6)

In [None]:
plt.scatter(np.arange(n_tot_station), degs_2_6)

### Clustering ###

In [None]:
isolated_nodes_2_6 = []
for e in nx.connected_components(g_2_6):
    if len(e) < 3:
        isolated_nodes_2_6.append(e.pop())

g_2_6.remove_nodes_from(isolated_nodes_2_6)

for e in nx.connected_components(g_2_6):
    if len(e) < 3:
        print(e)

In [None]:
G_2_6 = pygsp.graphs.Graph(nx.adjacency_matrix(g_2_6))

G_2_6.compute_laplacian()
G_2_6.compute_fourier_basis()

In [None]:
plt.scatter(G_2_6.U[1,:], G_2_6.U[2,:])

In [None]:
nx.write_gexf(g_2_6, "./graphs/g_2_6.gexf")

In [None]:
g_2_6.number_of_nodes()

## 9th-13th of March ##

In [None]:
# 9 -> Monday
# Choose day 
days = [9, 10, 11, 12, 13]

# Find all connections with weights
adj_9_13_tot = np.zeros((n_tot_station, n_tot_station))
for day in days:
    for index, row in df.iloc[d[day]:d[day+1]].iterrows():
        adj_9_13_tot[id_index[row['start station id']], id_index[row['end station id']]] += 1
        adj_9_13_tot[id_index[row['end station id']], id_index[row['start station id']]] += 1
    print('Day {} loaded...'.format(day))
    

In [None]:
print("Total number of rides : {}".format(np.sum(adj_9_13_tot) / 2))

In [None]:
adj_9_13 = adj_9_13_tot.copy()
adj_9_13[adj_9_13 > 0] = 1
print("Unweighted number of rides : {}".format(np.sum(adj_9_13) / 2))

# Remove Self-loops
for i in range(n_tot_station):
    adj_9_13[i, i] = 0

In [None]:
# Create Undirected graph 
g_9_13 = nx.from_numpy_matrix(adj_9_13)
degs_9_13 = np.array([x[1] for x in g_9_13.degree])
_ = plt.hist(degs_9_13)

In [None]:
plt.scatter(np.arange(n_tot_station), degs_9_13)

### Clustering ###

In [None]:
isolated_nodes = []
for e in nx.connected_components(g_9_13):
    if len(e) < 3:
        isolated_nodes.append(e.pop())

g_9_13.remove_nodes_from(isolated_nodes)

for e in nx.connected_components(g_9_13):
    if len(e) < 3:
        print(e)

In [None]:
G_9_13 = pygsp.graphs.Graph(nx.adjacency_matrix(g_9_13))

G_9_13.compute_laplacian()
G_9_13.compute_fourier_basis()

In [None]:
plt.scatter(G_9_13.U[1,:], G_9_13.U[2,:])

In [None]:
g_9_13.number_of_nodes()

## 16th-20th of March ##

In [None]:
# 16 -> Monday
# Choose day 
days = [16, 17, 18, 19, 20]

# Find all connections with weights
adj_16_20_tot = np.zeros((n_tot_station, n_tot_station))
for day in days:
    for index, row in df.iloc[d[day]:d[day+1]].iterrows():
        adj_16_20_tot[id_index[row['start station id']], id_index[row['end station id']]] += 1
        adj_16_20_tot[id_index[row['end station id']], id_index[row['start station id']]] += 1
    print('Day {} loaded...'.format(day))
    

In [None]:
print("Total number of rides : {}".format(np.sum(adj_16_20_tot) / 2))

In [None]:
adj_16_20 = adj_16_20_tot.copy()
adj_16_20[adj_16_20 > 0] = 1
print("Unweighted number of rides : {}".format(np.sum(adj_16_20) / 2))

# Remove Self-loops
for i in range(n_tot_station):
    adj_16_20[i, i] = 0

In [None]:
# Create Undirected graph 
g_16_20 = nx.from_numpy_matrix(adj_16_20)
degs_16_20 = np.array([x[1] for x in g_16_20.degree])
_ = plt.hist(degs_16_20)

In [None]:
plt.scatter(np.arange(n_tot_station), degs_16_20)

### Clustering ###

In [None]:
isolated_nodes = []
for e in nx.connected_components(g_16_20):
    if len(e) < 3:
        isolated_nodes.append(e.pop())

g_16_20.remove_nodes_from(isolated_nodes)

for e in nx.connected_components(g_16_20):
    if len(e) < 3:
        print(e)

In [None]:
G_16_20 = pygsp.graphs.Graph(nx.adjacency_matrix(g_16_20))

G_16_20.compute_laplacian()
G_16_20.compute_fourier_basis()

plt.scatter(G_16_20.U[1,:], G_16_20.U[2,:])

In [None]:
g_16_20.number_of_nodes()

## 23st-27th of March ##

In [None]:
# 23 -> Monday
# Choose day 
days = [23, 24, 25, 26, 27]

# Find all connections with weights
adj_23_27_tot = np.zeros((n_tot_station, n_tot_station))
for day in days:
    for index, row in df.iloc[d[day]:d[day+1]].iterrows():
        adj_23_27_tot[id_index[row['start station id']], id_index[row['end station id']]] += 1
        adj_23_27_tot[id_index[row['end station id']], id_index[row['start station id']]] += 1
    print('Day {} loaded...'.format(day))
    

In [None]:
print("Total number of rides : {}".format(np.sum(adj_23_27_tot) / 2))

In [None]:
adj_23_27 = adj_23_27_tot.copy()
adj_23_27[adj_23_27 > 0] = 1
print("Unweighted number of rides : {}".format(np.sum(adj_23_27) / 2))

# Remove Self-loops
for i in range(n_tot_station):
    adj_23_27[i, i] = 0

In [None]:
# Create Undirected graph 
g_23_27 = nx.from_numpy_matrix(adj_23_27)
degs_23_27 = np.array([x[1] for x in g_23_27.degree])
_ = plt.hist(degs_23_27)

In [None]:
plt.scatter(np.arange(n_tot_station), degs_23_27)

### Clustering ###

In [None]:
isolated_nodes_23_27 = []
for e in nx.connected_components(g_23_27):
    if len(e) < 3:
        isolated_nodes_23_27.append(e.pop())

g_23_27.remove_nodes_from(isolated_nodes_23_27)

for e in nx.connected_components(g_23_27):
    if len(e) < 3:
        print(e)

In [None]:
G_23_27 = pygsp.graphs.Graph(nx.adjacency_matrix(g_23_27))

G_23_27.compute_laplacian()
G_23_27.compute_fourier_basis()

plt.scatter(G_23_27.U[1,:], G_23_27.U[2,:])

In [None]:
g_23_27.number_of_nodes()

## Deg-Sorted Nodes over time ##

In [None]:
sort_deg = np.argsort(degs_2_6)

In [None]:
plt.scatter(np.arange(n_tot_station), degs_2_6[sort_deg])

In [None]:
plt.scatter(np.arange(n_tot_station), degs_9_13[sort_deg])

In [None]:
plt.scatter(np.arange(n_tot_station), degs_16_20[sort_deg])

In [None]:
plt.scatter(np.arange(n_tot_station), degs_23_27[sort_deg])

# Function analysis #

In [None]:
adj_2_6_tot = np.delete(adj_2_6_tot, isolated_nodes_2_6, 0)
adj_2_6_tot = np.delete(adj_2_6_tot, isolated_nodes_2_6, 1)


print(adj_2_6_tot.shape)

f_2_6 = np.sum(adj_2_6_tot, axis=0)

# Compute number of access per stations

f_2_6_spect = np.dot(G_2_6.U.T, f_2_6)

In [None]:
plt.plot(np.abs(f_2_6_spect))

In [None]:
adj_23_27_tot = np.delete(adj_23_27_tot, isolated_nodes_23_27, 0)
adj_23_27_tot = np.delete(adj_23_27_tot, isolated_nodes_23_27, 1)


print(adj_23_27_tot.shape)

f_23_27 = np.sum(adj_23_27_tot, axis=0)

# Compute number of access per stations

f_23_27_spect = np.dot(G_23_27.U.T, f_23_27)

In [None]:
plt.plot(np.abs(f_23_27_spect))