In [None]:
import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import pickle

In [None]:
df = pd.read_csv('./data/202003-citibike-tripdata.csv')
n_start_station = len(df['start station id'].unique())
n_end_station = len(df['end station id'].unique())


In [None]:
total_station_id = set(df['start station id']).union(set(df['end station id']))
n_tot_station = len(total_station_id)

In [None]:
id_index = dict(zip(sorted(total_station_id), np.arange(n_tot_station)))

In [None]:
df.head()

# Location Parser #

In [None]:
useful = [3, 5, 6, 7, 9, 10]
df.iloc[:,useful]

# Parser #

In [None]:
useful = [1, 0, 3, 7, 11, 13, 14]
df.iloc[:,useful]

# Util Functions #

In [None]:
# Find indexes to extract daily rides

def days_index():
    d_i = {}
    for index, row in df.iterrows():
        day = int(row['starttime'].split()[0].split('-')[2])
        if d_i.get(day) is None:
            d_i[day] = index
    return d_i

In [None]:
# Dictionary with days starting indexes
"""d = days_index()
with open('./data/variables/march_index.pickle', 'wb') as file:
    pickle.dump(d, file)"""


# MARCH #

In [None]:
# Load day indexes for March
with open('./data/variables/march_index.pickle', 'rb') as file:
    d = pickle.load(file)

## 2st-6th of March ##

In [None]:
# 2 -> Monday
# Choose day 
days = [2, 3, 4, 5, 6]

# Find all connections with weights
adj_2_6 = np.zeros((n_tot_station, n_tot_station))
for day in days:
    for index, row in df.iloc[d[day]:d[day+1]].iterrows():
        adj_2_6[id_index[row['start station id']], id_index[row['end station id']]] += 1
        adj_2_6[id_index[row['end station id']], id_index[row['start station id']]] +=1
    print('Day {} loaded...'.format(day))
    

In [None]:
print("Total number of rides : {}".format(np.sum(adj_2_6) / 2))

In [None]:
adj_2_6[adj_2_6 > 0] = 1
print("Unweighted number of rides : {}".format(np.sum(adj_2_6) / 2))

In [None]:
# Create Undirected graph 
g_2_6 = nx.from_numpy_matrix(adj_2_6)
degs_2_6 = np.array([x[1] for x in g_2_6.degree])
_ = plt.hist(degs_2_6)

In [None]:
plt.scatter(np.arange(n_tot_station), degs_2_6)

## 9th-13th of March ##

In [None]:
# 9 -> Monday
# Choose day 
days = [9, 10, 11, 12, 13]

# Find all connections with weights
adj_9_13 = np.zeros((n_tot_station, n_tot_station))
for day in days:
    for index, row in df.iloc[d[day]:d[day+1]].iterrows():
        adj_9_13[id_index[row['start station id']], id_index[row['end station id']]] += 1
        adj_9_13[id_index[row['end station id']], id_index[row['start station id']]] += 1
    print('Day {} loaded...'.format(day))
    

In [None]:
print("Total number of rides : {}".format(np.sum(adj_9_13) / 2))

In [None]:
adj_9_13[adj_9_13 > 0] = 1
print("Unweighted number of rides : {}".format(np.sum(adj_9_13) / 2))

In [None]:
# Create Undirected graph 
g_9_13 = nx.from_numpy_matrix(adj_9_13)
degs_9_13 = np.array([x[1] for x in g_9_13.degree])
_ = plt.hist(degs_9_13)

In [None]:
plt.scatter(np.arange(n_tot_station), degs_9_13)

## 16th-20th of March ##

In [None]:
# 16 -> Monday
# Choose day 
days = [16, 17, 18, 19, 20]

# Find all connections with weights
adj_16_20 = np.zeros((n_tot_station, n_tot_station))
for day in days:
    for index, row in df.iloc[d[day]:d[day+1]].iterrows():
        adj_16_20[id_index[row['start station id']], id_index[row['end station id']]] += 1
        adj_16_20[id_index[row['end station id']], id_index[row['start station id']]] += 1
    print('Day {} loaded...'.format(day))
    

In [None]:
print("Total number of rides : {}".format(np.sum(adj_16_20) / 2))

In [None]:
adj_16_20[adj_16_20 > 0] = 1
print("Unweighted number of rides : {}".format(np.sum(adj_16_20) / 2))

In [None]:
# Create Undirected graph 
g_16_20 = nx.from_numpy_matrix(adj_16_20)
degs_16_20 = np.array([x[1] for x in g_16_20.degree])
_ = plt.hist(degs_16_20)

In [None]:
plt.scatter(np.arange(n_tot_station), degs_16_20)

## 23st-27th of March ##

In [None]:
# 23 -> Monday
# Choose day 
days = [23, 24, 25, 26, 27]

# Find all connections with weights
adj_23_27 = np.zeros((n_tot_station, n_tot_station))
for day in days:
    for index, row in df.iloc[d[day]:d[day+1]].iterrows():
        adj_23_27[id_index[row['start station id']], id_index[row['end station id']]] += 1
        adj_23_27[id_index[row['end station id']], id_index[row['start station id']]] += 1
    print('Day {} loaded...'.format(day))
    

In [None]:
print("Total number of rides : {}".format(np.sum(adj_23_27) / 2))

In [None]:
adj_23_27[adj_23_27 > 0] = 1
print("Unweighted number of rides : {}".format(np.sum(adj_23_27) / 2))

In [None]:
# Create Undirected graph 
g_23_27 = nx.from_numpy_matrix(adj_23_27)
degs_23_27 = np.array([x[1] for x in g_23_27.degree])
_ = plt.hist(degs_23_27)

In [None]:
plt.scatter(np.arange(n_tot_station), degs_23_27)

## Sorted ##

In [None]:
sort_deg = np.argsort(degs_2_6)

In [None]:
plt.scatter(np.arange(n_tot_station), degs_2_6[sort_deg])

In [None]:
plt.scatter(np.arange(n_tot_station), degs_9_13[sort_deg])

In [None]:
plt.scatter(np.arange(n_tot_station), degs_16_20[sort_deg])

In [None]:
plt.scatter(np.arange(n_tot_station), degs_23_27[sort_deg])

In [None]:
id_index[3919]

In [None]:
df[df['start station id'] == 3919]

In [None]:
degs_2_6[898]

In [None]:
degs_9_13[898]

In [None]:
degs_16_20[898]

In [None]:
degs_23_27[898]