# Analyzing London’s Underground Network

In [143]:
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
import statsmodels.api as sm
import seaborn as sns
from math import sqrt
import scipy.stats

## Part 1: London’s underground resilience

In [117]:
# Read in the edgelist file from csv
file_network = pd.read_csv("london_flows.csv")

# Print data (first five rows)
file_network.head()

Unnamed: 0,station_origin,station_destination,flows,population,jobs,distance
0,Abbey Road,Bank and Monument,0,599,78549,8131.525097
1,Abbey Road,Beckton,1,599,442,8510.121774
2,Abbey Road,Blackwall,3,599,665,3775.448872
3,Abbey Road,Canary Wharf,1,599,58772,5086.51422
4,Abbey Road,Canning Town,37,599,15428,2228.923167


In [118]:
# Remove the station named Battersea Park
file_network = file_network[~file_network['station_origin'].str.contains('Battersea Park')]
file_network = file_network[~file_network['station_destination'].str.contains('Battersea Park')]

In [112]:
check_remove = file_network.apply(lambda row: 'Battersea Park' in row.values, axis=1).any()
print("Does the DataFrame contain 'Battersea Park'?", check_remove)

Does the DataFrame contain 'Battersea Park'? False


### 1 Topological network


#### 1.1  Centrality measures

In [20]:
G_origin = nx.from_pandas_edgelist(file_network, source='station_origin', target='station_destination')

In [21]:
print("node：", G_origin.nodes())
print("edge：", G_origin.edges())

node： ['Abbey Road', 'Bank and Monument', 'Beckton', 'Blackwall', 'Canary Wharf', 'Canning Town', 'Crossharbour', 'Custom House', 'Cutty Sark', 'Cyprus', 'Devons Road', 'East India', 'Island Gardens', 'King George V', 'Langdon Park', 'Lewisham', 'Limehouse', 'London City Airport', 'Pontoon Dock', 'Poplar', 'Prince Regent', 'Royal Albert', 'Shadwell', 'South Quay', 'Star Lane', 'Stratford', 'Stratford High Street', 'Stratford International', 'West Ham', 'West Silvertown', 'Westferry', 'Woolwich Arsenal', 'Acton Central', 'Barking', 'Blackhorse Road', 'Brondesbury', 'Brondesbury Park', 'Bushey', 'Caledonian Road & Barnsbury', 'Camden Road', 'Canada Water', 'Canonbury', 'Carpenders Park', 'Crouch Hill', 'Dalston Kingsland', 'Denmark Hill', 'Edmonton Green', 'Euston', 'Finchley Road & Frognal', 'Gospel Oak', 'Gunnersbury', 'Hackney Central', 'Hackney Wick', 'Haggerston', 'Hampstead Heath', 'Harlesden', 'Harringay Green Lanes', 'Harrow & Wealdstone', 'Hatch End', 'Headstone Lane', 'Highams 

In [22]:
degree_centrality = nx.degree_centrality(G_origin)
betweenness_centrality = nx.betweenness_centrality(G_origin)
closeness_centrality = nx.closeness_centrality(G_origin)

In [23]:
top_10_degree = sorted(degree_centrality.items(), key=lambda x: x[1], reverse=True)[:10]
top_10_betweenness = sorted(betweenness_centrality.items(), key=lambda x: x[1], reverse=True)[:10]
top_10_closeness = sorted(closeness_centrality.items(), key=lambda x: x[1], reverse=True)[:10]

In [24]:
result_top10 = pd.DataFrame({
    'Degree': [node[0] for node in top_10_degree],
    'Betweenness': [node[0] for node in top_10_betweenness],
    'Closeness': [node[0] for node in top_10_closeness]})
result_top10

Unnamed: 0,Degree,Betweenness,Closeness
0,Stratford,Stratford,Stratford
1,Highbury & Islington,Liverpool Street,Highbury & Islington
2,Whitechapel,Bank and Monument,Whitechapel
3,West Brompton,Canary Wharf,West Brompton
4,Canary Wharf,Canning Town,Canada Water
5,Canada Water,West Ham,Bank and Monument
6,Liverpool Street,Highbury & Islington,Canary Wharf
7,Bank and Monument,Whitechapel,Richmond
8,Richmond,Shadwell,Canning Town
9,Canning Town,Canada Water,Liverpool Street


#### 1.3  Node removal

*A: Non-sequential removal*

Connectivity Analysis

In [68]:
# degree centrality

connect_degree = []
G_copy = G_origin.copy()

for i in range(10):
    node_to_remove = top_10_degree[i][0] 
    G_copy.remove_node(node_to_remove)

    is_connected = nx.is_connected(G_copy)
    num_components = nx.number_connected_components(G_copy)

    connect_degree.append({
        'removed_node': node_to_remove,
        'is_connected': is_connected,
        'num_components': num_components})

connect_degree = pd.DataFrame(connect_degree)
connect_degree

Unnamed: 0,removed_node,is_connected,num_components
0,Stratford,True,1
1,Highbury & Islington,True,1
2,Whitechapel,True,1
3,West Brompton,True,1
4,Canary Wharf,True,1
5,Canada Water,True,1
6,Liverpool Street,True,1
7,Bank and Monument,True,1
8,Richmond,True,1
9,Canning Town,True,1


In [69]:
# betweenness centrality

connect_betweenness = []
G_copy = G_origin.copy()

for i in range(10):
    node_to_remove = top_10_betweenness[i][0] 
    G_copy.remove_node(node_to_remove)

    is_connected = nx.is_connected(G_copy)
    num_components = nx.number_connected_components(G_copy)

    connect_betweenness.append({
        'removed_node': node_to_remove,
        'is_connected': is_connected,
        'num_components': num_components})

connect_betweenness = pd.DataFrame(connect_betweenness)
connect_betweenness

Unnamed: 0,removed_node,is_connected,num_components
0,Stratford,True,1
1,Liverpool Street,True,1
2,Bank and Monument,True,1
3,Canary Wharf,True,1
4,Canning Town,True,1
5,West Ham,True,1
6,Highbury & Islington,True,1
7,Whitechapel,True,1
8,Shadwell,False,2
9,Canada Water,False,2


In [70]:
# closeness centrality

connect_closeness = []
G_copy = G_origin.copy()

for i in range(10):
    node_to_remove = top_10_closeness[i][0] 
    G_copy.remove_node(node_to_remove)

    is_connected = nx.is_connected(G_copy)
    num_components = nx.number_connected_components(G_copy)

    connect_closeness.append({
        'removed_node': node_to_remove,
        'is_connected': is_connected,
        'num_components': num_components})

connect_closeness = pd.DataFrame(connect_closeness)
connect_closeness

Unnamed: 0,removed_node,is_connected,num_components
0,Stratford,True,1
1,Highbury & Islington,True,1
2,Whitechapel,True,1
3,West Brompton,True,1
4,Canada Water,True,1
5,Bank and Monument,True,1
6,Canary Wharf,True,1
7,Richmond,True,1
8,Canning Town,True,1
9,Liverpool Street,True,1


Network Efficiency

In [94]:
def global_efficiency(G):
    paths = dict(nx.all_pairs_shortest_path_length(G))
    n = len(G)
    efficiency = 0
    for source in paths:
        for target, length in paths[source].items():
            if source != target:
                efficiency += 1 / length
    return efficiency / (n * (n - 1))

initial_efficiency = global_efficiency(G_origin)
print(initial_efficiency)

0.6985958339134433


In [71]:
# degree centrality

efficiency_degree = []
G_copy = G_origin.copy()

for i in range(10):
    node_to_remove = top_10_degree[i][0]
    G_copy.remove_node(node_to_remove)

    new_efficiency = global_efficiency(G_copy)
    efficiency_drop = initial_efficiency - new_efficiency

    efficiency_degree.append({
        'removed_node': node_to_remove,
        'new_efficiency': new_efficiency,
        'efficiency_drop': efficiency_drop})

efficiency_degree = pd.DataFrame(efficiency_degree)
efficiency_degree

Unnamed: 0,removed_node,new_efficiency,efficiency_drop
0,Stratford,0.693079,0.005517
1,Highbury & Islington,0.691996,0.0066
2,Whitechapel,0.690991,0.007604
3,West Brompton,0.689987,0.008609
4,Canary Wharf,0.689003,0.009593
5,Canada Water,0.686576,0.01202
6,Liverpool Street,0.67598,0.022616
7,Bank and Monument,0.674859,0.023737
8,Richmond,0.673782,0.024814
9,Canning Town,0.670714,0.027882


In [72]:
# betweenness centrality

efficiency_betweenness = []
G_copy = G_origin.copy()

for i in range(10):
    node_to_remove = top_10_betweenness[i][0]
    G_copy.remove_node(node_to_remove)

    new_efficiency = global_efficiency(G_copy)
    efficiency_drop = initial_efficiency - new_efficiency

    efficiency_betweenness.append({
        'removed_node': node_to_remove,
        'new_efficiency': new_efficiency,
        'efficiency_drop': efficiency_drop})

efficiency_betweenness = pd.DataFrame(efficiency_betweenness)
efficiency_betweenness

Unnamed: 0,removed_node,new_efficiency,efficiency_drop
0,Stratford,0.693079,0.005517
1,Liverpool Street,0.68266,0.015936
2,Bank and Monument,0.681664,0.016932
3,Canary Wharf,0.680578,0.018018
4,Canning Town,0.677601,0.020995
5,West Ham,0.65721,0.041386
6,Highbury & Islington,0.655972,0.042624
7,Whitechapel,0.654798,0.043798
8,Shadwell,0.591431,0.107165
9,Canada Water,0.588805,0.109791


In [73]:
# closeness centrality

efficiency_closeness = []
G_copy = G_origin.copy()

for i in range(10):
    node_to_remove = top_10_closeness[i][0]
    G_copy.remove_node(node_to_remove)

    new_efficiency = global_efficiency(G_copy)
    efficiency_drop = initial_efficiency - new_efficiency

    efficiency_closeness.append({
        'removed_node': node_to_remove,
        'new_efficiency': new_efficiency,
        'efficiency_drop': efficiency_drop})

efficiency_closeness = pd.DataFrame(efficiency_closeness)
efficiency_closeness

Unnamed: 0,removed_node,new_efficiency,efficiency_drop
0,Stratford,0.693079,0.005517
1,Highbury & Islington,0.691996,0.0066
2,Whitechapel,0.690991,0.007604
3,West Brompton,0.689987,0.008609
4,Canada Water,0.687574,0.011022
5,Bank and Monument,0.686576,0.01202
6,Canary Wharf,0.685486,0.013109
7,Richmond,0.684445,0.014151
8,Canning Town,0.681405,0.017191
9,Liverpool Street,0.670714,0.027882


*B: Sequential*

Connectivity Analysis

In [44]:
# degree centrality

G = G_origin.copy()
connect_degree = []

for i in range(10):  
    degree_centrality = nx.degree_centrality(G)
    node_to_remove = sorted(degree_centrality.items(), key=lambda x: x[1], reverse=True)[0][0]
    
    G_copy = G.copy()
    G_copy.remove_node(node_to_remove)

    is_connected = nx.is_connected(G_copy)
    num_components = nx.number_connected_components(G_copy)
    
    connect_degree.append({
        'removed_node': node_to_remove,
        'is_connected': is_connected,
        'num_components': num_components})

    G = G_copy

connect_degree = pd.DataFrame(connect_degree)
connect_degree

Unnamed: 0,removed_node,is_connected,num_components
0,Stratford,True,1
1,Highbury & Islington,True,1
2,Whitechapel,True,1
3,West Brompton,True,1
4,Canary Wharf,True,1
5,Canada Water,True,1
6,Liverpool Street,True,1
7,Bank and Monument,True,1
8,Richmond,True,1
9,Canning Town,True,1


In [45]:
# betweenness centrality

G = G_origin.copy()
connect_betweenness = []

for i in range(10):  
    betweenness_centrality = nx.betweenness_centrality(G)
    node_to_remove = sorted(betweenness_centrality.items(), key=lambda x: x[1], reverse=True)[0][0]
    
    G_copy = G.copy()
    G_copy.remove_node(node_to_remove)

    is_connected = nx.is_connected(G_copy)
    num_components = nx.number_connected_components(G_copy)
    
    connect_betweenness.append({
        'removed_node': node_to_remove,
        'is_connected': is_connected,
        'num_components': num_components})

    G = G_copy

connect_betweenness = pd.DataFrame(connect_betweenness)
connect_betweenness

Unnamed: 0,removed_node,is_connected,num_components
0,Stratford,True,1
1,Liverpool Street,True,1
2,Upminster,False,2
3,Bank and Monument,False,2
4,Canary Wharf,False,2
5,Canning Town,False,2
6,West Ham,False,2
7,Shadwell,False,3
8,Highbury & Islington,False,3
9,Whitechapel,False,3


In [46]:
# closeness centrality

G = G_origin.copy()
connect_closeness = []

for i in range(10):  
    closeness_centrality = nx.closeness_centrality(G)
    node_to_remove = sorted(closeness_centrality.items(), key=lambda x: x[1], reverse=True)[0][0]
    
    G_copy = G.copy()
    G_copy.remove_node(node_to_remove)

    is_connected = nx.is_connected(G_copy)
    num_components = nx.number_connected_components(G_copy)
    
    connect_closeness.append({
        'removed_node': node_to_remove,
        'is_connected': is_connected,
        'num_components': num_components})

    G = G_copy

connect_closeness = pd.DataFrame(connect_closeness)
connect_closeness

Unnamed: 0,removed_node,is_connected,num_components
0,Stratford,True,1
1,Highbury & Islington,True,1
2,Whitechapel,True,1
3,West Brompton,True,1
4,Canada Water,True,1
5,Bank and Monument,True,1
6,Canary Wharf,True,1
7,Richmond,True,1
8,Canning Town,True,1
9,Liverpool Street,True,1


Network Efficiency

In [47]:
# degree centrality

G = G_origin.copy()
efficiency_degree = []

for i in range(10): 
    
    degree_centrality = nx.degree_centrality(G)
    node_to_remove = sorted(degree_centrality.items(), key=lambda x: x[1], reverse=True)[0][0]

    G_copy = G.copy()
    G_copy.remove_node(node_to_remove)

    new_efficiency = global_efficiency(G_copy)
    efficiency_drop = initial_efficiency - new_efficiency 

    efficiency_degree.append({
        'removed_node': node_to_remove,
        'new_efficiency': new_efficiency,
        'efficiency_drop': efficiency_drop})

    G = G_copy 

efficiency_degree = pd.DataFrame(efficiency_degree)
efficiency_degree

Unnamed: 0,removed_node,new_efficiency,efficiency_drop
0,Stratford,0.693079,0.005517
1,Highbury & Islington,0.691996,0.0066
2,Whitechapel,0.690991,0.007604
3,West Brompton,0.689987,0.008609
4,Canary Wharf,0.689003,0.009593
5,Canada Water,0.686576,0.01202
6,Liverpool Street,0.67598,0.022616
7,Bank and Monument,0.674859,0.023737
8,Richmond,0.673782,0.024814
9,Canning Town,0.670714,0.027882


In [48]:
# betweenness centrality

G = G_origin.copy()
efficiency_betweenness = []

for i in range(10): 
    
    betweenness_centrality = nx.betweenness_centrality(G)
    node_to_remove = sorted(betweenness_centrality.items(), key=lambda x: x[1], reverse=True)[0][0]

    G_copy = G.copy()
    G_copy.remove_node(node_to_remove)

    new_efficiency = global_efficiency(G_copy)
    efficiency_drop = initial_efficiency - new_efficiency 

    efficiency_betweenness.append({
        'removed_node': node_to_remove,
        'new_efficiency': new_efficiency,
        'efficiency_drop': efficiency_drop})

    G = G_copy 

efficiency_betweenness = pd.DataFrame(efficiency_betweenness)
efficiency_betweenness

Unnamed: 0,removed_node,new_efficiency,efficiency_drop
0,Stratford,0.693079,0.005517
1,Liverpool Street,0.68266,0.015936
2,Upminster,0.662843,0.035752
3,Bank and Monument,0.661806,0.03679
4,Canary Wharf,0.660678,0.037918
5,Canning Town,0.657672,0.040924
6,West Ham,0.637569,0.061027
7,Shadwell,0.575517,0.123079
8,Highbury & Islington,0.57415,0.124445
9,Whitechapel,0.572873,0.125723


In [49]:
# closeness centrality

G = G_origin.copy()
efficiency_closeness = []

for i in range(10): 
    
    closeness_centrality = nx.closeness_centrality(G)
    node_to_remove = sorted(closeness_centrality.items(), key=lambda x: x[1], reverse=True)[0][0]

    G_copy = G.copy()
    G_copy.remove_node(node_to_remove)

    new_efficiency = global_efficiency(G_copy)
    efficiency_drop = initial_efficiency - new_efficiency 

    efficiency_closeness.append({
        'removed_node': node_to_remove,
        'new_efficiency': new_efficiency,
        'efficiency_drop': efficiency_drop})

    G = G_copy 

efficiency_closeness = pd.DataFrame(efficiency_closeness)
efficiency_closeness

Unnamed: 0,removed_node,new_efficiency,efficiency_drop
0,Stratford,0.693079,0.005517
1,Highbury & Islington,0.691996,0.0066
2,Whitechapel,0.690991,0.007604
3,West Brompton,0.689987,0.008609
4,Canada Water,0.687574,0.011022
5,Bank and Monument,0.686576,0.01202
6,Canary Wharf,0.685486,0.013109
7,Richmond,0.684445,0.014151
8,Canning Town,0.681405,0.017191
9,Liverpool Street,0.670714,0.027882


### 2 Flows: weighted network

#### 2.1

In [53]:
G_flow_origin = nx.from_pandas_edgelist(file_network, source='station_origin', target='station_destination', edge_attr='flows')

In [54]:
list(G_flow_origin.edges(data = True))[0:10]

[('Abbey Road', 'Bank and Monument', {'flows': 0}),
 ('Abbey Road', 'Beckton', {'flows': 0}),
 ('Abbey Road', 'Blackwall', {'flows': 0}),
 ('Abbey Road', 'Canary Wharf', {'flows': 2}),
 ('Abbey Road', 'Canning Town', {'flows': 5}),
 ('Abbey Road', 'Crossharbour', {'flows': 0}),
 ('Abbey Road', 'Custom House', {'flows': 0}),
 ('Abbey Road', 'Cutty Sark', {'flows': 0}),
 ('Abbey Road', 'Cyprus', {'flows': 0}),
 ('Abbey Road', 'Devons Road', {'flows': 0})]

In [75]:
v_flows = {}
for e1, e2, flows in G_flow_origin.edges(data='flows'):
    if flows == 0:
        v_flows[(e1, e2)] = float('inf')
    else:
        v_flows[(e1, e2)] = round(1. / flows, 7)

nx.set_edge_attributes(G_flow_origin, v_flows, 'v_flows')

In [76]:
list(G_flow_origin.edges(data = True))[0:10]

[('Abbey Road', 'Bank and Monument', {'flows': 0, 'v_flows': inf}),
 ('Abbey Road', 'Beckton', {'flows': 0, 'v_flows': inf}),
 ('Abbey Road', 'Blackwall', {'flows': 0, 'v_flows': inf}),
 ('Abbey Road', 'Canary Wharf', {'flows': 2, 'v_flows': 0.5}),
 ('Abbey Road', 'Canning Town', {'flows': 5, 'v_flows': 0.2}),
 ('Abbey Road', 'Crossharbour', {'flows': 0, 'v_flows': inf}),
 ('Abbey Road', 'Custom House', {'flows': 0, 'v_flows': inf}),
 ('Abbey Road', 'Cutty Sark', {'flows': 0, 'v_flows': inf}),
 ('Abbey Road', 'Cyprus', {'flows': 0, 'v_flows': inf}),
 ('Abbey Road', 'Devons Road', {'flows': 0, 'v_flows': inf})]

In [77]:
degree_centrality_flow = nx.degree_centrality(G_flow_origin)
betweenness_centrality_flow = nx.betweenness_centrality(G_flow_origin, weight='v_flows')
closeness_centrality_flow = nx.closeness_centrality(G_flow_origin, distance='v_flows')

In [78]:
top_10_degree_flow = sorted(degree_centrality_flow.items(), key=lambda x: x[1], reverse=True)[:10]
top_10_betweenness_flow = sorted(betweenness_centrality_flow.items(), key=lambda x: x[1], reverse=True)[:10]
top_10_closeness_flow = sorted(closeness_centrality_flow.items(), key=lambda x: x[1], reverse=True)[:10]

In [79]:
result_top10_flow = pd.DataFrame({
    'Degree': [node[0] for node in top_10_degree_flow],
    'Betweenness': [node[0] for node in top_10_betweenness_flow],
    'Closeness': [node[0] for node in top_10_closeness_flow]})
result_top10_flow

Unnamed: 0,Degree,Betweenness,Closeness
0,Stratford,Waterloo,Waterloo
1,Highbury & Islington,Bank and Monument,Canary Wharf
2,Whitechapel,Canary Wharf,Bank and Monument
3,West Brompton,Stratford,Stratford
4,Canary Wharf,Liverpool Street,Liverpool Street
5,Canada Water,Oxford Circus,London Bridge
6,Liverpool Street,Hammersmith,Oxford Circus
7,Bank and Monument,Farringdon,Farringdon
8,Richmond,Victoria,Victoria
9,Canning Town,Canada Water,King's Cross St. Pancras


#### 2.3

Change in total flow

In [81]:
total_flow_initial = sum(nx.get_edge_attributes(G_flow_origin, 'flows').values())
print("Initial total flow:", total_flow_initial)

flow_change_result = []

for i in range(3):
    node_to_remove = top_10_betweenness_flow[i][0]

    G_copy = G_flow_origin.copy()
    G_copy.remove_node(node_to_remove)

    total_flow_after = sum(nx.get_edge_attributes(G_copy, 'flows').values())
    flow_change = total_flow_initial - total_flow_after
    change_rate = flow_change/total_flow_initial

    flow_change_result.append({
        'Removed_Node': node_to_remove,
        'Total_Flow_After_Removal': total_flow_after,
        'Flow_Change': flow_change,
        'Change_rate':change_rate})

flow_change_result = pd.DataFrame(flow_change_result)
flow_change_result

Initial total flow: 808427


Unnamed: 0,Removed_Node,Total_Flow_After_Removal,Flow_Change,Change_rate
0,Waterloo,741459,66968,0.082837
1,Bank and Monument,731470,76957,0.095194
2,Canary Wharf,755643,52784,0.065292


Network efficiency -> Sum of maximum flow lines

In [91]:
def global_flow(G, path_weight_attr, accumulate_attr):
    
    shortest_paths = dict(nx.all_pairs_dijkstra_path(G, weight=path_weight_attr))
    accumulated_sum = 0

    for source, targets in shortest_paths.items():
        for target, path in targets.items():
            if source != target:
                for u, v in zip(path[:-1], path[1:]):
                    accumulated_sum += G[u][v][accumulate_attr]

    return accumulated_sum

initial_flow = global_flow(G_flow_origin,'v_flows','flows')

In [93]:
node_flow_results = []

for i in range(3):
    node_to_remove = top_10_betweenness_flow[i][0]

    G_copy = G_flow_origin.copy()
    G_copy.remove_node(node_to_remove)

    new_flow = global_flow(G_copy,'v_flows','flows')
    flow_change = initial_flow - new_flow

    node_flow_results.append({
        "Removed_Node": node_to_remove,
        "new_flow": new_flow,
        "flow_change": flow_change})

node_flow_results = pd.DataFrame(node_flow_results)
node_flow_results

Unnamed: 0,Removed_Node,new_flow,flow_change
0,Waterloo,976260824,1128908148
1,Bank and Monument,1344085786,761083186
2,Canary Wharf,1557420494,547748478


## Part 2: Spatial Interaction models

### 3 Models and calibration

#### 3.2

In [96]:
origin_flow_data = pd.pivot_table(file_network, values ="flows", index="station_origin", columns = "station_destination",
                            aggfunc=np.sum, margins=True)
origin_flow_data

station_destination,Abbey Road,Acton Central,Acton Town,Aldgate,Aldgate East,All Saints,Alperton,Amersham,Anerley,Angel,...,Wimbledon,Wimbledon Park,Wood Green,Wood Lane,Wood Street,Woodford,Woodgrange Park,Woodside Park,Woolwich Arsenal,All
station_origin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Abbey Road,,,,,,,,,,,...,,,,,,,,,32.0,599
Acton Central,,,,,,,,,,,...,,,,,,,0.0,,,1224
Acton Town,,,,3.0,17.0,,35.0,0.0,,11.0,...,77.0,3.0,6.0,9.0,,0.0,,0.0,,3745
Aldgate,,,0.0,,0.0,,,0.0,,17.0,...,0.0,,4.0,8.0,,0.0,,0.0,,2886
Aldgate East,,,2.0,0.0,,,0.0,0.0,,20.0,...,24.0,0.0,0.0,12.0,,1.0,,1.0,,3172
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Woodford,,,2.0,5.0,47.0,,,,,22.0,...,2.0,,1.0,,,,,,,4868
Woodgrange Park,,0.0,,,,,,,,,...,,,,,,,,,,530
Woodside Park,,,1.0,26.0,11.0,,0.0,,,59.0,...,0.0,,0.0,,,,,,,3093
Woolwich Arsenal,20.0,,,,,7.0,,,,,...,,,,,,,,,,7892


In [144]:
def CalcRSquared(observed, estimated):
    """Calculate the r^2 from a series of observed and estimated target values
    inputs:
    Observed: Series of actual observed values
    estimated: Series of predicted values"""
    
    r, p = scipy.stats.pearsonr(observed, estimated)
    R2 = r **2
    
    return R2

def CalcRMSE(observed, estimated):
    """Calculate Root Mean Square Error between a series of observed and estimated values
    inputs:
    Observed: Series of actual observed values
    estimated: Series of predicted values"""
    
    res = (observed -estimated)**2
    RMSE = round(sqrt(res.mean()), 3)
    
    return RMSE

In [120]:
file_network = file_network[file_network['distance'] != 0]

In [121]:
file_network

Unnamed: 0,station_origin,station_destination,flows,population,jobs,distance
0,Abbey Road,Bank and Monument,0,599,78549,8131.525097
1,Abbey Road,Beckton,1,599,442,8510.121774
2,Abbey Road,Blackwall,3,599,665,3775.448872
3,Abbey Road,Canary Wharf,1,599,58772,5086.514220
4,Abbey Road,Canning Town,37,599,15428,2228.923167
...,...,...,...,...,...,...
61469,Woolwich Arsenal,Tower Gateway,127,7892,3342,13401.795549
61470,Woolwich Arsenal,West Ham,608,7892,5487,8701.454361
61471,Woolwich Arsenal,West India Quay,6,7892,400,9536.720451
61472,Woolwich Arsenal,West Silvertown,81,7892,893,5355.248554


In [123]:
file_network["log_population"] = np.log(file_network['population'])
file_network["log_jobs"] = np.log(file_network['jobs'])
file_network["log_distance"] = np.log(file_network['distance'])

In [124]:
formula = 'flows ~ station_origin + log_jobs + log_distance-1'
prodSim = smf.glm(formula = formula, data=file_network, family=sm.families.Poisson()).fit()
print(prodSim.summary())

                 Generalized Linear Model Regression Results                  
Dep. Variable:                  flows   No. Observations:                61413
Model:                            GLM   Df Residuals:                    61013
Model Family:                 Poisson   Df Model:                          399
Link Function:                    Log   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:            -1.0169e+06
Date:                Mon, 29 Apr 2024   Deviance:                   1.8615e+06
Time:                        16:45:37   Pearson chi2:                 2.78e+06
No. Iterations:                     8   Pseudo R-squ. (CS):              1.000
Covariance Type:            nonrobust                                         
                                                  coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------------------------------------------------------------------

In [131]:
O_i = pd.DataFrame(file_network.groupby(["station_origin"])["flows"].agg(np.sum))
O_i.rename(columns={"flows":"O_i"}, inplace = True)
file_network = file_network.merge(O_i, on = "station_origin", how = "left" )

D_j = pd.DataFrame(file_network.groupby(["station_destination"])["flows"].agg(np.sum))
D_j.rename(columns={"flows":"D_j"}, inplace = True)
file_network = file_network.merge(D_j, on = "station_destination", how = "left" )

file_network.head()

Unnamed: 0,station_origin,station_destination,flows,population,jobs,distance,log_population,log_jobs,log_distance,O_i,D_j
0,Abbey Road,Bank and Monument,0,599,78549,8131.525097,6.395262,11.271478,9.003504,599,78549
1,Abbey Road,Beckton,1,599,442,8510.121774,6.395262,6.09131,9.049012,599,442
2,Abbey Road,Blackwall,3,599,665,3775.448872,6.395262,6.499787,8.236275,599,665
3,Abbey Road,Canary Wharf,1,599,58772,5086.51422,6.395262,10.981421,8.534348,599,58772
4,Abbey Road,Canning Town,37,599,15428,2228.923167,6.395262,9.643939,7.709274,599,15428


In [132]:
coefs = pd.DataFrame(prodSim.params)
coefs.reset_index(inplace=True)
coefs.rename(columns = {0:"alpha_i", "index":"coef"}, inplace = True)
to_repl = ["(station_origin)", "\[", "\]"]
for x in to_repl:
    coefs["coef"] = coefs["coef"].str.replace(x, "",regex=True)

file_network = file_network.merge(coefs, left_on="station_origin", right_on="coef", how = "left")
file_network.drop(columns = ["coef"], inplace = True)

file_network.head()

Unnamed: 0,station_origin,station_destination,flows,population,jobs,distance,log_population,log_jobs,log_distance,O_i,D_j,alpha_i
0,Abbey Road,Bank and Monument,0,599,78549,8131.525097,6.395262,11.271478,9.003504,599,78549,3.250242
1,Abbey Road,Beckton,1,599,442,8510.121774,6.395262,6.09131,9.049012,599,442,3.250242
2,Abbey Road,Blackwall,3,599,665,3775.448872,6.395262,6.499787,8.236275,599,665,3.250242
3,Abbey Road,Canary Wharf,1,599,58772,5086.51422,6.395262,10.981421,8.534348,599,58772,3.250242
4,Abbey Road,Canning Town,37,599,15428,2228.923167,6.395262,9.643939,7.709274,599,15428,3.250242


In [133]:
prodSim.params

station_origin[Abbey Road]          3.250242
station_origin[Acton Central]       5.016902
station_origin[Acton Town]          4.562892
station_origin[Aldgate]             3.323767
station_origin[Aldgate East]        3.457664
                                      ...   
station_origin[Woodgrange Park]     5.254667
station_origin[Woodside Park]       4.698635
station_origin[Woolwich Arsenal]    6.905590
log_jobs                            0.768616
log_distance                       -0.878119
Length: 400, dtype: float64

In [137]:
alpha_i = prodSim.params[0:397]
gamma = prodSim.params[398]
beta = -prodSim.params[399]

In [155]:
print(beta)

0.8781191183724159


In [139]:
file_network["prodsimest1"] = np.exp(file_network["alpha_i"]+gamma*file_network["log_jobs"] 
                                 - beta*file_network["log_distance"])
file_network.head(10)

Unnamed: 0,station_origin,station_destination,flows,population,jobs,distance,log_population,log_jobs,log_distance,O_i,D_j,alpha_i,prodsimest1
0,Abbey Road,Bank and Monument,0,599,78549,8131.525097,6.395262,11.271478,9.003504,599,78549,3.250242,55.010681
1,Abbey Road,Beckton,1,599,442,8510.121774,6.395262,6.09131,9.049012,599,442,3.250242,0.986106
2,Abbey Road,Blackwall,3,599,665,3775.448872,6.395262,6.499787,8.236275,599,665,3.250242,2.75564
3,Abbey Road,Canary Wharf,1,599,58772,5086.51422,6.395262,10.981421,8.534348,599,58772,3.250242,66.457296
4,Abbey Road,Canning Town,37,599,15428,2228.923167,6.395262,9.643939,7.709274,599,15428,3.250242,49.06097
5,Abbey Road,Crossharbour,1,599,1208,6686.47556,6.395262,7.096721,8.807842,599,1208,3.250242,2.639418
6,Abbey Road,Custom House,0,599,845,3824.85563,6.395262,6.739337,8.249276,599,845,3.250242,3.27512
7,Abbey Road,Cutty Sark,2,599,1748,8503.898909,6.395262,7.466228,9.04828,599,1748,3.250242,2.838948
8,Abbey Road,Cyprus,7,599,850,6532.099618,6.395262,6.745236,8.784484,599,850,3.250242,2.05631
9,Abbey Road,Devons Road,1,599,611,3958.324171,6.395262,6.415097,8.283576,599,611,3.250242,2.476929


In [145]:
CalcRSquared(file_network["flows"], file_network["prodsimest1"])

0.3882685022462576

In [146]:
CalcRMSE(file_network["flows"], file_network["prodsimest1"])

102.893

In [147]:
file_network["prodsimest1"] = round(file_network["prodsimest1"],0)
flow_data_1 = file_network.pivot_table(values ="prodsimest1", index="station_origin", columns = "station_destination",
                            aggfunc=np.sum, margins=True)
flow_data_1

station_destination,Abbey Road,Acton Central,Acton Town,Aldgate,Aldgate East,All Saints,Alperton,Amersham,Anerley,Angel,...,Wimbledon,Wimbledon Park,Wood Green,Wood Lane,Wood Street,Woodford,Woodgrange Park,Woodside Park,Woolwich Arsenal,All
station_origin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Abbey Road,,,,,,,,,,,...,,,,,,,,,5.0,599.0
Acton Central,,,,,,,,,,,...,,,,,,,1.0,,,1223.0
Acton Town,,,,18.0,18.0,,9.0,1.0,,20.0,...,16.0,3.0,5.0,13.0,,2.0,,2.0,,3749.0
Aldgate,,,2.0,,47.0,,,0.0,,21.0,...,4.0,,3.0,2.0,,1.0,,1.0,,2882.0
Aldgate East,,,2.0,52.0,,,1.0,0.0,,23.0,...,5.0,1.0,3.0,2.0,,1.0,,1.0,,3167.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Woodford,,,7.0,35.0,39.0,,,,,32.0,...,15.0,,10.0,,,,,,,4866.0
Woodgrange Park,,4.0,,,,,,,,,...,,,,,,,,,,532.0
Woodside Park,,,5.0,20.0,20.0,,2.0,,,25.0,...,11.0,,6.0,,,,,,,3092.0
Woolwich Arsenal,29.0,,,,,33.0,,,,,...,,,,,,,,,,7890.0


### 4 Scenarios

#### 4.1

In [149]:
def new_job(row):
    if row["station_destination"] == "Canary Wharf":
        val = row["jobs"]/2
    else:
        val = row["jobs"]
    return val
        
file_network["jobs_new"] = file_network.apply(new_job, axis =1)

file_network[file_network['station_destination'] == 'Canary Wharf']

Unnamed: 0,station_origin,station_destination,flows,population,jobs,distance,log_population,log_jobs,log_distance,O_i,D_j,alpha_i,prodsimest1,jobs_new
3,Abbey Road,Canary Wharf,1,599,58772,5086.514220,6.395262,10.981421,8.534348,599,58772,3.250242,66.0,29386.0
126,Acton Town,Canary Wharf,57,3745,58772,20398.165882,8.228177,10.981421,9.923200,3745,58772,4.562892,73.0,29386.0
348,Aldgate,Canary Wharf,1,2886,58772,6564.419680,7.967627,10.981421,8.789419,2886,58772,3.323767,57.0,29386.0
595,Aldgate East,Canary Wharf,3,3172,58772,5127.998899,8.062118,10.981421,8.542471,3172,58772,3.457664,81.0,29386.0
817,All Saints,Canary Wharf,67,740,58772,1340.088733,6.606650,10.981421,7.200491,740,58772,3.380602,244.0,29386.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60534,Wood Green,Canary Wharf,64,6667,58772,16849.246595,8.804925,10.981421,9.732061,6667,58772,5.137983,153.0,29386.0
60777,Wood Lane,Canary Wharf,0,1088,58772,17092.091760,6.992096,10.981421,9.746371,1088,58772,3.154679,21.0,29386.0
61001,Woodford,Canary Wharf,192,4868,58772,13963.787075,8.490438,10.981421,9.544223,4868,58772,5.160643,185.0,29386.0
61233,Woodside Park,Canary Wharf,42,3093,58772,22356.567178,8.036897,10.981421,10.014875,3093,58772,4.698635,77.0,29386.0


In [150]:
file_network["prodsimest2"] = np.exp(file_network["alpha_i"]+gamma*np.log(file_network["jobs_new"]) - beta*file_network["log_distance"])

file_network["prodsimest2"] = round(file_network["prodsimest2"],0)

flow_data_2 = file_network.pivot_table(values ="prodsimest2", index="station_origin", columns = "station_destination",
                            aggfunc=np.sum, margins=True)
flow_data_2

station_destination,Abbey Road,Acton Central,Acton Town,Aldgate,Aldgate East,All Saints,Alperton,Amersham,Anerley,Angel,...,Wimbledon,Wimbledon Park,Wood Green,Wood Lane,Wood Street,Woodford,Woodgrange Park,Woodside Park,Woolwich Arsenal,All
station_origin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Abbey Road,,,,,,,,,,,...,,,,,,,,,5.0,572.0
Acton Central,,,,,,,,,,,...,,,,,,,1.0,,,1223.0
Acton Town,,,,18.0,18.0,,9.0,1.0,,20.0,...,16.0,3.0,5.0,13.0,,2.0,,2.0,,3719.0
Aldgate,,,2.0,,47.0,,,0.0,,21.0,...,4.0,,3.0,2.0,,1.0,,1.0,,2859.0
Aldgate East,,,2.0,52.0,,,1.0,0.0,,23.0,...,5.0,1.0,3.0,2.0,,1.0,,1.0,,3134.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Woodford,,,7.0,35.0,39.0,,,,,32.0,...,15.0,,10.0,,,,,,,4790.0
Woodgrange Park,,4.0,,,,,,,,,...,,,,,,,,,,532.0
Woodside Park,,,5.0,20.0,20.0,,2.0,,,25.0,...,11.0,,6.0,,,,,,,3060.0
Woolwich Arsenal,29.0,,,,,33.0,,,,,...,,,,,,,,,,7297.0


In [151]:
#calculate some new wj^alpha and d_ij^beta values
Dj_gamma = file_network["jobs_new"]**gamma
dist_beta = file_network["distance"]**(-beta)
#calcualte the first stage of the Ai values
file_network["Ai1"] = Dj_gamma * dist_beta
#now do the sum over all js bit
A_i = pd.DataFrame(file_network.groupby(["station_origin"])["Ai1"].agg(np.sum))
#now divide into 1
A_i["Ai1"] = 1/A_i["Ai1"]
A_i.rename(columns={"Ai1":"A_i2"}, inplace=True)
#and write the A_i values back into the dataframe
file_network = file_network.merge(A_i, left_on="station_origin", right_index=True, how="left")

In [153]:
file_network["prodsimest_A"] = file_network["A_i2"]*file_network["O_i"]*Dj_gamma*dist_beta
file_network["prodsimest_A"] = round(file_network["prodsimest_A"])

In [154]:
flow_data_A = file_network.pivot_table(values ="prodsimest_A", index="station_origin", columns = "station_destination",
                            aggfunc=np.sum, margins=True)
flow_data_A

station_destination,Abbey Road,Acton Central,Acton Town,Aldgate,Aldgate East,All Saints,Alperton,Amersham,Anerley,Angel,...,Wimbledon,Wimbledon Park,Wood Green,Wood Lane,Wood Street,Woodford,Woodgrange Park,Woodside Park,Woolwich Arsenal,All
station_origin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Abbey Road,,,,,,,,,,,...,,,,,,,,,6.0,600.0
Acton Central,,,,,,,,,,,...,,,,,,,1.0,,,1223.0
Acton Town,,,,18.0,18.0,,9.0,1.0,,20.0,...,17.0,3.0,5.0,13.0,,2.0,,2.0,,3745.0
Aldgate,,,2.0,,47.0,,,0.0,,22.0,...,4.0,,3.0,2.0,,1.0,,1.0,,2884.0
Aldgate East,,,2.0,53.0,,,1.0,0.0,,23.0,...,5.0,1.0,3.0,2.0,,1.0,,1.0,,3166.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Woodford,,,7.0,36.0,39.0,,,,,33.0,...,16.0,,10.0,,,,,,,4863.0
Woodgrange Park,,4.0,,,,,,,,,...,,,,,,,,,,532.0
Woodside Park,,,5.0,21.0,21.0,,2.0,,,25.0,...,11.0,,6.0,,,,,,,3091.0
Woolwich Arsenal,32.0,,,,,36.0,,,,,...,,,,,,,,,,7893.0


#### 4.2

In [189]:
beta_2 = 1.5
beta_3 = 2.5

In [181]:
#calculate some new wj^alpha and d_ij^beta values
Dj_gamma = file_network["jobs"]**gamma
dist_beta = file_network["distance"]**(-beta_2)
#calcualte the first stage of the Ai values
file_network["Ai1"] = Dj_gamma * dist_beta
#now do the sum over all js bit
A_i = pd.DataFrame(file_network.groupby(["station_origin"])["Ai1"].agg(np.sum))
#now divide into 1
A_i["Ai1"] = 1/A_i["Ai1"]
A_i.rename(columns={"Ai1":"A_i_b2"}, inplace=True)
#and write the A_i values back into the dataframe
file_network = file_network.merge(A_i, left_on="station_origin", right_index=True, how="left")

In [182]:
file_network["prodsimest_b2"] = file_network["A_i_b2"]*file_network["O_i"]*Dj_gamma*dist_beta
file_network["prodsimest_b2"] = round(file_network["prodsimest_b2"])

In [183]:
flow_data_B2 = file_network.pivot_table(values ="prodsimest_b2", index="station_origin", columns = "station_destination",
                            aggfunc=np.sum, margins=True)
flow_data_B2

station_destination,Abbey Road,Acton Central,Acton Town,Aldgate,Aldgate East,All Saints,Alperton,Amersham,Anerley,Angel,...,Wimbledon,Wimbledon Park,Wood Green,Wood Lane,Wood Street,Woodford,Woodgrange Park,Woodside Park,Woolwich Arsenal,All
station_origin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Abbey Road,,,,,,,,,,,...,,,,,,,,,2.0,600.0
Acton Central,,,,,,,,,,,...,,,,,,,1.0,,,1224.0
Acton Town,,,,12.0,12.0,,13.0,0.0,,14.0,...,12.0,2.0,3.0,16.0,,1.0,,1.0,,3744.0
Aldgate,,,1.0,,60.0,,,0.0,,16.0,...,1.0,,1.0,1.0,,0.0,,0.0,,2875.0
Aldgate East,,,1.0,76.0,,,0.0,0.0,,18.0,...,1.0,0.0,1.0,1.0,,0.0,,0.0,,3159.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Woodford,,,5.0,35.0,40.0,,,,,29.0,...,9.0,,8.0,,,,,,,4871.0
Woodgrange Park,,2.0,,,,,,,,,...,,,,,,,,,,533.0
Woodside Park,,,4.0,18.0,18.0,,1.0,,,25.0,...,8.0,,5.0,,,,,,,3098.0
Woolwich Arsenal,29.0,,,,,31.0,,,,,...,,,,,,,,,,7894.0


In [190]:
#calculate some new wj^alpha and d_ij^beta values
Dj_gamma = file_network["jobs"]**gamma
dist_beta = file_network["distance"]**(-beta_3)
#calcualte the first stage of the Ai values
file_network["Ai1"] = Dj_gamma * dist_beta
#now do the sum over all js bit
A_i = pd.DataFrame(file_network.groupby(["station_origin"])["Ai1"].agg(np.sum))
#now divide into 1
A_i["Ai1"] = 1/A_i["Ai1"]
A_i.rename(columns={"Ai1":"A_i_b3"}, inplace=True)
#and write the A_i values back into the dataframe
file_network = file_network.merge(A_i, left_on="station_origin", right_index=True, how="left")

In [191]:
file_network["prodsimest_b3"] = file_network["A_i_b3"]*file_network["O_i"]*Dj_gamma*dist_beta
file_network["prodsimest_b3"] = round(file_network["prodsimest_b3"])

In [192]:
flow_data_B3 = file_network.pivot_table(values ="prodsimest_b3", index="station_origin", columns = "station_destination",
                            aggfunc=np.sum, margins=True)
flow_data_B3

station_destination,Abbey Road,Acton Central,Acton Town,Aldgate,Aldgate East,All Saints,Alperton,Amersham,Anerley,Angel,...,Wimbledon,Wimbledon Park,Wood Green,Wood Lane,Wood Street,Woodford,Woodgrange Park,Woodside Park,Woolwich Arsenal,All
station_origin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Abbey Road,,,,,,,,,,,...,,,,,,,,,0.0,595.0
Acton Central,,,,,,,,,,,...,,,,,,,0.0,,,1219.0
Acton Town,,,,4.0,3.0,,13.0,0.0,,4.0,...,4.0,1.0,1.0,12.0,,0.0,,0.0,,3736.0
Aldgate,,,0.0,,52.0,,,0.0,,6.0,...,0.0,,0.0,0.0,,0.0,,0.0,,2875.0
Aldgate East,,,0.0,86.0,,,0.0,0.0,,8.0,...,0.0,0.0,0.0,0.0,,0.0,,0.0,,3162.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Woodford,,,2.0,22.0,29.0,,,,,17.0,...,3.0,,3.0,,,,,,,4869.0
Woodgrange Park,,0.0,,,,,,,,,...,,,,,,,,,,532.0
Woodside Park,,,1.0,9.0,9.0,,0.0,,,16.0,...,2.0,,2.0,,,,,,,3091.0
Woolwich Arsenal,24.0,,,,,23.0,,,,,...,,,,,,,,,,7894.0


4.3

In [196]:
file_network

Unnamed: 0,station_origin,station_destination,flows,population,jobs,distance,log_population,log_jobs,log_distance,O_i,...,prodsimest1,jobs_new,prodsimest2,Ai1,A_i2,prodsimest_A,prodsimest_b2,A_i_b2,prodsimest_b3,A_i_b3
0,Abbey Road,Bank and Monument,0,599,78549,8131.525097,6.395262,11.271478,9.003504,599,...,55.0,78549.0,55.0,9.706252e-07,0.045134,58.0,20.0,4.162663,2.0,4153.273414
1,Abbey Road,Beckton,1,599,442,8510.121774,6.395262,6.091310,9.049012,599,...,1.0,442.0,1.0,1.616119e-08,0.045134,1.0,0.0,4.162663,0.0,4153.273414
2,Abbey Road,Blackwall,3,599,665,3775.448872,6.395262,6.499787,8.236275,599,...,3.0,665.0,3.0,1.687498e-07,0.045134,3.0,2.0,4.162663,0.0,4153.273414
3,Abbey Road,Canary Wharf,1,599,58772,5086.514220,6.395262,10.981421,8.534348,599,...,66.0,29386.0,39.0,2.509622e-06,0.045134,41.0,32.0,4.162663,6.0,4153.273414
4,Abbey Road,Canning Town,37,599,15428,2228.923167,6.395262,9.643939,7.709274,599,...,49.0,15428.0,49.0,7.062565e-06,0.045134,51.0,39.0,4.162663,18.0,4153.273414
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61408,Woolwich Arsenal,Tower Gateway,127,7892,3342,13401.795549,8.973605,8.114325,9.503144,7892,...,121.0,3342.0,121.0,2.458663e-08,0.136719,131.0,95.0,36.578767,54.0,276130.266660
61409,Woolwich Arsenal,West Ham,608,7892,5487,8701.454361,8.973605,8.610137,9.071245,7892,...,259.0,5487.0,259.0,1.059575e-07,0.136719,280.0,266.0,36.578767,231.0,276130.266660
61410,Woolwich Arsenal,West India Quay,6,7892,400,9536.720451,8.973605,5.991465,9.162905,7892,...,32.0,400.0,32.0,1.125865e-08,0.136719,35.0,31.0,36.578767,25.0,276130.266660
61411,Woolwich Arsenal,West Silvertown,81,7892,893,5355.248554,8.973605,6.794587,8.585832,7892,...,98.0,893.0,98.0,8.833269e-08,0.136719,106.0,137.0,36.578767,192.0,276130.266660


In [201]:
initial_origin = file_network.groupby('station_origin')['prodsimest1'].sum()
initial_origin = initial_origin.to_frame(name='initial_origin')
initial_origin.index.name = None

initial_destination = file_network.groupby('station_destination')['prodsimest1'].sum()
initial_destination = initial_destination.to_frame(name='initial_destination')
initial_destination.index.name = None

A_origin = file_network.groupby('station_origin')['prodsimest_A'].sum()
A_origin = A_origin.to_frame(name='A_origin')
A_origin.index.name = None

A_destination = file_network.groupby('station_destination')['prodsimest_A'].sum()
A_destination = A_destination.to_frame(name='A_destination')
A_destination.index.name = None

B2_origin = file_network.groupby('station_origin')['prodsimest_b2'].sum()
B2_origin = B2_origin.to_frame(name='B2_origin')
B2_origin.index.name = None

B2_destination = file_network.groupby('station_destination')['prodsimest_b2'].sum()
B2_destination = B2_destination.to_frame(name='B2_destination')
B2_destination.index.name = None

B3_origin = file_network.groupby('station_origin')['prodsimest_b3'].sum()
B3_origin = B3_origin.to_frame(name='B3_origin')
B3_origin.index.name = None

B3_destination = file_network.groupby('station_destination')['prodsimest_b3'].sum()
B3_destination = B3_destination.to_frame(name='B3_destination')
B3_destination.index.name = None

In [203]:
estimate_data = pd.concat([initial_origin, initial_destination, A_origin, A_destination, B2_origin, B2_destination, B3_origin, B3_destination], axis=1)
estimate_data

Unnamed: 0,initial_origin,initial_destination,A_origin,A_destination,B2_origin,B2_destination,B3_origin,B3_destination
Abbey Road,599.0,445.0,600.0,470.0,600.0,941.0,595.0,2526.0
Acton Central,1223.0,391.0,1223.0,393.0,1224.0,360.0,1219.0,351.0
Acton Town,3749.0,2156.0,3745.0,2182.0,3744.0,2249.0,3736.0,3042.0
Aldgate,2882.0,8753.0,2884.0,8836.0,2875.0,8678.0,2875.0,6651.0
Aldgate East,3167.0,9308.0,3166.0,9398.0,3159.0,9122.0,3162.0,7078.0
...,...,...,...,...,...,...,...,...
Wood Street,1366.0,341.0,1366.0,342.0,1368.0,618.0,1368.0,2003.0
Woodford,4866.0,654.0,4863.0,666.0,4871.0,641.0,4869.0,1336.0
Woodgrange Park,532.0,156.0,532.0,156.0,533.0,218.0,532.0,437.0
Woodside Park,3092.0,603.0,3091.0,609.0,3098.0,651.0,3091.0,1986.0


In [205]:
estimate_data['initial_flow'] = estimate_data['initial_destination'] - estimate_data['initial_origin']
estimate_data['A_flow'] = estimate_data['A_destination'] - estimate_data['A_origin']
estimate_data['B2_flow'] = estimate_data['B2_destination'] - estimate_data['B2_origin']
estimate_data['B3_flow'] = estimate_data['B3_destination'] - estimate_data['B3_origin']
estimate_data

Unnamed: 0,initial_origin,initial_destination,A_origin,A_destination,B2_origin,B2_destination,B3_origin,B3_destination,initial_flow,A_flow,B2_flow,B3_flow
Abbey Road,599.0,445.0,600.0,470.0,600.0,941.0,595.0,2526.0,-154.0,-130.0,341.0,1931.0
Acton Central,1223.0,391.0,1223.0,393.0,1224.0,360.0,1219.0,351.0,-832.0,-830.0,-864.0,-868.0
Acton Town,3749.0,2156.0,3745.0,2182.0,3744.0,2249.0,3736.0,3042.0,-1593.0,-1563.0,-1495.0,-694.0
Aldgate,2882.0,8753.0,2884.0,8836.0,2875.0,8678.0,2875.0,6651.0,5871.0,5952.0,5803.0,3776.0
Aldgate East,3167.0,9308.0,3166.0,9398.0,3159.0,9122.0,3162.0,7078.0,6141.0,6232.0,5963.0,3916.0
...,...,...,...,...,...,...,...,...,...,...,...,...
Wood Street,1366.0,341.0,1366.0,342.0,1368.0,618.0,1368.0,2003.0,-1025.0,-1024.0,-750.0,635.0
Woodford,4866.0,654.0,4863.0,666.0,4871.0,641.0,4869.0,1336.0,-4212.0,-4197.0,-4230.0,-3533.0
Woodgrange Park,532.0,156.0,532.0,156.0,533.0,218.0,532.0,437.0,-376.0,-376.0,-315.0,-95.0
Woodside Park,3092.0,603.0,3091.0,609.0,3098.0,651.0,3091.0,1986.0,-2489.0,-2482.0,-2447.0,-1105.0


In [207]:
initial_sorted = estimate_data.sort_values(by='initial_flow', ascending=False)
A_sorted = estimate_data.sort_values(by='A_flow', ascending=False)
B2_sorted = estimate_data.sort_values(by='B2_flow', ascending=False)
B3_sorted = estimate_data.sort_values(by='B3_flow', ascending=False)

In [210]:
initial_top = initial_sorted.head(10).index
initial_top = pd.DataFrame(initial_top, columns=['Initial'])

A_top = A_sorted.head(10).index
A_top = pd.DataFrame(A_top, columns=['Scenario A'])

B2_top = B2_sorted.head(10).index
B2_top = pd.DataFrame(B2_top, columns=['Scenario B (β=1.5)'])

B3_top = B3_sorted.head(10).index
B3_top = pd.DataFrame(B3_top, columns=['Scenario B (β=2.5)'])

top_flow = pd.concat([initial_top, A_top, B2_top, B3_top], axis=1)
top_flow

Unnamed: 0,Initial,Scenario A,Scenario B (β=1.5),Scenario B (β=2.5)
0,Bank and Monument,Bank and Monument,Bank and Monument,Bank and Monument
1,Canary Wharf,Oxford Circus,Oxford Circus,Moorgate
2,Oxford Circus,Liverpool Street,Canary Wharf,Oxford Circus
3,Liverpool Street,Green Park,Moorgate,Canary Wharf
4,Green Park,Moorgate,Liverpool Street,Stratford High Street
5,Moorgate,Holborn,Green Park,Westminster
6,Holborn,Bond Street,Holborn,St. James's Park
7,Bond Street,Farringdon,Westminster,Green Park
8,Tottenham Court Road,Tottenham Court Road,Warren Street,Warren Street
9,Farringdon,Westminster,Bond Street,Southwark


In [211]:
initial_tail = initial_sorted.tail(10).index
initial_tail = pd.DataFrame(initial_tail, columns=['Initial'])

A_tail = A_sorted.tail(10).index
A_tail = pd.DataFrame(A_tail, columns=['Scenario A'])

B2_tail = B2_sorted.tail(10).index
B2_tail = pd.DataFrame(B2_tail, columns=['Scenario B (β=1.5)'])

B3_tail = B3_sorted.tail(10).index
B3_tail = pd.DataFrame(B3_tail, columns=['Scenario B (β=2.5)'])

tail_flow = pd.concat([initial_tail, A_tail, B2_tail, B3_tail], axis=1)
tail_flow

Unnamed: 0,Initial,Scenario A,Scenario B (β=1.5),Scenario B (β=2.5)
0,Morden,Woolwich Arsenal,Woolwich Arsenal,Canada Water
1,Paddington,Morden,Seven Sisters,Walthamstow Central
2,Stockwell,Stockwell,Stockwell,Ealing Broadway
3,Seven Sisters,Seven Sisters,Paddington,Paddington
4,Ealing Broadway,Ealing Broadway,Ealing Broadway,London Bridge
5,Victoria,Victoria,Victoria,Stratford
6,Walthamstow Central,Walthamstow Central,Walthamstow Central,Finsbury Park
7,Finsbury Park,Finsbury Park,Finsbury Park,Victoria
8,Brixton,Brixton,Brixton,Brixton
9,Waterloo,Waterloo,Waterloo,Waterloo
