In [373]:
# libraries
from operator import itemgetter
from IPython.display import display_html
from math import sqrt

import pandas as pd
import geopandas as gpd

import matplotlib.pyplot as plt
import networkx as nx
import numpy as np

import folium
import scipy.stats
import seaborn as sns
import statsmodels.api as sm
import statsmodels.formula.api as smf

In [374]:
import warnings
warnings.filterwarnings('ignore')

In [375]:
# import data
london_flows = pd.read_csv("london_flows.csv")
london_flows.head(10)

Unnamed: 0,station_origin,station_destination,flows,population,jobs,distance
0,Abbey Road,Bank and Monument,0,599,78549,8131.525097
1,Abbey Road,Beckton,1,599,442,8510.121774
2,Abbey Road,Blackwall,3,599,665,3775.448872
3,Abbey Road,Canary Wharf,1,599,58772,5086.51422
4,Abbey Road,Canning Town,37,599,15428,2228.923167
5,Abbey Road,Crossharbour,1,599,1208,6686.47556
6,Abbey Road,Custom House,0,599,845,3824.85563
7,Abbey Road,Cutty Sark,2,599,1748,8503.898909
8,Abbey Road,Cyprus,7,599,850,6532.099618
9,Abbey Road,Devons Road,1,599,611,3958.324171


In [376]:
#remove Battersea Park
london_flows = london_flows[
    (london_flows['station_origin'] != 'Battersea Park') &
    (london_flows['station_destination'] != 'Battersea Park')
]

In [377]:
#check intra-borough flows
intra_flows = london_flows[(london_flows['station_origin'] == london_flows['station_destination'])]

In [378]:
#now chop out the intra-borough flows
london_flows = london_flows[(london_flows['station_origin'] != london_flows['station_destination'])]

In [379]:
population_data = london_flows[['station_origin', 'population']].copy()
population_data.rename(columns={'station_origin': 'station_destination'}, inplace=True)
population_data.drop_duplicates(subset='station_destination', inplace=True)

jobs_data = london_flows[['station_destination', 'jobs']].copy()
jobs_data.rename(columns={'station_destination': 'station_origin'}, inplace=True)
jobs_data.drop_duplicates(subset='station_origin', inplace=True)

In [380]:
# 合并population_data到london_flows，基于station_destination
london_flows = london_flows.merge(population_data, left_on='station_destination', right_on='station_destination', how='left')

# 合并jobs_data到london_flows，基于station_origin
london_flows = london_flows.merge(jobs_data, left_on='station_origin', right_on='station_origin', how='left')

In [381]:
#rename
london_flows.rename(columns={
    'population_x': 'O_pop',
    'population_y': 'D_pop',
    'jobs_x': 'D_jobs',
    'jobs_y': 'O_jobs'
}, inplace=True)

new_column_order = ['station_origin', 'O_pop', 'O_jobs', 'station_destination', 'D_pop', 'D_jobs', 'flows', 'distance']

# 重排DataFrame的列
london_flows = london_flows[new_column_order]


In [382]:
london_flows_matrix = pd.pivot_table(london_flows, values ="flows", index="station_origin", columns = "station_destination",
                            aggfunc=np.sum, margins=True)

london_flows_matrix

station_destination,Abbey Road,Acton Central,Acton Town,Aldgate,Aldgate East,All Saints,Alperton,Amersham,Anerley,Angel,...,Wimbledon,Wimbledon Park,Wood Green,Wood Lane,Wood Street,Woodford,Woodgrange Park,Woodside Park,Woolwich Arsenal,All
station_origin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Abbey Road,,,,,,,,,,,...,,,,,,,,,32.0,599
Acton Central,,,,,,,,,,,...,,,,,,,0.0,,,1224
Acton Town,,,,3.0,17.0,,35.0,0.0,,11.0,...,77.0,3.0,6.0,9.0,,0.0,,0.0,,3745
Aldgate,,,0.0,,0.0,,,0.0,,17.0,...,0.0,,4.0,8.0,,0.0,,0.0,,2886
Aldgate East,,,2.0,0.0,,,0.0,0.0,,20.0,...,24.0,0.0,0.0,12.0,,1.0,,1.0,,3172
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Woodford,,,2.0,5.0,47.0,,,,,22.0,...,2.0,,1.0,,,,,,,4868
Woodgrange Park,,0.0,,,,,,,,,...,,,,,,,,,,530
Woodside Park,,,1.0,26.0,11.0,,0.0,,,59.0,...,0.0,,0.0,,,,,,,3093
Woolwich Arsenal,20.0,,,,,7.0,,,,,...,,,,,,,,,,7892


In [383]:
# define functions
def CalcRSquared(observed, estimated):
    """Calculate the r^2 from a series of observed and estimated target values
    inputs:
    Observed: Series of actual observed values
    estimated: Series of predicted values"""
    
    r, p = scipy.stats.pearsonr(observed, estimated)
    R2 = r **2
    
    return R2

def CalcRMSE(observed, estimated):
    """Calculate Root Mean Square Error between a series of observed and estimated values
    inputs:
    Observed: Series of actual observed values
    estimated: Series of predicted values"""
    
    res = (observed -estimated)**2
    RMSE = round(sqrt(res.mean()), 3)
    
    return RMSE

In [384]:
#apply inverse power
london_flows['log_distance'] = np.log(london_flows['distance'])
#Calculate log of destination employment count with small addition to solve div by zero error
london_flows['log_dest_jobs'] = np.log(london_flows['D_jobs'] + 0.001)

In [385]:
london_flows.head()

Unnamed: 0,station_origin,O_pop,O_jobs,station_destination,D_pop,D_jobs,flows,distance,log_distance,log_dest_jobs
0,Abbey Road,599,345,Bank and Monument,29494,78549,0,8131.525097,9.003504,11.271478
1,Abbey Road,599,345,Beckton,1179,442,1,8510.121774,9.049012,6.091312
2,Abbey Road,599,345,Blackwall,952,665,3,3775.448872,8.236275,6.499789
3,Abbey Road,599,345,Canary Wharf,14632,58772,1,5086.51422,8.534348,10.981421
4,Abbey Road,599,345,Canning Town,21125,15428,37,2228.923167,7.709274,9.643939


In [386]:
#Production-constrained Model-inverse

#create the formula (the "-1" indicates no intercept in the regression model).
formula1 = 'flows ~ station_origin + log_dest_jobs + log_distance-1'
#run a production constrained sim
Sim1 = smf.glm(formula = formula1, data=london_flows, family=sm.families.Poisson()).fit()
#let's have a look at it's summary
print(Sim1.summary())

                 Generalized Linear Model Regression Results                  
Dep. Variable:                  flows   No. Observations:                61413
Model:                            GLM   Df Residuals:                    61013
Model Family:                 Poisson   Df Model:                          399
Link Function:                    Log   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:            -1.0169e+06
Date:                Sat, 20 Apr 2024   Deviance:                   1.8615e+06
Time:                        02:12:48   Pearson chi2:                 2.78e+06
No. Iterations:                     8   Pseudo R-squ. (CS):              1.000
Covariance Type:            nonrobust                                         
                                                  coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------------------------------------------------------------------

In [387]:
london_flows["fit1"] = np.round(Sim1.mu,0)

#Summary Statistics
print("RSquared =")
print(CalcRSquared(london_flows["flows"], london_flows["fit1"]))
print("RMSE =")
print(CalcRMSE(london_flows["flows"], london_flows["fit1"]))

RSquared =
0.3882763950178338
RMSE =
102.893


In [388]:
#Production-constrained Model-exponential

#create the formula (the "-1" indicates no intercept in the regression model).
formula2 = 'flows ~ station_origin + log_dest_jobs + distance-1'
#run a production constrained sim
Sim2 = smf.glm(formula = formula2, data=london_flows, family=sm.families.Poisson()).fit()
#let's have a look at it's summary
print(Sim2.summary())

                 Generalized Linear Model Regression Results                  
Dep. Variable:                  flows   No. Observations:                61413
Model:                            GLM   Df Residuals:                    61013
Model Family:                 Poisson   Df Model:                          399
Link Function:                    Log   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:            -9.0994e+05
Date:                Sat, 20 Apr 2024   Deviance:                   1.6477e+06
Time:                        02:12:59   Pearson chi2:                 2.40e+06
No. Iterations:                     8   Pseudo R-squ. (CS):              1.000
Covariance Type:            nonrobust                                         
                                                  coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------------------------------------------------------------------

In [389]:
london_flows["fit2"] = np.round(Sim2.mu,0)

#Summary Statistics
print("RSquared =")
print(CalcRSquared(london_flows["flows"], london_flows["fit2"]))
print("RMSE =")
print(CalcRMSE(london_flows["flows"], london_flows["fit2"]))

RSquared =
0.46806533672572836
RMSE =
96.263


In [390]:
#Double-constrained Model-exponential

#create the formula (the "-1" indicates no intercept in the regression model).
formula3 = 'flows ~ station_origin + station_destination + log_distance-1'
#run a production constrained sim
Sim3 = smf.glm(formula = formula3, data=london_flows, family=sm.families.Poisson()).fit()
#let's have a look at it's summary
print(Sim3.summary())

                 Generalized Linear Model Regression Results                  
Dep. Variable:                  flows   No. Observations:                61413
Model:                            GLM   Df Residuals:                    60617
Model Family:                 Poisson   Df Model:                          795
Link Function:                    Log   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:            -9.7074e+05
Date:                Sat, 20 Apr 2024   Deviance:                   1.7693e+06
Time:                        02:13:28   Pearson chi2:                 2.47e+06
No. Iterations:                     8   Pseudo R-squ. (CS):              1.000
Covariance Type:            nonrobust                                         
                                                         coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------------------

In [391]:
london_flows["fit3"] = np.round(Sim3.mu,0)

#Summary Statistics
print("RSquared =")
print(CalcRSquared(london_flows["flows"], london_flows["fit3"]))
print("RMSE =")
print(CalcRMSE(london_flows["flows"], london_flows["fit3"]))

RSquared =
0.4076853229295958
RMSE =
101.335


In [392]:
#Double-constrained Model-exponential

#create the formula (the "-1" indicates no intercept in the regression model).
formula4 = 'flows ~ station_origin + station_destination + distance-1'
#run a production constrained sim
Sim4 = smf.glm(formula = formula4, data=london_flows, family=sm.families.Poisson()).fit()
#let's have a look at it's summary
print(Sim4.summary())

                 Generalized Linear Model Regression Results                  
Dep. Variable:                  flows   No. Observations:                61413
Model:                            GLM   Df Residuals:                    60617
Model Family:                 Poisson   Df Model:                          795
Link Function:                    Log   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:            -8.5105e+05
Date:                Sat, 20 Apr 2024   Deviance:                   1.5299e+06
Time:                        02:14:26   Pearson chi2:                 2.02e+06
No. Iterations:                     8   Pseudo R-squ. (CS):              1.000
Covariance Type:            nonrobust                                         
                                                         coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------------------

In [393]:
london_flows["fit4"] = np.round(Sim4.mu,0)

#Summary Statistics
print("RSquared =")
print(CalcRSquared(london_flows["flows"], london_flows["fit4"]))
print("RMSE =")
print(CalcRMSE(london_flows["flows"], london_flows["fit4"]))

RSquared =
0.4978427414632353
RMSE =
93.401


In [394]:
#here's the matrix
london_flows_matrix1 = london_flows.pivot_table(values ="fit4", index="station_origin", columns = "station_destination",
                                    aggfunc=np.sum, margins=True)
london_flows_matrix1

station_destination,Abbey Road,Acton Central,Acton Town,Aldgate,Aldgate East,All Saints,Alperton,Amersham,Anerley,Angel,...,Wimbledon,Wimbledon Park,Wood Green,Wood Lane,Wood Street,Woodford,Woodgrange Park,Woodside Park,Woolwich Arsenal,All
station_origin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Abbey Road,,,,,,,,,,,...,,,,,,,,,31.0,603.0
Acton Central,,,,,,,,,,,...,,,,,,,0.0,,,1221.0
Acton Town,,,,11.0,10.0,,17.0,0.0,,12.0,...,40.0,4.0,2.0,19.0,,0.0,,1.0,,3752.0
Aldgate,,,1.0,,32.0,,,0.0,,23.0,...,7.0,,3.0,2.0,,1.0,,1.0,,2883.0
Aldgate East,,,2.0,38.0,,,0.0,0.0,,24.0,...,7.0,1.0,3.0,2.0,,1.0,,1.0,,3167.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Woodford,,,2.0,38.0,44.0,,,,,25.0,...,7.0,,7.0,,,,,,,4862.0
Woodgrange Park,,2.0,,,,,,,,,...,,,,,,,,,,528.0
Woodside Park,,,2.0,17.0,15.0,,0.0,,,25.0,...,10.0,,4.0,,,,,,,3093.0
Woolwich Arsenal,28.0,,,,,29.0,,,,,...,,,,,,,,,,7896.0


In [395]:
london_flows_matrix

station_destination,Abbey Road,Acton Central,Acton Town,Aldgate,Aldgate East,All Saints,Alperton,Amersham,Anerley,Angel,...,Wimbledon,Wimbledon Park,Wood Green,Wood Lane,Wood Street,Woodford,Woodgrange Park,Woodside Park,Woolwich Arsenal,All
station_origin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Abbey Road,,,,,,,,,,,...,,,,,,,,,32.0,599
Acton Central,,,,,,,,,,,...,,,,,,,0.0,,,1224
Acton Town,,,,3.0,17.0,,35.0,0.0,,11.0,...,77.0,3.0,6.0,9.0,,0.0,,0.0,,3745
Aldgate,,,0.0,,0.0,,,0.0,,17.0,...,0.0,,4.0,8.0,,0.0,,0.0,,2886
Aldgate East,,,2.0,0.0,,,0.0,0.0,,20.0,...,24.0,0.0,0.0,12.0,,1.0,,1.0,,3172
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Woodford,,,2.0,5.0,47.0,,,,,22.0,...,2.0,,1.0,,,,,,,4868
Woodgrange Park,,0.0,,,,,,,,,...,,,,,,,,,,530
Woodside Park,,,1.0,26.0,11.0,,0.0,,,59.0,...,0.0,,0.0,,,,,,,3093
Woolwich Arsenal,20.0,,,,,7.0,,,,,...,,,,,,,,,,7892


对于交通流量这样的大数值数据，845的差异相对于1,542,283来说比例非常小（约0.055%）。这通常被认为是一个很好的拟合结果，尤其是在社会科学和经济模型中，完全精确的预测是非常罕见的。

In [396]:
# 假设Sim4是你的模型结果对象
coefficients_indices = Sim4.params.index

# 分别计算各类索引的数量
origin_count = sum('station_origin' in index for index in coefficients_indices)
destination_count = sum('station_destination' in index for index in coefficients_indices)
distance_count = sum('distance' in index for index in coefficients_indices)

print("Number of 'station_origin' coefficients:", origin_count)
print("Number of 'station_destination' coefficients:", destination_count)
print("Number of 'distance' coefficients:", distance_count)

Number of 'station_origin' coefficients: 398
Number of 'station_destination' coefficients: 397
Number of 'distance' coefficients: 1


In [397]:
# 假设Sim4是你的模型结果对象
coefficients = Sim4.params
coefficients_indices = coefficients.index

# 创建两个空的Series来存储不同类别的系数
alpha_i = pd.Series(dtype='float')
gamma_j = pd.Series(dtype='float')

# 遍历所有系数及其索引，根据索引类别分配到相应的Series
for index, value in coefficients.items():
    if 'station_origin' in index:
        alpha_i[index] = value
    elif 'station_destination' in index:
        gamma_j[index] = value

# 输出结果检查
print("Alpha_i coefficients for 'station_origin':")
print(alpha_i)

print("\nGamma_j coefficients for 'station_destination':")
print(gamma_j)

Alpha_i coefficients for 'station_origin':
station_origin[Abbey Road]          1.354150
station_origin[Acton Central]       2.965314
station_origin[Acton Town]          2.780711
station_origin[Aldgate]             1.537233
station_origin[Aldgate East]        1.625544
                                      ...   
station_origin[Wood Street]         3.390992
station_origin[Woodford]            3.856952
station_origin[Woodgrange Park]     3.063112
station_origin[Woodside Park]       3.292851
station_origin[Woolwich Arsenal]    4.742926
Length: 398, dtype: float64

Gamma_j coefficients for 'station_destination':
station_destination[T.Acton Central]       1.486103
station_destination[T.Acton Town]          1.440836
station_destination[T.Aldgate]             2.222441
station_destination[T.Aldgate East]        2.153787
station_destination[T.All Saints]          0.163051
                                             ...   
station_destination[T.Wood Street]         0.213082
station_destination[T

In [398]:
beta = -Sim4.params[-1]

In [399]:
# 获取并打印系数索引的总数
total_coefficients = len(Sim4.params.index)
print("Total number of coefficients:", total_coefficients)

Total number of coefficients: 796


In [400]:
alpha_i

station_origin[Abbey Road]          1.354150
station_origin[Acton Central]       2.965314
station_origin[Acton Town]          2.780711
station_origin[Aldgate]             1.537233
station_origin[Aldgate East]        1.625544
                                      ...   
station_origin[Wood Street]         3.390992
station_origin[Woodford]            3.856952
station_origin[Woodgrange Park]     3.063112
station_origin[Woodside Park]       3.292851
station_origin[Woolwich Arsenal]    4.742926
Length: 398, dtype: float64

In [401]:
gamma_j

station_destination[T.Acton Central]       1.486103
station_destination[T.Acton Town]          1.440836
station_destination[T.Aldgate]             2.222441
station_destination[T.Aldgate East]        2.153787
station_destination[T.All Saints]          0.163051
                                             ...   
station_destination[T.Wood Street]         0.213082
station_destination[T.Woodford]            0.556487
station_destination[T.Woodgrange Park]     0.401733
station_destination[T.Woodside Park]       1.106866
station_destination[T.Woolwich Arsenal]    3.493220
Length: 397, dtype: float64

In [402]:
beta

0.00015436969215475965

In [403]:
#create some Oi and Dj columns in the dataframe and store row and column totals in them:
#to create O_i, take cdatasub ...then... group by origcodenew ...then... summarise by calculating the sum of Total
O_i = pd.DataFrame(london_flows.groupby(["station_origin"])["flows"].agg(np.sum))
O_i.rename(columns={"flows":"O_i"}, inplace = True)
london_flows = london_flows.merge(O_i, on = "station_origin", how = "left" )

D_j = pd.DataFrame(london_flows.groupby(["station_destination"])["flows"].agg(np.sum))
D_j.rename(columns={"flows":"D_j"}, inplace = True)
london_flows = london_flows.merge(D_j, on = "station_destination", how = "left" )

london_flows

Unnamed: 0,station_origin,O_pop,O_jobs,station_destination,D_pop,D_jobs,flows,distance,log_distance,log_dest_jobs,fit1,fit2,fit3,fit4,O_i,D_j
0,Abbey Road,599,345,Bank and Monument,29494,78549,0,8131.525097,9.003504,11.271478,55.0,78.0,55.0,77.0,599,78549
1,Abbey Road,599,345,Beckton,1179,442,1,8510.121774,9.049012,6.091312,1.0,1.0,2.0,3.0,599,442
2,Abbey Road,599,345,Blackwall,952,665,3,3775.448872,8.236275,6.499789,3.0,4.0,3.0,4.0,599,665
3,Abbey Road,599,345,Canary Wharf,14632,58772,1,5086.514220,8.534348,10.981421,66.0,99.0,77.0,112.0,599,58772
4,Abbey Road,599,345,Canning Town,21125,15428,37,2228.923167,7.709274,9.643939,49.0,56.0,45.0,48.0,599,15428
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61408,Woolwich Arsenal,7892,4428,Tower Gateway,853,3342,127,13401.795549,9.503144,8.114325,121.0,99.0,257.0,185.0,7892,3342
61409,Woolwich Arsenal,7892,4428,West Ham,8879,5487,608,8701.454361,9.071245,8.610137,259.0,295.0,152.0,176.0,7892,5487
61410,Woolwich Arsenal,7892,4428,West India Quay,162,400,6,9536.720451,9.162905,5.991467,32.0,36.0,20.0,27.0,7892,400
61411,Woolwich Arsenal,7892,4428,West Silvertown,899,893,81,5355.248554,8.585832,6.794588,98.0,125.0,115.0,132.0,7892,893


In [404]:
# Scenarios A
#双限制模型尝试

# 创建london_flows的副本
london_flows_scenarios_a = london_flows.copy()

# 找出包含'Canary Wharf'的行
mask_origin = london_flows['station_origin'].str.contains('Canary Wharf')
mask_destination = london_flows['station_destination'].str.contains('Canary Wharf')

# 减少50%的O_jobs和D_jobs
london_flows_scenarios_a.loc[mask_origin, 'O_jobs'] *= 0.5
london_flows_scenarios_a.loc[mask_destination, 'D_jobs'] *= 0.5

In [405]:
# 显示原始数据和修改后数据的对比
print("Original Data:")
print(london_flows.loc[mask_origin, ['station_origin', 'station_destination', 'O_jobs', 'D_jobs']])
print("\nModified Data:")
print(london_flows_scenarios_a.loc[mask_origin, ['station_origin', 'station_destination', 'O_jobs', 'D_jobs']])

Original Data:
     station_origin station_destination  O_jobs  D_jobs
8779   Canary Wharf          Abbey Road   58772     345
8780   Canary Wharf          Acton Town   58772    2202
8781   Canary Wharf             Aldgate   58772    7782
8782   Canary Wharf        Aldgate East   58772    7932
8783   Canary Wharf          All Saints   58772     444
...             ...                 ...     ...     ...
9078   Canary Wharf          Wood Green   58772    2146
9079   Canary Wharf           Wood Lane   58772    1578
9080   Canary Wharf            Woodford   58772     706
9081   Canary Wharf       Woodside Park   58772     745
9082   Canary Wharf    Woolwich Arsenal   58772    4428

[304 rows x 4 columns]

Modified Data:
     station_origin station_destination  O_jobs  D_jobs
8779   Canary Wharf          Abbey Road   29386     345
8780   Canary Wharf          Acton Town   29386    2202
8781   Canary Wharf             Aldgate   29386    7782
8782   Canary Wharf        Aldgate East   29386  

In [406]:
print("Original Data:")
print(london_flows.loc[mask_destination, ['station_origin', 'station_destination', 'O_jobs', 'D_jobs']])
print("\nModified Data:")
print(london_flows_scenarios_a.loc[mask_destination, ['station_origin', 'station_destination', 'O_jobs', 'D_jobs']])

Original Data:
         station_origin station_destination  O_jobs  D_jobs
3            Abbey Road        Canary Wharf     345   58772
126          Acton Town        Canary Wharf    2202   58772
348             Aldgate        Canary Wharf    7782   58772
595        Aldgate East        Canary Wharf    7932   58772
817          All Saints        Canary Wharf     444   58772
...                 ...                 ...     ...     ...
60534        Wood Green        Canary Wharf    2146   58772
60777         Wood Lane        Canary Wharf    1578   58772
61001          Woodford        Canary Wharf     706   58772
61233     Woodside Park        Canary Wharf     745   58772
61381  Woolwich Arsenal        Canary Wharf    4428   58772

[304 rows x 4 columns]

Modified Data:
         station_origin station_destination  O_jobs  D_jobs
3            Abbey Road        Canary Wharf     345   29386
126          Acton Town        Canary Wharf    2202   29386
348             Aldgate        Canary Wharf  

In [407]:
# Here is the entropy maximising approach for a known beta.
# Plug in the required values in this function to solve.

def balance_doubly_constrained(pd, orig_field, dest_field, Oi_field, Dj_field, cij_field, beta, 
                               cost_function, Ai_name = "Ai_new", Bj_name = "Bj_new", converge=0.001):
    # Define some variables
    Oi = pd[[orig_field, Oi_field]]
    Dj = pd[[dest_field,Dj_field]]    
    if cost_function.lower() in ['power','pow']:
        beta_cij = np.exp(beta * np.log(pd[cij_field]))
    elif cost_function.lower() in ['exponential','exp']:
        beta_cij = np.exp(beta * pd[cij_field])
    else:
        return "Cost function not specified properly, use 'exp' or 'pow'"
    
    # Create some helper variables
    cnvg = 1
    iteration = 0
    # Now iteratively rebalance the Ai and Bj terms until convergence
    while cnvg > converge:
        if iteration == 0:
            # This first condition sets starting values for Ai and Bj
            # NB sets starting value of Ai assuming Bj is a vector of 1s.
            # We've already established beta_cij with the appropriate cost function, so...
            Oi = Oi.assign(Ai = Dj[Dj_field] * beta_cij)
            # Aggregate Ai and take inverse
            Ai = 1.0/Oi.groupby(orig_field)['Ai'].sum().to_frame()
            # Merge new Ais 
            Oi = Oi.merge(Ai,left_on = orig_field, right_index = True, suffixes = ('','_old'))
            # Drop the temporary Ai field we created, leaving Ai_old
            Oi.drop('Ai', axis=1, inplace=True)
            
            # Now set up Bjs using starting values of Ai
            Dj = Dj.assign(Bj = Oi['Ai_old'] * Oi[Oi_field] * beta_cij)
            # Aggregate Bj and take inverse
            Bj = 1.0/Dj.groupby(dest_field)['Bj'].sum().to_frame()
            # Merge new Bjs
            Dj = Dj.merge(Bj,left_on = dest_field, right_index = True, suffixes = ('','_old'))
            # Drop the temporary Bj field we created, leaving Bj_old
            Dj.drop('Bj', axis=1, inplace=True)
            
            # Increment loop
            iteration += 1
        else:
            # This bit is the iterated bit of the loop which refines the values of Ai and Bj
            # First Ai
            Oi['Ai'] = Dj['Bj_old'] * Dj[Dj_field] * beta_cij
            # Aggregate Ai and take inverse
            Ai = 1.0/Oi.groupby(orig_field)['Ai'].sum().to_frame()
            # Drop temporary Ai
            Oi.drop('Ai', axis=1, inplace=True)
            # Merge new Ais 
            Oi = Oi.merge(Ai,left_on = orig_field, right_index = True)
            # Calculate the difference between old and new Ais
            Oi['diff'] = np.absolute((Oi['Ai_old'] - Oi['Ai'])/Oi['Ai_old'])
            # Set new Ais to Ai_old
            Oi['Ai_old'] = Oi['Ai']
            # Drop the temporary Ai field we created, leaving Ai_old
            Oi.drop('Ai', axis=1, inplace=True)
            
            # Then Bj
            Dj['Bj'] = Oi['Ai_old'] * Oi[Oi_field] * beta_cij
            # Aggregate Bj and take inverse
            Bj = 1.0/Dj.groupby(dest_field)['Bj'].sum().to_frame()
            # Drop temporary Bj
            Dj.drop('Bj', axis=1, inplace=True)
            # Merge new Bjs
            Dj = Dj.merge(Bj,left_on = dest_field, right_index = True)
            # Calculate the difference between old and new Bjs
            Dj['diff'] = np.absolute((Dj['Bj_old'] - Dj['Bj'])/Dj['Bj_old'])
            # Set new Bjs to Bj_old
            Dj['Bj_old'] = Dj['Bj']
            # Drop the temporary Bj field we created, leaving Bj_old
            Dj.drop('Bj', axis=1, inplace=True)
            
            # Assign higher sum difference from Ai or Bj to cnvg
            cnvg = np.maximum(Oi['diff'].sum(),Dj['diff'].sum())
            
            # Print and increment loop
            print("Iteration:", iteration)
            iteration += 1

    # When the while loop finishes add the computed Ai_old and Bj_old to the dataframe and return
    pd[Ai_name] = Oi['Ai_old']
    pd[Bj_name] = Dj['Bj_old']
    return pd

In [408]:
# Use the beta we got from the negative exponential model
beta = -Sim4.params[-1]
# Get the balancing factors. NB Setting of new field names for Ai and Bj.
london_flows = balance_doubly_constrained(london_flows,'station_origin','station_destination','O_jobs','D_jobs','distance',-beta,'exponential','Ai_exp','Bj_exp')

# Now predict the model again using the new Ai and Dj fields.
london_flows['fit_exp'] = np.round(london_flows['O_i'] * london_flows['Ai_exp'] * london_flows['D_j'] * london_flows['Bj_exp'] * 
                                   np.exp(london_flows['distance']*-beta))
# Check out the matrix
pd.pivot_table(london_flows,values='fit_exp',index ='station_origin',columns='station_destination',fill_value=0,aggfunc=sum,margins=True)

Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Iteration: 5
Iteration: 6
Iteration: 7
Iteration: 8
Iteration: 9
Iteration: 10
Iteration: 11
Iteration: 12
Iteration: 13
Iteration: 14
Iteration: 15
Iteration: 16
Iteration: 17
Iteration: 18
Iteration: 19
Iteration: 20
Iteration: 21
Iteration: 22
Iteration: 23
Iteration: 24
Iteration: 25
Iteration: 26


station_destination,Abbey Road,Acton Central,Acton Town,Aldgate,Aldgate East,All Saints,Alperton,Amersham,Anerley,Angel,...,Wimbledon,Wimbledon Park,Wood Green,Wood Lane,Wood Street,Woodford,Woodgrange Park,Woodside Park,Woolwich Arsenal,All
station_origin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Abbey Road,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,25.0,600.0
Acton Central,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1224.0
Acton Town,0.0,0.0,0.0,9.0,8.0,0.0,24.0,0.0,0.0,10.0,...,39.0,4.0,3.0,18.0,0.0,0.0,0.0,1.0,0.0,3738.0
Aldgate,0.0,0.0,2.0,0.0,29.0,0.0,0.0,0.0,0.0,21.0,...,7.0,0.0,4.0,2.0,0.0,1.0,0.0,1.0,0.0,2877.0
Aldgate East,0.0,0.0,2.0,32.0,0.0,0.0,1.0,0.0,0.0,22.0,...,7.0,1.0,4.0,2.0,0.0,2.0,0.0,1.0,0.0,3167.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Woodford,0.0,0.0,2.0,27.0,33.0,0.0,0.0,0.0,0.0,19.0,...,6.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,4866.0
Woodgrange Park,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,533.0
Woodside Park,0.0,0.0,3.0,15.0,14.0,0.0,1.0,0.0,0.0,23.0,...,10.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,3099.0
Woolwich Arsenal,26.0,0.0,0.0,0.0,0.0,24.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7890.0


In [409]:
london_flows_matrix1

station_destination,Abbey Road,Acton Central,Acton Town,Aldgate,Aldgate East,All Saints,Alperton,Amersham,Anerley,Angel,...,Wimbledon,Wimbledon Park,Wood Green,Wood Lane,Wood Street,Woodford,Woodgrange Park,Woodside Park,Woolwich Arsenal,All
station_origin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Abbey Road,,,,,,,,,,,...,,,,,,,,,31.0,603.0
Acton Central,,,,,,,,,,,...,,,,,,,0.0,,,1221.0
Acton Town,,,,11.0,10.0,,17.0,0.0,,12.0,...,40.0,4.0,2.0,19.0,,0.0,,1.0,,3752.0
Aldgate,,,1.0,,32.0,,,0.0,,23.0,...,7.0,,3.0,2.0,,1.0,,1.0,,2883.0
Aldgate East,,,2.0,38.0,,,0.0,0.0,,24.0,...,7.0,1.0,3.0,2.0,,1.0,,1.0,,3167.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Woodford,,,2.0,38.0,44.0,,,,,25.0,...,7.0,,7.0,,,,,,,4862.0
Woodgrange Park,,2.0,,,,,,,,,...,,,,,,,,,,528.0
Woodside Park,,,2.0,17.0,15.0,,0.0,,,25.0,...,10.0,,4.0,,,,,,,3093.0
Woolwich Arsenal,28.0,,,,,29.0,,,,,...,,,,,,,,,,7896.0


In [410]:
#50 percent increase in transport costs
beta_1 = beta*1.5
# Get the balancing factors. NB Setting of new field names for Ai and Bj.
london_flows = balance_doubly_constrained(london_flows,'station_origin','station_destination','O_jobs','D_jobs','distance',-beta_1,'exponential','Ai_exp_1','Bj_exp_1')

# Now predict the model again using the new Ai and Dj fields.
london_flows['fit_exp_1'] = np.round(london_flows['O_i'] * london_flows['Ai_exp_1'] * london_flows['D_j'] * london_flows['Bj_exp_1'] * 
                                   np.exp(london_flows['distance']*-beta_1))
# Check out the matrix
pd.pivot_table(london_flows,values='fit_exp_1',index ='station_origin',columns='station_destination',fill_value=0,aggfunc=sum,margins=True)

Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Iteration: 5
Iteration: 6
Iteration: 7
Iteration: 8
Iteration: 9
Iteration: 10
Iteration: 11
Iteration: 12
Iteration: 13
Iteration: 14
Iteration: 15
Iteration: 16
Iteration: 17
Iteration: 18
Iteration: 19
Iteration: 20
Iteration: 21
Iteration: 22
Iteration: 23
Iteration: 24
Iteration: 25
Iteration: 26
Iteration: 27
Iteration: 28
Iteration: 29
Iteration: 30
Iteration: 31
Iteration: 32
Iteration: 33
Iteration: 34
Iteration: 35
Iteration: 36
Iteration: 37
Iteration: 38
Iteration: 39
Iteration: 40
Iteration: 41
Iteration: 42
Iteration: 43
Iteration: 44
Iteration: 45
Iteration: 46
Iteration: 47
Iteration: 48


station_destination,Abbey Road,Acton Central,Acton Town,Aldgate,Aldgate East,All Saints,Alperton,Amersham,Anerley,Angel,...,Wimbledon,Wimbledon Park,Wood Green,Wood Lane,Wood Street,Woodford,Woodgrange Park,Woodside Park,Woolwich Arsenal,All
station_origin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Abbey Road,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,20.0,597.0
Acton Central,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1223.0
Acton Town,0.0,0.0,0.0,4.0,4.0,0.0,42.0,0.0,0.0,5.0,...,38.0,3.0,1.0,23.0,0.0,0.0,0.0,0.0,0.0,3738.0
Aldgate,0.0,0.0,1.0,0.0,36.0,0.0,0.0,0.0,0.0,23.0,...,4.0,0.0,3.0,1.0,0.0,1.0,0.0,1.0,0.0,2884.0
Aldgate East,0.0,0.0,1.0,39.0,0.0,0.0,0.0,0.0,0.0,23.0,...,4.0,0.0,3.0,1.0,0.0,1.0,0.0,1.0,0.0,3158.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Woodford,0.0,0.0,0.0,17.0,23.0,0.0,0.0,0.0,0.0,10.0,...,2.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,4866.0
Woodgrange Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,530.0
Woodside Park,0.0,0.0,1.0,8.0,8.0,0.0,0.0,0.0,0.0,17.0,...,5.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,3091.0
Woolwich Arsenal,21.0,0.0,0.0,0.0,0.0,18.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7892.0


In [411]:
london_flows_matrix1

station_destination,Abbey Road,Acton Central,Acton Town,Aldgate,Aldgate East,All Saints,Alperton,Amersham,Anerley,Angel,...,Wimbledon,Wimbledon Park,Wood Green,Wood Lane,Wood Street,Woodford,Woodgrange Park,Woodside Park,Woolwich Arsenal,All
station_origin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Abbey Road,,,,,,,,,,,...,,,,,,,,,31.0,603.0
Acton Central,,,,,,,,,,,...,,,,,,,0.0,,,1221.0
Acton Town,,,,11.0,10.0,,17.0,0.0,,12.0,...,40.0,4.0,2.0,19.0,,0.0,,1.0,,3752.0
Aldgate,,,1.0,,32.0,,,0.0,,23.0,...,7.0,,3.0,2.0,,1.0,,1.0,,2883.0
Aldgate East,,,2.0,38.0,,,0.0,0.0,,24.0,...,7.0,1.0,3.0,2.0,,1.0,,1.0,,3167.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Woodford,,,2.0,38.0,44.0,,,,,25.0,...,7.0,,7.0,,,,,,,4862.0
Woodgrange Park,,2.0,,,,,,,,,...,,,,,,,,,,528.0
Woodside Park,,,2.0,17.0,15.0,,0.0,,,25.0,...,10.0,,4.0,,,,,,,3093.0
Woolwich Arsenal,28.0,,,,,29.0,,,,,...,,,,,,,,,,7896.0


In [412]:
#Tenfold increase in transport costs 
beta_2 = beta*2

# Get the balancing factors. NB Setting of new field names for Ai and Bj.
london_flows = balance_doubly_constrained(london_flows,'station_origin','station_destination','O_jobs','D_jobs','distance',-beta_2,'exponential','Ai_exp_2','Bj_exp_2')

# Now predict the model again using the new Ai and Dj fields.
london_flows['fit_exp_2'] = np.round(london_flows['O_i'] * london_flows['Ai_exp_2'] * london_flows['D_j'] * london_flows['Bj_exp_2'] * 
                                   np.exp(london_flows['distance']*-beta_2))
# Check out the matrix
pd.pivot_table(london_flows,values='fit_exp_2',index ='station_origin',columns='station_destination',fill_value=0,aggfunc=sum,margins=True)

Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Iteration: 5
Iteration: 6
Iteration: 7
Iteration: 8
Iteration: 9
Iteration: 10
Iteration: 11
Iteration: 12
Iteration: 13
Iteration: 14
Iteration: 15
Iteration: 16
Iteration: 17
Iteration: 18
Iteration: 19
Iteration: 20
Iteration: 21
Iteration: 22
Iteration: 23
Iteration: 24
Iteration: 25
Iteration: 26
Iteration: 27
Iteration: 28
Iteration: 29
Iteration: 30
Iteration: 31
Iteration: 32
Iteration: 33
Iteration: 34
Iteration: 35
Iteration: 36
Iteration: 37
Iteration: 38
Iteration: 39
Iteration: 40
Iteration: 41
Iteration: 42
Iteration: 43
Iteration: 44
Iteration: 45
Iteration: 46
Iteration: 47
Iteration: 48
Iteration: 49
Iteration: 50
Iteration: 51
Iteration: 52
Iteration: 53
Iteration: 54
Iteration: 55
Iteration: 56
Iteration: 57
Iteration: 58
Iteration: 59
Iteration: 60
Iteration: 61
Iteration: 62
Iteration: 63
Iteration: 64
Iteration: 65
Iteration: 66
Iteration: 67
Iteration: 68
Iteration: 69
Iteration: 70
Iteration: 71
Iteration: 72
I

station_destination,Abbey Road,Acton Central,Acton Town,Aldgate,Aldgate East,All Saints,Alperton,Amersham,Anerley,Angel,...,Wimbledon,Wimbledon Park,Wood Green,Wood Lane,Wood Street,Woodford,Woodgrange Park,Woodside Park,Woolwich Arsenal,All
station_origin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Abbey Road,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,15.0,600.0
Acton Central,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1224.0
Acton Town,0.0,0.0,0.0,2.0,2.0,0.0,54.0,0.0,0.0,3.0,...,29.0,1.0,0.0,24.0,0.0,0.0,0.0,0.0,0.0,3740.0
Aldgate,0.0,0.0,0.0,0.0,43.0,0.0,0.0,0.0,0.0,24.0,...,2.0,0.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,2871.0
Aldgate East,0.0,0.0,0.0,46.0,0.0,0.0,0.0,0.0,0.0,24.0,...,2.0,0.0,2.0,1.0,0.0,1.0,0.0,0.0,0.0,3162.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Woodford,0.0,0.0,0.0,9.0,13.0,0.0,0.0,0.0,0.0,5.0,...,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,4859.0
Woodgrange Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,528.0
Woodside Park,0.0,0.0,0.0,3.0,3.0,0.0,0.0,0.0,0.0,9.0,...,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,3089.0
Woolwich Arsenal,16.0,0.0,0.0,0.0,0.0,14.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7892.0


In [413]:
london_flows_matrix1

station_destination,Abbey Road,Acton Central,Acton Town,Aldgate,Aldgate East,All Saints,Alperton,Amersham,Anerley,Angel,...,Wimbledon,Wimbledon Park,Wood Green,Wood Lane,Wood Street,Woodford,Woodgrange Park,Woodside Park,Woolwich Arsenal,All
station_origin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Abbey Road,,,,,,,,,,,...,,,,,,,,,31.0,603.0
Acton Central,,,,,,,,,,,...,,,,,,,0.0,,,1221.0
Acton Town,,,,11.0,10.0,,17.0,0.0,,12.0,...,40.0,4.0,2.0,19.0,,0.0,,1.0,,3752.0
Aldgate,,,1.0,,32.0,,,0.0,,23.0,...,7.0,,3.0,2.0,,1.0,,1.0,,2883.0
Aldgate East,,,2.0,38.0,,,0.0,0.0,,24.0,...,7.0,1.0,3.0,2.0,,1.0,,1.0,,3167.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Woodford,,,2.0,38.0,44.0,,,,,25.0,...,7.0,,7.0,,,,,,,4862.0
Woodgrange Park,,2.0,,,,,,,,,...,,,,,,,,,,528.0
Woodside Park,,,2.0,17.0,15.0,,0.0,,,25.0,...,10.0,,4.0,,,,,,,3093.0
Woolwich Arsenal,28.0,,,,,29.0,,,,,...,,,,,,,,,,7896.0


In [414]:
london_flows.head(33)

Unnamed: 0,station_origin,O_pop,O_jobs,station_destination,D_pop,D_jobs,flows,distance,log_distance,log_dest_jobs,...,D_j,Ai_exp,Bj_exp,fit_exp,Ai_exp_1,Bj_exp_1,fit_exp_1,Ai_exp_2,Bj_exp_2,fit_exp_2
0,Abbey Road,599,345,Bank and Monument,29494,78549,0,8131.525097,9.003504,11.271478,...,78549,7e-06,0.678063,60.0,8e-06,0.621769,37.0,1e-05,0.574974,22.0
1,Abbey Road,599,345,Beckton,1179,442,1,8510.121774,9.049012,6.091312,...,442,7e-06,5.018793,2.0,8e-06,6.128142,2.0,1e-05,7.299464,1.0
2,Abbey Road,599,345,Blackwall,952,665,3,3775.448872,8.236275,6.499789,...,665,7e-06,2.000126,3.0,8e-06,1.620643,2.0,1e-05,1.347408,2.0
3,Abbey Road,599,345,Canary Wharf,14632,58772,1,5086.51422,8.534348,10.981421,...,58772,7e-06,0.997791,105.0,8e-06,1.065421,97.0,1e-05,1.125452,83.0
4,Abbey Road,599,345,Canning Town,21125,15428,37,2228.923167,7.709274,9.643939,...,15428,7e-06,1.068527,46.0,8e-06,1.110254,51.0,1e-05,1.105063,52.0
5,Abbey Road,599,345,Crossharbour,1724,1208,1,6686.47556,8.807842,7.096722,...,1208,7e-06,2.274195,4.0,8e-06,1.939044,2.0,1e-05,1.674669,2.0
6,Abbey Road,599,345,Custom House,785,845,0,3824.85563,8.249276,6.739338,...,845,7e-06,2.551832,5.0,8e-06,2.316344,4.0,1e-05,2.143315,3.0
7,Abbey Road,599,345,Cutty Sark,2122,1748,2,8503.898909,9.04828,7.466228,...,1748,7e-06,2.944977,5.0,8e-06,2.759962,3.0,1e-05,2.596307,2.0
8,Abbey Road,599,345,Cyprus,500,850,7,6532.099618,8.784484,6.745238,...,850,7e-06,3.862127,5.0,8e-06,4.135458,4.0,1e-05,4.396153,3.0
9,Abbey Road,599,345,Devons Road,1387,611,1,3958.324171,8.283576,6.415099,...,611,7e-06,2.278408,3.0,8e-06,1.932427,2.0,1e-05,1.662428,2.0
