# I. Topological network

## I.1. Centrality measures:

In [1]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

In [2]:
G = nx.read_graphml("london_graph.xml") 

In [3]:
print(nx.info(G))

Graph with 401 nodes and 467 edges



  print(nx.info(G))


In [4]:
type(G)

networkx.classes.graph.Graph

In [5]:
print(nx.info(G))

Graph with 401 nodes and 467 edges



  print(nx.info(G))


In [6]:
list(G.nodes(data = True))[0]

('Wembley Park', {'coords': '(519316.5590174915, 186389.32981656672)'})

In [7]:
for node in G.nodes():
    G.nodes[node]['coords'] = eval(G.nodes[node]['coords'])

In [8]:
list(G.nodes(data = True))[0]

('Wembley Park', {'coords': (519316.5590174915, 186389.32981656672)})

In [9]:
list(G.edges(data = True))[0]

('Wembley Park',
 'Kingsbury',
 {'length': 2916.7715580506483, 'line_name': 'Jubilee', 'flows': 12356})

fig, ax = plt.subplots(figsize=(25,20))

node_labels = nx.get_node_attributes(G, 'station_name')

pos = nx.get_node_attributes(G, 'coords')

nx.draw_networkx_nodes(G,pos,node_size=50,node_color='b')
nx.draw_networkx_edges(G,pos,arrows=False,width=0.2)
nx.draw_networkx_labels(G,pos, node_labels, font_size=10, font_color='black')

plt.title("London tube network",fontsize=15)
plt.axis("off")
plt.show()

 #We can print the dataframe from the shapefile to check the data
df = nx.to_pandas_edgelist(G)
df[0:10]

### Explore

In [10]:
#if want to know what is the maximum distance between stations
max_dist=max({weight for e1, e2, weight in G.edges(data='length')})
max_dist
#This distance in given in meters

7266.37392749648

In [11]:
#if want to know the edge connecting the stations farther away from each other
max(dict(G.edges).items(), key=lambda x: x[1]['length'])

(('Wembley Park', 'Finchley Road'),
 {'length': 7266.37392749648, 'line_name': 'Metropolitan', 'flows': 55216})

In [12]:
nx.diameter(G)

36

In [13]:
# diameter of the network considering the distance between stations (weighted diameter)

nlen = {n:nx.single_source_dijkstra_path_length(G, n, weight='length') for n in G.nodes() }
e = nx.eccentricity(G,sp=nlen)
d = nx.diameter(G, e)
d

78581.28589029584

### Centrality measures

#### Degree Centrality on nodes

In [14]:
#  We can calculate the degree centrality using networkx function:
deg_london =nx.degree_centrality(G)
nx.set_node_attributes(G,dict(deg_london),'degree')


In [15]:
# To dataframe using the nodes as the index
df = pd.DataFrame(index=G.nodes())
#df['station_name'] = pd.Series(nx.get_node_attributes(G, 'station_name'))
df['degree'] = pd.Series(nx.get_node_attributes(G, 'degree'))

df_sorted_deg = df.sort_values(["degree"], ascending=False)

df_sorted_deg[0:10]

Unnamed: 0,degree
Stratford,0.0225
Bank and Monument,0.02
King's Cross St. Pancras,0.0175
Baker Street,0.0175
Earl's Court,0.015
Oxford Circus,0.015
Liverpool Street,0.015
Waterloo,0.015
Green Park,0.015
Canning Town,0.015


In [16]:
df_top_deg = df_sorted_deg.reset_index()
column_names = {"index":"Station"}
df_top_deg = df_top_deg.rename(column_names, axis=1)
df_top_deg

Unnamed: 0,Station,degree
0,Stratford,0.0225
1,Bank and Monument,0.0200
2,King's Cross St. Pancras,0.0175
3,Baker Street,0.0175
4,Earl's Court,0.0150
...,...,...
396,High Barnet,0.0025
397,Battersea Park,0.0025
398,New Cross,0.0025
399,Crystal Palace,0.0025


In [17]:
degree_values=[(i[1]['degree']) for i in G.nodes(data=True)]

#### Betweenness Centrality on nodes

In [18]:
### Topological betweenness centrality:

#Let us compute the betweenness centrality for the network, without using weights:
bet_london_t=nx.betweenness_centrality(G, normalized=False)
# We can add these values to the nodes attributes:
nx.set_node_attributes(G,bet_london_t,'betweenness_t')

# To ataframe using the nodes as the index
df = pd.DataFrame(index=G.nodes())
#df['station_name'] = pd.Series(nx.get_node_attributes(G, 'station_name'))
df['betweenness_t'] = pd.Series(nx.get_node_attributes(G, 'betweenness_t'))

df_sorted_bet = df.sort_values(["betweenness_t"], ascending=False)
df_sorted_bet[0:10]

Unnamed: 0,betweenness_t
Stratford,23768.093434
Bank and Monument,23181.058947
Liverpool Street,21610.387049
King's Cross St. Pancras,20373.521465
Waterloo,19464.882323
Green Park,17223.622114
Euston,16624.275469
Westminster,16226.155916
Baker Street,15287.107612
Finchley Road,13173.758009


In [19]:
df_top_bet = df_sorted_bet.reset_index()
column_names = {"index":"Station"}
df_top_bet = df_top_bet.rename(column_names, axis=1)
df_top_bet

Unnamed: 0,Station,betweenness_t
0,Stratford,23768.093434
1,Bank and Monument,23181.058947
2,Liverpool Street,21610.387049
3,King's Cross St. Pancras,20373.521465
4,Waterloo,19464.882323
...,...,...
396,Edgware,0.000000
397,Woolwich Arsenal,0.000000
398,Stratford International,0.000000
399,Watford Junction,0.000000


In [20]:
betweenness_t_values=[(i[1]['betweenness_t']) for i in G.nodes(data=True)]
betweenness_t_values

[10780.813492063491,
 1191.0,
 796.0,
 399.0,
 0.0,
 23768.09343434341,
 11119.621248196234,
 7674.09841269841,
 2526.4904761904795,
 5848.217099567098,
 8590.281385281378,
 5815.533333333337,
 6491.394155844155,
 1379.3071428571413,
 19464.882323232327,
 16226.155916305914,
 17223.622113997128,
 11298.961255411277,
 15287.107611832655,
 144.25714285714298,
 34.66666666666667,
 13173.758008657984,
 335.33333333333337,
 35.0,
 96.66666666666666,
 458.66666666666663,
 4539.465945165944,
 659.7000000000002,
 542.3333333333334,
 3321.1999999999994,
 0.0,
 399.0,
 796.0,
 1191.0,
 1584.0,
 1975.0,
 2364.0,
 3242.1999999999994,
 3564.866666666666,
 7158.103174603172,
 810.7611832611843,
 2102.9274531024566,
 864.7778499278496,
 1070.277849927851,
 1295.277849927853,
 4296.746031746024,
 694.5365079365091,
 1190.66626984127,
 774.8833333333329,
 801.4499999999998,
 23181.05894660899,
 21610.387049062047,
 2944.9343795093787,
 1245.0750721500708,
 7239.0,
 6911.5,
 4051.5,
 3674.5,
 3330.0,
 1

#### Closeness Centrality:

In [21]:
#topological closeness centrality
clos_t=nx.closeness_centrality(G)
# We can add these values to the nodes attributes:
nx.set_node_attributes(G,clos_t,'closeness_t')

# To ataframe using the nodes as the index
df = pd.DataFrame(index=G.nodes())
#df['station_name'] = pd.Series(nx.get_node_attributes(G, 'station_name'))
df['closeness_t'] = pd.Series(nx.get_node_attributes(G, 'closeness_t'))

df_sorted_clo = df.sort_values(["closeness_t"], ascending=False)
df_sorted_clo[0:10]

Unnamed: 0,closeness_t
Green Park,0.114778
Bank and Monument,0.113572
King's Cross St. Pancras,0.113443
Westminster,0.112549
Waterloo,0.112265
Oxford Circus,0.111204
Bond Street,0.110988
Farringdon,0.110742
Angel,0.110742
Moorgate,0.110314


In [22]:
df_top_clo = df_sorted_clo.reset_index()
column_names = {"index":"Station"}
df_top_clo = df_top_clo.rename(column_names, axis=1)
df_top_clo

Unnamed: 0,Station,closeness_t
0,Green Park,0.114778
1,Bank and Monument,0.113572
2,King's Cross St. Pancras,0.113443
3,Westminster,0.112549
4,Waterloo,0.112265
...,...,...
396,Heathrow Terminals 2 & 3,0.045096
397,Heathrow Terminal 4,0.045091
398,Watford High Street,0.044302
399,Heathrow Terminal 5,0.043155


In [23]:
closeness_t_values=[(i[1]['closeness_t']) for i in G.nodes(data=True)]

In [24]:
# number of connected components
nx.number_connected_components(G)

1

## I.2. Impact measures:

clustering coefficient and average shortest paths
（or global efficiency or largest component）

## I.3. Node removal:

## A

### Degree

In [25]:
#G = nx.read_graphml("london_graph.xml")
G5 = G.copy()
for i in range(0,10):
    #valbet = [n for n in G3.nodes() if G3.nodes[n]['betweenness_t'] == betweenness_t_values[i]]
    valdeg = [df_top_deg["Station"][i]]
    G5.remove_nodes_from(valdeg)
    
    #G5_c = nx.average_clustering(G5)
    G5_ge = nx.global_efficiency(G5)
    
    components = nx.connected_components(G5)
    largest_component = max(components, key=len)
    largest_subgraph = G5.subgraph(largest_component)
    G5_lc = largest_subgraph.number_of_nodes()
    
    
    
    print("remove", i, valdeg,
          "G5_lc = ", G5_lc, 
          "G5_ge = ", G5_ge)

remove 0 ['Stratford'] G5_lc =  379 G5_ge =  0.08891736066510689
remove 1 ['Bank and Monument'] G5_lc =  378 G5_ge =  0.08586164448742485
remove 2 ["King's Cross St. Pancras"] G5_lc =  377 G5_ge =  0.08028700838265396
remove 3 ['Baker Street'] G5_lc =  374 G5_ge =  0.07570039409751211
remove 4 ["Earl's Court"] G5_lc =  373 G5_ge =  0.0740361229198828
remove 5 ['Oxford Circus'] G5_lc =  371 G5_ge =  0.07274535237569829
remove 6 ['Liverpool Street'] G5_lc =  365 G5_ge =  0.07094844226118287
remove 7 ['Waterloo'] G5_lc =  364 G5_ge =  0.06898194318071488
remove 8 ['Green Park'] G5_lc =  363 G5_ge =  0.06825731584971681
remove 9 ['Canning Town'] G5_lc =  349 G5_ge =  0.06338447666647608


### Betweeness

In [26]:
G6 = G.copy()
for i in range(0,10):
    #valbet = [n for n in G3.nodes() if G3.nodes[n]['betweenness_t'] == betweenness_t_values[i]]
    valbet = [df_top_bet["Station"][i]]
    G6.remove_nodes_from(valbet)
    #G6 = G.copy()
    
    #G6_c = nx.average_clustering(G6)
    
    G6_ge = nx.global_efficiency(G6)
    
    components = nx.connected_components(G6)
    largest_component = max(components, key=len)
    largest_subgraph = G6.subgraph(largest_component)
    G6_lc = largest_subgraph.number_of_nodes()
    
    
    
    print("remove", i, valbet,
          "G6_lc = ", G6_lc, 
          "G6_ge = ", G6_ge)

remove 0 ['Stratford'] G6_lc =  379 G6_ge =  0.08891736066510689
remove 1 ['Bank and Monument'] G6_lc =  378 G6_ge =  0.08586164448742485
remove 2 ['Liverpool Street'] G6_lc =  377 G6_ge =  0.08496349266423939
remove 3 ["King's Cross St. Pancras"] G6_lc =  371 G6_ge =  0.07849775440713821
remove 4 ['Waterloo'] G6_lc =  370 G6_ge =  0.07594226578366223
remove 5 ['Green Park'] G6_lc =  369 G6_ge =  0.07415154167648695
remove 6 ['Euston'] G6_lc =  346 G6_ge =  0.06820564659789057
remove 7 ['Westminster'] G6_lc =  345 G6_ge =  0.06765950327361094
remove 8 ['Baker Street'] G6_lc =  342 G6_ge =  0.064700058053009
remove 9 ['Finchley Road'] G6_lc =  339 G6_ge =  0.06313903700825897


### Closeness

In [27]:
G7 = G.copy()
for i in range(0,10):
    valclo = [df_top_clo["Station"][i]]
    G7.remove_nodes_from(valclo)
    
    #G7_c = nx.average_clustering(G7)
    G7_ge = nx.global_efficiency(G7)
    
    components = nx.connected_components(G7)
    largest_component = max(components, key=len)
    largest_subgraph = G7.subgraph(largest_component)
    G7_lc = largest_subgraph.number_of_nodes()

        
    
    print("remove", i, valclo,"G7_lc = ", G7_lc, "G7_ge = ", G7_ge)

remove 0 ['Green Park'] G7_lc =  400 G7_ge =  0.09918991960788402
remove 1 ['Bank and Monument'] G7_lc =  399 G7_ge =  0.09487232544791133
remove 2 ["King's Cross St. Pancras"] G7_lc =  398 G7_ge =  0.08793385149140875
remove 3 ['Westminster'] G7_lc =  397 G7_ge =  0.08737164566976727
remove 4 ['Waterloo'] G7_lc =  396 G7_ge =  0.08485943799789313
remove 5 ['Oxford Circus'] G7_lc =  395 G7_ge =  0.08278135073141742
remove 6 ['Bond Street'] G7_lc =  394 G7_ge =  0.08258086417012774
remove 7 ['Farringdon'] G7_lc =  393 G7_ge =  0.08260040537396239
remove 8 ['Angel'] G7_lc =  392 G7_ge =  0.08262233108950982
remove 9 ['Moorgate'] G7_lc =  389 G7_ge =  0.08166991436767818


## B

### Degree

There are some values with multiple stations, delete the top 10 is not as accurate as other strategies. As a result, degree is discarded.

G2 = G.copy()
for i in range(0, 10):
    
        maxdeg=[n for n in G2.nodes() if G2.nodes[n]['degree'] == max(degree_values)]
        G2.remove_nodes_from(maxdeg)
    
        deg_london =nx.degree_centrality(G2)
        nx.set_node_attributes(G2,dict(deg_london),'degree')

        # To dataframe using the nodes as the index
        df = pd.DataFrame(index=G2.nodes())
        df['degree'] = pd.Series(nx.get_node_attributes(G2, 'degree'))

        df_sorted = df.sort_values(["degree"], ascending=False)
        G2_c = nx.average_clustering(G2) 
        G2_ge = nx.global_efficiency(G2)
        degree_values =[(i[1]['degree']) for i in G2.nodes(data=True)]

        print("remove ",i, maxdeg, "G2_c = ", G2_c, "G2_ge = ", G2_ge)
        
      

### Betweeness

In [28]:
G3 = G.copy()
for i in range(0, 11):
    
        maxbet=[n for n in G3.nodes() if G3.nodes[n]['betweenness_t'] == max(betweenness_t_values)]
        G3.remove_nodes_from(maxbet)
    
        bet_london_t=nx.betweenness_centrality(G3, normalized=False)
        nx.set_node_attributes(G3,bet_london_t,'betweenness_t')

        # To dataframe using the nodes as the index
        df = pd.DataFrame(index=G3.nodes())
        df['betweenness_t'] = pd.Series(nx.get_node_attributes(G3, 'betweenness_t'))
        betweenness_t_values=[(i[1]['betweenness_t']) for i in G3.nodes(data=True)]

        components = nx.connected_components(G3)
        largest_component = max(components, key=len)
        largest_subgraph = G3.subgraph(largest_component)
        G3_lc = largest_subgraph.number_of_nodes()
    
        G3_ge = nx.global_efficiency(G3)
        

        print("remove ",i, maxbet, "G3_lc = ", G3_lc, "G3_ge = ", G3_ge)
        
      

remove  0 ['Stratford'] G3_lc =  379 G3_ge =  0.08891736066510689
remove  1 ["King's Cross St. Pancras"] G3_lc =  378 G3_ge =  0.08460293133575152
remove  2 ['Waterloo'] G3_lc =  377 G3_ge =  0.08182895253292936
remove  3 ['Bank and Monument'] G3_lc =  376 G3_ge =  0.07767794342812263
remove  4 ['Canada Water'] G3_lc =  375 G3_ge =  0.07283234083472483
remove  5 ['West Hampstead'] G3_lc =  227 G3_ge =  0.053210203984026455
remove  6 ["Earl's Court"] G3_lc =  226 G3_ge =  0.05165629952389727
remove  7 ["Shepherd's Bush"] G3_lc =  196 G3_ge =  0.0458442134055722
remove  8 ['Euston'] G3_lc =  173 G3_ge =  0.04163076968121037
remove  9 ['Baker Street'] G3_lc =  170 G3_ge =  0.0381637040943985
remove  10 ['Acton Town'] G3_lc =  147 G3_ge =  0.032996526260496956


### Closeness

In [29]:
G4 = G.copy()
for i in range(0, 11):
    
        maxclo=[n for n in G4.nodes() if G4.nodes[n]['closeness_t'] == max(closeness_t_values)]
        G4.remove_nodes_from(maxclo)
    
        clos_t=nx.closeness_centrality(G4)
        nx.set_node_attributes(G4,clos_t,'closeness_t')

        # To dataframe using the nodes as the index
        df = pd.DataFrame(index=G4.nodes())
        df['closeness_t'] = pd.Series(nx.get_node_attributes(G4, 'closeness_t'))
        closeness_t_values=[(i[1]['closeness_t']) for i in G4.nodes(data=True)]

        components = nx.connected_components(G4)
        largest_component = max(components, key=len)
        largest_subgraph = G4.subgraph(largest_component)
        G4_lc = largest_subgraph.number_of_nodes()
        
        G4_ge = nx.global_efficiency(G4)
        

        print("remove ",i, maxclo, "G4_lc = ", G4_lc, "G4_ge = ", G4_ge)

remove  0 ['Green Park'] G4_lc =  400 G4_ge =  0.09918991960788402
remove  1 ["King's Cross St. Pancras"] G4_lc =  399 G4_ge =  0.09443475025566316
remove  2 ['Waterloo'] G4_lc =  398 G4_ge =  0.09181648060183005
remove  3 ['Bank and Monument'] G4_lc =  397 G4_ge =  0.08542563066911478
remove  4 ['West Hampstead'] G4_lc =  396 G4_ge =  0.08054424756502003
remove  5 ['Canada Water'] G4_lc =  226 G4_ge =  0.05810104159173278
remove  6 ['Stratford'] G4_lc =  226 G4_ge =  0.051883620553389555
remove  7 ["Earl's Court"] G4_lc =  225 G4_ge =  0.05035000093626794
remove  8 ["Shepherd's Bush"] G4_lc =  195 G4_ge =  0.04439458727102797
remove  9 ['Oxford Circus'] G4_lc =  194 G4_ge =  0.04295771061337044
remove  10 ['Paddington'] G4_lc =  193 G4_ge =  0.041119379269026736


# II

In [30]:
G_II = nx.read_graphml("london_graph.xml")

In [31]:
for node in G_II.nodes():
    G_II.nodes[node]['coords'] = eval(G_II.nodes[node]['coords'])

In [32]:
list(G_II.nodes(data = True))[0]

('Wembley Park', {'coords': (519316.5590174915, 186389.32981656672)})

In [33]:
list(G_II.edges(data = True))[0]

('Wembley Park',
 'Kingsbury',
 {'length': 2916.7715580506483, 'line_name': 'Jubilee', 'flows': 12356})

In [34]:
df_edge = nx.to_pandas_edgelist(G_II)
df_edge

Unnamed: 0,source,target,flows,line_name,length
0,Wembley Park,Kingsbury,12356,Jubilee,2916.771558
1,Wembley Park,Neasden,6744,Jubilee,2353.165938
2,Wembley Park,Preston Road,36601,Metropolitan,1419.735166
3,Wembley Park,Finchley Road,55216,Metropolitan,7266.373927
4,Kingsbury,Queensbury,9419,Jubilee,1245.995234
...,...,...,...,...,...
462,Hounslow Central,Hounslow East,13469,Piccadilly,745.684383
463,Hounslow East,Osterley,15935,Piccadilly,967.494672
464,Osterley,Boston Manor,17445,Piccadilly,2560.814291
465,Boston Manor,Northfields,18598,Piccadilly,930.852145


In [35]:
df_edge = df_edge[df_edge.flows != 0]

In [36]:
df_edge.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 464 entries, 0 to 466
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   source     464 non-null    object 
 1   target     464 non-null    object 
 2   flows      464 non-null    int64  
 3   line_name  464 non-null    object 
 4   length     464 non-null    float64
dtypes: float64(1), int64(1), object(3)
memory usage: 21.8+ KB


In [37]:
G_II_cleaned = nx.Graph()
G_II_cleaned = nx.from_pandas_edgelist(df_edge, 'source', 'target','flows')

In [38]:
G_II_cleaned.edges

EdgeView([('Wembley Park', 'Kingsbury'), ('Wembley Park', 'Neasden'), ('Wembley Park', 'Preston Road'), ('Wembley Park', 'Finchley Road'), ('Kingsbury', 'Queensbury'), ('Neasden', 'Dollis Hill'), ('Preston Road', 'Northwick Park'), ('Finchley Road', 'Baker Street'), ('Finchley Road', 'Swiss Cottage'), ('Finchley Road', 'West Hampstead'), ('Queensbury', 'Canons Park'), ('Canons Park', 'Stanmore'), ('Stratford', 'West Ham'), ('Stratford', 'Mile End'), ('Stratford', 'Leyton'), ('Stratford', 'Hackney Wick'), ('Stratford', 'Stratford High Street'), ('Stratford', 'Pudding Mill Lane'), ('Stratford', 'Stratford International'), ('Stratford', 'Liverpool Street'), ('Stratford', 'Maryland'), ('West Ham', 'Canning Town'), ('West Ham', 'Bromley-by-Bow'), ('West Ham', 'Plaistow'), ('West Ham', 'Abbey Road'), ('West Ham', 'Star Lane'), ('Mile End', 'Bethnal Green'), ('Mile End', 'Stepney Green'), ('Mile End', 'Bow Road'), ('Leyton', 'Leytonstone'), ('Hackney Wick', 'Homerton'), ('Stratford High Stree

In [39]:
G_II_cleaned.nodes

NodeView(('Wembley Park', 'Kingsbury', 'Neasden', 'Preston Road', 'Finchley Road', 'Queensbury', 'Canons Park', 'Stanmore', 'Stratford', 'West Ham', 'Mile End', 'Leyton', 'Hackney Wick', 'Stratford High Street', 'Pudding Mill Lane', 'Stratford International', 'Liverpool Street', 'Maryland', 'Canning Town', 'Bromley-by-Bow', 'Plaistow', 'Abbey Road', 'Star Lane', 'North Greenwich', 'Royal Victoria', 'East India', 'West Silvertown', 'Canary Wharf', 'Canada Water', 'West India Quay', 'Heron Quays', 'Bermondsey', 'Rotherhithe', 'Surrey Quays', 'London Bridge', 'Southwark', 'Bank and Monument', 'Borough', 'Waterloo', 'Westminster', 'Embankment', 'Kennington', 'Lambeth North', 'Green Park', "St. James's Park", 'Bond Street', 'Hyde Park Corner', 'Piccadilly Circus', 'Victoria', 'Oxford Circus', 'Baker Street', 'Marble Arch', "St. John's Wood", 'Great Portland Street', 'Edgware Road', 'Marylebone', "Regent's Park", 'Swiss Cottage', 'West Hampstead', 'Dollis Hill', 'Willesden Green', 'Kilburn',

## II.1 Centrality measures for weighted network

### Betweenness Centrality on nodes for the Weighted Network

In [40]:
# Inverse weights:
inv_flows = {(e1, e2):round(1./weight,7) for e1, e2, weight in G_II_cleaned.edges(data='flows')}
# Let us add the inversed weight as an attribute to the edges in the graph
nx.set_edge_attributes(G_II_cleaned, inv_flows, 'inv_flows')

In [41]:
### Weighted betweenness centrality:
#Let us compute the betweenness centrality for the network, considering the distance between stations:

bet_london_w=nx.betweenness_centrality(G_II_cleaned,weight='inv_flows',normalized=False)

# We can add these values to the nodes attributes:
nx.set_node_attributes(G_II_cleaned,bet_london_w,'betweenness_w')

In [42]:
# To dataframe using the nodes as the index
df = pd.DataFrame(index=G_II_cleaned.nodes())

df['betweenness_w'] = pd.Series(nx.get_node_attributes(G_II_cleaned, 'betweenness_w'))

df_sorted_bet_w = df.sort_values(["betweenness_w"], ascending=False)
df_sorted_bet_w[0:10]

Unnamed: 0,betweenness_w
Green Park,44945.0
Bank and Monument,39913.0
Waterloo,32331.0
Westminster,29830.0
Liverpool Street,26617.0
Stratford,26203.0
Bond Street,23084.0
Euston,22419.0
Oxford Circus,21320.0
Warren Street,20023.0


In [43]:
df_wei_bet = df_sorted_bet_w.reset_index()
column_names = {"index":"Station"}
df_wei_bet = df_wei_bet.rename(column_names, axis=1)
df_wei_bet

Unnamed: 0,Station,betweenness_w
0,Green Park,44945.0
1,Bank and Monument,39913.0
2,Waterloo,32331.0
3,Westminster,29830.0
4,Liverpool Street,26617.0
...,...,...
393,Aldgate,0.0
394,Russell Square,0.0
395,Stratford High Street,0.0
396,Woolwich Arsenal,0.0


### Closeness Centrality on nodes

In [44]:
#Weighted closeness centrality: 
clos_w=nx.closeness_centrality(G_II, distance='length')
# We can add these values to the nodes attributes:
nx.set_node_attributes(G_II,clos_w,'closeness_w')

# To ataframe using the nodes as the index
df = pd.DataFrame(index=G_II.nodes())

df['closeness_w'] = pd.Series(nx.get_node_attributes(G_II, 'closeness_w'))

df_sorted_clo_w = df.sort_values(["closeness_w"], ascending=False)
df_sorted_clo_w[0:10]

Unnamed: 0,closeness_w
Holborn,7.9e-05
King's Cross St. Pancras,7.9e-05
Tottenham Court Road,7.9e-05
Oxford Circus,7.9e-05
Leicester Square,7.8e-05
Piccadilly Circus,7.8e-05
Charing Cross,7.8e-05
Chancery Lane,7.8e-05
Covent Garden,7.8e-05
Embankment,7.8e-05


In [45]:
df_wei_clo = df_sorted_clo_w.reset_index()
column_names = {"index":"Station"}
df_wei_clo = df_wei_clo.rename(column_names, axis=1)
df_wei_clo

Unnamed: 0,Station,closeness_w
0,Holborn,0.000079
1,King's Cross St. Pancras,0.000079
2,Tottenham Court Road,0.000079
3,Oxford Circus,0.000079
4,Leicester Square,0.000078
...,...,...
396,Chorleywood,0.000025
397,Shenfield,0.000024
398,Chalfont & Latimer,0.000023
399,Amersham,0.000021


## II.2

In [46]:
print(nx.average_shortest_path_length(G_II_cleaned))

13.567155677632496


In [47]:
# average shorest path
print(nx.average_shortest_path_length(G_II_cleaned, weight = 'inv_flows'))

0.0008157978810931216


In [48]:
components = nx.connected_components(G_II_cleaned)
largest_component = max(components, key=len)
largest_subgraph = G_II_cleaned.subgraph(largest_component)
G_II_cleaned_lc = largest_subgraph.number_of_nodes()
        
G_II_cleaned_lc

398

## II.3

In [49]:
G_II3_original = G.copy()

In [50]:
components = nx.connected_components(G_II3_original)
largest_component = max(components, key=len)
largest_subgraph = G_II3_original.subgraph(largest_component)
G_II3_original_lc = largest_subgraph.number_of_nodes()
        
G_II3_original_lc

401

In [51]:
print(nx.average_shortest_path_length(G_II3_original))

13.545997506234414


In [52]:
G_II3 = G.copy()

In [53]:
# remove 
highestnode = df_top_bet['Station'][0]

In [54]:
highestnode

'Stratford'

In [55]:
G_II3.remove_nodes_from([highestnode])

In [56]:
components = nx.connected_components(G_II3)
largest_component = max(components, key=len)
largest_subgraph = G_II3.subgraph(largest_component)
G_II3_lc = largest_subgraph.number_of_nodes()
        
G_II3_lc

379

In [61]:
for C in (G_II3.subgraph(c).copy() for c in nx.connected_components(G_II3)):
    print(nx.average_shortest_path_length(C))

14.496447069006436
4.821052631578947
0


In [62]:
# Repeat on the weighted network
df_wei_bet

Unnamed: 0,Station,betweenness_w
0,Green Park,44945.0
1,Bank and Monument,39913.0
2,Waterloo,32331.0
3,Westminster,29830.0
4,Liverpool Street,26617.0
...,...,...
393,Aldgate,0.0
394,Russell Square,0.0
395,Stratford High Street,0.0
396,Woolwich Arsenal,0.0


In [63]:
# remove 
highestnode_w = df_wei_bet['Station'][0]
highestnode_w

'Green Park'

In [64]:
G_II3_w = G_II_cleaned.copy()

In [65]:
G_II3_w.remove_nodes_from([highestnode_w])

In [66]:
components = nx.connected_components(G_II3_w)
largest_component = max(components, key=len)
largest_subgraph = G_II3_w.subgraph(largest_component)
G_II3_lc_w = largest_subgraph.number_of_nodes()
        
G_II3_lc_w

397

In [67]:
print(nx.average_shortest_path_length(G_II3_w, weight = 'inv_flows'))

0.0008434015316896885
