This file is used to create and pickle the 'base' graph files and shortest paths so they can be used later

In [1]:
import pandas as pd
import networkx as nx
import scripts as scr
import fileloader as fl

First, grabbing the required files using fileloader.py

In [2]:
graph = fl.get_network()
od = fl.get_OD_LU()
frequency = fl.get_frequency()

Next, we want to save the 'basegraphs' - unaltered network graphs which have their capacity assigned. 

In [7]:
basegraphs = scr.create_base_graphs(graph, frequency)

Added capacity for timeslot: Morning (0500-0700)
Added capacity for timeslot: AM Peak (0700-1000)
Added capacity for timeslot: Inter Peak (1000-1600)
Added capacity for timeslot: PM Peak (1600-1900)
Added capacity for timeslot: Evening (1900-2200)
Added capacity for timeslot: Late (2200-0030)
Added capacity for timeslot: Night (0030-0300)
Added capacity for timeslot: Early (0300-0500)


In certain (late) timeslots, due to very low train frequency, capacity is at 0. This skews results, therefore we iteratively take the average capacity of neighbouring links which sligtly increases average capacity, but allows for better analysis of results. 

In [None]:
def fixCapacity(bgraph, depth):
    depth += 1 #keep track of depth to prevent stack overflow
    #create a list of edges with no capacity
    nocap = [(a, b) for a, b, data in bgraph.edges(data=True) 
        if data['capacity'] == 0] 

    for start, end in nocap:
        sneighbors = [n for n in bgraph.neighbors(start)] 
        eneighbors = [n for n in bgraph.neighbors(end)]
        #average of neighbours at start of link
        savg = sum(bgraph[s][start]['capacity'] 
            for s in sneighbors)/len(sneighbors) 

        #average of neighbours at end of link
        eavg = sum(bgraph[e][end]['capacity'] 
            for e in eneighbors)/len(sneighbors) 

        bgraph[start][end]['capacity'] = (savg + eavg)//2
    if depth > 100: #if it doesn't converge at 100 iterations, stop
        print('no solution found')
        return
    if len(nocap) > 0: 
        fixCapacity(bgraph, depth)
    return

for bgraph in basegraphs.values(): 
    fixCapacity(bgraph, 0)

One of the more 'heavier' calculations is getting the shortest path for the ~68k OD-pairs. Therefore, we also want to save this into a pickle object

In [8]:
#TODO remove prints
baseshortest = scr.create_shortest_paths(graph, od)
#Saving the base shortestpaths
fl.save_obj(baseshortest, 'baseshortest')

inal 5 LU
Barbican Heathrow Terminal 5 LU
Aldgate Heathrow Terminal 5 LU
Aldgate East Heathrow Terminal 5 LU
Alperton Heathrow Terminal 5 LU
Amersham Heathrow Terminal 5 LU
Angel Heathrow Terminal 5 LU
Archway Heathrow Terminal 5 LU
Arnos Grove Heathrow Terminal 5 LU
Arsenal Heathrow Terminal 5 LU
Baker Street Heathrow Terminal 5 LU
Balham LU Heathrow Terminal 5 LU
Bank and Monument Heathrow Terminal 5 LU
Barking Heathrow Terminal 5 LU
Barkingside Heathrow Terminal 5 LU
Barons Court Heathrow Terminal 5 LU
Bayswater Heathrow Terminal 5 LU
Becontree Heathrow Terminal 5 LU
Belsize Park Heathrow Terminal 5 LU
Bethnal Green LU Heathrow Terminal 5 LU
Blackfriars LU Heathrow Terminal 5 LU
Blackhorse Road Heathrow Terminal 5 LU
Bond Street Heathrow Terminal 5 LU
Borough Heathrow Terminal 5 LU
Boston Manor Heathrow Terminal 5 LU
Bounds Green Heathrow Terminal 5 LU
Bow Road Heathrow Terminal 5 LU
Brent Cross Heathrow Terminal 5 LU
Bromley-by-Bow Heathrow Terminal 5 LU
Buckhurst Hill Heathrow Ter

In [3]:
baseshortest = fl.load_obj('baseshortest')

In [16]:
#TODO remove prints ('no new path..')
for edge in graph.edges():
    name = ",".join(edge)
    if edge == ('Bromley-by-Bow', 'West Ham'): 
        print('yes')
        shortest_paths = scr.update_shortest_paths(graph, edge, baseshortest)
        fl.save_obj(shortest_paths, name, 'shortest/')

yes


TODO: add basemodel


Creating the networkx models, assigning passengers and travel time

In [None]:
for i in range(5): #graph.edges():
    edge = list(graph.edges())[i]
    name = ",".join(edge)
    tempshortest = fl.load_obj(name, 'shortest/')
    tempgraphs = scr.n1_analysis(basegraphs, od, tempshortest, edge)
    fl.save_obj(tempgraphs, name, 'n1graphs/')
    print('added for edge: {}'.format(name))

Grabbing travel time and amount of passengers traveling over link, creating pandas series object

SECOND ITERATION

Create base estimate for passengers: 

In [6]:
basegraphs = fl.load_obj('basegraphs')
od = fl.get_OD_LU()
baseshortest = fl.load_obj('baseshortest')
tempgraphs, passengers = scr.n1_analysis(basegraphs, od, baseshortest, None)
fl.save_obj(tempgraphs, 'basepassengers')
fl.save_obj(passengers, 'base', 'passengers/')

None
Added passengers for timeslot: Morning (0500-0700), total rows: 68545
Added passengers for timeslot: AM Peak (0700-1000), total rows: 68545
Added passengers for timeslot: Inter Peak (1000-1600), total rows: 68545
Added passengers for timeslot: PM Peak (1600-1900), total rows: 68545
Added passengers for timeslot: Evening (1900-2200), total rows: 68545
Added passengers for timeslot: Late (2200-0030), total rows: 68545
Added passengers for timeslot: Night (0030-0300), total rows: 68545
Added passengers for timeslot: Early (0300-0500), total rows: 68545


Capacity needs to be fixed in order to give insights in the capacity utilization of the network: 

In [7]:
basegraphs = fl.load_obj('basepassengers')

def fixCapacity(bgraph, depth):
    depth += 1
    nocap = [(a, b) for a, b, data in bgraph.edges(data=True) if data['capacity'] == 0]
    for start, end in nocap:
        sneighbors = [n for n in bgraph.neighbors(start)]
        eneighbors = [n for n in bgraph.neighbors(end)]
        savg = sum([bgraph[s][start]['capacity'] for s in sneighbors])/len(sneighbors) #average of neighbours at start of link
        eavg = sum([bgraph[e][end]['capacity'] for e in eneighbors])/len(sneighbors) #average of neighbours at end of link
        bgraph[start][end]['capacity'] = (savg + eavg)//2
    if depth > 100: #it might not find a solution, then stop
        print('no solution found')
        return
    if len(nocap) > 0: fixCapacity(bgraph, depth)
for bgraph in basegraphs.values(): 
    fixCapacity(bgraph, 0)
fl.save_obj(basegraphs, 'basepassfixed')

no solution found


AWS STUFF:

In [None]:
import os.path
from os import path
import networkx as nx
import scripts2 as scr
import fileloader as fl
import dataframes as df
import time

graph = fl.get_network()
basegraphs = fl.load_obj('basegraphs')
od = fl.get_OD_LU()
for i len(list(graph.edges()))//2):
    starttime = time.time()
    #i+= len(list(graph.edges()))//2 #I ended up creating 2 aws.py files, one starting halway down, this way my computing power was better optimised (redneck multithreading)
    edge = list(graph.edges())[i]
    name = ",".join(edge)
    if path.exists('Pickles/test/'+ name + '.pickle'): continue #if I already created this file, skip 
    tempshortest = fl.load_obj(name, 'shortest/')
    tempgraphs, passengers = scr.n1_analysis(basegraphs, od, tempshortest, edge)
    fl.save_obj(tempgraphs, name, 'awsgraphs/')
    fl.save_obj(passengers, name, 'passengers/')
    print('added for edge: {}, time taken {}'.format(name, starttime-time.time()))