In [92]:
class Station:
    def __init__(self, name, id):
        self.name = name
        self.id = id
        self.delay = 0 # total delay at the station at the current time step, maybe this needs to be a list so we store the delay for each time step
        self.T_ij = []  # set of trains moving to station i at time t #TODO
        self.N_out = [] # set of stations to which there is a edge from station i (neightbours out)
        self.N_in = [] # set of stations from which there is an edge to station i (neighbours in)
        self.Bi = None #TODO turnover rate
        self.si = None #fraction of trains on the edge towards this station that end at this station

    def initiate_station(self, df):
        #rows are all trains that are going to this station
        rows = df[df['Ankomstplats'] == self.name]
        self.si = self.get_si(rows)
        self.Bi = self.get_Bi(rows)

    def get_si(self, rows):
        #rows are all rows that are going to this station
        if len(rows) == 0:
            return 0
        final_station_rows = rows[rows['UppehållstypAnkomst'] == 'Sista']
        total_rows = len(rows)
        final_rows = len(final_station_rows)
        return final_rows/total_rows
    
    def get_Bi(self, rows):
        #TODO
        pass

    def set_N_in(self, neighbours_in):
        self.N_in = neighbours_in
    
    def set_N_out(self, neighbours_out):
        self.N_out = neighbours_out
    


Network init calculations
=========================
- A (adjacency matrix): for each station, put 1 if it is connected to another station, otherwise 0.
- B = for each station, calculate the turnover rate
- fij = for each edge, calculate the train frequency from i to j
- tij = for each edge, calculate the time it took to travel, and average it 
- pij = for all trains that have gone to i, calculate the probability of going to j (in a fraction)
- rij =  for all trains that do not have i as end station, calculate the probability of those trains going to j (in a fraction)
- sj = for each train that pass through station j, calculate the number of trains that end at j. (On that edge)
- T(i,j) = extract from data


In [140]:
import pandas as pd
import numpy as np

class Edge: 
    def __init__(self, id, start, end):
        self.id = id
        self.i = start #station i = start
        self.j = end # station j = end
        self.Aij = None #TODO adjacency matrix for edge i to 
        self.fij = None #TODO frequency of trains on this edge i to j
        self.tij = None # travel time on this edge average i to j
        self.pij = None # fraction of trains to i that continues to j. It is a probability.
        self.rij = None # fraction of trains to i that continue to j if they do not end at i
    
    def initiate_edge(self, df):
        #average travel time on edge i to j
        rows = df[(df['Avgångsplats'] == self.i) & (df['Ankomstplats'] == self.j)]
        
        average_travel_time = self.get_average_travel_time(rows)
        self.tij = average_travel_time

        #fraction of trains that continue from i to j
        rows = df[(df['Ankomstplats'] == self.i) | (df['Avgångsplats'] == self.i)] #trains that arrive at i and depart at i. TODO: Should do some check on timings also?
        self.pij = self.get_pij(rows)

        # fraction of trains that continue from i to j if they do not end at i
        rows = df[df['Avgångsplats'] == self.i] #trains that depart from i
        self.rij = self.get_rij(rows)
    

    def get_average_travel_time(self, rows): 
        #rows are the trains that travel from station i to j
        rows = rows.dropna(subset=['UtfAnkTid', 'UtfAvgTid'])
        time_diff = pd.to_datetime(rows['UtfAnkTid']) - pd.to_datetime(rows['UtfAvgTid'])
        # Convert the time difference to minutes
        time_diff = time_diff.dt.total_seconds() / 60
        mean_time_diff = time_diff.mean()
        rounded = np.round(mean_time_diff)
        return rounded #in minutes
    
    # TODO: Check with rest of group to find best way to write the input values.
    def get_pij(self, rows): #for all trains that have gone to i, calculate the probability of going to j. 
        #rows are the trains that arrive and depart at station i. 
        trains_to_i = len((rows[rows['Ankomstplats'] == self.i]))
        if len(rows) == 0 or trains_to_i == 0: 
            return 0
        trains_to_j = len(rows[(rows['Avgångsplats'] == self.i) & (rows['Ankomstplats'] == self.j)])
        fraction_i_to_j = trains_to_j / trains_to_i
        return fraction_i_to_j

    def get_rij(self, rows): #for all trains that do not have i as end station, calculate the probability of those trains going to j (in a fraction)
        #rows are the trains that depart from station i
        if len(rows) == 0:
            return 0
        fraction_i_to_j = len(rows[rows['Ankomstplats'] == self.j]) / len(rows)
        return fraction_i_to_j

    

In [3]:
import pandas as pd

class Network:
    def __init__(self):
        self.N = 0 # number of stations
        self.stations = {} # dictionary of stations {name: Station}
        self.edges = {} #dict of edges {[start, end]: Edge}
    
    def initate_network(self, df):
        self.extract_edges(df)
        self.extract_stations(df)

    def add_station(self, name, id, df):
        station = Station(name, id)
        station.initiate_station(df)
        self.stations[name] = station
        self.N += 1

    def add_edge(self, id, start, end, df):
        edge = Edge(id, start, end)
        edge.initiate_edge(df)
        key = start+end
        self.edges[key] = edge
    
    def extract_stations(self, df):
        stations_depart = df['Avgångsplats'].unique()
        stations_arrive = df['Ankomstplats'].unique()
        stations = set(stations_depart).union(set(stations_arrive))
        for i, station in enumerate(stations):
            self.add_station(station, i, df)
        for station_name in self.stations:
            #station is the value from the dictionary, which is the sation object
            #add neighbours
            neighbours_in_names = df[df['Ankomstplats'] == station_name]
            neighbours_in_names = neighbours_in_names['Avgångsplats'].unique()
            neighbours_in = [self.stations[name] for name in neighbours_in_names]

            neighbours_out_names = df[df['Avgångsplats'] == station_name]
            neighbours_out_names = neighbours_out_names['Ankomstplats'].unique()
            neighbours_out = [self.stations[name] for name in neighbours_out_names]
        
            station = self.stations[station_name]
            station.set_N_in(neighbours_in)
            station.set_N_out(neighbours_out)
    
    def extract_edges(self, df):
        unique_edges = df[['Avgångsplats', 'Ankomstplats']].drop_duplicates()
        # Convert to a list of lists 
        edges = unique_edges.values.tolist()
        for i, edge in enumerate(edges):
            self.add_edge(i, edge[0], edge[1], df)

    def print_station_info(self, station_name):
        station = self.stations[station_name]
        print(f"Station: {station_name}")
        print(f"Neighbours in: {[neighbour.name for neighbour in station.N_in]}")
        print(f"Neighbours out: {[neighbour.name for neighbour in station.N_out]}")
        print(f"si: {station.si}")
        print(f"Bi: {station.Bi}")
    
    def print_edge_info(self, start, end):
        key = start+end
        edge = self.edges[key]
        print(f"Edge from {start} to {end}")
        print(f"Travel time: {edge.tij}")
        print(f"pij: {edge.pij}")
        print(f"rij: {edge.rij}")




In [112]:
import pandas as pd

file_name = "smaller_test_network.csv"
# Define the file path
file_path = r"data" + "/" + file_name

# Load the CSV file into a DataFrame
#df = pd.read_csv(file_path, sep=';', encoding='utf-8')
df_network = pd.read_csv(file_path, sep=',', encoding='utf-8')

df_network




Unnamed: 0,Tåguppdrag,Datum_PAU,Tågslag,Tågsort,UppehållstypAvgång,UppehållstypAnkomst,AktivitetskodAvgång,AktivitetskodBeskrivningAvgång,AktivitetskodAnkomst,AktivitetskodBeskrivningAnkomst,...,PlanAnkTid_vid_AvgPlats,UtfAnkTid_vid_AvgPlats,PlanUppehållstidAvgång,UtfUppehållstidAvgång,PlanGångtid,UtfGångtid,FörseningGångtid,AvgFörsening,AnkFörsening,FörseningUppehållAvgång
0,97621.0,2019-03-01,TJT,REGION,Första,Sista,0,Uppehåll av teknisk/personalrelaterad typ,0,Uppehåll av teknisk/personalrelaterad typ,...,,,,,7,5.0,-2.0,-5.0,-7.0,
1,97621.0,2019-03-22,TJT,REGION,Första,Sista,0,Uppehåll av teknisk/personalrelaterad typ,0,Uppehåll av teknisk/personalrelaterad typ,...,,,,,7,5.0,-2.0,-5.0,-7.0,
2,9149.0,2019-03-10,TJT,-,Passage,Passage,0,-,0,-,...,2019-03-11 04:21:00.000,2019-03-11 03:25:00.000,0.0,0.0,7,5.0,-2.0,-56.0,-58.0,0.0
3,84392.0,2019-03-07,TJT,-,Passage,Passage,0,-,0,-,...,2019-03-07 12:13:00.000,2019-03-07 12:12:00.000,0.0,0.0,6,5.0,-1.0,-1.0,-2.0,0.0
4,92667.0,2019-03-28,TJT,-,Passage,Passage,0,-,0,-,...,2019-03-28 21:21:00.000,2019-03-28 21:18:00.000,0.0,0.0,3,1.0,-2.0,-3.0,-5.0,0.0
5,99950.0,2019-03-29,TJT,-,Passage,Passage,0,-,0,Uppehåll av teknisk/personalrelaterad typ,...,2019-03-29 23:40:00.000,2019-03-29 23:36:00.000,0.0,0.0,2,2.0,0.0,-4.0,-4.0,0.0
6,92563.0,2019-03-26,TJT,-,Passage,Uppehåll,0,-,0,-,...,2019-03-26 20:46:00.000,2019-03-26 20:46:00.000,0.0,0.0,4,3.0,-1.0,0.0,-1.0,0.0
7,33310.0,2019-03-24,TJT,-,Passage,Passage,0,-,0,-,...,2019-03-24 08:58:00.000,2019-03-24 08:43:00.000,0.0,0.0,5,8.0,3.0,-15.0,-12.0,0.0
8,82188.0,2019-03-26,TJT,-,Passage,Passage,0,-,0,-,...,2019-03-26 06:04:00.000,2019-03-26 06:03:00.000,0.0,0.0,8,6.0,-2.0,-1.0,-3.0,0.0
9,92552.0,2019-03-17,TJT,-,Första,Passage,0,Uppehåll av teknisk/personalrelaterad typ,0,-,...,,,,,2,3.0,1.0,-8.0,-7.0,


In [141]:
network = Network()

#df_network = df[df['Tågslag'] == 'TJT'] 
network.initate_network(df_network)


In [142]:
network.print_station_info('Hagalund')
network.print_edge_info('Hagalund', 'Stockholm')

#looks like the pij might not be correct

Station: Hagalund
Neighbours in: ['Solna']
Neighbours out: ['Stockholm']
si: 0.3333333333333333
Bi: None
Edge from Hagalund to Stockholm
Travel time: 5.0
pij: 0.6666666666666666
rij: 1.0


In [34]:
import pandas as pd

# Step 1: Read the original DataFrame (assuming 'df_network' is the original data)
smaller_test_network = df_network.head(10)#
smaller_test_network.reset_index(drop=True, inplace=True)

# Step 2: Modify the first few rows as per your request
# Modify first row
smaller_test_network.at[0, 'Ankomstplats'] = 'Hagalund'
smaller_test_network.at[0, 'Avgångsplats'] = 'Solna'
smaller_test_network.at[0, 'UppehållstypAnkomst'] = 'Sista'

# Modify second row
smaller_test_network.at[1, 'Ankomstplats'] = 'Hagalund'
smaller_test_network.at[1, 'Avgångsplats'] = 'Solna'
smaller_test_network.at[1, 'UppehållstypAnkomst'] = 'Sista'

# Modify third row
smaller_test_network.at[2, 'Ankomstplats'] = 'Hagalund'
smaller_test_network.at[2, 'Avgångsplats'] = 'Solna'

# Modify fourth row
smaller_test_network.at[3, 'Ankomstplats'] = 'Hagalund'
smaller_test_network.at[3, 'Avgångsplats'] = 'Solna'

# Modify fifth row
smaller_test_network.at[4, 'Ankomstplats'] = 'Hagalund'
smaller_test_network.at[4, 'Avgångsplats'] = 'Solna'

# Modify sixth row
smaller_test_network.at[5, 'Ankomstplats'] = 'Hagalund'
smaller_test_network.at[5, 'Avgångsplats'] = 'Solna'

# Modify seventh row
smaller_test_network.at[6, 'Ankomstplats'] = 'Stockholm'
smaller_test_network.at[6, 'Avgångsplats'] = 'Hagalund'

# Modify eighth row
smaller_test_network.at[7, 'Ankomstplats'] = 'Stockholm'
smaller_test_network.at[7, 'Avgångsplats'] = 'Hagalund'

# Modify ninth row
smaller_test_network.at[8, 'Ankomstplats'] = 'Stockholm'
smaller_test_network.at[8, 'Avgångsplats'] = 'Hagalund'

# Modify tenth row
smaller_test_network.at[9, 'Ankomstplats'] = 'Stockholm'
smaller_test_network.at[9, 'Avgångsplats'] = 'Hagalund'


smaller_test_network
# Step 3: Save the modified DataFrame to a new CSV file
smaller_test_network.to_csv('smaller_test_network.csv', index=False)
