In [119]:
class Station:
    def __init__(self, name, id):
        self.name = name
        self.id = id
        self.delay = 0 # total delay at the station at the current time step, maybe this needs to be a list so we store the delay for each time step
        self.T_ij = []  # set of trains moving to station i at time t #TODO
        self.N_out = [] # set of stations to which there is a edge from station i (neightbours out)
        self.N_in = [] # set of stations from which there is an edge to station i (neighbours in)
        self.Bi = None #TODO turnover rate
        self.si = None #fraction of trains on the edge towards this station that end at this station

    def initiate_station(self, df):
        #rows to all trains that are going to this station
        rows = df[df['Ankomstplats'] == self.name]
        self.si = self.get_si(rows)
        self.Bi = self.get_Bi(rows)

    def get_si(self, rows):
        #rows are all rows that are going to this station
        if len(rows) == 0:
            return 0
        final_station_rows = rows[rows['UppehållstypAnkomst'] == 'Sista']
        total_rows = len(rows)
        final_rows = len(final_station_rows)
        return final_rows/total_rows
    
    def get_Bi(self, rows):
        #TODO
        pass

    def set_N_in(self, neighbours_in):
        self.N_in = neighbours_in
    
    def set_N_out(self, neighbours_out):
        self.N_out = neighbours_out
    


Network init calculations
=========================
- A (adjacency matrix): for each station, put 1 if it is connected to another station, otherwise 0.
- B = for each station, calculate the turnover rate
- fij = for each edge, calculate the train frequency from i to j
- tij = for each edge, calculate the time it took to travel, and average it 
- pij = for all trains that have gone to i, calculate the probability of going to j (in a fraction)
- rij =  for all trains that do not have i as end station, calculate the probability of those trains going to j (in a fraction)
- sj = for each train that pass through station j, calculate the number of trains that end at j. (On that edge)
- T(i,j) = extract from data


In [120]:
import pandas as pd
import numpy as np

class Edge: 
    def __init__(self, id, start, end):
        self.id = id
        self.i = start #station i = start
        self.j = end # station j = end
        self.Aij = None #TODO adjacency matrix for edge i to 
        self.fij = None #TODO frequency of trains on this edge i to j
        self.tij = None # travel time on this edge average i to j
        self.pij = None # fraction of trains to i that continues to j. It is a probability.
        self.rij = None # fraction of trains to i that continue to j if they do not end at i
    
    def initiate_edge(self, df):
        #average travel time on edge i to j
        rows = df[(df['Avgångsplats'] == self.i) & (df['Ankomstplats'] == self.j)]
        
        average_travel_time = self.get_average_travel_time(rows)
        self.tij = average_travel_time

        #fraction of trains that continue from i to j
        rows = df[df['Ankomstplats'] == self.i] #trains that arrive at i
        self.pij = self.get_pij(rows)

        # fraction of trains that continue from i to j if they do not end at i
        rows = df[df['Avgångsplats'] == self.i] #trains that depart from i
        self.rij = self.get_rij(rows)
    

    def get_average_travel_time(self, rows): 
        #rows are the trains that travel from station i to j
        rows = rows.dropna(subset=['UtfAnkTid', 'UtfAvgTid'])
        time_diff = pd.to_datetime(rows['UtfAnkTid']) - pd.to_datetime(rows['UtfAvgTid'])
        # Convert the time difference to minutes
        time_diff = time_diff.dt.total_seconds() / 60
        mean_time_diff = time_diff.mean()
        rounded = np.round(mean_time_diff)
        return rounded #in minutes
    
    def get_pij(self, rows):
        #rows are the trains that arrive at station i
        if len(rows) == 0:
            return 0
        fraction_i_to_j = len(rows[rows['Ankomstplats'] == self.j]) / len(rows)
        return fraction_i_to_j

    def get_rij(self, rows):
        #rows are the trains that depart from station i
        if len(rows) == 0:
            return 0
        fraction_i_to_j = len(rows[rows['Ankomstplats'] == self.j]) / len(rows)
        return fraction_i_to_j

    

In [121]:
import pandas as pd

class Network:
    def __init__(self):
        self.N = 0 # number of stations
        self.stations = {} # dictionar of stations {name: Station}
        self.edges = {} #dict of edges {[start, end]: Edge}
    
    def initate_network(self, df):
        self.extract_edges(df)
        self.extract_stations(df)

    def add_station(self, name, id, df):
        station = Station(name, id)
        station.initiate_station(df)
        self.stations[name] = station
        self.N += 1

    def add_edge(self, id, start, end, df):
        edge = Edge(id, start, end)
        edge.initiate_edge(df)
        key = start+end
        self.edges[key] = edge
    
    def extract_stations(self, df):
        stations_depart = df['Avgångsplats'].unique()
        stations_arrive = df['Ankomstplats'].unique()
        stations = set(stations_depart).union(set(stations_arrive))
        for i, station in enumerate(stations):
            self.add_station(station, i, df)
        for station_name in self.stations:
            #station is the value from the dictionary, which is the sation object
            #add neighbours
            neighbours_in_names = df[df['Ankomstplats'] == station_name]
            neighbours_in_names = neighbours_in_names['Avgångsplats'].unique()
            neighbours_in = [self.stations[name] for name in neighbours_in_names]

            neighbours_out_names = df[df['Avgångsplats'] == station_name]
            neighbours_out_names = neighbours_out_names['Ankomstplats'].unique()
            neighbours_out = [self.stations[name] for name in neighbours_out_names]
        
            station = self.stations[station_name]
            station.set_N_in(neighbours_in)
            station.set_N_out(neighbours_out)
    
    def extract_edges(self, df):
        unique_edges = df[['Avgångsplats', 'Ankomstplats']].drop_duplicates()
        # Convert to a list of lists (if that's what you want)
        edges = unique_edges.values.tolist()
        for i, edge in enumerate(edges):
            self.add_edge(i, edge[0], edge[1], df)

    def print_station_info(self, station_name):
        station = self.stations[station_name]
        print(f"Station: {station_name}")
        print(f"Neighbours in: {[neighbour.name for neighbour in station.N_in]}")
        print(f"Neighbours out: {[neighbour.name for neighbour in station.N_out]}")
        print(f"si: {station.si}")
        print(f"Bi: {station.Bi}")
    
    def print_edge_info(self, start, end):
        key = start+end
        edge = self.edges[key]
        print(f"Edge from {start} to {end}")
        print(f"Travel time: {edge.tij}")
        print(f"pij: {edge.pij}")
        print(f"rij: {edge.rij}")




In [122]:
import pandas as pd

file_name = "month_data.csv"
# Define the file path
file_path = r"data" + "/" + file_name

# Load the CSV file into a DataFrame
df = pd.read_csv(file_path, sep=';', encoding='utf-8')



In [124]:
network = Network()

df_network = df[df['Tågslag'] == 'TJT']
network.initate_network(df_network)


In [125]:
network.print_station_info('Stockholm C')
network.print_edge_info('Stockholm C', 'Stockholm Södra')

#looks like the pij might not be correct

Station: Stockholm C
Neighbours in: ['Karlberg', 'Norra bantorget', 'Stockholm Södra']
Neighbours out: ['Norra bantorget', 'Karlberg', 'Stockholm Södra']
si: 0.904851183501289
Bi: None
Edge from Stockholm C to Stockholm Södra
Travel time: 3.0
pij: 0.0
rij: 0.06034687574245664
