In [30]:
import pandas as pd
import traintools
from trainconstants import *
from collections import defaultdict
import networkx as nx
from typing import List, Tuple

In [31]:
def create_network_schedule(df: pd.DataFrame) -> pd.DataFrame:
    """Creates a DataFrame which can be turned into a networkx graph.
    """

    network_schedule = df[['first_class', 'second_class']].copy()
    network_schedule['start'] = list(zip(df.start, df.departure_time))
    network_schedule['end'] = list(zip(df.end, df.arrival_time))

    return network_schedule


def connect_stationary_nodes(G: nx.DiGraph) -> None:
    """Connects all nodes to the next node in time at the same station.
    """
    
    nodes_per_station = defaultdict(list)

    # create dict with each station as a key and all stops to / from that station in a list as the value 
    for node in G.nodes:
        nodes_per_station[node[0]].append(node)


    for station in nodes_per_station.keys():
        # sort the stations so they appear in chronological order in the list
        nodes_per_station[station].sort()

        # connect each stop to the next stop in time at the same station
        for index, stop in enumerate(nodes_per_station[station][:-1]):
            G.add_edge(stop, nodes_per_station[station][index + 1], first_class=0, second_class=0)


def graph_from_schedule(df: pd.DataFrame) -> nx.DiGraph:
    """Creates a digraph from the schedule.
    """
    
    # create the dataframe in the needed format for the creation of a graph
    schedule_network = create_network_schedule(df)

    # create the graph
    G = nx.from_pandas_edgelist(
        df=schedule_network, 
        source='start', 
        target='end', 
        edge_attr=['first_class', 'second_class'], 
        create_using=nx.DiGraph
        )
    
    # add the stationary connections that correspond to staying at any given station
    connect_stationary_nodes(G)

    return G


In [32]:
def attempt_solution_1(G: nx.DiGraph) -> Tuple[int, defaultdict, defaultdict]:
    """Here is a first attempt at a solution for trains of only one type.

    The solution works a follows:
    * start by finding the longest path in the graph
    * next go through that longest path and subtract the 
    maximum number of passengers that can fit in the train on each
    part of the trip
    * recalculate a new longest path in the altered graph and repeat until the 
    longest path consists of only one node.
    * meanwhile keep track of how many trains were needed and at which stations the trains start and end.

    Known problems:
    * The algorithm will think that it is "better" to choose a trip where there are 1000
    passengers to be transported than to choose a trip where there are 500 passengers to be transported
    eventhough both exeed the capacity of the train so they would both be equilly valid options.
    * the number of trains starting and ending at each station is not the same 
    * the solution is incorrect because of the point mentioned above.
    """

    # to keep track of at which stations the trains start and end.
    starting_trains = defaultdict(int)
    ending_trains = defaultdict(int)

    # find the longest path in the graph
    path = nx.dag_longest_path(G, weight='second_class')

    # keep track of the number of trains, and where they start and end
    starting_trains[path[0][0]] += 1
    ending_trains[path[-1][0]] += 1
    number_of_trains = 1

    # repeat until the longest path consists of only one node
    while len(path) != 1:
        # go through the longest path and pick up the maximal number of passengers on the way
        for index, current_stop in enumerate(path[:-1]):
            next_stop = path[index + 1]
            G[current_stop][next_stop]['first_class'] -= min(TYPE_3_TRAIN[0], G[current_stop][next_stop]['first_class'])
            G[current_stop][next_stop]['second_class'] -= min(TYPE_3_TRAIN[1], G[current_stop][next_stop]['second_class'])
        
        # recompute the longest path 
        path = nx.dag_longest_path(G, weight='second_class')

        # keep track of the number of trains, and where they start and end
        starting_trains[path[0][0]] += 1
        ending_trains[path[-1][0]] += 1
        number_of_trains += 1

    return(number_of_trains, starting_trains, ending_trains)

In [33]:
df = traintools.read_schedule("datasets/nsdata1.txt")
G = graph_from_schedule(df)


print(attempt_solution_1(G.copy()))

(25, defaultdict(<class 'int'>, {4: 8, 2: 13, 3: 4}), defaultdict(<class 'int'>, {3: 5, 2: 4, 1: 5, 4: 11}))


In [38]:
def attempt_solution_2(G: nx.DiGraph) -> Tuple[int, defaultdict, defaultdict]:
    """Here is a second attempt, it is very similar but tries to fix the endpoints.

    This is done through the use of the fact that nx.dag_longest_path returns "the shortest" 
    longest path. Meaning that when the weight of the edges is 0, it does not add any of those edges 
    to the front or to the end of the longest path. This means that we can try to
    add (all the trains that do not pick up passengers everywhere) empty trips to the front and the back of 
    these trains and see if we can connect them in a way such that the number of trains at each station 
    at the beginning of the day is equal to the number of trains at the end of the day. 

    Note:
    * Did not actually implement this yet, as of now this is pretty much the same as solution above.
    * I think this solution will be the one for the trains of just one type once the weighting of the passengers is also fixed, 
    which means we can slowly start thinking about the solution with mixed trains.
    """

    # to keep track of at which stations the trains start and end.
    starting_trains = defaultdict(list)
    ending_trains = defaultdict(list)

    # find the longest path in the graph
    path = nx.dag_longest_path(G, weight='second_class')

    # keep track of the number of trains, and where they start and end
    starting_trains[path[0][0]].append(path)
    ending_trains[path[-1][0]].append(path)
    number_of_trains = 1

    # repeat until the longest path consists of only one node
    while len(path) != 1:
        # go through the longest path and pick up the maximal number of passengers on the way
        for index, current_stop in enumerate(path[:-1]):
            next_stop = path[index + 1]
            G[current_stop][next_stop]['first_class'] -= min(TYPE_3_TRAIN[0], G[current_stop][next_stop]['first_class'])
            G[current_stop][next_stop]['second_class'] -= min(TYPE_3_TRAIN[1], G[current_stop][next_stop]['second_class'])
        
        # recompute the longest path 
        path = nx.dag_longest_path(G, weight='second_class')

        # keep track of the number of trains, and where they start and end
        starting_trains[path[0][0]].append(path)
        ending_trains[path[-1][0]].append(path)
        number_of_trains += 1

    number_of_starting_trains = {key: len(value) for key, value in starting_trains.items()}
    number_of_ending_trains = {key: len(value) for key, value in ending_trains.items()}

    return(number_of_trains, number_of_starting_trains, number_of_ending_trains)

In [39]:
print(attempt_solution_2(G.copy()))

(25, {4: 8, 2: 13, 3: 4}, {3: 5, 2: 4, 1: 5, 4: 11})
