In [2]:
import pandas as pd
import traintools
from trainconstants import *
from collections import defaultdict
import networkx as nx
from typing import List, Tuple

In [3]:
def create_network_schedule(df: pd.DataFrame) -> pd.DataFrame:
    """Creates a DataFrame which can be turned into a networkx graph.
    """

    network_schedule = df[['first_class', 'second_class']].copy()
    network_schedule['start'] = list(zip(df.start, df.departure_time))
    network_schedule['end'] = list(zip(df.end, df.arrival_time))

    return network_schedule


def connect_stationary_nodes(G: nx.DiGraph) -> None:
    """Connects all nodes to the next node in time at the same station.
    """
    
    nodes_per_station = defaultdict(list)

    for node in G.nodes:
        nodes_per_station[node[0]].append(node)

    for station in nodes_per_station.keys():
        nodes_per_station[station].sort()

        for index, stop in enumerate(nodes_per_station[station][:-1]):
            G.add_edge(stop, nodes_per_station[station][index + 1], first_class=0, second_class=0)


def graph_from_schedule(df: pd.DataFrame) -> nx.DiGraph:
    """Creates a digraph from the schedule.
    """
    
    schedule_network = create_network_schedule(df)

    G = nx.from_pandas_edgelist(
        df=schedule_network, 
        source='start', 
        target='end', 
        edge_attr=['first_class', 'second_class'], 
        create_using=nx.DiGraph
        )
    
    connect_stationary_nodes(G)

    return G


In [11]:
def attempt_solution_1(G: nx.DiGraph) -> Tuple[int, defaultdict, defaultdict]:
    """Here is a first attempt at a solution for trains of only one type.

    The solution works a follows:
    * start by finding the longest path in the graph
    * next go through that longest path and subtract the 
    maximum number of passengers that can fit in the train on each
    part of the trip
    * recalculate a new longest path in the altered graph and repeat until the 
    longest path consists of only one node.

    * meanwhile keep track of how many trains were needed and at which stations the trains start and end.

    Known problems:
    * The algorithm will think that it is "better" to choose a trip where there are 1000
    passengers to be transported than to choose a trip where there are 500 passengers to be transported
    eventhough the 
    * the number of trains starting and ending at each station is not the same 
    * the solution is incorrect because of the point mentioned above.
    """

    # to keep track of at which stations the trains start and end.
    starting_trains = defaultdict(int)
    ending_trains = defaultdict(int)

    # find the longest path in the graph
    path = nx.dag_longest_path(G, weight='second_class')

    # keep track of the number of trains, and where they start and end
    starting_trains[path[0][0]] += 1
    ending_trains[path[-1][0]] += 1
    number_of_trains = 1

    # repeat until the longest path consists of only one node
    while len(path) != 1:
        # go through the longest path and pick up the maximal number of passengers on the way
        for index, stop in enumerate(path[:-1]):
            G[stop][path[index + 1]]['first_class'] -= min(TYPE_3_TRAIN[0], G[stop][path[index + 1]]['first_class'])
            G[stop][path[index + 1]]['second_class'] -= min(TYPE_3_TRAIN[1], G[stop][path[index + 1]]['second_class'])
        
        # recompute the longest path 
        path = nx.dag_longest_path(G, weight='second_class')

        # keep track of the number of trains, and where they start and end
        starting_trains[path[0][0]] += 1
        ending_trains[path[-1][0]] += 1
        number_of_trains += 1

    return(number_of_trains, starting_trains, ending_trains)

In [13]:
df = traintools.read_schedule("datasets/nsdata1.txt")
G = graph_from_schedule(df)

print(attempt_solution_1(G))

(25, defaultdict(<class 'int'>, {4: 14, 2: 4, 3: 7}), defaultdict(<class 'int'>, {3: 5, 2: 4, 1: 5, 4: 11}))
