# Recreating Train Lines and Their Time Deltas

This is including the entirety of all lines to the best of my knowledge. They data was basically copied out of the Sbahn interface at the following website:

https://s-bahn-muenchen-live.de/?disruption=true&mode=schematic&routeinfo=false&x=1290872&y=6117890&z=10

Lines which are missing is the S4 extension which only runs on a very limited frequency. Thus I deemed in it not super important in the first step.

## Imports

In [1]:
from pathlib import Path
import os
import sys
import pandas as pd

## Constants

In [2]:
WD = Path(os.getcwd())
DATA = WD / 'copied_data'
FILES = os.listdir(DATA)

## Loading Data

### Helper Functions

In [3]:
def load_data(path, file):
    with open(path/file, mode='r') as f:
        times = []
        station_names = []
        for index, line in enumerate(f):
            if index % 2 == 0:
                times.append(line.strip())
            else:
                station_names.append(line.strip())
        file = [file] * len(times)
        if len(times) != len(station_names):
            raise Exception('Inequal number of times and stations')
    data = {
        'file': file,
        'start_time': times,
        'from': station_names,
        'order': range(1,len(times)+1),
        'start': [station_names[0]] * len(times),
        'end': [station_names[-1]] * len(times)
    }
    df = pd.DataFrame(data)
    return df

def process_line(df):
    df_normal_order = df.copy()
    df_normal_order['start_time'] = pd.to_datetime(df_normal_order.start_time.apply(lambda t: '20-12-2020 {}'.format(t)))
    df_normal_order['to'] = df_normal_order.shift(periods=-1,axis=0)['from']
    df_normal_order['arrival_time'] = df_normal_order.shift(periods=-1,axis=0)['start_time']
    df_normal_order['delta'] = df_normal_order.arrival_time - df_normal_order.start_time
    return df_normal_order

def reverse_line(df):
    df_reverse_order = df.copy()
    df_reverse_order['order'] = df_reverse_order.order.apply(lambda o: df_reverse_order.order.max()+1-o)
    df_reverse_order = df_reverse_order.sort_values(by='order')
    renaming = {
        'file':'file',
        'start_time':'arrival_time',
        'order':'order',
        'start':'end',
        'end':'start',
        'from': 'from'
    }

    df_reverse_order = df_reverse_order.rename(columns=renaming)
    df_reverse_order['arrival_time'] = pd.to_datetime(df_reverse_order.arrival_time.apply(lambda t: '20-12-2020 {}'.format(t)))
    df_reverse_order['to'] = df_reverse_order.shift(periods=-1,axis=0)['from']
    df_reverse_order['start_time'] = df_reverse_order.shift(periods=-1,axis=0)['arrival_time']
    df_reverse_order['delta'] = df_reverse_order.arrival_time - df_reverse_order.start_time
    return df_reverse_order

### Loading Data

In [4]:
line_information = None
for index, file in enumerate(FILES):
    df = load_data(DATA, file)
    normal_df = process_line(df)
    rev_df = reverse_line(df)
    normal_df['line_id'] = index+1
    rev_df['line_id'] = index+101
    if line_information is None:
        line_information = pd.concat([normal_df, rev_df])
    else:
        line_information = pd.concat([line_information, normal_df, rev_df])

In [5]:
line_information

Unnamed: 0,file,start_time,from,order,start,end,to,arrival_time,delta,line_id
0,S8.txt,2020-12-20 16:23:00,Flughafen München,1,Flughafen München,Herrsching,Flughafen Besucherpark,2020-12-20 16:25:00,0 days 00:02:00,1
1,S8.txt,2020-12-20 16:25:00,Flughafen Besucherpark,2,Flughafen München,Herrsching,Hallbergmoos,2020-12-20 16:30:00,0 days 00:05:00,1
2,S8.txt,2020-12-20 16:30:00,Hallbergmoos,3,Flughafen München,Herrsching,Ismaning,2020-12-20 16:37:00,0 days 00:07:00,1
3,S8.txt,2020-12-20 16:37:00,Ismaning,4,Flughafen München,Herrsching,Unterföhring,2020-12-20 16:41:00,0 days 00:04:00,1
4,S8.txt,2020-12-20 16:41:00,Unterföhring,5,Flughafen München,Herrsching,Johanneskirchen,2020-12-20 16:45:00,0 days 00:04:00,1
...,...,...,...,...,...,...,...,...,...,...
4,S2_2.txt,2020-12-20 16:26:00,Ottenhofen,33,Altomünster,Erding,St. Koloman,2020-12-20 16:29:00,0 days 00:03:00,110
3,S2_2.txt,2020-12-20 16:22:00,St. Koloman,34,Altomünster,Erding,Aufhausen,2020-12-20 16:26:00,0 days 00:04:00,110
2,S2_2.txt,2020-12-20 16:19:00,Aufhausen,35,Altomünster,Erding,Altenerding,2020-12-20 16:22:00,0 days 00:03:00,110
1,S2_2.txt,2020-12-20 16:18:00,Altenerding,36,Altomünster,Erding,Erding,2020-12-20 16:19:00,0 days 00:01:00,110


### Final Preprocessing

In [6]:
line_information['delta'] = line_information.delta.apply(lambda d: d.seconds/60).astype('Int64')

In [7]:
line_information['line'] = line_information.file.apply(lambda f: f[0:2])

In [8]:
line_information.columns

Index(['file', 'start_time', 'from', 'order', 'start', 'end', 'to',
       'arrival_time', 'delta', 'line_id', 'line'],
      dtype='object')

In [9]:
line_information = line_information[['line','line_id','start','end','from','to','order','delta']]

In [10]:
line_information

Unnamed: 0,line,line_id,start,end,from,to,order,delta
0,S8,1,Flughafen München,Herrsching,Flughafen München,Flughafen Besucherpark,1,2
1,S8,1,Flughafen München,Herrsching,Flughafen Besucherpark,Hallbergmoos,2,5
2,S8,1,Flughafen München,Herrsching,Hallbergmoos,Ismaning,3,7
3,S8,1,Flughafen München,Herrsching,Ismaning,Unterföhring,4,4
4,S8,1,Flughafen München,Herrsching,Unterföhring,Johanneskirchen,5,4
...,...,...,...,...,...,...,...,...
4,S2,110,Altomünster,Erding,Ottenhofen,St. Koloman,33,3
3,S2,110,Altomünster,Erding,St. Koloman,Aufhausen,34,4
2,S2,110,Altomünster,Erding,Aufhausen,Altenerding,35,3
1,S2,110,Altomünster,Erding,Altenerding,Erding,36,1


## Saving Data

In [11]:
line_information.to_csv('line_data.csv', index=False)

In [13]:
line_information[line_information.line_id.eq(103)]

Unnamed: 0,line,line_id,start,end,from,to,order,delta
23,S4,103,Trudering,Geltendorf,Trudering,Berg am Laim,1,2.0
22,S4,103,Trudering,Geltendorf,Berg am Laim,Leuchtenbergring,2,3.0
21,S4,103,Trudering,Geltendorf,Leuchtenbergring,Ostbahnhof München,3,2.0
20,S4,103,Trudering,Geltendorf,Ostbahnhof München,Rosenheimer Platz,4,1.0
19,S4,103,Trudering,Geltendorf,Rosenheimer Platz,Isartor,5,2.0
18,S4,103,Trudering,Geltendorf,Isartor,Marienplatz,6,2.0
17,S4,103,Trudering,Geltendorf,Marienplatz,Karlsplatz,7,2.0
16,S4,103,Trudering,Geltendorf,Karlsplatz,Hauptbahnhof,8,1.0
15,S4,103,Trudering,Geltendorf,Hauptbahnhof,Hackerbrücke,9,2.0
14,S4,103,Trudering,Geltendorf,Hackerbrücke,Donnersbergerbrücke,10,1.0
