# DB API - first look

In [393]:
import pandas as pd
import pyhafas
import datetime as dt
import time

from pyhafas import HafasClient
from pyhafas.profile import VSNProfile
from pyhafas.types.fptf import Leg
from datetime import datetime
from datetime import timedelta
from typing import List
from pyhafas.profile import DBProfile
from pyhafas.types.fptf import Leg

### The API operates with "journeys", which are classes representing a trip from origin to destination. In each trip there are legs, representing the number of train changes. You can specify a number of conditions for each journey

In [394]:
client = HafasClient(VSNProfile())

date_time_str = '18/09/22 01:55:19' # you can play around with date and time, but looks like it only looks into the future
date_time_obj = datetime.strptime(date_time_str, '%d/%m/%y %H:%M:%S')
    
origin = client.locations("München Hbf")[0]
connections = client.locations("Mannheim")[0] # can specify here whether you want a connection 
destination = client.locations("Köln Messe")[0]

journeys = client.journeys(
    origin=origin,
    via=[connections],
    destination=destination,
    date=date_time_obj,#datetime.datetime.now(),
    max_changes=2, # we can delimit here how many changes we want to work with
    min_change_time=1,
    max_journeys=-1,
        products={
        'long_distance_express': True,
        'regional_express': False,
        'regional': False,
        'suburban': False,
        'bus': False,
        'ferry': False,
        'subway': False,
        'tram': False,
        'taxi': False
    }

)
    
#journey = client.journey(journeys[0].id)

print('Journey 1 - direct Munich to Cologne')
print(journeys[0].legs[0].name)
print(journeys[0].legs[0].origin.name)
print(journeys[0].legs[0].destination.name)
print('\n')
print('Journey 2 - Munich to Cologne with stop in Mannheim and Frankfurt Flughafen')
print(journeys[2].legs[0].name)
print(journeys[2].legs[0].origin.name)
print(journeys[2].legs[0].destination.name)
print(journeys[2].legs[1].name)
print(journeys[2].legs[1].origin.name)
print(journeys[2].legs[1].destination.name)
print(journeys[2].legs[2].name)
print(journeys[2].legs[2].origin.name)
print(journeys[2].legs[2].destination.name)

Journey 1 - direct Munich to Cologne
ICE 616
München Hbf
Köln Hbf


Journey 2 - Munich to Cologne with stop in Mannheim and Frankfurt Flughafen
ICE 692
München Hbf
Mannheim Hbf
ICE 202
Mannheim Hbf
Frankfurt(Main) Flughafen Fernbf
ICE 728
Frankfurt(Main) Flughafen Fernbf
Köln Messe/Deutz Gl.11-12


## Overview

The "legs" class also has some extra info like expected arrival time and coordinates. Particularly the latter could be useful for plotting

In [395]:
print('Station 1')
print(journeys[2].legs[0].origin.name)
print(journeys[2].legs[0].origin.latitude)
print(journeys[2].legs[0].origin.longitude)

print('\nStation 2')
print(journeys[2].legs[1].origin.name)
print(journeys[2].legs[1].origin.latitude)
print(journeys[2].legs[1].origin.longitude)

print('\nStation 3')
print(journeys[2].legs[2].origin.name)
print(journeys[2].legs[2].origin.latitude)
print(journeys[2].legs[2].origin.longitude)

print('\nStation 4')
print(journeys[2].legs[2].destination.name)
print(journeys[2].legs[2].destination.latitude)
print(journeys[2].legs[2].destination.longitude)


Station 1
München Hbf
48.140229
11.558339

Station 2
Mannheim Hbf
49.479352
8.468917

Station 3
Frankfurt(Main) Flughafen Fernbf
50.053169
8.570181

Station 4
Köln Messe/Deutz Gl.11-12
50.941717
6.974065


## Basic plotting

In [396]:
import folium

lat = [journeys[2].legs[0].origin.latitude, journeys[2].legs[1].origin.latitude, journeys[2].legs[2].origin.latitude, journeys[2].legs[2].destination.latitude]
lon = [journeys[2].legs[0].origin.longitude, journeys[2].legs[1].origin.longitude, journeys[2].legs[2].origin.longitude, journeys[2].legs[2].destination.longitude]
coords = list(zip(lat,lon))
newdict =({'stations': stations }) 
df = pd.DataFrame([newdict])
m = folium.Map(location=coords[1], zoom_start=6, tiles='Cartodb dark_matter')

aline=folium.PolyLine(locations=coords,weight=2,color = 'blue')
m.add_child(aline)

for i in coords:
    folium.CircleMarker(location=[i[0], i[1]],
                        radius=2,
                        color='#00C4B3',
                        fill_color='#00C4B3',
                        fill=True).add_to(m)

# We can add a different journey to see the difference
date_time_str = '18/09/22 01:55:19' # you can play around with date and time, but looks like it only looks into the future
date_time_obj = datetime.strptime(date_time_str, '%d/%m/%y %H:%M:%S')
    
origin = client.locations("München Hbf")[0]
connections = client.locations("Nürnberg")[0] # different path
destination = client.locations("Köln Messe")[0]


journeys2 = client.journeys(
    origin=origin,
    via=[connections],
    destination=destination,
    date=date_time_obj,#datetime.datetime.now(),
    max_changes=2,
    min_change_time=1,
    max_journeys=-1,
        products={
        'long_distance_express': True,
        'regional_express': False,
        'regional': False,
        'suburban': False,
        'bus': False,
        'ferry': False,
        'subway': False,
        'tram': False,
        'taxi': False
    }

)

lat2 = [journeys2[3].legs[0].origin.latitude, journeys2[3].legs[1].origin.latitude, journeys2[3].legs[1].destination.latitude]
lon2 = [journeys2[3].legs[0].origin.longitude, journeys2[3].legs[1].origin.longitude, journeys2[3].legs[1].destination.longitude]
coords2 = list(zip(lat2,lon2))

aline=folium.PolyLine(locations=coords2,weight=2,color = 'red')
m.add_child(aline)

for i in coords2:
    folium.CircleMarker(location=[i[0], i[1]],
                        radius=2,
                        color='red',
                        fill_color='red',
                        fill=True).add_to(m)


m

In [397]:
def journeys(dateandtime,origin,destination):
    journey = client.journeys(
    origin=origin,
    destination=destination,
    date=date_time_obj,
    max_changes=2,
    min_change_time=5,
    max_journeys=-1,
        products={
        'long_distance_express': True,
        'regional_express': False,
        'regional': False,
        'suburban': False,
        'bus': False,
        'ferry': False,
        'subway': False,
        'tram': False,
        'taxi': False
    })
    return journey

In [None]:
def journeymaker(dateandtime,origin,destination):
    journey = client.journeys(
    origin=origin,
    destination=destination,
    date=dateandtime,
    max_changes=2,
    min_change_time=5,
    max_journeys=-1,
        products={
        'long_distance_express': True,
        'regional_express': False,
        'regional': False,
        'suburban': False,
        'bus': False,
        'ferry': False,
        'subway': False,
        'tram': False,
        'taxi': False
    })
    return journey

journeys(dateandtime,origin,destination)


In [245]:
journeylist = []
#connection1 = []
#connection2 = []
dateandtime0 = '04/09/22 02:00:00' 
dateandtime = datetime.strptime(dateandtime0, '%d/%m/%y %H:%M:%S')
origin = client.locations("München Hbf")[0]
destination = client.locations("Köln Messe")[0]
daylimit = dateandtime.day + 7

while dateandtime.day < daylimit:
    tempjourneys = journeymaker(dateandtime,origin,destination)
    dateandtime = tempjourneys[-1].legs[0].departure + timedelta(minutes = 1) # this is the departure time of the last trip the API gave us, we are adding a minute to it to get more 
    print(f'Fetching trains from this date: {dateandtime}')
    for journey in journeys:
        journeylist.append(journey)
    time.sleep(1) #just so we don't go over the API limit

Fetching trains from this date: 2022-09-04 06:30:00+02:00
Fetching trains from this date: 2022-09-04 07:29:00+02:00
Fetching trains from this date: 2022-09-04 08:28:00+02:00
Fetching trains from this date: 2022-09-04 09:29:00+02:00
Fetching trains from this date: 2022-09-04 10:29:00+02:00
Fetching trains from this date: 2022-09-04 12:28:00+02:00
Fetching trains from this date: 2022-09-04 13:29:00+02:00
Fetching trains from this date: 2022-09-04 14:28:00+02:00
Fetching trains from this date: 2022-09-04 15:29:00+02:00
Fetching trains from this date: 2022-09-04 16:28:00+02:00
Fetching trains from this date: 2022-09-04 18:28:00+02:00
Fetching trains from this date: 2022-09-05 03:25:00+02:00
Fetching trains from this date: 2022-09-05 05:34:00+02:00
Fetching trains from this date: 2022-09-05 06:47:00+02:00
Fetching trains from this date: 2022-09-05 08:28:00+02:00
Fetching trains from this date: 2022-09-05 09:29:00+02:00
Fetching trains from this date: 2022-09-05 10:29:00+02:00
Fetching train

### Now we can get some information about number of journeys and trains, and also categorize trains according to routes

In [302]:
legs = []
trains = []
changeovers = []

for journey in journeylist:
    for leg in journey.legs:
        legs.append(leg)
        trains.append(leg.name)
        changeovers.append(leg.origin.name)

changeoverset = set(changeovers)
changeoverset.remove(origin.name) # this is the origin station for the whole trip, we don't want to count that as a changeover
  
print(f'Number of journeys: {len(journeylist)}')
print(f'Number of legs: {len(legs)}')
print(f'Number of trains: {len(set(trains))}')
print(f'Which trains: {set(trains)}')
print(f'Stations where trains connect: {changeoverset}')

Number of journeys: 313
Number of legs: 637
Number of trains: 76
Which trains: {'ICE 580', 'ICE 626', 'ICE 314', 'ICE 254', 'ICE 510', 'ICE 524', 'ICE 516', 'ICE 574', 'ICE 622', 'IC 1290', 'ICE 720', 'ICE 624', 'ICE 820', 'ICE 578', 'ICE 528', 'ICE 104', 'ICE 822', 'ICE 570', 'IC 1298', 'ICE 16', 'ICE 610', 'ICE 588', None, 'ICE 592', 'ICE 536', 'ICE 594', 'ICE 514', 'IC 2016', 'ICE 620', 'ICE 810', 'IC 2014', 'ICE 728', 'ICE 726', 'ICE 586', 'ICE 572', 'ICE 612', 'ICE 512', 'ICE 984', 'ICE 582', 'ICE 590', 'ICE 692', 'ICE 108', 'ICE 100', 'ICE 584', 'TGV 9576', 'IC 1296', 'ICE 102', 'ICE 200', 'ICE 614', 'ICE 1118', 'ICE 828', 'ICE 628', 'ICE 722', 'ICE 724', 'ICE 824', 'ICE 874', 'IC 266', 'ICE 202', 'ICE 526', 'ICE 596', 'IC 2266', 'ICE 690', 'ICE 616', 'ICE 518', 'ICE 106', 'ICE 204', 'ICE 770', 'ICE 598', 'ICE 122', 'ICE 680', 'ICE 152', 'ICE 618', 'ICE 1164', 'ICE 918', 'ICE 10', 'ICE 1010'}
Stations where trains connect on first change: {'Frankfurt(Main) Hbf', 'Mannheim Hbf', '

## Dataset integration

In [333]:
# Let's import our dataset and see if we can integrate this info

df0 = pd.read_csv('raw_data/select_2020.csv',encoding='iso-8859-2',sep=';')
df1 = pd.read_csv('raw_data/select_2021.csv',encoding='iso-8859-2',sep=';')
df2 = pd.read_csv('raw_data/select_2022.csv',encoding='iso-8859-2',sep=';')
df = pd.concat([df0,df1,df2])

# downsample as we don't need that much data for this test run
#df = df.sample(frac=0.01)

In [340]:
df[df["zugnr"].apply(lambda x: 'ICE 596' in x)]

Unnamed: 0,zugnr,datum,bhf,arrTime,adelay,depTime,ddelay
266507,ICE 596,2019-12-15,Augsburg Hbf,1300,0,1301,0
266508,ICE 596,2019-12-15,Berlin Südkreuz,2022,0,9999,0
266509,ICE 596,2019-12-15,Eisenach,1800,0,1801,0
266510,ICE 596,2019-12-15,Erfurt Hbf,1826,0,1827,0
266511,ICE 596,2019-12-15,Frankfurt(Main)Hbf,1608,0,1612,0
...,...,...,...,...,...,...,...
166867,ICE 596,2022-05-15,Ulm Hbf,1346,0,1348,0
166868,ICE 596,2022-05-16,Augsburg Hbf,1300,0,1302,0
166869,ICE 596,2022-05-16,München Hbf,9999,0,1227,1
166870,ICE 596,2022-05-16,München-Pasing,1235,0,1237,0


In [354]:
df.zugnr.unique()

array(['EC 6', 'EC 8', 'IC 118', 'IC 119', 'IC 2011', 'IC 2013',
       'IC 2210', 'IC 2213', 'IC 2216', 'IC 2265', 'IC 2266', 'IC 2269',
       'IC 2312', 'IC 2319', 'ICE 100', 'ICE 1003', 'ICE 1004',
       'ICE 1005', 'ICE 1006', 'ICE 1007', 'ICE 1009', 'ICE 101',
       'ICE 102', 'ICE 1020', 'ICE 1022', 'ICE 103', 'ICE 104', 'ICE 105',
       'ICE 106', 'ICE 107', 'ICE 108', 'ICE 109', 'ICE 1123', 'ICE 1554',
       'ICE 1627', 'ICE 1656', 'ICE 200', 'ICE 201', 'ICE 202', 'ICE 203',
       'ICE 204', 'ICE 22', 'ICE 228', 'ICE 229', 'ICE 23', 'ICE 26',
       'ICE 27', 'ICE 28', 'ICE 29', 'ICE 510', 'ICE 511', 'ICE 512',
       'ICE 513', 'ICE 514', 'ICE 515', 'ICE 516', 'ICe 516', 'ICE 517',
       'ICE 518', 'ICE 519', 'ICE 521', 'ICE 522', 'ICE 523', 'ICE 524',
       'ICE 525', 'ICE 526', 'ICE 527', 'ICE 528', 'ICE 529', 'ICE 573',
       'ICE 579', 'ICE 590', 'ICE 591', 'ICE 592', 'ICE 593', 'ICE 594',
       'ICE 595', 'ICe 595', 'ICE 596', 'ICE 597', 'ICE 598', 'ICe 598',
  

In [384]:
len(changeovers)

637

In [398]:
cu = pd.DataFrame(columns = ['journeys','origin','destination','routes','number_stations','changeovers','trains'])

In [399]:
cu

Unnamed: 0,journeys,origin,destination,routes,number_stations,changeovers,trains
