In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import xml.etree.ElementTree as ET

from functools import reduce
from datetime import datetime, timedelta

pd.set_option('display.max_columns', None)
%config IPCompleter.greedy=True

In [113]:
# load all data time chunks
df = pd.read_csv('../assets/data/2019-UT-ASD-Full/2019-UT-ASD-scrapped.csv', parse_dates=['PlannedDepartureTime',
                                                                                          'PlannedArrivalTime',
                                                                                          'ActualArrivalTime',
                                                                                          'ActualDepartureTime'
                                                                                         ])

In [114]:
df.shape

(94963, 20)

In [115]:
df.head()

Unnamed: 0,Date,RideId,TrainId,DepartureStation,DepartureTime,DepartureDelay,DestinationStation,ArrivalTime,ArrivalDelay,DeparturePlatform,ArrivalPlatform,DepartureMaterials,ArrivalMaterials,InbetweenStations,FaultMessages,RideInstance,PlannedDepartureTime,PlannedArrivalTime,ActualArrivalTime,ActualDepartureTime
0,2018-12-31,1405,1405,Utrecht Centraal,01:01,1.0,Amsterdam Centraal,01:29,1.5,7,2a,VIRM-4 9516;VIRM-6 8671,VIRM-4 9516;VIRM-6 8671,Amsterdam Bijlmer ArenA,,2018-12-31#1405,2018-12-31 01:01:00,2018-12-31 01:29:00,2018-12-31 01:30:30,2018-12-31 01:02:00
1,2018-12-31,1402,1402,Amsterdam Centraal,01:18,0.0,Utrecht Centraal,01:53,0.0,2,15,VIRM-6 8730;VIRM-6 8656,,Amsterdam Bijlmer ArenA,,2018-12-31#1402,2018-12-31 01:18:00,2018-12-31 01:53:00,2018-12-31 01:53:00,2018-12-31 01:18:00
2,2019-01-01,1409,1409,Utrecht Centraal,02:17,0.0,Amsterdam Centraal,02:44,1.0,15,2a,VIRM-6 8656;VIRM-6 8730,VIRM-6 8656;VIRM-6 8730,Amsterdam Bijlmer ArenA,,2019-01-01#1409,2019-01-01 02:17:00,2019-01-01 02:44:00,2019-01-01 02:45:00,2019-01-01 02:17:00
3,2018-12-31,1406,1406,Amsterdam Centraal,02:19,1.5,Utrecht Centraal,02:45,0.0,2b,14,VIRM-6 8648;VIRM-4 9504,,,,2018-12-31#1406,2018-12-31 02:19:00,2018-12-31 02:45:00,2018-12-31 02:45:00,2018-12-31 02:20:30
4,2019-01-01,1413,1413,Utrecht Centraal,03:11,0.0,Amsterdam Centraal,03:44,0.0,14,2a,VIRM-4 9504;VIRM-6 8648,VIRM-4 9504;VIRM-6 8648,,,2019-01-01#1413,2019-01-01 03:11:00,2019-01-01 03:44:00,2019-01-01 03:44:00,2019-01-01 03:11:00


# Merge data with weather

In [116]:
# if 'RideInstance' in dfa:
#     del dfa['RideInstance']
# dfa.insert(0, 'RideInstance', dfa.RideId.astype(str) + '#' + dfa.RideTime )

In [117]:
dfa_weather_U = pd.read_csv('../assets/data/de_bilt_weather_2019.csv')  
dfa_weather_A = pd.read_csv('../assets/data/schiphol_weather_2019.csv') 

In [118]:
dfa_weather_A['WeatherKey'] = dfa_weather_A['Timestamp'].astype(str) + '-' + dfa_weather_A['Hour'].astype(str) + '-' + dfa_weather_A['StationCode'].astype(str)
dfa_weather_U['WeatherKey'] = dfa_weather_U['Timestamp'].astype(str) + '-' + dfa_weather_U['Hour'].astype(str)+ '-'+ dfa_weather_U['StationCode'].astype(str)

In [119]:
print(dfa_weather_A.head())
print(dfa_weather_U.head())

   StationCode   Timestamp  Hour  WindDir  WindHour  WindSpeed  MaxWindSpeed  \
0          240  2019-01-01     1      260       7.0        6.0          10.0   
1          240  2019-01-01     2      260       7.0        7.0          10.0   
2          240  2019-01-01     3      250       7.0        7.0          11.0   
3          240  2019-01-01     4      250       7.0        8.0          11.0   
4          240  2019-01-01     5      260       9.0        9.0          12.0   

   Temperature  MinTemp10M  DewPointTemp  SunshineDur  Radiation  PrecipDur  \
0          8.5         NaN           5.7          0.0          0        0.0   
1          8.6         NaN           5.1          0.0          0        0.0   
2          8.5         NaN           5.1          0.0          0        0.0   
3          8.2         NaN           5.4          0.0          0        0.0   
4          8.7         NaN           5.8          0.0          0        0.0   

   PrecipHour  AirPressure  Visibility  Clou

In [120]:
def add_uic_code(date, destination, weather_station):
    if not pd.isnull(date):
        weather_key = f'{date.strftime("%Y-%m-%d")}-{int(date.hour) + 1}'
        if destination == 'Amsterdam Centraal':
            return weather_key + f'-{weather_station[0]}'
        if destination == '	Utrecht Centraal':
            return weather_key + f'-{weather_station[1]}'
    else:
        return None

In [121]:
# this function renames the weather columns with a specific prefix
def rename_weather(suffix, df):
    return df.rename(columns = {
        'StationCode' : f'{suffix}WeatherStationCode',
        'Timestamp' : f'{suffix}Timestamp',  # date (YYYY=year,MM=month,DD=day)
        'Hour' : f'{suffix}Hour' ,  # time (HH uur/hour, UT. 12 UT=13 MET, 14 MEZT. Hourly division 05 runs from 04.00 UT to 5.00 UT
        'WindDir' : f'{suffix}WindDir' ,  # Mean wind direction (in degrees) during the 10-minute period preceding the time of observation (360=north, 90=east, 180=south, 270=west, 0=calm 990=variable)
        'WindHour' : f'{suffix}WindHour' ,  # Hourly mean wind speed (in 0.1 m/s)
        'WindSpeed' : f'{suffix}WindSpeed' , # Mean wind speed (in 0.1 m/s) during the 10-minute period preceding the time of observation  
        'MaxWindSpeed' : f'{suffix}MaxWindSpeed' ,  # Maximum wind gust (in 0.1 m/s) during the hourly division
        'Temperature' : f'{suffix}Temperature' ,  # Temperature (in 0.1 degrees Celsius) at 1.50 m at the time of observation  
        'MinTemp10M' : f'{suffix}MinTemp10M' ,  # Minimum temperature (in 0.1 degrees Celsius) at 0.1 m in the preceding 6-hour period
        'DewPointTemp' : f'{suffix}DewPointTemp' ,  # Dew point temperature (in 0.1 degrees Celsius) at 1.50 m at the time of observation 
        'SunshineDur' : f'{suffix}SunshineDur' ,  # Sunshine duration (in 0.1 hour) during the hourly division, calculated from global radiation (-1 for <0.05 hour) 
        'Radiation' : f'{suffix}Radiation' ,  # Global radiation (in J/cm2) during the hourly division    
        'PrecipDur' : f'{suffix}PrecipDur',  # Precipitation duration (in 0.1 hour) during the hourly division
        'PrecipHour' : f'{suffix}PrecipHour',  # Hourly precipitation amount (in 0.1 mm) (-1 for <0.05 mm)
        'AirPressure' : f'{suffix}AirPressure',  # Air pressure (in 0.1 hPa) reduced to mean sea level, at the time of observation 
        'Visibility' : f'{suffix}Visibility',  # Horizontal visibility at the time of observation (0=less than 100m, 1=100-200m, 2=200-300m,..., 49=4900-5000m, 50=5-6km, 56=6-7km, 57=7-8km, ..., 79=29-30km, 80=30-35km, 81=35-40km,..., 89=more than 70km)
        'Cloudines' : f'{suffix}Cloudiness',  # Cloud cover (in octants), at the time of observation (9=sky invisible)
        'Humidity': f'{suffix}Humidity',  # Relative atmospheric humidity (in percents) at 1.50 m at the time of observation
        'WeatherCode' : f'{suffix}WeatherCode',  # Present weather code (00-99), description for the hourly division. (http://bibliotheek.knmi.nl/scholierenpdf/weercodes_Nederland)
        'WeatherCodeIndicator': f'{suffix}WeatherCodeIndicator',  # Indicator present weather code (1=manned and recorded (using code from visual observations), 2,3=manned and omitted (no significant weather phenomenon to report, not available), 4=automatically recorded (using code from visual observations), 5,6=automatically omitted (no significant weather phenomenon to report, not available), 7=automatically set (using code from automated observations) 
        'Fog' : f'{suffix}Fog',  # Fog 0=no occurrence, 1=occurred during the preceding hour and/or at the time of observation
        'Rain' : f'{suffix}Rain',  # Rainfall 0=no occurrence, 1=occurred during the preceding hour and/or at the time of observation
        'Snow' : f'{suffix}Snow',  # Snow 0=no occurrence, 1=occurred during the preceding hour and/or at the time of observation
        'Thunder' : f'{suffix}Thunder',  # Thunder  0=no occurrence, 1=occurred during the preceding hour and/or at the time of observation 
        'IceFormation' : f'{suffix}IceFormation'  # Ice formation 0=no occurrence, 1=occurred during the preceding hour and/or at the time of observation
    })

In [122]:
# df['WeatherKey'] = df['PlannedArrivalTime'].dt.strftime('%Y-%m-%d') + '-' + (df['PlannedArrivalTime'].dt.hour.astype(int) + 1).astype(str)

In [123]:
df['WeatherKey'] = df.apply(lambda row: add_uic_code(row['PlannedDepartureTime'], row['DestinationStation'], [260, 240]), axis=1)

In [124]:
df.head()

Unnamed: 0,Date,RideId,TrainId,DepartureStation,DepartureTime,DepartureDelay,DestinationStation,ArrivalTime,ArrivalDelay,DeparturePlatform,ArrivalPlatform,DepartureMaterials,ArrivalMaterials,InbetweenStations,FaultMessages,RideInstance,PlannedDepartureTime,PlannedArrivalTime,ActualArrivalTime,ActualDepartureTime,WeatherKey
0,2018-12-31,1405,1405,Utrecht Centraal,01:01,1.0,Amsterdam Centraal,01:29,1.5,7,2a,VIRM-4 9516;VIRM-6 8671,VIRM-4 9516;VIRM-6 8671,Amsterdam Bijlmer ArenA,,2018-12-31#1405,2018-12-31 01:01:00,2018-12-31 01:29:00,2018-12-31 01:30:30,2018-12-31 01:02:00,2018-12-31-2-260
1,2018-12-31,1402,1402,Amsterdam Centraal,01:18,0.0,Utrecht Centraal,01:53,0.0,2,15,VIRM-6 8730;VIRM-6 8656,,Amsterdam Bijlmer ArenA,,2018-12-31#1402,2018-12-31 01:18:00,2018-12-31 01:53:00,2018-12-31 01:53:00,2018-12-31 01:18:00,
2,2019-01-01,1409,1409,Utrecht Centraal,02:17,0.0,Amsterdam Centraal,02:44,1.0,15,2a,VIRM-6 8656;VIRM-6 8730,VIRM-6 8656;VIRM-6 8730,Amsterdam Bijlmer ArenA,,2019-01-01#1409,2019-01-01 02:17:00,2019-01-01 02:44:00,2019-01-01 02:45:00,2019-01-01 02:17:00,2019-01-01-3-260
3,2018-12-31,1406,1406,Amsterdam Centraal,02:19,1.5,Utrecht Centraal,02:45,0.0,2b,14,VIRM-6 8648;VIRM-4 9504,,,,2018-12-31#1406,2018-12-31 02:19:00,2018-12-31 02:45:00,2018-12-31 02:45:00,2018-12-31 02:20:30,
4,2019-01-01,1413,1413,Utrecht Centraal,03:11,0.0,Amsterdam Centraal,03:44,0.0,14,2a,VIRM-4 9504;VIRM-6 8648,VIRM-4 9504;VIRM-6 8648,,,2019-01-01#1413,2019-01-01 03:11:00,2019-01-01 03:44:00,2019-01-01 03:44:00,2019-01-01 03:11:00,2019-01-01-4-260


In [125]:
 df.sort_values('PlannedArrivalTime').head()

Unnamed: 0,Date,RideId,TrainId,DepartureStation,DepartureTime,DepartureDelay,DestinationStation,ArrivalTime,ArrivalDelay,DeparturePlatform,ArrivalPlatform,DepartureMaterials,ArrivalMaterials,InbetweenStations,FaultMessages,RideInstance,PlannedDepartureTime,PlannedArrivalTime,ActualArrivalTime,ActualDepartureTime,WeatherKey
0,2018-12-31,1405,1405,Utrecht Centraal,01:01,1.0,Amsterdam Centraal,01:29,1.5,7,2a,VIRM-4 9516;VIRM-6 8671,VIRM-4 9516;VIRM-6 8671,Amsterdam Bijlmer ArenA,,2018-12-31#1405,2018-12-31 01:01:00,2018-12-31 01:29:00,2018-12-31 01:30:30,2018-12-31 01:02:00,2018-12-31-2-260
1,2018-12-31,1402,1402,Amsterdam Centraal,01:18,0.0,Utrecht Centraal,01:53,0.0,2,15,VIRM-6 8730;VIRM-6 8656,,Amsterdam Bijlmer ArenA,,2018-12-31#1402,2018-12-31 01:18:00,2018-12-31 01:53:00,2018-12-31 01:53:00,2018-12-31 01:18:00,
3,2018-12-31,1406,1406,Amsterdam Centraal,02:19,1.5,Utrecht Centraal,02:45,0.0,2b,14,VIRM-6 8648;VIRM-4 9504,,,,2018-12-31#1406,2018-12-31 02:19:00,2018-12-31 02:45:00,2018-12-31 02:45:00,2018-12-31 02:20:30,
200,2019-01-01,3091,3091,Amsterdam Centraal,00:24,0.0,Utrecht Centraal,00:52,0.0,4a,19,VIRM-6 8662,VIRM-6 8662,Amsterdam Amstel,,2019-01-01#3091,2019-01-01 00:24:00,2019-01-01 00:52:00,2019-01-01 00:52:00,2019-01-01 00:24:00,
199,2019-01-01,2986,2986,Utrecht Centraal,00:23,0.0,Amsterdam Centraal,00:52,0.0,7,10a,VIRM-4 9563,,Amsterdam Bijlmer ArenA;Amsterdam Amstel,,2019-01-01#2986,2019-01-01 00:23:00,2019-01-01 00:52:00,2019-01-01 00:52:00,2019-01-01 00:23:00,2019-01-01-1-260


In [126]:
# combine weather in one big weather dataset
result_weather = pd.concat([dfa_weather_A, dfa_weather_U])

In [127]:
result_weather.head()

Unnamed: 0,StationCode,Timestamp,Hour,WindDir,WindHour,WindSpeed,MaxWindSpeed,Temperature,MinTemp10M,DewPointTemp,SunshineDur,Radiation,PrecipDur,PrecipHour,AirPressure,Visibility,Cloudines,Humidity,WeatherCode,WeatherCodeIndicator,Fog,Rain,Snow,Thunder,IceFormation,WeatherKey
0,240,2019-01-01,1,260,7.0,6.0,10.0,8.5,,5.7,0.0,0,0.0,0.0,1030.6,69.0,8.0,82,,5,0.0,0.0,0.0,0.0,0.0,2019-01-01-1-240
1,240,2019-01-01,2,260,7.0,7.0,10.0,8.6,,5.1,0.0,0,0.0,0.0,1030.1,75.0,8.0,78,,5,0.0,0.0,0.0,0.0,0.0,2019-01-01-2-240
2,240,2019-01-01,3,250,7.0,7.0,11.0,8.5,,5.1,0.0,0,0.0,0.0,1029.5,75.0,8.0,79,,5,0.0,0.0,0.0,0.0,0.0,2019-01-01-3-240
3,240,2019-01-01,4,250,7.0,8.0,11.0,8.2,,5.4,0.0,0,0.0,0.0,1029.0,70.0,8.0,82,,5,0.0,0.0,0.0,0.0,0.0,2019-01-01-4-240
4,240,2019-01-01,5,260,9.0,9.0,12.0,8.7,,5.8,0.0,0,0.0,-0.1,1028.3,70.0,8.0,81,22.0,7,0.0,1.0,0.0,0.0,0.0,2019-01-01-5-240


In [128]:
# merge weather with train data
df = pd.merge(df,result_weather, on = 'WeatherKey', how='left')
df.head()

Unnamed: 0,Date,RideId,TrainId,DepartureStation,DepartureTime,DepartureDelay,DestinationStation,ArrivalTime,ArrivalDelay,DeparturePlatform,ArrivalPlatform,DepartureMaterials,ArrivalMaterials,InbetweenStations,FaultMessages,RideInstance,PlannedDepartureTime,PlannedArrivalTime,ActualArrivalTime,ActualDepartureTime,WeatherKey,StationCode,Timestamp,Hour,WindDir,WindHour,WindSpeed,MaxWindSpeed,Temperature,MinTemp10M,DewPointTemp,SunshineDur,Radiation,PrecipDur,PrecipHour,AirPressure,Visibility,Cloudines,Humidity,WeatherCode,WeatherCodeIndicator,Fog,Rain,Snow,Thunder,IceFormation
0,2018-12-31,1405,1405,Utrecht Centraal,01:01,1.0,Amsterdam Centraal,01:29,1.5,7,2a,VIRM-4 9516;VIRM-6 8671,VIRM-4 9516;VIRM-6 8671,Amsterdam Bijlmer ArenA,,2018-12-31#1405,2018-12-31 01:01:00,2018-12-31 01:29:00,2018-12-31 01:30:30,2018-12-31 01:02:00,2018-12-31-2-260,,,,,,,,,,,,,,,,,,,,,,,,,
1,2018-12-31,1402,1402,Amsterdam Centraal,01:18,0.0,Utrecht Centraal,01:53,0.0,2,15,VIRM-6 8730;VIRM-6 8656,,Amsterdam Bijlmer ArenA,,2018-12-31#1402,2018-12-31 01:18:00,2018-12-31 01:53:00,2018-12-31 01:53:00,2018-12-31 01:18:00,,,,,,,,,,,,,,,,,,,,,,,,,,
2,2019-01-01,1409,1409,Utrecht Centraal,02:17,0.0,Amsterdam Centraal,02:44,1.0,15,2a,VIRM-6 8656;VIRM-6 8730,VIRM-6 8656;VIRM-6 8730,Amsterdam Bijlmer ArenA,,2019-01-01#1409,2019-01-01 02:17:00,2019-01-01 02:44:00,2019-01-01 02:45:00,2019-01-01 02:17:00,2019-01-01-3-260,260.0,2019-01-01,3.0,250.0,4.0,4.0,9.0,8.4,,5.7,0.0,0.0,0.0,0.0,1030.1,57.0,8.0,83.0,10.0,7.0,0.0,0.0,0.0,0.0,0.0
3,2018-12-31,1406,1406,Amsterdam Centraal,02:19,1.5,Utrecht Centraal,02:45,0.0,2b,14,VIRM-6 8648;VIRM-4 9504,,,,2018-12-31#1406,2018-12-31 02:19:00,2018-12-31 02:45:00,2018-12-31 02:45:00,2018-12-31 02:20:30,,,,,,,,,,,,,,,,,,,,,,,,,,
4,2019-01-01,1413,1413,Utrecht Centraal,03:11,0.0,Amsterdam Centraal,03:44,0.0,14,2a,VIRM-4 9504;VIRM-6 8648,VIRM-4 9504;VIRM-6 8648,,,2019-01-01#1413,2019-01-01 03:11:00,2019-01-01 03:44:00,2019-01-01 03:44:00,2019-01-01 03:11:00,2019-01-01-4-260,260.0,2019-01-01,4.0,250.0,4.0,4.0,8.0,8.2,,5.6,0.0,0.0,0.0,0.0,1029.3,62.0,8.0,83.0,,5.0,0.0,0.0,0.0,0.0,0.0


In [129]:
df.iloc[10].to_frame()

Unnamed: 0,10
Date,2019-01-01
RideId,1425
TrainId,1425
DepartureStation,Utrecht Centraal
DepartureTime,06:05
DepartureDelay,0
DestinationStation,Amsterdam Centraal
ArrivalTime,06:39
ArrivalDelay,0
DeparturePlatform,5


In [130]:
df = rename_weather('Departure', df)

In [131]:
df.head()

Unnamed: 0,Date,RideId,TrainId,DepartureStation,DepartureTime,DepartureDelay,DestinationStation,ArrivalTime,ArrivalDelay,DeparturePlatform,ArrivalPlatform,DepartureMaterials,ArrivalMaterials,InbetweenStations,FaultMessages,RideInstance,PlannedDepartureTime,PlannedArrivalTime,ActualArrivalTime,ActualDepartureTime,WeatherKey,DepartureWeatherStationCode,DepartureTimestamp,DepartureHour,DepartureWindDir,DepartureWindHour,DepartureWindSpeed,DepartureMaxWindSpeed,DepartureTemperature,DepartureMinTemp10M,DepartureDewPointTemp,DepartureSunshineDur,DepartureRadiation,DeparturePrecipDur,DeparturePrecipHour,DepartureAirPressure,DepartureVisibility,DepartureCloudiness,DepartureHumidity,DepartureWeatherCode,DepartureWeatherCodeIndicator,DepartureFog,DepartureRain,DepartureSnow,DepartureThunder,DepartureIceFormation
0,2018-12-31,1405,1405,Utrecht Centraal,01:01,1.0,Amsterdam Centraal,01:29,1.5,7,2a,VIRM-4 9516;VIRM-6 8671,VIRM-4 9516;VIRM-6 8671,Amsterdam Bijlmer ArenA,,2018-12-31#1405,2018-12-31 01:01:00,2018-12-31 01:29:00,2018-12-31 01:30:30,2018-12-31 01:02:00,2018-12-31-2-260,,,,,,,,,,,,,,,,,,,,,,,,,
1,2018-12-31,1402,1402,Amsterdam Centraal,01:18,0.0,Utrecht Centraal,01:53,0.0,2,15,VIRM-6 8730;VIRM-6 8656,,Amsterdam Bijlmer ArenA,,2018-12-31#1402,2018-12-31 01:18:00,2018-12-31 01:53:00,2018-12-31 01:53:00,2018-12-31 01:18:00,,,,,,,,,,,,,,,,,,,,,,,,,,
2,2019-01-01,1409,1409,Utrecht Centraal,02:17,0.0,Amsterdam Centraal,02:44,1.0,15,2a,VIRM-6 8656;VIRM-6 8730,VIRM-6 8656;VIRM-6 8730,Amsterdam Bijlmer ArenA,,2019-01-01#1409,2019-01-01 02:17:00,2019-01-01 02:44:00,2019-01-01 02:45:00,2019-01-01 02:17:00,2019-01-01-3-260,260.0,2019-01-01,3.0,250.0,4.0,4.0,9.0,8.4,,5.7,0.0,0.0,0.0,0.0,1030.1,57.0,8.0,83.0,10.0,7.0,0.0,0.0,0.0,0.0,0.0
3,2018-12-31,1406,1406,Amsterdam Centraal,02:19,1.5,Utrecht Centraal,02:45,0.0,2b,14,VIRM-6 8648;VIRM-4 9504,,,,2018-12-31#1406,2018-12-31 02:19:00,2018-12-31 02:45:00,2018-12-31 02:45:00,2018-12-31 02:20:30,,,,,,,,,,,,,,,,,,,,,,,,,,
4,2019-01-01,1413,1413,Utrecht Centraal,03:11,0.0,Amsterdam Centraal,03:44,0.0,14,2a,VIRM-4 9504;VIRM-6 8648,VIRM-4 9504;VIRM-6 8648,,,2019-01-01#1413,2019-01-01 03:11:00,2019-01-01 03:44:00,2019-01-01 03:44:00,2019-01-01 03:11:00,2019-01-01-4-260,260.0,2019-01-01,4.0,250.0,4.0,4.0,8.0,8.2,,5.6,0.0,0.0,0.0,0.0,1029.3,62.0,8.0,83.0,,5.0,0.0,0.0,0.0,0.0,0.0


# Create the departure time field

In [132]:
# create weather key for departure
df['WeatherKey'] = df.apply(lambda row: add_uic_code(row['PlannedArrivalTime'], row['DestinationStation'], [240, 260]), axis=1)

In [134]:
df.head()

Unnamed: 0,Date,RideId,TrainId,DepartureStation,DepartureTime,DepartureDelay,DestinationStation,ArrivalTime,ArrivalDelay,DeparturePlatform,ArrivalPlatform,DepartureMaterials,ArrivalMaterials,InbetweenStations,FaultMessages,RideInstance,PlannedDepartureTime,PlannedArrivalTime,ActualArrivalTime,ActualDepartureTime,WeatherKey,DepartureWeatherStationCode,DepartureTimestamp,DepartureHour,DepartureWindDir,DepartureWindHour,DepartureWindSpeed,DepartureMaxWindSpeed,DepartureTemperature,DepartureMinTemp10M,DepartureDewPointTemp,DepartureSunshineDur,DepartureRadiation,DeparturePrecipDur,DeparturePrecipHour,DepartureAirPressure,DepartureVisibility,DepartureCloudiness,DepartureHumidity,DepartureWeatherCode,DepartureWeatherCodeIndicator,DepartureFog,DepartureRain,DepartureSnow,DepartureThunder,DepartureIceFormation
0,2018-12-31,1405,1405,Utrecht Centraal,01:01,1.0,Amsterdam Centraal,01:29,1.5,7,2a,VIRM-4 9516;VIRM-6 8671,VIRM-4 9516;VIRM-6 8671,Amsterdam Bijlmer ArenA,,2018-12-31#1405,2018-12-31 01:01:00,2018-12-31 01:29:00,2018-12-31 01:30:30,2018-12-31 01:02:00,2018-12-31-2-240,,,,,,,,,,,,,,,,,,,,,,,,,
1,2018-12-31,1402,1402,Amsterdam Centraal,01:18,0.0,Utrecht Centraal,01:53,0.0,2,15,VIRM-6 8730;VIRM-6 8656,,Amsterdam Bijlmer ArenA,,2018-12-31#1402,2018-12-31 01:18:00,2018-12-31 01:53:00,2018-12-31 01:53:00,2018-12-31 01:18:00,,,,,,,,,,,,,,,,,,,,,,,,,,
2,2019-01-01,1409,1409,Utrecht Centraal,02:17,0.0,Amsterdam Centraal,02:44,1.0,15,2a,VIRM-6 8656;VIRM-6 8730,VIRM-6 8656;VIRM-6 8730,Amsterdam Bijlmer ArenA,,2019-01-01#1409,2019-01-01 02:17:00,2019-01-01 02:44:00,2019-01-01 02:45:00,2019-01-01 02:17:00,2019-01-01-3-240,260.0,2019-01-01,3.0,250.0,4.0,4.0,9.0,8.4,,5.7,0.0,0.0,0.0,0.0,1030.1,57.0,8.0,83.0,10.0,7.0,0.0,0.0,0.0,0.0,0.0
3,2018-12-31,1406,1406,Amsterdam Centraal,02:19,1.5,Utrecht Centraal,02:45,0.0,2b,14,VIRM-6 8648;VIRM-4 9504,,,,2018-12-31#1406,2018-12-31 02:19:00,2018-12-31 02:45:00,2018-12-31 02:45:00,2018-12-31 02:20:30,,,,,,,,,,,,,,,,,,,,,,,,,,
4,2019-01-01,1413,1413,Utrecht Centraal,03:11,0.0,Amsterdam Centraal,03:44,0.0,14,2a,VIRM-4 9504;VIRM-6 8648,VIRM-4 9504;VIRM-6 8648,,,2019-01-01#1413,2019-01-01 03:11:00,2019-01-01 03:44:00,2019-01-01 03:44:00,2019-01-01 03:11:00,2019-01-01-4-240,260.0,2019-01-01,4.0,250.0,4.0,4.0,8.0,8.2,,5.6,0.0,0.0,0.0,0.0,1029.3,62.0,8.0,83.0,,5.0,0.0,0.0,0.0,0.0,0.0


In [136]:
df = pd.merge(df, result_weather, on = 'WeatherKey', how='left')
df.head()

Unnamed: 0,Date,RideId,TrainId,DepartureStation,DepartureTime,DepartureDelay,DestinationStation,ArrivalTime,ArrivalDelay,DeparturePlatform,ArrivalPlatform,DepartureMaterials,ArrivalMaterials,InbetweenStations,FaultMessages,RideInstance,PlannedDepartureTime,PlannedArrivalTime,ActualArrivalTime,ActualDepartureTime,WeatherKey,DepartureWeatherStationCode,DepartureTimestamp,DepartureHour,DepartureWindDir,DepartureWindHour,DepartureWindSpeed,DepartureMaxWindSpeed,DepartureTemperature,DepartureMinTemp10M,DepartureDewPointTemp,DepartureSunshineDur,DepartureRadiation,DeparturePrecipDur,DeparturePrecipHour,DepartureAirPressure,DepartureVisibility,DepartureCloudiness,DepartureHumidity,DepartureWeatherCode,DepartureWeatherCodeIndicator,DepartureFog,DepartureRain,DepartureSnow,DepartureThunder,DepartureIceFormation,StationCode,Timestamp,Hour,WindDir,WindHour,WindSpeed,MaxWindSpeed,Temperature,MinTemp10M,DewPointTemp,SunshineDur,Radiation,PrecipDur,PrecipHour,AirPressure,Visibility,Cloudines,Humidity,WeatherCode,WeatherCodeIndicator,Fog,Rain,Snow,Thunder,IceFormation
0,2018-12-31,1405,1405,Utrecht Centraal,01:01,1.0,Amsterdam Centraal,01:29,1.5,7,2a,VIRM-4 9516;VIRM-6 8671,VIRM-4 9516;VIRM-6 8671,Amsterdam Bijlmer ArenA,,2018-12-31#1405,2018-12-31 01:01:00,2018-12-31 01:29:00,2018-12-31 01:30:30,2018-12-31 01:02:00,2018-12-31-2-240,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,2018-12-31,1402,1402,Amsterdam Centraal,01:18,0.0,Utrecht Centraal,01:53,0.0,2,15,VIRM-6 8730;VIRM-6 8656,,Amsterdam Bijlmer ArenA,,2018-12-31#1402,2018-12-31 01:18:00,2018-12-31 01:53:00,2018-12-31 01:53:00,2018-12-31 01:18:00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,2019-01-01,1409,1409,Utrecht Centraal,02:17,0.0,Amsterdam Centraal,02:44,1.0,15,2a,VIRM-6 8656;VIRM-6 8730,VIRM-6 8656;VIRM-6 8730,Amsterdam Bijlmer ArenA,,2019-01-01#1409,2019-01-01 02:17:00,2019-01-01 02:44:00,2019-01-01 02:45:00,2019-01-01 02:17:00,2019-01-01-3-240,260.0,2019-01-01,3.0,250.0,4.0,4.0,9.0,8.4,,5.7,0.0,0.0,0.0,0.0,1030.1,57.0,8.0,83.0,10.0,7.0,0.0,0.0,0.0,0.0,0.0,240.0,2019-01-01,3.0,250.0,7.0,7.0,11.0,8.5,,5.1,0.0,0.0,0.0,0.0,1029.5,75.0,8.0,79.0,,5.0,0.0,0.0,0.0,0.0,0.0
3,2018-12-31,1406,1406,Amsterdam Centraal,02:19,1.5,Utrecht Centraal,02:45,0.0,2b,14,VIRM-6 8648;VIRM-4 9504,,,,2018-12-31#1406,2018-12-31 02:19:00,2018-12-31 02:45:00,2018-12-31 02:45:00,2018-12-31 02:20:30,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,2019-01-01,1413,1413,Utrecht Centraal,03:11,0.0,Amsterdam Centraal,03:44,0.0,14,2a,VIRM-4 9504;VIRM-6 8648,VIRM-4 9504;VIRM-6 8648,,,2019-01-01#1413,2019-01-01 03:11:00,2019-01-01 03:44:00,2019-01-01 03:44:00,2019-01-01 03:11:00,2019-01-01-4-240,260.0,2019-01-01,4.0,250.0,4.0,4.0,8.0,8.2,,5.6,0.0,0.0,0.0,0.0,1029.3,62.0,8.0,83.0,,5.0,0.0,0.0,0.0,0.0,0.0,240.0,2019-01-01,4.0,250.0,7.0,8.0,11.0,8.2,,5.4,0.0,0.0,0.0,0.0,1029.0,70.0,8.0,82.0,,5.0,0.0,0.0,0.0,0.0,0.0


In [137]:
df = rename_weather('Destination', df)

In [139]:
df.head()

Unnamed: 0,Date,RideId,TrainId,DepartureStation,DepartureTime,DepartureDelay,DestinationStation,ArrivalTime,ArrivalDelay,DeparturePlatform,ArrivalPlatform,DepartureMaterials,ArrivalMaterials,InbetweenStations,FaultMessages,RideInstance,PlannedDepartureTime,PlannedArrivalTime,ActualArrivalTime,ActualDepartureTime,WeatherKey,DepartureWeatherStationCode,DepartureTimestamp,DepartureHour,DepartureWindDir,DepartureWindHour,DepartureWindSpeed,DepartureMaxWindSpeed,DepartureTemperature,DepartureMinTemp10M,DepartureDewPointTemp,DepartureSunshineDur,DepartureRadiation,DeparturePrecipDur,DeparturePrecipHour,DepartureAirPressure,DepartureVisibility,DepartureCloudiness,DepartureHumidity,DepartureWeatherCode,DepartureWeatherCodeIndicator,DepartureFog,DepartureRain,DepartureSnow,DepartureThunder,DepartureIceFormation,DestinationWeatherStationCode,DestinationTimestamp,DestinationHour,DestinationWindDir,DestinationWindHour,DestinationWindSpeed,DestinationMaxWindSpeed,DestinationTemperature,DestinationMinTemp10M,DestinationDewPointTemp,DestinationSunshineDur,DestinationRadiation,DestinationPrecipDur,DestinationPrecipHour,DestinationAirPressure,DestinationVisibility,DestinationCloudiness,DestinationHumidity,DestinationWeatherCode,DestinationWeatherCodeIndicator,DestinationFog,DestinationRain,DestinationSnow,DestinationThunder,DestinationIceFormation
0,2018-12-31,1405,1405,Utrecht Centraal,01:01,1.0,Amsterdam Centraal,01:29,1.5,7,2a,VIRM-4 9516;VIRM-6 8671,VIRM-4 9516;VIRM-6 8671,Amsterdam Bijlmer ArenA,,2018-12-31#1405,2018-12-31 01:01:00,2018-12-31 01:29:00,2018-12-31 01:30:30,2018-12-31 01:02:00,2018-12-31-2-240,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,2018-12-31,1402,1402,Amsterdam Centraal,01:18,0.0,Utrecht Centraal,01:53,0.0,2,15,VIRM-6 8730;VIRM-6 8656,,Amsterdam Bijlmer ArenA,,2018-12-31#1402,2018-12-31 01:18:00,2018-12-31 01:53:00,2018-12-31 01:53:00,2018-12-31 01:18:00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,2019-01-01,1409,1409,Utrecht Centraal,02:17,0.0,Amsterdam Centraal,02:44,1.0,15,2a,VIRM-6 8656;VIRM-6 8730,VIRM-6 8656;VIRM-6 8730,Amsterdam Bijlmer ArenA,,2019-01-01#1409,2019-01-01 02:17:00,2019-01-01 02:44:00,2019-01-01 02:45:00,2019-01-01 02:17:00,2019-01-01-3-240,260.0,2019-01-01,3.0,250.0,4.0,4.0,9.0,8.4,,5.7,0.0,0.0,0.0,0.0,1030.1,57.0,8.0,83.0,10.0,7.0,0.0,0.0,0.0,0.0,0.0,240.0,2019-01-01,3.0,250.0,7.0,7.0,11.0,8.5,,5.1,0.0,0.0,0.0,0.0,1029.5,75.0,8.0,79.0,,5.0,0.0,0.0,0.0,0.0,0.0
3,2018-12-31,1406,1406,Amsterdam Centraal,02:19,1.5,Utrecht Centraal,02:45,0.0,2b,14,VIRM-6 8648;VIRM-4 9504,,,,2018-12-31#1406,2018-12-31 02:19:00,2018-12-31 02:45:00,2018-12-31 02:45:00,2018-12-31 02:20:30,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,2019-01-01,1413,1413,Utrecht Centraal,03:11,0.0,Amsterdam Centraal,03:44,0.0,14,2a,VIRM-4 9504;VIRM-6 8648,VIRM-4 9504;VIRM-6 8648,,,2019-01-01#1413,2019-01-01 03:11:00,2019-01-01 03:44:00,2019-01-01 03:44:00,2019-01-01 03:11:00,2019-01-01-4-240,260.0,2019-01-01,4.0,250.0,4.0,4.0,8.0,8.2,,5.6,0.0,0.0,0.0,0.0,1029.3,62.0,8.0,83.0,,5.0,0.0,0.0,0.0,0.0,0.0,240.0,2019-01-01,4.0,250.0,7.0,8.0,11.0,8.2,,5.4,0.0,0.0,0.0,0.0,1029.0,70.0,8.0,82.0,,5.0,0.0,0.0,0.0,0.0,0.0


In [140]:
# save the dataset with the weate
df.to_csv('../assets/data/2019-UT-ASD-Full/2019-UT-ASD-scrapped.csv', index=None)

# Merge disruptions with train data

In [57]:
df = pd.read_csv('../assets/data/2019-UT-ASD-Full/2019-UT-ASD-scrapped.csv')
df_disruptions = pd.read_csv('../assets/original_dataset/disruptions-2019.csv', parse_dates=['start_time', 
                                                                                             'end_time'
                                                                                            ])

In [58]:
df_disruptions.shape

(5940, 14)

In [59]:
df_disruptions = df_disruptions.loc[df_disruptions['end_time'].notna()]

In [63]:
df_disruptions.fillna('not applicable', inplace=True)

In [64]:
# df_disruptions.fillna('not applicable', inplace=True)

In [65]:
df_disruptions = df_disruptions.loc[df_disruptions['rdt_lines'].str.contains('Amsterdam Centraal - Utrecht Centraal')]

In [278]:
df_disruptions.shape

(174, 14)

In [66]:
df_disruptions.head()

Unnamed: 0,rdt_id,ns_lines,rdt_lines,rdt_lines_id,rdt_station_names,rdt_station_codes,cause_nl,cause_en,statistical_cause_nl,statistical_cause_en,cause_group,start_time,end_time,duration_minutes
104,25340,Amsterdam-Utrecht,"Amsterdam Centraal - Utrecht Centraal, Amsterd...",136137141,"Abcoude,Breukelen","AC, BKL",defecte trein,broken down train,defecte trein,broken down train,rolling stock,2019-01-08 09:20:58,2019-01-08 09:21:35,1.0
106,25342,Utrecht Centraal,"'s-Hertogenbosch - Utrecht Centraal, Almere Oo...","40,44,51,133,134,136,137,142,143,147,149,150,1...",Utrecht Centraal,UT,inzet van hulpdiensten,an emergency call,inzet van hulpdiensten,an emergency call,external,2019-01-08 11:42:07,2019-01-08 11:45:17,3.0
123,25359,Breukelen-Utrecht-Veenendaal,"Amsterdam Centraal - Utrecht Centraal, Schipho...",136137,"Breukelen,Maarssen,Utrecht Centraal,Utrecht Zu...","BKL, MAS, UT, UTZL",gladde sporen,slippery railway tracks,gladde sporen,slippery railway tracks,weather,2019-01-09 06:29:40,2019-01-09 09:58:10,209.0
144,25380,Breukelen-Utrecht-Veenendaal,"Amsterdam Centraal - Utrecht Centraal, Schipho...",136137,"Breukelen,Maarssen,Utrecht Centraal,Utrecht Zu...","BKL, MAS, UT, UTZL",gladde sporen,slippery railway tracks,gladde sporen,slippery railway tracks,weather,2019-01-10 07:01:52,2019-01-10 09:14:39,133.0
155,25391,Breukelen-Utrecht-Veenendaal,"Amsterdam Centraal - Utrecht Centraal, Schipho...",136137,"Breukelen,Maarssen,Utrecht Centraal,Utrecht Zu...","BKL, MAS, UT, UTZL",gladde sporen,slippery railway tracks,gladde sporen,slippery railway tracks,weather,2019-01-10 16:13:02,2019-01-10 18:57:39,165.0


In [89]:
df['DisruptionKey'] = df['Date']

In [82]:
df.head()

Unnamed: 0,Date,RideId,TrainId,DepartureStation,DepartureTime,DepartureDelay,DestinationStation,ArrivalTime,ArrivalDelay,DeparturePlatform,ArrivalPlatform,DepartureMaterials,ArrivalMaterials,InbetweenStations,FaultMessages,RideInstance,PlannedDepartureTime,PlannedArrivalTime,ActualArrivalTime,ActualDepartureTime,WeatherKey,DepartureWeatherStationCode,DepartureTimestamp,DepartureHour,DepartureWindDir,DepartureWindHour,DepartureWindSpeed,DepartureMaxWindSpeed,DepartureTemperature,DepartureMinTemp10M,DepartureDewPointTemp,DepartureSunshineDur,DepartureRadiation,DeparturePrecipDur,DeparturePrecipHour,DepartureAirPressure,DepartureVisibility,DepartureCloudiness,DepartureHumidity,DepartureWeatherCode,DepartureWeatherCodeIndicator,DepartureFog,DepartureRain,DepartureSnow,DepartureThunder,DepartureIceFormation,DestinationWeatherStationCode,DestinationTimestamp,DestinationHour,DestinationWindDir,DestinationWindHour,DestinationWindSpeed,DestinationMaxWindSpeed,DestinationTemperature,DestinationMinTemp10M,DestinationDewPointTemp,DestinationSunshineDur,DestinationRadiation,DestinationPrecipDur,DestinationPrecipHour,DestinationAirPressure,DestinationVisibility,DestinationCloudiness,DestinationHumidity,DestinationWeatherCode,DestinationWeatherCodeIndicator,DestinationFog,DestinationRain,DestinationSnow,DestinationThunder,DestinationIceFormation,DisruptionKey
0,2018-12-31,1405,1405,Utrecht Centraal,01:01,1.0,Amsterdam Centraal,01:29,1.5,7,2a,VIRM-4 9516;VIRM-6 8671,VIRM-4 9516;VIRM-6 8671,Amsterdam Bijlmer ArenA,,2018-12-31#1405,2018-12-31 01:01:00,2018-12-31 01:29:00,2018-12-31 01:30:30,2018-12-31 01:02:00,2018-12-31-2-240,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2018-12-31-01
1,2018-12-31,1402,1402,Amsterdam Centraal,01:18,0.0,Utrecht Centraal,01:53,0.0,2,15,VIRM-6 8730;VIRM-6 8656,,Amsterdam Bijlmer ArenA,,2018-12-31#1402,2018-12-31 01:18:00,2018-12-31 01:53:00,2018-12-31 01:53:00,2018-12-31 01:18:00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2018-12-31-01
2,2019-01-01,1409,1409,Utrecht Centraal,02:17,0.0,Amsterdam Centraal,02:44,1.0,15,2a,VIRM-6 8656;VIRM-6 8730,VIRM-6 8656;VIRM-6 8730,Amsterdam Bijlmer ArenA,,2019-01-01#1409,2019-01-01 02:17:00,2019-01-01 02:44:00,2019-01-01 02:45:00,2019-01-01 02:17:00,2019-01-01-3-240,260.0,2019-01-01,3.0,250.0,4.0,4.0,9.0,8.4,,5.7,0.0,0.0,0.0,0.0,1030.1,57.0,8.0,83.0,10.0,7.0,0.0,0.0,0.0,0.0,0.0,240.0,2019-01-01,3.0,250.0,7.0,7.0,11.0,8.5,,5.1,0.0,0.0,0.0,0.0,1029.5,75.0,8.0,79.0,,5.0,0.0,0.0,0.0,0.0,0.0,2019-01-01-02
3,2018-12-31,1406,1406,Amsterdam Centraal,02:19,1.5,Utrecht Centraal,02:45,0.0,2b,14,VIRM-6 8648;VIRM-4 9504,,,,2018-12-31#1406,2018-12-31 02:19:00,2018-12-31 02:45:00,2018-12-31 02:45:00,2018-12-31 02:20:30,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2018-12-31-02
4,2019-01-01,1413,1413,Utrecht Centraal,03:11,0.0,Amsterdam Centraal,03:44,0.0,14,2a,VIRM-4 9504;VIRM-6 8648,VIRM-4 9504;VIRM-6 8648,,,2019-01-01#1413,2019-01-01 03:11:00,2019-01-01 03:44:00,2019-01-01 03:44:00,2019-01-01 03:11:00,2019-01-01-4-240,260.0,2019-01-01,4.0,250.0,4.0,4.0,8.0,8.2,,5.6,0.0,0.0,0.0,0.0,1029.3,62.0,8.0,83.0,,5.0,0.0,0.0,0.0,0.0,0.0,240.0,2019-01-01,4.0,250.0,7.0,8.0,11.0,8.2,,5.4,0.0,0.0,0.0,0.0,1029.0,70.0,8.0,82.0,,5.0,0.0,0.0,0.0,0.0,0.0,2019-01-01-03


In [80]:
df['DepartureTime'].str.slice(stop=2)

0        01
1        01
2        02
3        02
4        03
         ..
94958    19
94959    19
94960    19
94961    20
94962    20
Name: DepartureTime, Length: 94963, dtype: object

In [280]:
df_disruptions.loc[df_disruptions.start_time.dt.strftime('%Y-%m-%d') != df_disruptions.end_time.dt.strftime('%Y-%m-%d')]

Unnamed: 0,rdt_id,ns_lines,rdt_lines,rdt_lines_id,rdt_station_names,rdt_station_codes,cause_nl,cause_en,statistical_cause_nl,statistical_cause_en,cause_group,start_time,end_time,duration_minutes
666,25902,Amsterdam-Utrecht,"Amsterdam Centraal - Utrecht Centraal, Amsterd...",136137141,"Abcoude,Amsterdam Bijlmer ArenA,Amsterdam Hole...","AC, ASB, ASHD",wisselstoring,points failure,wisselstoring,points failure,infrastructure,2019-02-05 23:56:54,2019-02-06 00:06:17,9.0
1673,26909,Amsterdam-Utrecht; Amsterdam-Gouda,"Amsterdam Centraal - Utrecht Centraal, Amsterd...",136141,"Amsterdam Amstel,Amsterdam Bijlmer ArenA,Amste...","ASA, ASB, ASD, ASDM, DVD",defecte trein,broken down train,defecte trein,broken down train,rolling stock,2019-04-05 23:16:47,2019-04-06 00:42:18,86.0
2334,27570,Amsterdam-Utrecht; Amsterdam-Gouda,"Amsterdam Centraal - Utrecht Centraal, Amsterd...",136141,"Amsterdam Amstel,Amsterdam Bijlmer ArenA,Amste...","ASA, ASB, ASD, ASDM, DVD",inzet van hulpdiensten,an emergency call,inzet van hulpdiensten,an emergency call,external,2019-05-17 22:13:25,2019-05-18 07:39:02,566.0
2787,28023,Amsterdam-Utrecht,"Amsterdam Centraal - Utrecht Centraal, Amsterd...",136137141,"Abcoude,Amsterdam Bijlmer ArenA,Amsterdam Hole...","AC, ASB, ASHD",defecte trein,broken down train,defecte trein,broken down train,rolling stock,2019-06-19 19:13:57,2019-06-20 01:11:31,358.0
3329,28565,Amsterdam-Utrecht,"Amsterdam Centraal - Utrecht Centraal, Amsterd...",136141,"Amsterdam Amstel,Amsterdam Bijlmer ArenA,Amste...","ASA, ASB, ASD, ASDM, DVD",herstelwerkzaamheden,repair works,herstelwerkzaamheden,repair works,engineering work,2019-07-23 22:51:13,2019-07-24 01:11:20,140.0
4689,29925,Amsterdam-Utrecht,Amsterdam Centraal - Utrecht Centraal,136,"Abcoude,Amsterdam Amstel,Amsterdam Bijlmer Are...","AC, ASA, ASB, ASD, ASDM, ASHD, BKL, DVD, MAS, ...",aanrijding met een voertuig,collision with a vehicle,aanrijding met een voertuig,collision with a vehicle,accidents,2019-10-16 23:33:36,2019-10-17 01:33:05,119.0
5374,30610,Amsterdam-Utrecht; Amsterdam-Gouda,"Amsterdam Centraal - Utrecht Centraal, Amsterd...",136141,"Amsterdam Amstel,Amsterdam Bijlmer ArenA,Amste...","ASA, ASB, ASD, ASDM, DVD",wisselstoring,points failure,wisselstoring,points failure,infrastructure,2019-11-28 20:47:57,2019-11-29 00:50:40,243.0


In [281]:
df.head()

Unnamed: 0,Date,RideId,TrainId,DepartureStation,DepartureTime,DepartureDelay,DestinationStation,ArrivalTime,ArrivalDelay,DeparturePlatform,ArrivalPlatform,DepartureMaterials,ArrivalMaterials,InbetweenStations,FaultMessages,RideInstance,PlannedDepartureTime,PlannedArrivalTime,ActualArrivalTime,ActualDepartureTime,WeatherKey,DepartureWeatherStationCode,DepartureTimestamp,DepartureHour,DepartureWindDir,DepartureWindHour,DepartureWindSpeed,DepartureMaxWindSpeed,DepartureTemperature,DepartureMinTemp10M,DepartureDewPointTemp,DepartureSunshineDur,DepartureRadiation,DeparturePrecipDur,DeparturePrecipHour,DepartureAirPressure,DepartureVisibility,DepartureCloudiness,DepartureHumidity,DepartureWeatherCode,DepartureWeatherCodeIndicator,DepartureFog,DepartureRain,DepartureSnow,DepartureThunder,DepartureIceFormation,DestinationWeatherStationCode,DestinationTimestamp,DestinationHour,DestinationWindDir,DestinationWindHour,DestinationWindSpeed,DestinationMaxWindSpeed,DestinationTemperature,DestinationMinTemp10M,DestinationDewPointTemp,DestinationSunshineDur,DestinationRadiation,DestinationPrecipDur,DestinationPrecipHour,DestinationAirPressure,DestinationVisibility,DestinationCloudiness,DestinationHumidity,DestinationWeatherCode,DestinationWeatherCodeIndicator,DestinationFog,DestinationRain,DestinationSnow,DestinationThunder,DestinationIceFormation
0,2018-12-31,1405,1405,Utrecht Centraal,01:01,1.0,Amsterdam Centraal,01:29,1.5,7,2a,VIRM-4 9516;VIRM-6 8671,VIRM-4 9516;VIRM-6 8671,Amsterdam Bijlmer ArenA,,2018-12-31#1405,2018-12-31 01:01:00,2018-12-31 01:29:00,2018-12-31 01:30:30,2018-12-31 01:02:00,2018-12-31-2-240,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,2018-12-31,1402,1402,Amsterdam Centraal,01:18,0.0,Utrecht Centraal,01:53,0.0,2,15,VIRM-6 8730;VIRM-6 8656,,Amsterdam Bijlmer ArenA,,2018-12-31#1402,2018-12-31 01:18:00,2018-12-31 01:53:00,2018-12-31 01:53:00,2018-12-31 01:18:00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,2019-01-01,1409,1409,Utrecht Centraal,02:17,0.0,Amsterdam Centraal,02:44,1.0,15,2a,VIRM-6 8656;VIRM-6 8730,VIRM-6 8656;VIRM-6 8730,Amsterdam Bijlmer ArenA,,2019-01-01#1409,2019-01-01 02:17:00,2019-01-01 02:44:00,2019-01-01 02:45:00,2019-01-01 02:17:00,2019-01-01-3-240,260.0,2019-01-01,3.0,250.0,4.0,4.0,9.0,8.4,,5.7,0.0,0.0,0.0,0.0,1030.1,57.0,8.0,83.0,10.0,7.0,0.0,0.0,0.0,0.0,0.0,240.0,2019-01-01,3.0,250.0,7.0,7.0,11.0,8.5,,5.1,0.0,0.0,0.0,0.0,1029.5,75.0,8.0,79.0,,5.0,0.0,0.0,0.0,0.0,0.0
3,2018-12-31,1406,1406,Amsterdam Centraal,02:19,1.5,Utrecht Centraal,02:45,0.0,2b,14,VIRM-6 8648;VIRM-4 9504,,,,2018-12-31#1406,2018-12-31 02:19:00,2018-12-31 02:45:00,2018-12-31 02:45:00,2018-12-31 02:20:30,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,2019-01-01,1413,1413,Utrecht Centraal,03:11,0.0,Amsterdam Centraal,03:44,0.0,14,2a,VIRM-4 9504;VIRM-6 8648,VIRM-4 9504;VIRM-6 8648,,,2019-01-01#1413,2019-01-01 03:11:00,2019-01-01 03:44:00,2019-01-01 03:44:00,2019-01-01 03:11:00,2019-01-01-4-240,260.0,2019-01-01,4.0,250.0,4.0,4.0,8.0,8.2,,5.6,0.0,0.0,0.0,0.0,1029.3,62.0,8.0,83.0,,5.0,0.0,0.0,0.0,0.0,0.0,240.0,2019-01-01,4.0,250.0,7.0,8.0,11.0,8.2,,5.4,0.0,0.0,0.0,0.0,1029.0,70.0,8.0,82.0,,5.0,0.0,0.0,0.0,0.0,0.0


In [88]:
df_disruptions['DisruptionKey'] = df_disruptions['start_time'].dt.strftime('%Y-%m-%d')

In [68]:
df_disruptions.head()

Unnamed: 0,rdt_id,ns_lines,rdt_lines,rdt_lines_id,rdt_station_names,rdt_station_codes,cause_nl,cause_en,statistical_cause_nl,statistical_cause_en,cause_group,start_time,end_time,duration_minutes,DisruptionKey
104,25340,Amsterdam-Utrecht,"Amsterdam Centraal - Utrecht Centraal, Amsterd...",136137141,"Abcoude,Breukelen","AC, BKL",defecte trein,broken down train,defecte trein,broken down train,rolling stock,2019-01-08 09:20:58,2019-01-08 09:21:35,1.0,2019-01-08-09
106,25342,Utrecht Centraal,"'s-Hertogenbosch - Utrecht Centraal, Almere Oo...","40,44,51,133,134,136,137,142,143,147,149,150,1...",Utrecht Centraal,UT,inzet van hulpdiensten,an emergency call,inzet van hulpdiensten,an emergency call,external,2019-01-08 11:42:07,2019-01-08 11:45:17,3.0,2019-01-08-11
123,25359,Breukelen-Utrecht-Veenendaal,"Amsterdam Centraal - Utrecht Centraal, Schipho...",136137,"Breukelen,Maarssen,Utrecht Centraal,Utrecht Zu...","BKL, MAS, UT, UTZL",gladde sporen,slippery railway tracks,gladde sporen,slippery railway tracks,weather,2019-01-09 06:29:40,2019-01-09 09:58:10,209.0,2019-01-09-06
144,25380,Breukelen-Utrecht-Veenendaal,"Amsterdam Centraal - Utrecht Centraal, Schipho...",136137,"Breukelen,Maarssen,Utrecht Centraal,Utrecht Zu...","BKL, MAS, UT, UTZL",gladde sporen,slippery railway tracks,gladde sporen,slippery railway tracks,weather,2019-01-10 07:01:52,2019-01-10 09:14:39,133.0,2019-01-10-07
155,25391,Breukelen-Utrecht-Veenendaal,"Amsterdam Centraal - Utrecht Centraal, Schipho...",136137,"Breukelen,Maarssen,Utrecht Centraal,Utrecht Zu...","BKL, MAS, UT, UTZL",gladde sporen,slippery railway tracks,gladde sporen,slippery railway tracks,weather,2019-01-10 16:13:02,2019-01-10 18:57:39,165.0,2019-01-10-16


In [69]:
df_disruptions['Date'].dtypes

KeyError: 'Date'

In [90]:
df_temp = pd.merge(df, df_disruptions, on='DisruptionKey', how='left')


In [91]:
df_temp[50000:550000]

Unnamed: 0,Date,RideId,TrainId,DepartureStation,DepartureTime,DepartureDelay,DestinationStation,ArrivalTime,ArrivalDelay,DeparturePlatform,ArrivalPlatform,DepartureMaterials,ArrivalMaterials,InbetweenStations,FaultMessages,RideInstance,PlannedDepartureTime,PlannedArrivalTime,ActualArrivalTime,ActualDepartureTime,WeatherKey,DepartureWeatherStationCode,DepartureTimestamp,DepartureHour,DepartureWindDir,DepartureWindHour,DepartureWindSpeed,DepartureMaxWindSpeed,DepartureTemperature,DepartureMinTemp10M,DepartureDewPointTemp,DepartureSunshineDur,DepartureRadiation,DeparturePrecipDur,DeparturePrecipHour,DepartureAirPressure,DepartureVisibility,DepartureCloudiness,DepartureHumidity,DepartureWeatherCode,DepartureWeatherCodeIndicator,DepartureFog,DepartureRain,DepartureSnow,DepartureThunder,DepartureIceFormation,DestinationWeatherStationCode,DestinationTimestamp,DestinationHour,DestinationWindDir,DestinationWindHour,DestinationWindSpeed,DestinationMaxWindSpeed,DestinationTemperature,DestinationMinTemp10M,DestinationDewPointTemp,DestinationSunshineDur,DestinationRadiation,DestinationPrecipDur,DestinationPrecipHour,DestinationAirPressure,DestinationVisibility,DestinationCloudiness,DestinationHumidity,DestinationWeatherCode,DestinationWeatherCodeIndicator,DestinationFog,DestinationRain,DestinationSnow,DestinationThunder,DestinationIceFormation,DisruptionKey,rdt_id,ns_lines,rdt_lines,rdt_lines_id,rdt_station_names,rdt_station_codes,cause_nl,cause_en,statistical_cause_nl,statistical_cause_en,cause_group,start_time,end_time,duration_minutes
50000,2019-06-19,3091,3091,Amsterdam Centraal,00:24,0.0,Utrecht Centraal,00:52,0.0,5b,19,VIRM-4 9506,VIRM-4 9506,Amsterdam Amstel,,2019-06-19#3091,2019-06-19 00:24:00,2019-06-19 00:52:00,2019-06-19 00:52:00,2019-06-19 00:24:00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2019-06-19,28013.0,Amsterdam-Utrecht,"Amersfoort - Amsterdam Centraal, Amsterdam Cen...",135136141145,"Amsterdam Centraal,Amsterdam Muiderpoort","ASD, ASDM",seinstoring,signal failure,seinstoring,signal failure,infrastructure,2019-06-19 13:19:45,2019-06-19 13:56:07,36.0
50001,2019-06-19,3091,3091,Amsterdam Centraal,00:24,0.0,Utrecht Centraal,00:52,0.0,5b,19,VIRM-4 9506,VIRM-4 9506,Amsterdam Amstel,,2019-06-19#3091,2019-06-19 00:24:00,2019-06-19 00:52:00,2019-06-19 00:52:00,2019-06-19 00:24:00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2019-06-19,28023.0,Amsterdam-Utrecht,"Amsterdam Centraal - Utrecht Centraal, Amsterd...",136137141,"Abcoude,Amsterdam Bijlmer ArenA,Amsterdam Hole...","AC, ASB, ASHD",defecte trein,broken down train,defecte trein,broken down train,rolling stock,2019-06-19 19:13:57,2019-06-20 01:11:31,358.0
50002,2019-06-19,7384,7384,Utrecht Centraal,00:21,0.0,Amsterdam Centraal,01:06,0.0,14,5a,SLT-4 2419,SLT-4 2419,Utrecht Zuilen;Maarssen;Breukelen;Abcoude;Amst...,,2019-06-19#7384,2019-06-19 00:21:00,2019-06-19 01:06:00,2019-06-19 01:06:00,2019-06-19 00:21:00,2019-06-19-2-240,260.0,2019-06-19,1.0,70.0,2.0,2.0,3.0,17.0,,14.6,0.0,0.0,0.0,0.0,1009.5,70.0,0.0,85.0,,5.0,0.0,0.0,0.0,0.0,0.0,240.0,2019-06-19,2.0,90.0,4.0,3.0,6.0,17.3,,14.4,0.0,0.0,0.0,0.0,1008.5,70.0,4.0,82.0,,5.0,0.0,0.0,0.0,0.0,0.0,2019-06-19,28000.0,Amsterdam-Utrecht; Amsterdam-Gouda,"Amsterdam Centraal - Utrecht Centraal, Amsterd...",136141,"Amsterdam Amstel,Amsterdam Bijlmer ArenA,Amste...","ASA, ASB, ASD, ASDM, DVD",defecte trein,broken down train,defecte trein,broken down train,rolling stock,2019-06-19 06:27:22,2019-06-19 07:37:36,70.0
50003,2019-06-19,7384,7384,Utrecht Centraal,00:21,0.0,Amsterdam Centraal,01:06,0.0,14,5a,SLT-4 2419,SLT-4 2419,Utrecht Zuilen;Maarssen;Breukelen;Abcoude;Amst...,,2019-06-19#7384,2019-06-19 00:21:00,2019-06-19 01:06:00,2019-06-19 01:06:00,2019-06-19 00:21:00,2019-06-19-2-240,260.0,2019-06-19,1.0,70.0,2.0,2.0,3.0,17.0,,14.6,0.0,0.0,0.0,0.0,1009.5,70.0,0.0,85.0,,5.0,0.0,0.0,0.0,0.0,0.0,240.0,2019-06-19,2.0,90.0,4.0,3.0,6.0,17.3,,14.4,0.0,0.0,0.0,0.0,1008.5,70.0,4.0,82.0,,5.0,0.0,0.0,0.0,0.0,0.0,2019-06-19,28001.0,Amsterdam-Köln-Frankfurt,"Amsterdam Centraal - Utrecht Centraal, Arnhem ...",133136,"Abcoude,Arnhem Centraal,Amsterdam Amstel,Amste...","AC, AH, ASA, ASB, ASD, ASDM, ASHD, BKL, BNK, D...",defecte trein,broken down train,defecte trein,broken down train,rolling stock,2019-06-19 06:44:29,2019-06-19 06:44:29,0.0
50004,2019-06-19,7384,7384,Utrecht Centraal,00:21,0.0,Amsterdam Centraal,01:06,0.0,14,5a,SLT-4 2419,SLT-4 2419,Utrecht Zuilen;Maarssen;Breukelen;Abcoude;Amst...,,2019-06-19#7384,2019-06-19 00:21:00,2019-06-19 01:06:00,2019-06-19 01:06:00,2019-06-19 00:21:00,2019-06-19-2-240,260.0,2019-06-19,1.0,70.0,2.0,2.0,3.0,17.0,,14.6,0.0,0.0,0.0,0.0,1009.5,70.0,0.0,85.0,,5.0,0.0,0.0,0.0,0.0,0.0,240.0,2019-06-19,2.0,90.0,4.0,3.0,6.0,17.3,,14.4,0.0,0.0,0.0,0.0,1008.5,70.0,4.0,82.0,,5.0,0.0,0.0,0.0,0.0,0.0,2019-06-19,28002.0,Breukelen-Utrecht-Veenendaal,"Amsterdam Centraal - Utrecht Centraal, Rhenen ...",136137166,"Breukelen,Bunnik,Driebergen-Zeist,Maarssen,Maa...","BKL, BNK, DB, MAS, MRN, UT, UTZL, VNDC, VNDW, ...",defecte trein,broken down train,defecte trein,broken down train,rolling stock,2019-06-19 07:00:38,2019-06-19 07:00:38,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
111263,2019-12-31,2973,2973,Amsterdam Centraal,19:40,0.0,Utrecht Centraal,20:07,2.0,4b,18,VIRM-4 9468,VIRM-4 9468,Amsterdam Amstel,,2019-12-31#2973,2019-12-31 19:40:00,2019-12-31 20:07:00,2019-12-31 20:09:00,2019-12-31 19:40:00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2019-12-31,31170.0,Amsterdam-Utrecht,"Amsterdam Centraal - Utrecht Centraal, Amsterd...",136141,"Amsterdam Amstel,Amsterdam Muiderpoort","ASA, ASDM",inzet van hulpdiensten,an emergency call,inzet van hulpdiensten,an emergency call,external,2019-12-31 07:28:45,2019-12-31 07:57:33,29.0
111264,2019-12-31,2968,2968,Utrecht Centraal,19:53,0.0,Amsterdam Centraal,20:18,1.0,7,8b,VIRM-6 8671,VIRM-6 8671,Amsterdam Amstel,,2019-12-31#2968,2019-12-31 19:53:00,2019-12-31 20:18:00,2019-12-31 20:19:00,2019-12-31 19:53:00,2019-12-31-21-240,260.0,2019-12-31,20.0,130.0,2.0,3.0,5.0,4.0,,3.7,0.0,0.0,0.0,0.0,1035.9,12.0,8.0,98.0,20.0,7.0,1.0,0.0,0.0,0.0,0.0,240.0,2019-12-31,21.0,130.0,2.0,3.0,4.0,2.7,,2.6,0.0,0.0,0.0,0.0,1035.3,1.0,9.0,99.0,32.0,7.0,1.0,0.0,0.0,0.0,0.0,2019-12-31,31170.0,Amsterdam-Utrecht,"Amsterdam Centraal - Utrecht Centraal, Amsterd...",136141,"Amsterdam Amstel,Amsterdam Muiderpoort","ASA, ASDM",inzet van hulpdiensten,an emergency call,inzet van hulpdiensten,an emergency call,external,2019-12-31 07:28:45,2019-12-31 07:57:33,29.0
111265,2019-12-31,3073,3073,Amsterdam Centraal,19:54,0.0,Utrecht Centraal,20:21,0.0,4b,19,VIRM-4 9576,VIRM-4 9576,Amsterdam Amstel,,2019-12-31#3073,2019-12-31 19:54:00,2019-12-31 20:21:00,2019-12-31 20:21:00,2019-12-31 19:54:00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2019-12-31,31170.0,Amsterdam-Utrecht,"Amsterdam Centraal - Utrecht Centraal, Amsterd...",136141,"Amsterdam Amstel,Amsterdam Muiderpoort","ASA, ASDM",inzet van hulpdiensten,an emergency call,inzet van hulpdiensten,an emergency call,external,2019-12-31 07:28:45,2019-12-31 07:57:33,29.0
111266,2019-12-31,122,122,Utrecht Centraal,20:03,0.0,Amsterdam Centraal,20:28,0.0,5,7b,ICE-3M,ICE-3M,,,2019-12-31#122,2019-12-31 20:03:00,2019-12-31 20:28:00,2019-12-31 20:28:00,2019-12-31 20:03:00,2019-12-31-21-240,260.0,2019-12-31,21.0,140.0,3.0,4.0,6.0,4.0,,3.5,0.0,0.0,0.0,0.0,1035.6,18.0,8.0,96.0,10.0,7.0,0.0,0.0,0.0,0.0,0.0,240.0,2019-12-31,21.0,130.0,2.0,3.0,4.0,2.7,,2.6,0.0,0.0,0.0,0.0,1035.3,1.0,9.0,99.0,32.0,7.0,1.0,0.0,0.0,0.0,0.0,2019-12-31,31170.0,Amsterdam-Utrecht,"Amsterdam Centraal - Utrecht Centraal, Amsterd...",136141,"Amsterdam Amstel,Amsterdam Muiderpoort","ASA, ASDM",inzet van hulpdiensten,an emergency call,inzet van hulpdiensten,an emergency call,external,2019-12-31 07:28:45,2019-12-31 07:57:33,29.0


In [92]:
df = df_temp

In [289]:
df.tail(40)

Unnamed: 0,Date,RideId,TrainId,DepartureStation,DepartureTime,DepartureDelay,DestinationStation,ArrivalTime,ArrivalDelay,DeparturePlatform,ArrivalPlatform,DepartureMaterials,ArrivalMaterials,InbetweenStations,FaultMessages,RideInstance,PlannedDepartureTime,PlannedArrivalTime,ActualArrivalTime,ActualDepartureTime,WeatherKey,DepartureWeatherStationCode,DepartureTimestamp,DepartureHour,DepartureWindDir,DepartureWindHour,DepartureWindSpeed,DepartureMaxWindSpeed,DepartureTemperature,DepartureMinTemp10M,DepartureDewPointTemp,DepartureSunshineDur,DepartureRadiation,DeparturePrecipDur,DeparturePrecipHour,DepartureAirPressure,DepartureVisibility,DepartureCloudiness,DepartureHumidity,DepartureWeatherCode,DepartureWeatherCodeIndicator,DepartureFog,DepartureRain,DepartureSnow,DepartureThunder,DepartureIceFormation,DestinationWeatherStationCode,DestinationTimestamp,DestinationHour,DestinationWindDir,DestinationWindHour,DestinationWindSpeed,DestinationMaxWindSpeed,DestinationTemperature,DestinationMinTemp10M,DestinationDewPointTemp,DestinationSunshineDur,DestinationRadiation,DestinationPrecipDur,DestinationPrecipHour,DestinationAirPressure,DestinationVisibility,DestinationCloudiness,DestinationHumidity,DestinationWeatherCode,DestinationWeatherCodeIndicator,DestinationFog,DestinationRain,DestinationSnow,DestinationThunder,DestinationIceFormation,rdt_id,ns_lines,rdt_lines,rdt_lines_id,rdt_station_names,rdt_station_codes,cause_nl,cause_en,statistical_cause_nl,statistical_cause_en,cause_group,start_time,end_time,duration_minutes
111228,2019-12-31,7463,7463,Amsterdam Centraal,17:08,0.0,Utrecht Centraal,17:54,0.0,5b,15,SGMM-3 2963,SGMM-3 2963,Amsterdam Muiderpoort;Amsterdam Amstel;Duivend...,,2019-12-31#7463,2019-12-31 17:08:00,2019-12-31 17:54:00,2019-12-31 17:54:00,2019-12-31 17:08:00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,31170.0,Amsterdam-Utrecht,"Amsterdam Centraal - Utrecht Centraal, Amsterd...",136141,"Amsterdam Amstel,Amsterdam Muiderpoort","ASA, ASDM",inzet van hulpdiensten,an emergency call,inzet van hulpdiensten,an emergency call,external,2019-12-31 07:28:45,2019-12-31 07:57:33,29.0
111229,2019-12-31,858,858,Utrecht Centraal,17:29,0.0,Amsterdam Centraal,17:55,1.5,7,8a,VIRM-4 9547,VIRM-4 9547,Amsterdam Amstel,,2019-12-31#858,2019-12-31 17:29:00,2019-12-31 17:55:00,2019-12-31 17:56:30,2019-12-31 17:29:00,2019-12-31-18-240,260.0,2019-12-31,18.0,140.0,2.0,2.0,4.0,2.6,-1.1,2.5,0.0,0.0,0.0,0.0,1035.9,0.0,9.0,99.0,34.0,7.0,1.0,0.0,0.0,0.0,0.0,240.0,2019-12-31,18.0,90.0,2.0,2.0,3.0,3.1,-1.9,2.4,0.0,0.0,0.0,0.0,1035.4,58.0,8.0,95.0,10.0,7.0,0.0,0.0,0.0,0.0,0.0,31170.0,Amsterdam-Utrecht,"Amsterdam Centraal - Utrecht Centraal, Amsterd...",136141,"Amsterdam Amstel,Amsterdam Muiderpoort","ASA, ASDM",inzet van hulpdiensten,an emergency call,inzet van hulpdiensten,an emergency call,external,2019-12-31 07:28:45,2019-12-31 07:57:33,29.0
111230,2019-12-31,865,865,Amsterdam Centraal,17:35,0.0,Utrecht Centraal,18:01,0.0,4,18,VIRM-6 8703;VIRM-4 9516,VIRM-6 8703;VIRM-4 9516,Amsterdam Amstel,,2019-12-31#865,2019-12-31 17:35:00,2019-12-31 18:01:00,2019-12-31 18:01:00,2019-12-31 17:35:00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,31170.0,Amsterdam-Utrecht,"Amsterdam Centraal - Utrecht Centraal, Amsterd...",136141,"Amsterdam Amstel,Amsterdam Muiderpoort","ASA, ASDM",inzet van hulpdiensten,an emergency call,inzet van hulpdiensten,an emergency call,external,2019-12-31 07:28:45,2019-12-31 07:57:33,29.0
111231,2019-12-31,3058,3058,Utrecht Centraal,17:38,0.0,Amsterdam Centraal,18:05,2.5,5,8a,VIRM-6 8642,VIRM-6 8642,Amsterdam Amstel,,2019-12-31#3058,2019-12-31 17:38:00,2019-12-31 18:05:00,2019-12-31 18:07:30,2019-12-31 17:38:00,2019-12-31-19-240,260.0,2019-12-31,18.0,140.0,2.0,2.0,4.0,2.6,-1.1,2.5,0.0,0.0,0.0,0.0,1035.9,0.0,9.0,99.0,34.0,7.0,1.0,0.0,0.0,0.0,0.0,240.0,2019-12-31,19.0,160.0,3.0,3.0,5.0,0.1,,-0.1,0.0,0.0,0.0,0.0,1035.9,0.0,9.0,98.0,34.0,7.0,1.0,0.0,0.0,0.0,0.0,31170.0,Amsterdam-Utrecht,"Amsterdam Centraal - Utrecht Centraal, Amsterd...",136141,"Amsterdam Amstel,Amsterdam Muiderpoort","ASA, ASDM",inzet van hulpdiensten,an emergency call,inzet van hulpdiensten,an emergency call,external,2019-12-31 07:28:45,2019-12-31 07:57:33,29.0
111232,2019-12-31,3965,3965,Amsterdam Centraal,17:45,0.0,Utrecht Centraal,18:11,0.0,4,18,VIRM-6 8675;VIRM-4 9574,VIRM-6 8675;VIRM-4 9574,Amsterdam Amstel,,2019-12-31#3965,2019-12-31 17:45:00,2019-12-31 18:11:00,2019-12-31 18:11:00,2019-12-31 17:45:00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,31170.0,Amsterdam-Utrecht,"Amsterdam Centraal - Utrecht Centraal, Amsterd...",136141,"Amsterdam Amstel,Amsterdam Muiderpoort","ASA, ASDM",inzet van hulpdiensten,an emergency call,inzet van hulpdiensten,an emergency call,external,2019-12-31 07:28:45,2019-12-31 07:57:33,29.0
111233,2019-12-31,3960,3960,Utrecht Centraal,17:48,0.0,Amsterdam Centraal,18:14,3.0,7,8a,VIRM-4 9416;VIRM-4 9508,VIRM-4 9416;VIRM-4 9508,Amsterdam Amstel,,2019-12-31#3960,2019-12-31 17:48:00,2019-12-31 18:14:00,2019-12-31 18:17:00,2019-12-31 17:48:00,2019-12-31-19-240,260.0,2019-12-31,18.0,140.0,2.0,2.0,4.0,2.6,-1.1,2.5,0.0,0.0,0.0,0.0,1035.9,0.0,9.0,99.0,34.0,7.0,1.0,0.0,0.0,0.0,0.0,240.0,2019-12-31,19.0,160.0,3.0,3.0,5.0,0.1,,-0.1,0.0,0.0,0.0,0.0,1035.9,0.0,9.0,98.0,34.0,7.0,1.0,0.0,0.0,0.0,0.0,31170.0,Amsterdam-Utrecht,"Amsterdam Centraal - Utrecht Centraal, Amsterd...",136141,"Amsterdam Amstel,Amsterdam Muiderpoort","ASA, ASDM",inzet van hulpdiensten,an emergency call,inzet van hulpdiensten,an emergency call,external,2019-12-31 07:28:45,2019-12-31 07:57:33,29.0
111234,2019-12-31,3065,3065,Amsterdam Centraal,17:54,6.5,Utrecht Centraal,18:21,4.0,4b,19,VIRM-4 9581,VIRM-4 9581,Amsterdam Amstel,,2019-12-31#3065,2019-12-31 17:54:00,2019-12-31 18:21:00,2019-12-31 18:25:00,2019-12-31 18:00:30,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,31170.0,Amsterdam-Utrecht,"Amsterdam Centraal - Utrecht Centraal, Amsterd...",136141,"Amsterdam Amstel,Amsterdam Muiderpoort","ASA, ASDM",inzet van hulpdiensten,an emergency call,inzet van hulpdiensten,an emergency call,external,2019-12-31 07:28:45,2019-12-31 07:57:33,29.0
111235,2019-12-31,7458,7458,Utrecht Centraal,17:37,0.0,Amsterdam Centraal,18:21,0.0,14,7a,SGMM-3 2939,SGMM-3 2939,Utrecht Zuilen;Maarssen;Breukelen;Abcoude;Amst...,,2019-12-31#7458,2019-12-31 17:37:00,2019-12-31 18:21:00,2019-12-31 18:21:00,2019-12-31 17:37:00,2019-12-31-19-240,260.0,2019-12-31,18.0,140.0,2.0,2.0,4.0,2.6,-1.1,2.5,0.0,0.0,0.0,0.0,1035.9,0.0,9.0,99.0,34.0,7.0,1.0,0.0,0.0,0.0,0.0,240.0,2019-12-31,19.0,160.0,3.0,3.0,5.0,0.1,,-0.1,0.0,0.0,0.0,0.0,1035.9,0.0,9.0,98.0,34.0,7.0,1.0,0.0,0.0,0.0,0.0,31170.0,Amsterdam-Utrecht,"Amsterdam Centraal - Utrecht Centraal, Amsterd...",136141,"Amsterdam Amstel,Amsterdam Muiderpoort","ASA, ASDM",inzet van hulpdiensten,an emergency call,inzet van hulpdiensten,an emergency call,external,2019-12-31 07:28:45,2019-12-31 07:57:33,29.0
111236,2019-12-31,7465,7465,Amsterdam Centraal,17:39,0.0,Utrecht Centraal,18:22,3.0,5b,15,SGMM-3 2960,SGMM-3 2960,Amsterdam Muiderpoort;Amsterdam Amstel;Duivend...,,2019-12-31#7465,2019-12-31 17:39:00,2019-12-31 18:22:00,2019-12-31 18:25:00,2019-12-31 17:39:00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,31170.0,Amsterdam-Utrecht,"Amsterdam Centraal - Utrecht Centraal, Amsterd...",136141,"Amsterdam Amstel,Amsterdam Muiderpoort","ASA, ASDM",inzet van hulpdiensten,an emergency call,inzet van hulpdiensten,an emergency call,external,2019-12-31 07:28:45,2019-12-31 07:57:33,29.0
111237,2019-12-31,860,860,Utrecht Centraal,17:59,0.0,Amsterdam Centraal,18:25,0.0,7,8a,VIRM-6 8733,VIRM-6 8733,Amsterdam Amstel,,2019-12-31#860,2019-12-31 17:59:00,2019-12-31 18:25:00,2019-12-31 18:25:00,2019-12-31 17:59:00,2019-12-31-19-240,260.0,2019-12-31,18.0,140.0,2.0,2.0,4.0,2.6,-1.1,2.5,0.0,0.0,0.0,0.0,1035.9,0.0,9.0,99.0,34.0,7.0,1.0,0.0,0.0,0.0,0.0,240.0,2019-12-31,19.0,160.0,3.0,3.0,5.0,0.1,,-0.1,0.0,0.0,0.0,0.0,1035.9,0.0,9.0,98.0,34.0,7.0,1.0,0.0,0.0,0.0,0.0,31170.0,Amsterdam-Utrecht,"Amsterdam Centraal - Utrecht Centraal, Amsterd...",136141,"Amsterdam Amstel,Amsterdam Muiderpoort","ASA, ASDM",inzet van hulpdiensten,an emergency call,inzet van hulpdiensten,an emergency call,external,2019-12-31 07:28:45,2019-12-31 07:57:33,29.0


In [295]:
df_disruptions.loc[df_disruptions.duplicated('Date')].sort_values('Date', ascending=True)

Unnamed: 0,rdt_id,ns_lines,rdt_lines,rdt_lines_id,rdt_station_names,rdt_station_codes,cause_nl,cause_en,statistical_cause_nl,statistical_cause_en,cause_group,start_time,end_time,duration_minutes,Date
106,25342,Utrecht Centraal,"'s-Hertogenbosch - Utrecht Centraal, Almere Oo...","40,44,51,133,134,136,137,142,143,147,149,150,1...",Utrecht Centraal,UT,inzet van hulpdiensten,an emergency call,inzet van hulpdiensten,an emergency call,external,2019-01-08 11:42:07,2019-01-08 11:45:17,3.0,2019-01-08
155,25391,Breukelen-Utrecht-Veenendaal,"Amsterdam Centraal - Utrecht Centraal, Schipho...",136137,"Breukelen,Maarssen,Utrecht Centraal,Utrecht Zu...","BKL, MAS, UT, UTZL",gladde sporen,slippery railway tracks,gladde sporen,slippery railway tracks,weather,2019-01-10 16:13:02,2019-01-10 18:57:39,165.0,2019-01-10
954,26190,Amsterdam-Utrecht,"Amsterdam Centraal - Utrecht Centraal, Amsterd...",136137141,"Amsterdam Bijlmer ArenA,Amsterdam Holendrecht","ASB, ASHD",defecte trein,broken down train,defecte trein,broken down train,rolling stock,2019-02-21 09:22:50,2019-02-21 09:43:32,21.0,2019-02-21
1178,26414,Amsterdam-Köln-Frankfurt,"Amsterdam Centraal - Utrecht Centraal, Arnhem ...",129133136,"Abcoude,Arnhem Centraal,Arnhem Velperpoort,Ams...","AC, AH, AHP, ASA, ASB, ASD, ASDM, ASHD, BKL, B...",wisselstoring,points failure,wisselstoring,points failure,infrastructure,2019-03-08 16:36:12,2019-03-08 17:29:40,53.0,2019-03-08
1476,26712,Amsterdam-Gouda,"Amsterdam Centraal - Utrecht Centraal, Amsterd...",136137141,"Abcoude,Amsterdam Bijlmer ArenA,Amsterdam Hole...","AC, ASB, ASHD, BKL",aanrijding met een persoon,person hit by a train,aanrijding met een persoon,person hit by a train,accidents,2019-03-26 09:11:52,2019-03-26 12:02:50,171.0,2019-03-26
1483,26719,Amsterdam-Utrecht; Amsterdam-Gouda,"Amsterdam Centraal - Utrecht Centraal, Amsterd...",136137141,"Abcoude,Amsterdam Amstel,Amsterdam Bijlmer Are...","AC, ASA, ASB, ASD, ASDM, ASHD, BKL, DVD, MAS",defecte trein,broken down train,defecte trein,broken down train,rolling stock,2019-03-26 11:52:09,2019-03-26 14:15:13,143.0,2019-03-26
1528,26764,Breukelen-Utrecht-Veenendaal,"Amsterdam Centraal - Utrecht Centraal, Rhenen ...",136137166,"Breukelen,Bunnik,Driebergen-Zeist,Maarssen,Maa...","BKL, BNK, DB, MAS, MRN, UT, UTZL, VNDC, VNDW, ...",defect wissel,defective point,defect wissel,defective point,infrastructure,2019-03-29 05:48:09,2019-03-29 09:33:43,226.0,2019-03-29
1530,26766,Breukelen-Utrecht-Veenendaal,"Amsterdam Centraal - Utrecht Centraal, Rhenen ...",136137166,"Breukelen,Bunnik,Driebergen-Zeist,Maarssen,Maa...","BKL, BNK, DB, MAS, MRN, UT, UTZL, VNDC, VNDW, ...",wisselstoring,points failure,wisselstoring,points failure,infrastructure,2019-03-29 06:01:30,2019-03-29 06:01:30,0.0,2019-03-29
1532,26768,Amsterdam-Köln-Frankfurt,"Amsterdam Centraal - Utrecht Centraal, Arnhem ...",133136,"Abcoude,Arnhem Centraal,Amsterdam Amstel,Amste...","AC, AH, ASA, ASB, ASD, ASDM, ASHD, BKL, BNK, D...",wisselstoring,points failure,wisselstoring,points failure,infrastructure,2019-03-29 06:00:15,2019-03-29 07:06:19,66.0,2019-03-29
1673,26909,Amsterdam-Utrecht; Amsterdam-Gouda,"Amsterdam Centraal - Utrecht Centraal, Amsterd...",136141,"Amsterdam Amstel,Amsterdam Bijlmer ArenA,Amste...","ASA, ASB, ASD, ASDM, DVD",defecte trein,broken down train,defecte trein,broken down train,rolling stock,2019-04-05 23:16:47,2019-04-06 00:42:18,86.0,2019-04-05


In [297]:
df.loc[df['rdt_id'] == 26712]

Unnamed: 0,Date,RideId,TrainId,DepartureStation,DepartureTime,DepartureDelay,DestinationStation,ArrivalTime,ArrivalDelay,DeparturePlatform,ArrivalPlatform,DepartureMaterials,ArrivalMaterials,InbetweenStations,FaultMessages,RideInstance,PlannedDepartureTime,PlannedArrivalTime,ActualArrivalTime,ActualDepartureTime,WeatherKey,DepartureWeatherStationCode,DepartureTimestamp,DepartureHour,DepartureWindDir,DepartureWindHour,DepartureWindSpeed,DepartureMaxWindSpeed,DepartureTemperature,DepartureMinTemp10M,DepartureDewPointTemp,DepartureSunshineDur,DepartureRadiation,DeparturePrecipDur,DeparturePrecipHour,DepartureAirPressure,DepartureVisibility,DepartureCloudiness,DepartureHumidity,DepartureWeatherCode,DepartureWeatherCodeIndicator,DepartureFog,DepartureRain,DepartureSnow,DepartureThunder,DepartureIceFormation,DestinationWeatherStationCode,DestinationTimestamp,DestinationHour,DestinationWindDir,DestinationWindHour,DestinationWindSpeed,DestinationMaxWindSpeed,DestinationTemperature,DestinationMinTemp10M,DestinationDewPointTemp,DestinationSunshineDur,DestinationRadiation,DestinationPrecipDur,DestinationPrecipHour,DestinationAirPressure,DestinationVisibility,DestinationCloudiness,DestinationHumidity,DestinationWeatherCode,DestinationWeatherCodeIndicator,DestinationFog,DestinationRain,DestinationSnow,DestinationThunder,DestinationIceFormation,rdt_id,ns_lines,rdt_lines,rdt_lines_id,rdt_station_names,rdt_station_codes,cause_nl,cause_en,statistical_cause_nl,statistical_cause_en,cause_group,start_time,end_time,duration_minutes
22793,2019-03-26,1406,1406,Amsterdam Centraal,02:19,0.0,Utrecht Centraal,02:44,1.5,7a,15,VIRM-4 9514,,,,2019-03-26#1406,2019-03-26 02:19:00,2019-03-26 02:44:00,2019-03-26 02:45:30,2019-03-26 02:19:00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,26712.0,Amsterdam-Gouda,"Amsterdam Centraal - Utrecht Centraal, Amsterd...",136137141,"Abcoude,Amsterdam Bijlmer ArenA,Amsterdam Hole...","AC, ASB, ASHD, BKL",aanrijding met een persoon,person hit by a train,aanrijding met een persoon,person hit by a train,accidents,2019-03-26 09:11:52,2019-03-26 12:02:50,171.0
22796,2019-03-26,1409,1409,Utrecht Centraal,02:17,0.0,Amsterdam Centraal,02:44,1.0,18,7a,VIRM-4 9426,VIRM-4 9426,,,2019-03-26#1409,2019-03-26 02:17:00,2019-03-26 02:44:00,2019-03-26 02:45:00,2019-03-26 02:17:00,2019-03-26-3-240,260.0,2019-03-26,3.0,310.0,2.0,1.0,5.0,5.6,,1.1,0.0,0.0,0.0,0.0,1031.4,75.0,8.0,72.0,,5.0,0.0,0.0,0.0,0.0,0.0,240.0,2019-03-26,3.0,310.0,2.0,2.0,4.0,3.3,,0.6,0.0,0.0,0.0,0.0,1031.4,70.0,8.0,82.0,,5.0,0.0,0.0,0.0,0.0,0.0,26712.0,Amsterdam-Gouda,"Amsterdam Centraal - Utrecht Centraal, Amsterd...",136137141,"Abcoude,Amsterdam Bijlmer ArenA,Amsterdam Hole...","AC, ASB, ASHD, BKL",aanrijding met een persoon,person hit by a train,aanrijding met een persoon,person hit by a train,accidents,2019-03-26 09:11:52,2019-03-26 12:02:50,171.0
22799,2019-03-26,1413,1413,Utrecht Centraal,03:17,0.0,Amsterdam Centraal,03:44,0.0,15,7a,VIRM-4 9514,VIRM-4 9514,,,2019-03-26#1413,2019-03-26 03:17:00,2019-03-26 03:44:00,2019-03-26 03:44:00,2019-03-26 03:17:00,2019-03-26-4-240,260.0,2019-03-26,4.0,280.0,1.0,1.0,3.0,2.6,,0.8,0.0,0.0,0.0,0.0,1031.3,70.0,8.0,88.0,,5.0,0.0,0.0,0.0,0.0,0.0,240.0,2019-03-26,4.0,300.0,2.0,2.0,3.0,2.8,,0.9,0.0,0.0,0.0,0.0,1031.4,69.0,8.0,87.0,,5.0,0.0,0.0,0.0,0.0,0.0,26712.0,Amsterdam-Gouda,"Amsterdam Centraal - Utrecht Centraal, Amsterd...",136137141,"Abcoude,Amsterdam Bijlmer ArenA,Amsterdam Hole...","AC, ASB, ASHD, BKL",aanrijding met een persoon,person hit by a train,aanrijding met een persoon,person hit by a train,accidents,2019-03-26 09:11:52,2019-03-26 12:02:50,171.0
22802,2019-03-26,1410,1410,Amsterdam Centraal,03:19,0.0,Utrecht Centraal,03:44,6.0,7a,15,VIRM-4 9571,,,,2019-03-26#1410,2019-03-26 03:19:00,2019-03-26 03:44:00,2019-03-26 03:50:00,2019-03-26 03:19:00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,26712.0,Amsterdam-Gouda,"Amsterdam Centraal - Utrecht Centraal, Amsterd...",136137141,"Abcoude,Amsterdam Bijlmer ArenA,Amsterdam Hole...","AC, ASB, ASHD, BKL",aanrijding met een persoon,person hit by a train,aanrijding met een persoon,person hit by a train,accidents,2019-03-26 09:11:52,2019-03-26 12:02:50,171.0
22805,2019-03-26,1414,1414,Amsterdam Centraal,04:19,0.0,Utrecht Centraal,04:44,0.0,7a,15,VIRM-6 8610,,,,2019-03-26#1414,2019-03-26 04:19:00,2019-03-26 04:44:00,2019-03-26 04:44:00,2019-03-26 04:19:00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,26712.0,Amsterdam-Gouda,"Amsterdam Centraal - Utrecht Centraal, Amsterd...",136137141,"Abcoude,Amsterdam Bijlmer ArenA,Amsterdam Hole...","AC, ASB, ASHD, BKL",aanrijding met een persoon,person hit by a train,aanrijding met een persoon,person hit by a train,accidents,2019-03-26 09:11:52,2019-03-26 12:02:50,171.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23606,2019-03-26,7384,7384,Utrecht Centraal,00:21,0.0,Amsterdam Centraal,01:06,0.0,14,5a,SLT-4 2458,,Utrecht Zuilen;Maarssen;Breukelen;Abcoude;Amst...,,2019-03-26#7384,2019-03-26 00:21:00,2019-03-26 01:06:00,2019-03-26 01:06:00,2019-03-26 00:21:00,2019-03-26-2-240,260.0,2019-03-26,1.0,340.0,3.0,2.0,8.0,6.7,,1.7,0.0,0.0,0.0,0.0,1031.6,74.0,8.0,70.0,,5.0,0.0,0.0,0.0,0.0,0.0,240.0,2019-03-26,2.0,340.0,4.0,2.0,7.0,5.2,,0.9,0.0,0.0,0.0,0.0,1031.7,72.0,8.0,73.0,,5.0,0.0,0.0,0.0,0.0,0.0,26712.0,Amsterdam-Gouda,"Amsterdam Centraal - Utrecht Centraal, Amsterd...",136137141,"Abcoude,Amsterdam Bijlmer ArenA,Amsterdam Hole...","AC, ASB, ASHD, BKL",aanrijding met een persoon,person hit by a train,aanrijding met een persoon,person hit by a train,accidents,2019-03-26 09:11:52,2019-03-26 12:02:50,171.0
23609,2019-03-26,7393,7393,Amsterdam Centraal,00:28,0.0,Utrecht Centraal,01:10,0.0,4b,15,SLT-4 2442;SLT-6 2627,,Amsterdam Muiderpoort;Amsterdam Amstel;Duivend...,,2019-03-26#7393,2019-03-26 00:28:00,2019-03-26 01:10:00,2019-03-26 01:10:00,2019-03-26 00:28:00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,26712.0,Amsterdam-Gouda,"Amsterdam Centraal - Utrecht Centraal, Amsterd...",136137141,"Abcoude,Amsterdam Bijlmer ArenA,Amsterdam Hole...","AC, ASB, ASHD, BKL",aanrijding met een persoon,person hit by a train,aanrijding met een persoon,person hit by a train,accidents,2019-03-26 09:11:52,2019-03-26 12:02:50,171.0
23612,2019-03-26,1405,1405,Utrecht Centraal,01:01,0.0,Amsterdam Centraal,01:28,0.0,7,4a,VIRM-6 8667,VIRM-6 8667,Amsterdam Bijlmer ArenA,,2019-03-26#1405,2019-03-26 01:01:00,2019-03-26 01:28:00,2019-03-26 01:28:00,2019-03-26 01:01:00,2019-03-26-2-240,260.0,2019-03-26,2.0,340.0,3.0,3.0,6.0,6.6,,1.1,0.0,0.0,0.0,0.0,1031.5,74.0,8.0,68.0,,5.0,0.0,0.0,0.0,0.0,0.0,240.0,2019-03-26,2.0,340.0,4.0,2.0,7.0,5.2,,0.9,0.0,0.0,0.0,0.0,1031.7,72.0,8.0,73.0,,5.0,0.0,0.0,0.0,0.0,0.0,26712.0,Amsterdam-Gouda,"Amsterdam Centraal - Utrecht Centraal, Amsterd...",136137141,"Abcoude,Amsterdam Bijlmer ArenA,Amsterdam Hole...","AC, ASB, ASHD, BKL",aanrijding met een persoon,person hit by a train,aanrijding met een persoon,person hit by a train,accidents,2019-03-26 09:11:52,2019-03-26 12:02:50,171.0
23615,2019-03-26,7395,7395,Amsterdam Centraal,00:57,0.0,Utrecht Centraal,01:40,0.0,2b,15,SGMM-2 2123;SGMM-3 2972,,Amsterdam Muiderpoort;Amsterdam Amstel;Duivend...,,2019-03-26#7395,2019-03-26 00:57:00,2019-03-26 01:40:00,2019-03-26 01:40:00,2019-03-26 00:57:00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,26712.0,Amsterdam-Gouda,"Amsterdam Centraal - Utrecht Centraal, Amsterd...",136137141,"Abcoude,Amsterdam Bijlmer ArenA,Amsterdam Hole...","AC, ASB, ASHD, BKL",aanrijding met een persoon,person hit by a train,aanrijding met een persoon,person hit by a train,accidents,2019-03-26 09:11:52,2019-03-26 12:02:50,171.0


In [93]:
df.to_csv('../assets/data/2019-UT-ASD-Full/2019-UT-ASD-scrapped.csv')

In [56]:
%%time
count = 0
for i in range(df.shape[0]):
    print(i)
    train_time = pd.to_datetime(df.iloc[i]['Date'] + '-' + "{:02d}".format(int(df.iloc[i]['DepartureTime'][:2])))
    for j in range(df_disruptions.shape[0]):
        disruption_start = pd.to_datetime(df_disruptions.iloc[j]['start_time'].strftime('%Y-%m-%d-%H'))
        disruption_end = pd.to_datetime(df_disruptions.iloc[j]['end_time'].strftime('%Y-%m-%d-%H'))
        
        if (train_time >= disruption_start) and (train_time <= disruption_end):
            count += 1
            print(count)

0
1
2


KeyboardInterrupt: 

In [40]:
train_time = df.iloc[0]['Date'] + '-' + "{:02d}".format(int(df.iloc[50]['DepartureTime'][:2]) + 1)

In [41]:
train_time

'2018-12-31-12'

In [42]:
train_time = pd.to_datetime(train_time)

In [46]:
disruption_start = df_disruptions.iloc[0]['start_time'].strftime('%Y-%m-%d-%H')


In [48]:
disruption_start

Timestamp('2019-01-01 06:00:00')

In [47]:
disruption_start = pd.to_datetime(disruption_start)

In [18]:
train_time > disruption_start

False