In [2]:
import pandas as pd
import numpy as np
    
import xml.etree.ElementTree as ET

from functools import reduce
from datetime import datetime, timedelta

pd.set_option('display.max_columns', None)
%config IPCompleter.greedy=True

In [3]:
# load all data time chunks
dfa_1 = pd.read_csv('../assets/data/2019 UT-ASD/2019-01-01--2019-03-31 arrivals.csv')  
dfa_2 = pd.read_csv('../assets/data/2019 UT-ASD/2019-04-01--2019-06-30_arrivals.csv')  
dfa_3 = pd.read_csv('../assets/data/2019 UT-ASD/2019-07-01--2019-07-31_arrivals.csv')  
dfa_4 = pd.read_csv('../assets/data/2019 UT-ASD/2019-08-01--2019-08-31 arrivals.csv')  
dfa_5 = pd.read_csv('../assets/data/2019 UT-ASD/2019-09-01--2019-10-31_arrivals.csv') 
dfa_6 = pd.read_csv('../assets/data/2019 UT-ASD/2019-11-01--2019-11-30_arrivals.csv')  
dfa_7 = pd.read_csv('../assets/data/2019 UT-ASD/2019-12-01--2019-12-31-arrivals.csv')  

In [4]:
dfa_all = [dfa_1, dfa_2, dfa_3, dfa_4, dfa_5, dfa_6, dfa_7]
dfa_not_parsed = pd.concat(dfa_all)

In [5]:
dfa_not_parsed.shape

(29885, 3)

In [6]:
dfa_not_parsed.head()

Unnamed: 0,date,xml_obj,uuid
0,2019-01-01 00:19:01.914659+01:00,"<?xml version=""1.0"" encoding=""UTF-8""?><ns1:Put...",755ef744-0d52-11e9-b8b1-06550c001849
1,2019-01-01 00:43:02.633669+01:00,"<?xml version=""1.0"" encoding=""UTF-8""?><ns1:Put...",d01b3d84-0d55-11e9-b8b1-06550c001849
2,2019-01-01 01:20:59.756816+01:00,"<?xml version=""1.0"" encoding=""UTF-8""?><ns1:Put...",1d60a9bc-0d5b-11e9-b8b1-06550c001849
3,2019-01-01 01:28:35.627872+01:00,"<?xml version=""1.0"" encoding=""UTF-8""?><ns1:Put...",2d19105a-0d5c-11e9-b8b1-06550c001849
4,2019-01-01 01:30:22.532675+01:00,"<?xml version=""1.0"" encoding=""UTF-8""?><ns1:Put...",6cd1737c-0d5c-11e9-b8b1-06550c001849


# Parse into csv

In [7]:
def extract_station_info(data, node, prefix):
  if node:
    data[prefix + 'Code'].append(node.find('./ns2:StationCode', ns).text)
    data[prefix + 'UIC'].append(int(node.find('./ns2:UICCode', ns).text))
    data[prefix + 'Type'].append(int(node.find('./ns2:Type', ns).text))
  else:
    data[prefix + 'Code'].append(np.NaN)
    data[prefix + 'UIC'].append(np.NaN)
    data[prefix + 'Type'].append(np.NaN)

def parse_timestamp(date_string):
  return datetime.strptime(date_string, '%Y-%m-%dT%H:%M:%S.%fZ')

def encode_list(nodes, sufix):
  UIC_codes = [node.find(sufix, ns).text for node in nodes]
  return ';'.join(UIC_codes)

In [8]:
station_amsterdam = b'<ns2:RitStation><ns2:StationCode>ASD</ns2:StationCode>'
station_utrecht = b'<ns2:RitStation><ns2:StationCode>UT</ns2:StationCode>'
source_amsterdam = b'<ns2:TreinHerkomst InfoStatus=""Gepland""><ns2:StationCode>ASD</ns2:StationCode'
source_utrecht = b'<ns2:TreinHerkomst InfoStatus=""Gepland""><ns2:StationCode>UT</ns2:StationCode>'
destination_amsterdam = b'<ns2:TreinEindBestemming InfoStatus=""Gepland""><ns2:StationCode>AMS</ns2:StationCode>'
destination_utrecht = b'<ns2:TreinEindBestemming InfoStatus=""Gepland""><ns2:StationCode>UT</ns2:StationCode>' 

In [9]:
# namespace of the xml object
ns={
    'ns1d': 'urn:ndov:cdm:trein:reisinformatie:messages:5',
    'ns1a': 'urn:ndov:cdm:trein:reisinformatie:messages:dynamischeaankomststaat:1',
    'ns2': 'urn:ndov:cdm:trein:reisinformatie:data:4',
}

# dictionary that will save the data while loading it in
def arrival_data_template():

    return {
        'ObservationTime': [],

        # Ride
        'RideId':[],
        'RideTime': [],

        # Departure station
        'DepartureStationCode': [],
        'DepartureStationUIC': [],
        'DepartureStationType': [],

        # Train
        'TrainId': [],
        'TrainType': [],
        'TrainOperator': [],

        # Actual destination
        'DestinationStationCode': [],
        'DestinationStationUIC': [],
        'DestinationStationType': [],

        # Arrival times
        'PlannedArrivalTime': [],
        'ActualArrivalTime': [],

        # Arrival platform
        'PlannedArrivalPlatform': [],
        'PlannedArrivalPlatformSuffix': [],
        'ActualArrivalPlatform': [],
        'ActualArrivalPlatformSuffix': [],

#         # Departure platforms
#         'PlannedDeparturePlatform': [],
#         'ActualDeparturePlatform': [],

#         # Stop stations
#         'PlannedStopStations': [],
#         'ActualStopStations': [],

#         # Matirial type
#         'MaterialType': [],
#         'MaterialDesignation': [],
#         'MaterialLength': [],

#         'ChangeType': [],

    }

def departure_data_template():
    return {
    'ObservationTime': [],

    # Ride
    'RideId':[],
    'RideTime': [],

    # Departure station
    'DepartureStationCode': [],
    'DepartureStationUIC': [],
    'DepartureStationType': [],

    # Train
    'TrainId': [],
    'TrainType': [],
    'TrainOperator': [],

    # Actual destination
    'DestinationStationCode': [],
    'DestinationStationUIC': [],
    'DestinationStationType': [],

    # Departure times
    'PlannedDepartureTime': [],
    'ActualDepartureTime': [],

    # Departure platform
    'PlannedDeparturePlatform': [],
    'PlannedDeparturePlatformSuffix': [],
    'ActualDeparturePlatform': [],
    'ActualDeparturePlatformSuffix': [],

    # Departure platforms
    'PlannedDeparturePlatform': [],
    'ActualDeparturePlatform': [],

    # Stop stations
    'PlannedStopStations': [],
    'ActualStopStations': [],

    # Matirial type
    'MaterialType': [],
    'MaterialDesignation': [],
    'MaterialLength': [],
    
    'HasChange': [],
    'ChangeType': [],

}

In [11]:
arrival_data = arrival_data_template()

# loop through every day

for root_text, observation_time in dfa_not_parsed[['xml_obj', 'date']].values:  
            
    arrival_data['ObservationTime'].append(observation_time)

    root = ET.fromstring(root_text)

    # Ride
    ride = root.find('./ns2:ReisInformatieProductDAS/ns2:DynamischeAankomstStaat', ns)

    arrival_data['RideId'].append(int(ride.find('./ns2:RitId', ns).text))
    arrival_data['RideTime'].append(root.find('./ns2:ReisInformatieProductDAS/ns2:RIPAdministratie/ns2:ReisInformatieTijdstip', ns).text)

    # Stations
    train = ride.find('./ns2:TreinAankomst', ns)

    destinationStation = ride.find('./ns2:RitStation', ns)
    departureStation = train.find('./ns2:TreinHerkomst[@InfoStatus="Gepland"]',ns)
    
    extract_station_info(arrival_data, departureStation, 'DepartureStation')
    extract_station_info(arrival_data, destinationStation, 'DestinationStation')

    # Arrival times
    arrival_data['ActualArrivalTime'].append(train.find('./ns2:AankomstTijd[@InfoStatus="Actueel"]', ns).text)
    arrival_data['PlannedArrivalTime'].append(train.find('./ns2:AankomstTijd[@InfoStatus="Gepland"]', ns).text)
    
    # Train
    arrival_data['TrainId'].append(train.find('./ns2:TreinNummer', ns).text)
    arrival_data['TrainType'].append(train.find('./ns2:TreinSoort', ns).text)
    arrival_data['TrainOperator'].append(train.find('./ns2:Vervoerder', ns).text)

    arrival_data['PlannedArrivalPlatform'].append(train.find('./ns2:TreinAankomstSpoor[@InfoStatus="Gepland"]/ns2:SpoorNummer', ns).text)
    suffix = train.find('./ns2:TreinAankomstSpoor[@InfoStatus="Gepland"]/ns2:SpoorFase', ns)
    arrival_data['PlannedArrivalPlatformSuffix'].append(None if suffix is None else suffix.text)

    arrival_data['ActualArrivalPlatform'].append(train.find('./ns2:TreinAankomstSpoor[@InfoStatus="Actueel"]/ns2:SpoorNummer', ns).text)
    suffix = train.find('./ns2:TreinAankomstSpoor[@InfoStatus="Actueel"]/ns2:SpoorFase', ns)
    arrival_data['ActualArrivalPlatformSuffix'].append(None if suffix is None else suffix.text)
    
# Convert the dictionary to a dataframe
dfa = pd.DataFrame(arrival_data)    
print(dfa.shape) # show the file size kinda

(29885, 18)


In [16]:
dfa.to_csv('../assets/data/2019 UT-ASD/2019-parsed-data.csv')

# Merge data with weather

In [17]:
dfa = pd.read_csv('../assets/data/2019 UT-ASD/2019-parsed-data.csv')

In [22]:
dfa.head()

Unnamed: 0,RideInstance,ObservationTime,RideId,RideTime,DepartureStationCode,DepartureStationUIC,DepartureStationType,TrainId,TrainType,TrainOperator,DestinationStationCode,DestinationStationUIC,DestinationStationType,PlannedArrivalTime,ActualArrivalTime,PlannedArrivalPlatform,PlannedArrivalPlatformSuffix,ActualArrivalPlatform,ActualArrivalPlatformSuffix,WeatherKey,DepartureWeatherStationCode,DepartureTimestamp,DepartureHour,DepartureWindDir,DepartureWindHour,DepartureWindSpeed,DepartureMaxWindSpeed,DepartureTemperature,DepartureMinTemp10M,DepartureDewPointTemp,DepartureSunshineDur,DepartureRadiation,DeparturePrecipDur,DeparturePrecipHour,DepartureAirPressure,DepartureVisibility,DepartureCloudiness,DepartureHumidity,DepartureWeatherCode,DepartureWeatherCodeIndicator,DepartureFog,DepartureRain,DepartureSnow,DepartureThunder,DepartureIceFormation,PlannedDepartureTime,DestinationWeatherStationCode,DestinationTimestamp,DestinationHour,DestinationWindDir,DestinationWindHour,DestinationWindSpeed,DestinationMaxWindSpeed,DestinationTemperature,DestinationMinTemp10M,DestinationDewPointTemp,DestinationSunshineDur,DestinationRadiation,DestinationPrecipDur,DestinationPrecipHour,DestinationAirPressure,DestinationVisibility,DestinationCloudiness,DestinationHumidity,DestinationWeatherCode,DestinationWeatherCodeIndicator,DestinationFog,DestinationRain,DestinationSnow,DestinationThunder,DestinationIceFormation
0,104#2019-06-22 19:58:00+00:00,2019-06-22 21:16:20.243615+02:00,104,2019-06-22 19:58:00+00:00,UT,8400621,6,300104,ICE International,NS,ASD,8400058,6,2019-06-22 19:58:00+00:00,2019-06-22T19:58:00.000Z,7,b,7,b,2019-06-22-20-260,240,2019-06-22,20,50,6.0,7.0,10.0,18.3,,13.0,0.5,12,0.0,0.0,1020.5,75.0,0.0,71,,5,0.0,0.0,0.0,0.0,0.0,2019-06-22 19:28:00+00:00,260.0,2019-06-22,20.0,20.0,4.0,4.0,7.0,19.0,,12.2,0.3,11.0,0.0,0.0,1020.2,83.0,0.0,64.0,,5.0,0.0,0.0,0.0,0.0,0.0
1,105#2019-01-02 07:33:00+00:00,2019-01-02 08:14:53.961020+01:00,105,2019-01-02 07:33:00+00:00,ASD,8400058,6,105,ICE International,NS,UT,8400621,6,2019-01-02 07:33:00+00:00,2019-01-02T07:34:02.000Z,18,,18,,2019-01-02-8-240,260,2019-01-02,8,340,4.0,4.0,10.0,4.9,,1.9,0.0,0,0.1,0.1,1036.8,70.0,8.0,81,23.0,7,0.0,1.0,0.0,0.0,0.0,2019-01-02 07:03:00+00:00,240.0,2019-01-02,8.0,350.0,6.0,6.0,11.0,5.0,,2.3,0.0,0.0,0.0,-0.1,1037.2,66.0,7.0,83.0,81.0,7.0,0.0,1.0,0.0,0.0,0.0
2,105#2019-01-03 07:33:00+00:00,2019-01-03 08:49:03.881832+01:00,105,2019-01-03 07:33:00+00:00,ASD,8400058,6,105,ICE International,NS,UT,8400621,6,2019-01-03 07:33:00+00:00,2019-01-03T07:33:00.000Z,18,,18,,2019-01-03-8-240,260,2019-01-03,8,310,2.0,1.0,3.0,2.6,,1.5,0.0,0,0.0,0.0,1039.9,65.0,8.0,92,,5,0.0,0.0,0.0,0.0,0.0,2019-01-03 07:03:00+00:00,240.0,2019-01-03,8.0,310.0,3.0,2.0,4.0,1.8,,0.9,0.0,0.0,0.0,-0.1,1040.0,65.0,5.0,93.0,22.0,7.0,0.0,1.0,0.0,0.0,0.0
3,105#2019-01-04 07:33:00+00:00,2019-01-04 08:37:38.178122+01:00,105,2019-01-04 07:33:00+00:00,ASD,8400058,6,105,ICE International,NS,UT,8400621,6,2019-01-04 07:33:00+00:00,2019-01-04T07:34:13.000Z,18,,18,,2019-01-04-8-240,260,2019-01-04,8,270,3.0,2.0,6.0,4.6,,2.4,0.0,0,0.0,0.0,1039.1,64.0,8.0,85,,5,0.0,0.0,0.0,0.0,0.0,2019-01-04 07:03:00+00:00,240.0,2019-01-04,8.0,290.0,6.0,5.0,8.0,5.0,,2.3,0.0,0.0,0.0,0.0,1039.0,70.0,8.0,82.0,,5.0,0.0,0.0,0.0,0.0,0.0
4,105#2019-01-05 07:32:00+00:00,2019-01-05 08:40:55.071092+01:00,105,2019-01-05 07:32:00+00:00,ASD,8400058,6,105,ICE International,NS,UT,8400621,6,2019-01-05 07:32:00+00:00,2019-01-05T07:38:37.000Z,19,,19,,2019-01-05-8-240,260,2019-01-05,8,320,4.0,4.0,8.0,7.7,,4.4,0.0,0,0.0,0.0,1032.4,75.0,8.0,79,,5,0.0,0.0,0.0,0.0,0.0,2019-01-05 07:02:00+00:00,240.0,2019-01-05,8.0,320.0,8.0,8.0,12.0,7.8,,4.4,0.0,1.0,0.0,0.0,1032.8,75.0,8.0,78.0,,5.0,0.0,0.0,0.0,0.0,0.0


In [23]:
dfa.shape

(7571, 71)

In [24]:
if 'RideInstance' in dfa:
    del dfa['RideInstance']
dfa.insert(0, 'RideInstance', dfa.RideId.astype(str) + '#' + dfa.RideTime )

In [25]:
dfa.head()

Unnamed: 0,RideInstance,ObservationTime,RideId,RideTime,DepartureStationCode,DepartureStationUIC,DepartureStationType,TrainId,TrainType,TrainOperator,DestinationStationCode,DestinationStationUIC,DestinationStationType,PlannedArrivalTime,ActualArrivalTime,PlannedArrivalPlatform,PlannedArrivalPlatformSuffix,ActualArrivalPlatform,ActualArrivalPlatformSuffix,WeatherKey,DepartureWeatherStationCode,DepartureTimestamp,DepartureHour,DepartureWindDir,DepartureWindHour,DepartureWindSpeed,DepartureMaxWindSpeed,DepartureTemperature,DepartureMinTemp10M,DepartureDewPointTemp,DepartureSunshineDur,DepartureRadiation,DeparturePrecipDur,DeparturePrecipHour,DepartureAirPressure,DepartureVisibility,DepartureCloudiness,DepartureHumidity,DepartureWeatherCode,DepartureWeatherCodeIndicator,DepartureFog,DepartureRain,DepartureSnow,DepartureThunder,DepartureIceFormation,PlannedDepartureTime,DestinationWeatherStationCode,DestinationTimestamp,DestinationHour,DestinationWindDir,DestinationWindHour,DestinationWindSpeed,DestinationMaxWindSpeed,DestinationTemperature,DestinationMinTemp10M,DestinationDewPointTemp,DestinationSunshineDur,DestinationRadiation,DestinationPrecipDur,DestinationPrecipHour,DestinationAirPressure,DestinationVisibility,DestinationCloudiness,DestinationHumidity,DestinationWeatherCode,DestinationWeatherCodeIndicator,DestinationFog,DestinationRain,DestinationSnow,DestinationThunder,DestinationIceFormation
0,104#2019-06-22 19:58:00+00:00,2019-06-22 21:16:20.243615+02:00,104,2019-06-22 19:58:00+00:00,UT,8400621,6,300104,ICE International,NS,ASD,8400058,6,2019-06-22 19:58:00+00:00,2019-06-22T19:58:00.000Z,7,b,7,b,2019-06-22-20-260,240,2019-06-22,20,50,6.0,7.0,10.0,18.3,,13.0,0.5,12,0.0,0.0,1020.5,75.0,0.0,71,,5,0.0,0.0,0.0,0.0,0.0,2019-06-22 19:28:00+00:00,260.0,2019-06-22,20.0,20.0,4.0,4.0,7.0,19.0,,12.2,0.3,11.0,0.0,0.0,1020.2,83.0,0.0,64.0,,5.0,0.0,0.0,0.0,0.0,0.0
1,105#2019-01-02 07:33:00+00:00,2019-01-02 08:14:53.961020+01:00,105,2019-01-02 07:33:00+00:00,ASD,8400058,6,105,ICE International,NS,UT,8400621,6,2019-01-02 07:33:00+00:00,2019-01-02T07:34:02.000Z,18,,18,,2019-01-02-8-240,260,2019-01-02,8,340,4.0,4.0,10.0,4.9,,1.9,0.0,0,0.1,0.1,1036.8,70.0,8.0,81,23.0,7,0.0,1.0,0.0,0.0,0.0,2019-01-02 07:03:00+00:00,240.0,2019-01-02,8.0,350.0,6.0,6.0,11.0,5.0,,2.3,0.0,0.0,0.0,-0.1,1037.2,66.0,7.0,83.0,81.0,7.0,0.0,1.0,0.0,0.0,0.0
2,105#2019-01-03 07:33:00+00:00,2019-01-03 08:49:03.881832+01:00,105,2019-01-03 07:33:00+00:00,ASD,8400058,6,105,ICE International,NS,UT,8400621,6,2019-01-03 07:33:00+00:00,2019-01-03T07:33:00.000Z,18,,18,,2019-01-03-8-240,260,2019-01-03,8,310,2.0,1.0,3.0,2.6,,1.5,0.0,0,0.0,0.0,1039.9,65.0,8.0,92,,5,0.0,0.0,0.0,0.0,0.0,2019-01-03 07:03:00+00:00,240.0,2019-01-03,8.0,310.0,3.0,2.0,4.0,1.8,,0.9,0.0,0.0,0.0,-0.1,1040.0,65.0,5.0,93.0,22.0,7.0,0.0,1.0,0.0,0.0,0.0
3,105#2019-01-04 07:33:00+00:00,2019-01-04 08:37:38.178122+01:00,105,2019-01-04 07:33:00+00:00,ASD,8400058,6,105,ICE International,NS,UT,8400621,6,2019-01-04 07:33:00+00:00,2019-01-04T07:34:13.000Z,18,,18,,2019-01-04-8-240,260,2019-01-04,8,270,3.0,2.0,6.0,4.6,,2.4,0.0,0,0.0,0.0,1039.1,64.0,8.0,85,,5,0.0,0.0,0.0,0.0,0.0,2019-01-04 07:03:00+00:00,240.0,2019-01-04,8.0,290.0,6.0,5.0,8.0,5.0,,2.3,0.0,0.0,0.0,0.0,1039.0,70.0,8.0,82.0,,5.0,0.0,0.0,0.0,0.0,0.0
4,105#2019-01-05 07:32:00+00:00,2019-01-05 08:40:55.071092+01:00,105,2019-01-05 07:32:00+00:00,ASD,8400058,6,105,ICE International,NS,UT,8400621,6,2019-01-05 07:32:00+00:00,2019-01-05T07:38:37.000Z,19,,19,,2019-01-05-8-240,260,2019-01-05,8,320,4.0,4.0,8.0,7.7,,4.4,0.0,0,0.0,0.0,1032.4,75.0,8.0,79,,5,0.0,0.0,0.0,0.0,0.0,2019-01-05 07:02:00+00:00,240.0,2019-01-05,8.0,320.0,8.0,8.0,12.0,7.8,,4.4,0.0,1.0,0.0,0.0,1032.8,75.0,8.0,78.0,,5.0,0.0,0.0,0.0,0.0,0.0


In [26]:
dfa.sort_values(['RideInstance', 'ActualArrivalTime'], ascending = True, inplace = True)
dfa.head()

Unnamed: 0,RideInstance,ObservationTime,RideId,RideTime,DepartureStationCode,DepartureStationUIC,DepartureStationType,TrainId,TrainType,TrainOperator,DestinationStationCode,DestinationStationUIC,DestinationStationType,PlannedArrivalTime,ActualArrivalTime,PlannedArrivalPlatform,PlannedArrivalPlatformSuffix,ActualArrivalPlatform,ActualArrivalPlatformSuffix,WeatherKey,DepartureWeatherStationCode,DepartureTimestamp,DepartureHour,DepartureWindDir,DepartureWindHour,DepartureWindSpeed,DepartureMaxWindSpeed,DepartureTemperature,DepartureMinTemp10M,DepartureDewPointTemp,DepartureSunshineDur,DepartureRadiation,DeparturePrecipDur,DeparturePrecipHour,DepartureAirPressure,DepartureVisibility,DepartureCloudiness,DepartureHumidity,DepartureWeatherCode,DepartureWeatherCodeIndicator,DepartureFog,DepartureRain,DepartureSnow,DepartureThunder,DepartureIceFormation,PlannedDepartureTime,DestinationWeatherStationCode,DestinationTimestamp,DestinationHour,DestinationWindDir,DestinationWindHour,DestinationWindSpeed,DestinationMaxWindSpeed,DestinationTemperature,DestinationMinTemp10M,DestinationDewPointTemp,DestinationSunshineDur,DestinationRadiation,DestinationPrecipDur,DestinationPrecipHour,DestinationAirPressure,DestinationVisibility,DestinationCloudiness,DestinationHumidity,DestinationWeatherCode,DestinationWeatherCodeIndicator,DestinationFog,DestinationRain,DestinationSnow,DestinationThunder,DestinationIceFormation
0,104#2019-06-22 19:58:00+00:00,2019-06-22 21:16:20.243615+02:00,104,2019-06-22 19:58:00+00:00,UT,8400621,6,300104,ICE International,NS,ASD,8400058,6,2019-06-22 19:58:00+00:00,2019-06-22T19:58:00.000Z,7,b,7,b,2019-06-22-20-260,240,2019-06-22,20,50,6.0,7.0,10.0,18.3,,13.0,0.5,12,0.0,0.0,1020.5,75.0,0.0,71,,5,0.0,0.0,0.0,0.0,0.0,2019-06-22 19:28:00+00:00,260.0,2019-06-22,20.0,20.0,4.0,4.0,7.0,19.0,,12.2,0.3,11.0,0.0,0.0,1020.2,83.0,0.0,64.0,,5.0,0.0,0.0,0.0,0.0,0.0
1,105#2019-01-02 07:33:00+00:00,2019-01-02 08:14:53.961020+01:00,105,2019-01-02 07:33:00+00:00,ASD,8400058,6,105,ICE International,NS,UT,8400621,6,2019-01-02 07:33:00+00:00,2019-01-02T07:34:02.000Z,18,,18,,2019-01-02-8-240,260,2019-01-02,8,340,4.0,4.0,10.0,4.9,,1.9,0.0,0,0.1,0.1,1036.8,70.0,8.0,81,23.0,7,0.0,1.0,0.0,0.0,0.0,2019-01-02 07:03:00+00:00,240.0,2019-01-02,8.0,350.0,6.0,6.0,11.0,5.0,,2.3,0.0,0.0,0.0,-0.1,1037.2,66.0,7.0,83.0,81.0,7.0,0.0,1.0,0.0,0.0,0.0
2,105#2019-01-03 07:33:00+00:00,2019-01-03 08:49:03.881832+01:00,105,2019-01-03 07:33:00+00:00,ASD,8400058,6,105,ICE International,NS,UT,8400621,6,2019-01-03 07:33:00+00:00,2019-01-03T07:33:00.000Z,18,,18,,2019-01-03-8-240,260,2019-01-03,8,310,2.0,1.0,3.0,2.6,,1.5,0.0,0,0.0,0.0,1039.9,65.0,8.0,92,,5,0.0,0.0,0.0,0.0,0.0,2019-01-03 07:03:00+00:00,240.0,2019-01-03,8.0,310.0,3.0,2.0,4.0,1.8,,0.9,0.0,0.0,0.0,-0.1,1040.0,65.0,5.0,93.0,22.0,7.0,0.0,1.0,0.0,0.0,0.0
3,105#2019-01-04 07:33:00+00:00,2019-01-04 08:37:38.178122+01:00,105,2019-01-04 07:33:00+00:00,ASD,8400058,6,105,ICE International,NS,UT,8400621,6,2019-01-04 07:33:00+00:00,2019-01-04T07:34:13.000Z,18,,18,,2019-01-04-8-240,260,2019-01-04,8,270,3.0,2.0,6.0,4.6,,2.4,0.0,0,0.0,0.0,1039.1,64.0,8.0,85,,5,0.0,0.0,0.0,0.0,0.0,2019-01-04 07:03:00+00:00,240.0,2019-01-04,8.0,290.0,6.0,5.0,8.0,5.0,,2.3,0.0,0.0,0.0,0.0,1039.0,70.0,8.0,82.0,,5.0,0.0,0.0,0.0,0.0,0.0
4,105#2019-01-05 07:32:00+00:00,2019-01-05 08:40:55.071092+01:00,105,2019-01-05 07:32:00+00:00,ASD,8400058,6,105,ICE International,NS,UT,8400621,6,2019-01-05 07:32:00+00:00,2019-01-05T07:38:37.000Z,19,,19,,2019-01-05-8-240,260,2019-01-05,8,320,4.0,4.0,8.0,7.7,,4.4,0.0,0,0.0,0.0,1032.4,75.0,8.0,79,,5,0.0,0.0,0.0,0.0,0.0,2019-01-05 07:02:00+00:00,240.0,2019-01-05,8.0,320.0,8.0,8.0,12.0,7.8,,4.4,0.0,1.0,0.0,0.0,1032.8,75.0,8.0,78.0,,5.0,0.0,0.0,0.0,0.0,0.0


In [27]:
# drop near duplicate values
dfa.drop_duplicates('RideInstance', keep='last', inplace = True)

In [29]:
dfa['RideTime'].dtype
dfa['RideTime'] = pd.to_datetime(dfa['RideTime'])

In [30]:
dfa_weather_U = pd.read_csv('../assets/data/de_bilt_weather_2019.csv')  
dfa_weather_A = pd.read_csv('../assets/data/schiphol_weather_2019.csv') 

In [31]:
dfa_weather_A['WeatherKey'] = dfa_weather_A['Timestamp'].astype(str) + '-' + dfa_weather_A['Hour'].astype(str) + '-' + dfa_weather_A['StationCode'].astype(str)
dfa_weather_U['WeatherKey'] = dfa_weather_U['Timestamp'].astype(str) + '-' + dfa_weather_U['Hour'].astype(str)+ '-'+ dfa_weather_U['StationCode'].astype(str)

In [32]:
print(dfa_weather_A.head())
print(dfa_weather_U.head())

   StationCode   Timestamp  Hour  WindDir  WindHour  WindSpeed  MaxWindSpeed  \
0          240  2019-01-01     1      260       7.0        6.0          10.0   
1          240  2019-01-01     2      260       7.0        7.0          10.0   
2          240  2019-01-01     3      250       7.0        7.0          11.0   
3          240  2019-01-01     4      250       7.0        8.0          11.0   
4          240  2019-01-01     5      260       9.0        9.0          12.0   

   Temperature  MinTemp10M  DewPointTemp  SunshineDur  Radiation  PrecipDur  \
0          8.5         NaN           5.7          0.0          0        0.0   
1          8.6         NaN           5.1          0.0          0        0.0   
2          8.5         NaN           5.1          0.0          0        0.0   
3          8.2         NaN           5.4          0.0          0        0.0   
4          8.7         NaN           5.8          0.0          0        0.0   

   PrecipHour  AirPressure  Visibility  Clou

In [62]:
def add_uic_code(destination, weather, weather_station):
    if destination == 'ASD':
        return f'{weather[0:13]}-{weather_station[0]}'
    if destination == 'UT':
        return f'{weather[0:13]}-{weather_station[1]}'

In [63]:
# this function renames the weather columns with a specific prefix
def rename_weather(suffix, df):
    return df.rename(columns = {
        'StationCode' : f'{suffix}WeatherStationCode',
        'Timestamp' : f'{suffix}Timestamp',  # date (YYYY=year,MM=month,DD=day)
        'Hour' : f'{suffix}Hour' ,  # time (HH uur/hour, UT. 12 UT=13 MET, 14 MEZT. Hourly division 05 runs from 04.00 UT to 5.00 UT
        'WindDir' : f'{suffix}WindDir' ,  # Mean wind direction (in degrees) during the 10-minute period preceding the time of observation (360=north, 90=east, 180=south, 270=west, 0=calm 990=variable)
        'WindHour' : f'{suffix}WindHour' ,  # Hourly mean wind speed (in 0.1 m/s)
        'WindSpeed' : f'{suffix}WindSpeed' , # Mean wind speed (in 0.1 m/s) during the 10-minute period preceding the time of observation  
        'MaxWindSpeed' : f'{suffix}MaxWindSpeed' ,  # Maximum wind gust (in 0.1 m/s) during the hourly division
        'Temperature' : f'{suffix}Temperature' ,  # Temperature (in 0.1 degrees Celsius) at 1.50 m at the time of observation  
        'MinTemp10M' : f'{suffix}MinTemp10M' ,  # Minimum temperature (in 0.1 degrees Celsius) at 0.1 m in the preceding 6-hour period
        'DewPointTemp' : f'{suffix}DewPointTemp' ,  # Dew point temperature (in 0.1 degrees Celsius) at 1.50 m at the time of observation 
        'SunshineDur' : f'{suffix}SunshineDur' ,  # Sunshine duration (in 0.1 hour) during the hourly division, calculated from global radiation (-1 for <0.05 hour) 
        'Radiation' : f'{suffix}Radiation' ,  # Global radiation (in J/cm2) during the hourly division    
        'PrecipDur' : f'{suffix}PrecipDur',  # Precipitation duration (in 0.1 hour) during the hourly division
        'PrecipHour' : f'{suffix}PrecipHour',  # Hourly precipitation amount (in 0.1 mm) (-1 for <0.05 mm)
        'AirPressure' : f'{suffix}AirPressure',  # Air pressure (in 0.1 hPa) reduced to mean sea level, at the time of observation 
        'Visibility' : f'{suffix}Visibility',  # Horizontal visibility at the time of observation (0=less than 100m, 1=100-200m, 2=200-300m,..., 49=4900-5000m, 50=5-6km, 56=6-7km, 57=7-8km, ..., 79=29-30km, 80=30-35km, 81=35-40km,..., 89=more than 70km)
        'Cloudines' : f'{suffix}Cloudiness',  # Cloud cover (in octants), at the time of observation (9=sky invisible)
        'Humidity': f'{suffix}Humidity',  # Relative atmospheric humidity (in percents) at 1.50 m at the time of observation
        'WeatherCode' : f'{suffix}WeatherCode',  # Present weather code (00-99), description for the hourly division. (http://bibliotheek.knmi.nl/scholierenpdf/weercodes_Nederland)
        'WeatherCodeIndicator': f'{suffix}WeatherCodeIndicator',  # Indicator present weather code (1=manned and recorded (using code from visual observations), 2,3=manned and omitted (no significant weather phenomenon to report, not available), 4=automatically recorded (using code from visual observations), 5,6=automatically omitted (no significant weather phenomenon to report, not available), 7=automatically set (using code from automated observations) 
        'Fog' : f'{suffix}Fog',  # Fog 0=no occurrence, 1=occurred during the preceding hour and/or at the time of observation
        'Rain' : f'{suffix}Rain',  # Rainfall 0=no occurrence, 1=occurred during the preceding hour and/or at the time of observation
        'Snow' : f'{suffix}Snow',  # Snow 0=no occurrence, 1=occurred during the preceding hour and/or at the time of observation
        'Thunder' : f'{suffix}Thunder',  # Thunder  0=no occurrence, 1=occurred during the preceding hour and/or at the time of observation 
        'IceFormation' : f'{suffix}IceFormation'  # Ice formation 0=no occurrence, 1=occurred during the preceding hour and/or at the time of observation
    })

In [64]:
dfa['WeatherKey'] = dfa['RideTime'].dt.strftime('%Y-%m-%d') + '-' + (dfa['RideTime'].dt.hour.astype(int) + 1).astype(str)

In [65]:
dfa['WeatherKey'] = dfa.apply(lambda row: add_uic_code(row['DestinationStationCode'], row['WeatherKey'], [240, 260]), axis=1)

In [66]:
dfa.head()

Unnamed: 0,RideInstance,ObservationTime,RideId,RideTime,DepartureStationCode,DepartureStationUIC,DepartureStationType,TrainId,TrainType,TrainOperator,DestinationStationCode,DestinationStationUIC,DestinationStationType,PlannedArrivalTime,ActualArrivalTime,PlannedArrivalPlatform,PlannedArrivalPlatformSuffix,ActualArrivalPlatform,ActualArrivalPlatformSuffix,WeatherKey,DepartureWeatherStationCode,DepartureTimestamp,DepartureHour,DepartureWindDir,DepartureWindHour,DepartureWindSpeed,DepartureMaxWindSpeed,DepartureTemperature,DepartureMinTemp10M,DepartureDewPointTemp,DepartureSunshineDur,DepartureRadiation,DeparturePrecipDur,DeparturePrecipHour,DepartureAirPressure,DepartureVisibility,DepartureCloudiness,DepartureHumidity,DepartureWeatherCode,DepartureWeatherCodeIndicator,DepartureFog,DepartureRain,DepartureSnow,DepartureThunder,DepartureIceFormation,PlannedDepartureTime,DestinationWeatherStationCode,DestinationTimestamp,DestinationHour,DestinationWindDir,DestinationWindHour,DestinationWindSpeed,DestinationMaxWindSpeed,DestinationTemperature,DestinationMinTemp10M,DestinationDewPointTemp,DestinationSunshineDur,DestinationRadiation,DestinationPrecipDur,DestinationPrecipHour,DestinationAirPressure,DestinationVisibility,DestinationCloudiness,DestinationHumidity,DestinationWeatherCode,DestinationWeatherCodeIndicator,DestinationFog,DestinationRain,DestinationSnow,DestinationThunder,DestinationIceFormation,DestinationWeatherStationCode.1,DestinationTimestamp.1,DestinationHour.1,DestinationWindDir.1,DestinationWindHour.1,DestinationWindSpeed.1,DestinationMaxWindSpeed.1,DestinationTemperature.1,DestinationMinTemp10M.1,DestinationDewPointTemp.1,DestinationSunshineDur.1,DestinationRadiation.1,DestinationPrecipDur.1,DestinationPrecipHour.1,DestinationAirPressure.1,DestinationVisibility.1,DestinationCloudiness.1,DestinationHumidity.1,DestinationWeatherCode.1,DestinationWeatherCodeIndicator.1,DestinationFog.1,DestinationRain.1,DestinationSnow.1,DestinationThunder.1,DestinationIceFormation.1
0,104#2019-06-22 19:58:00+00:00,2019-06-22 21:16:20.243615+02:00,104,2019-06-22 19:58:00+00:00,UT,8400621,6,300104,ICE International,NS,ASD,8400058,6,2019-06-22 19:58:00+00:00,2019-06-22T19:58:00.000Z,7,b,7,b,2019-06-22-20-240,240,2019-06-22,20,50,6.0,7.0,10.0,18.3,,13.0,0.5,12,0.0,0.0,1020.5,75.0,0.0,71,,5,0.0,0.0,0.0,0.0,0.0,2019-06-22 19:28:00+00:00,260.0,2019-06-22,20.0,20.0,4.0,4.0,7.0,19.0,,12.2,0.3,11.0,0.0,0.0,1020.2,83.0,0.0,64.0,,5.0,0.0,0.0,0.0,0.0,0.0,240.0,2019-06-22,20.0,50.0,6.0,7.0,10.0,18.3,,13.0,0.5,12.0,0.0,0.0,1020.5,75.0,0.0,71.0,,5.0,0.0,0.0,0.0,0.0,0.0
1,105#2019-01-02 07:33:00+00:00,2019-01-02 08:14:53.961020+01:00,105,2019-01-02 07:33:00+00:00,ASD,8400058,6,105,ICE International,NS,UT,8400621,6,2019-01-02 07:33:00+00:00,2019-01-02T07:34:02.000Z,18,,18,,2019-01-02-8-260,260,2019-01-02,8,340,4.0,4.0,10.0,4.9,,1.9,0.0,0,0.1,0.1,1036.8,70.0,8.0,81,23.0,7,0.0,1.0,0.0,0.0,0.0,2019-01-02 07:03:00+00:00,240.0,2019-01-02,8.0,350.0,6.0,6.0,11.0,5.0,,2.3,0.0,0.0,0.0,-0.1,1037.2,66.0,7.0,83.0,81.0,7.0,0.0,1.0,0.0,0.0,0.0,260.0,2019-01-02,8.0,340.0,4.0,4.0,10.0,4.9,,1.9,0.0,0.0,0.1,0.1,1036.8,70.0,8.0,81.0,23.0,7.0,0.0,1.0,0.0,0.0,0.0
2,105#2019-01-03 07:33:00+00:00,2019-01-03 08:49:03.881832+01:00,105,2019-01-03 07:33:00+00:00,ASD,8400058,6,105,ICE International,NS,UT,8400621,6,2019-01-03 07:33:00+00:00,2019-01-03T07:33:00.000Z,18,,18,,2019-01-03-8-260,260,2019-01-03,8,310,2.0,1.0,3.0,2.6,,1.5,0.0,0,0.0,0.0,1039.9,65.0,8.0,92,,5,0.0,0.0,0.0,0.0,0.0,2019-01-03 07:03:00+00:00,240.0,2019-01-03,8.0,310.0,3.0,2.0,4.0,1.8,,0.9,0.0,0.0,0.0,-0.1,1040.0,65.0,5.0,93.0,22.0,7.0,0.0,1.0,0.0,0.0,0.0,260.0,2019-01-03,8.0,310.0,2.0,1.0,3.0,2.6,,1.5,0.0,0.0,0.0,0.0,1039.9,65.0,8.0,92.0,,5.0,0.0,0.0,0.0,0.0,0.0
3,105#2019-01-04 07:33:00+00:00,2019-01-04 08:37:38.178122+01:00,105,2019-01-04 07:33:00+00:00,ASD,8400058,6,105,ICE International,NS,UT,8400621,6,2019-01-04 07:33:00+00:00,2019-01-04T07:34:13.000Z,18,,18,,2019-01-04-8-260,260,2019-01-04,8,270,3.0,2.0,6.0,4.6,,2.4,0.0,0,0.0,0.0,1039.1,64.0,8.0,85,,5,0.0,0.0,0.0,0.0,0.0,2019-01-04 07:03:00+00:00,240.0,2019-01-04,8.0,290.0,6.0,5.0,8.0,5.0,,2.3,0.0,0.0,0.0,0.0,1039.0,70.0,8.0,82.0,,5.0,0.0,0.0,0.0,0.0,0.0,260.0,2019-01-04,8.0,270.0,3.0,2.0,6.0,4.6,,2.4,0.0,0.0,0.0,0.0,1039.1,64.0,8.0,85.0,,5.0,0.0,0.0,0.0,0.0,0.0
4,105#2019-01-05 07:32:00+00:00,2019-01-05 08:40:55.071092+01:00,105,2019-01-05 07:32:00+00:00,ASD,8400058,6,105,ICE International,NS,UT,8400621,6,2019-01-05 07:32:00+00:00,2019-01-05T07:38:37.000Z,19,,19,,2019-01-05-8-260,260,2019-01-05,8,320,4.0,4.0,8.0,7.7,,4.4,0.0,0,0.0,0.0,1032.4,75.0,8.0,79,,5,0.0,0.0,0.0,0.0,0.0,2019-01-05 07:02:00+00:00,240.0,2019-01-05,8.0,320.0,8.0,8.0,12.0,7.8,,4.4,0.0,1.0,0.0,0.0,1032.8,75.0,8.0,78.0,,5.0,0.0,0.0,0.0,0.0,0.0,260.0,2019-01-05,8.0,320.0,4.0,4.0,8.0,7.7,,4.4,0.0,0.0,0.0,0.0,1032.4,75.0,8.0,79.0,,5.0,0.0,0.0,0.0,0.0,0.0


In [39]:
 dfa.sort_values('WeatherKey').head()

Unnamed: 0,RideInstance,ObservationTime,RideId,RideTime,DepartureStationCode,DepartureStationUIC,DepartureStationType,TrainId,TrainType,TrainOperator,DestinationStationCode,DestinationStationUIC,DestinationStationType,PlannedArrivalTime,ActualArrivalTime,PlannedArrivalPlatform,PlannedArrivalPlatformSuffix,ActualArrivalPlatform,ActualArrivalPlatformSuffix,WeatherKey,DepartureWeatherStationCode,DepartureTimestamp,DepartureHour,DepartureWindDir,DepartureWindHour,DepartureWindSpeed,DepartureMaxWindSpeed,DepartureTemperature,DepartureMinTemp10M,DepartureDewPointTemp,DepartureSunshineDur,DepartureRadiation,DeparturePrecipDur,DeparturePrecipHour,DepartureAirPressure,DepartureVisibility,DepartureCloudiness,DepartureHumidity,DepartureWeatherCode,DepartureWeatherCodeIndicator,DepartureFog,DepartureRain,DepartureSnow,DepartureThunder,DepartureIceFormation,PlannedDepartureTime,DestinationWeatherStationCode,DestinationTimestamp,DestinationHour,DestinationWindDir,DestinationWindHour,DestinationWindSpeed,DestinationMaxWindSpeed,DestinationTemperature,DestinationMinTemp10M,DestinationDewPointTemp,DestinationSunshineDur,DestinationRadiation,DestinationPrecipDur,DestinationPrecipHour,DestinationAirPressure,DestinationVisibility,DestinationCloudiness,DestinationHumidity,DestinationWeatherCode,DestinationWeatherCodeIndicator,DestinationFog,DestinationRain,DestinationSnow,DestinationThunder,DestinationIceFormation
2165,1405#2019-01-01 00:29:00+00:00,2019-01-01 01:20:59.756816+01:00,1405,2019-01-01 00:29:00+00:00,UT,8400621,6,1405,Intercity,NS,ASD,8400058,6,2019-01-01 00:29:00+00:00,2019-01-01T00:30:49.000Z,2,a,2,a,2019-01-01-1-240,240,2019-01-01,1,260,7.0,6.0,10.0,8.5,,5.7,0.0,0,0.0,0.0,1030.6,69.0,8.0,82,,5,0.0,0.0,0.0,0.0,0.0,2018-12-31 23:59:00+00:00,,,,,,,,,,,,,,,,,,,,,,,,,
1832,1402#2019-01-01 00:53:00+00:00,2019-01-01 01:51:40.442110+01:00,1402,2019-01-01 00:53:00+00:00,ASD,8400058,6,1402,Intercity,NS,UT,8400621,6,2019-01-01 00:53:00+00:00,2019-01-01T00:53:00.000Z,15,,15,,2019-01-01-1-260,260,2019-01-01,1,230,3.0,2.0,6.0,7.9,,6.3,0.0,0,0.0,0.0,1030.9,36.0,8.0,89,10.0,7,0.0,0.0,0.0,0.0,0.0,2019-01-01 00:23:00+00:00,240.0,2019-01-01,1.0,260.0,7.0,6.0,10.0,8.5,,5.7,0.0,0.0,0.0,0.0,1030.6,69.0,8.0,82.0,,5.0,0.0,0.0,0.0,0.0,0.0
582,123#2019-01-01 10:03:00+00:00,2019-01-01 11:02:25.017695+01:00,123,2019-01-01 10:03:00+00:00,ASD,8400058,6,123,ICE International,NS,UT,8400621,6,2019-01-01 10:03:00+00:00,2019-01-01T10:05:11.000Z,18,,18,,2019-01-01-11-260,260,2019-01-01,11,300,3.0,3.0,9.0,8.9,,7.5,0.0,24,0.0,-0.1,1028.2,63.0,8.0,90,23.0,7,0.0,1.0,0.0,0.0,0.0,2019-01-01 09:33:00+00:00,240.0,2019-01-01,10.0,320.0,6.0,5.0,9.0,8.7,,6.5,0.0,10.0,0.4,0.1,1028.5,64.0,8.0,86.0,22.0,7.0,0.0,1.0,0.0,0.0,0.0
892,125#2019-01-01 12:03:00+00:00,2019-01-01 13:07:58.375403+01:00,125,2019-01-01 12:03:00+00:00,ASD,8400058,6,125,ICE International,NS,UT,8400621,6,2019-01-01 12:03:00+00:00,2019-01-01T12:03:00.000Z,18,,18,,2019-01-01-13-260,260,2019-01-01,13,310,4.0,5.0,9.0,9.0,,4.5,0.2,46,0.0,0.0,1028.7,64.0,7.0,73,,5,0.0,0.0,0.0,0.0,0.0,2019-01-01 11:33:00+00:00,240.0,2019-01-01,12.0,310.0,7.0,8.0,11.0,8.5,7.8,4.9,0.0,44.0,0.0,0.0,1028.5,62.0,8.0,77.0,2.0,7.0,0.0,0.0,0.0,0.0,0.0
1202,127#2019-01-01 14:03:00+00:00,2019-01-01 15:07:55.997611+01:00,127,2019-01-01 14:03:00+00:00,ASD,8400058,6,127,ICE International,NS,UT,8400621,6,2019-01-01 14:03:00+00:00,2019-01-01T14:03:00.000Z,18,,18,,2019-01-01-15-260,260,2019-01-01,15,320,5.0,5.0,11.0,7.9,,4.1,0.2,10,0.0,-0.1,1029.6,69.0,7.0,77,23.0,7,0.0,1.0,0.0,0.0,0.0,2019-01-01 13:33:00+00:00,240.0,2019-01-01,14.0,320.0,8.0,9.0,14.0,8.4,,3.7,0.3,29.0,0.0,0.0,1029.5,68.0,8.0,72.0,,5.0,0.0,0.0,0.0,0.0,0.0


In [40]:
# combine weather in one big weather dataset
result_weather = pd.concat([dfa_weather_A, dfa_weather_U])

In [41]:
result_weather

Unnamed: 0,StationCode,Timestamp,Hour,WindDir,WindHour,WindSpeed,MaxWindSpeed,Temperature,MinTemp10M,DewPointTemp,SunshineDur,Radiation,PrecipDur,PrecipHour,AirPressure,Visibility,Cloudines,Humidity,WeatherCode,WeatherCodeIndicator,Fog,Rain,Snow,Thunder,IceFormation,WeatherKey
0,240,2019-01-01,1,260,7.0,6.0,10.0,8.5,,5.7,0.0,0,0.0,0.0,1030.6,69.0,8.0,82,,5,0.0,0.0,0.0,0.0,0.0,2019-01-01-1-240
1,240,2019-01-01,2,260,7.0,7.0,10.0,8.6,,5.1,0.0,0,0.0,0.0,1030.1,75.0,8.0,78,,5,0.0,0.0,0.0,0.0,0.0,2019-01-01-2-240
2,240,2019-01-01,3,250,7.0,7.0,11.0,8.5,,5.1,0.0,0,0.0,0.0,1029.5,75.0,8.0,79,,5,0.0,0.0,0.0,0.0,0.0,2019-01-01-3-240
3,240,2019-01-01,4,250,7.0,8.0,11.0,8.2,,5.4,0.0,0,0.0,0.0,1029.0,70.0,8.0,82,,5,0.0,0.0,0.0,0.0,0.0,2019-01-01-4-240
4,240,2019-01-01,5,260,9.0,9.0,12.0,8.7,,5.8,0.0,0,0.0,-0.1,1028.3,70.0,8.0,81,22.0,7,0.0,1.0,0.0,0.0,0.0,2019-01-01-5-240
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8755,260,2019-12-31,20,130,2.0,3.0,5.0,4.0,,3.7,0.0,0,0.0,0.0,1035.9,12.0,8.0,98,20.0,7,1.0,0.0,0.0,0.0,0.0,2019-12-31-20-260
8756,260,2019-12-31,21,140,3.0,4.0,6.0,4.0,,3.5,0.0,0,0.0,0.0,1035.6,18.0,8.0,96,10.0,7,0.0,0.0,0.0,0.0,0.0,2019-12-31-21-260
8757,260,2019-12-31,22,120,4.0,4.0,7.0,3.5,,3.2,0.0,0,0.0,0.0,1035.6,15.0,8.0,97,10.0,7,0.0,0.0,0.0,0.0,0.0,2019-12-31-22-260
8758,260,2019-12-31,23,130,2.0,2.0,5.0,1.6,,1.4,0.0,0,0.0,0.0,1035.2,7.0,1.0,98,34.0,7,1.0,0.0,0.0,0.0,0.0,2019-12-31-23-260


In [42]:
# merge weather with train data
result = pd.merge(dfa,result_weather, on = 'WeatherKey', how='left')
result

Unnamed: 0,RideInstance,ObservationTime,RideId,RideTime,DepartureStationCode,DepartureStationUIC,DepartureStationType,TrainId,TrainType,TrainOperator,DestinationStationCode,DestinationStationUIC,DestinationStationType,PlannedArrivalTime,ActualArrivalTime,PlannedArrivalPlatform,PlannedArrivalPlatformSuffix,ActualArrivalPlatform,ActualArrivalPlatformSuffix,WeatherKey,DepartureWeatherStationCode,DepartureTimestamp,DepartureHour,DepartureWindDir,DepartureWindHour,DepartureWindSpeed,DepartureMaxWindSpeed,DepartureTemperature,DepartureMinTemp10M,DepartureDewPointTemp,DepartureSunshineDur,DepartureRadiation,DeparturePrecipDur,DeparturePrecipHour,DepartureAirPressure,DepartureVisibility,DepartureCloudiness,DepartureHumidity,DepartureWeatherCode,DepartureWeatherCodeIndicator,DepartureFog,DepartureRain,DepartureSnow,DepartureThunder,DepartureIceFormation,PlannedDepartureTime,DestinationWeatherStationCode,DestinationTimestamp,DestinationHour,DestinationWindDir,DestinationWindHour,DestinationWindSpeed,DestinationMaxWindSpeed,DestinationTemperature,DestinationMinTemp10M,DestinationDewPointTemp,DestinationSunshineDur,DestinationRadiation,DestinationPrecipDur,DestinationPrecipHour,DestinationAirPressure,DestinationVisibility,DestinationCloudiness,DestinationHumidity,DestinationWeatherCode,DestinationWeatherCodeIndicator,DestinationFog,DestinationRain,DestinationSnow,DestinationThunder,DestinationIceFormation,StationCode,Timestamp,Hour,WindDir,WindHour,WindSpeed,MaxWindSpeed,Temperature,MinTemp10M,DewPointTemp,SunshineDur,Radiation,PrecipDur,PrecipHour,AirPressure,Visibility,Cloudines,Humidity,WeatherCode,WeatherCodeIndicator,Fog,Rain,Snow,Thunder,IceFormation
0,104#2019-06-22 19:58:00+00:00,2019-06-22 21:16:20.243615+02:00,104,2019-06-22 19:58:00+00:00,UT,8400621,6,300104,ICE International,NS,ASD,8400058,6,2019-06-22 19:58:00+00:00,2019-06-22T19:58:00.000Z,7,b,7,b,2019-06-22-20-240,240,2019-06-22,20,50,6.0,7.0,10.0,18.3,,13.0,0.5,12,0.0,0.0,1020.5,75.0,0.0,71,,5,0.0,0.0,0.0,0.0,0.0,2019-06-22 19:28:00+00:00,260.0,2019-06-22,20.0,20.0,4.0,4.0,7.0,19.0,,12.2,0.3,11.0,0.0,0.0,1020.2,83.0,0.0,64.0,,5.0,0.0,0.0,0.0,0.0,0.0,240,2019-06-22,20,50,6.0,7.0,10.0,18.3,,13.0,0.5,12,0.0,0.0,1020.5,75.0,0.0,71,,5,0.0,0.0,0.0,0.0,0.0
1,105#2019-01-02 07:33:00+00:00,2019-01-02 08:14:53.961020+01:00,105,2019-01-02 07:33:00+00:00,ASD,8400058,6,105,ICE International,NS,UT,8400621,6,2019-01-02 07:33:00+00:00,2019-01-02T07:34:02.000Z,18,,18,,2019-01-02-8-260,260,2019-01-02,8,340,4.0,4.0,10.0,4.9,,1.9,0.0,0,0.1,0.1,1036.8,70.0,8.0,81,23.0,7,0.0,1.0,0.0,0.0,0.0,2019-01-02 07:03:00+00:00,240.0,2019-01-02,8.0,350.0,6.0,6.0,11.0,5.0,,2.3,0.0,0.0,0.0,-0.1,1037.2,66.0,7.0,83.0,81.0,7.0,0.0,1.0,0.0,0.0,0.0,260,2019-01-02,8,340,4.0,4.0,10.0,4.9,,1.9,0.0,0,0.1,0.1,1036.8,70.0,8.0,81,23.0,7,0.0,1.0,0.0,0.0,0.0
2,105#2019-01-03 07:33:00+00:00,2019-01-03 08:49:03.881832+01:00,105,2019-01-03 07:33:00+00:00,ASD,8400058,6,105,ICE International,NS,UT,8400621,6,2019-01-03 07:33:00+00:00,2019-01-03T07:33:00.000Z,18,,18,,2019-01-03-8-260,260,2019-01-03,8,310,2.0,1.0,3.0,2.6,,1.5,0.0,0,0.0,0.0,1039.9,65.0,8.0,92,,5,0.0,0.0,0.0,0.0,0.0,2019-01-03 07:03:00+00:00,240.0,2019-01-03,8.0,310.0,3.0,2.0,4.0,1.8,,0.9,0.0,0.0,0.0,-0.1,1040.0,65.0,5.0,93.0,22.0,7.0,0.0,1.0,0.0,0.0,0.0,260,2019-01-03,8,310,2.0,1.0,3.0,2.6,,1.5,0.0,0,0.0,0.0,1039.9,65.0,8.0,92,,5,0.0,0.0,0.0,0.0,0.0
3,105#2019-01-04 07:33:00+00:00,2019-01-04 08:37:38.178122+01:00,105,2019-01-04 07:33:00+00:00,ASD,8400058,6,105,ICE International,NS,UT,8400621,6,2019-01-04 07:33:00+00:00,2019-01-04T07:34:13.000Z,18,,18,,2019-01-04-8-260,260,2019-01-04,8,270,3.0,2.0,6.0,4.6,,2.4,0.0,0,0.0,0.0,1039.1,64.0,8.0,85,,5,0.0,0.0,0.0,0.0,0.0,2019-01-04 07:03:00+00:00,240.0,2019-01-04,8.0,290.0,6.0,5.0,8.0,5.0,,2.3,0.0,0.0,0.0,0.0,1039.0,70.0,8.0,82.0,,5.0,0.0,0.0,0.0,0.0,0.0,260,2019-01-04,8,270,3.0,2.0,6.0,4.6,,2.4,0.0,0,0.0,0.0,1039.1,64.0,8.0,85,,5,0.0,0.0,0.0,0.0,0.0
4,105#2019-01-05 07:32:00+00:00,2019-01-05 08:40:55.071092+01:00,105,2019-01-05 07:32:00+00:00,ASD,8400058,6,105,ICE International,NS,UT,8400621,6,2019-01-05 07:32:00+00:00,2019-01-05T07:38:37.000Z,19,,19,,2019-01-05-8-260,260,2019-01-05,8,320,4.0,4.0,8.0,7.7,,4.4,0.0,0,0.0,0.0,1032.4,75.0,8.0,79,,5,0.0,0.0,0.0,0.0,0.0,2019-01-05 07:02:00+00:00,240.0,2019-01-05,8.0,320.0,8.0,8.0,12.0,7.8,,4.4,0.0,1.0,0.0,0.0,1032.8,75.0,8.0,78.0,,5.0,0.0,0.0,0.0,0.0,0.0,260,2019-01-05,8,320,4.0,4.0,8.0,7.7,,4.4,0.0,0,0.0,0.0,1032.4,75.0,8.0,79,,5,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7566,871#2019-09-29 17:31:00+00:00,2019-09-29 19:33:54.061500+02:00,871,2019-09-29 17:31:00+00:00,ASD,8400058,6,871,Intercity,NS,UT,8400621,6,2019-09-29 17:31:00+00:00,2019-09-29T17:31:00.000Z,18,,18,,2019-09-29-18-260,260,2019-09-29,18,230,5.0,5.0,11.0,16.6,14.9,15.1,0.0,1,0.4,0.1,993.0,65.0,8.0,91,23.0,7,0.0,1.0,0.0,0.0,0.0,2019-09-29 17:01:00+00:00,240.0,2019-09-29,18.0,240.0,9.0,8.0,15.0,17.1,15.2,15.2,0.0,1.0,0.2,0.2,992.3,61.0,8.0,88.0,23.0,7.0,0.0,1.0,0.0,0.0,0.0,260,2019-09-29,18,230,5.0,5.0,11.0,16.6,14.9,15.1,0.0,1,0.4,0.1,993.0,65.0,8.0,91,23.0,7,0.0,1.0,0.0,0.0,0.0
7567,873#2019-06-30 18:01:00+00:00,2019-06-30 20:03:54.979467+02:00,873,2019-06-30 18:01:00+00:00,ASD,8400058,6,873,Intercity,NS,UT,8400621,6,2019-06-30 18:01:00+00:00,2019-06-30T18:01:00.000Z,18,,18,,2019-06-30-19-260,260,2019-06-30,19,280,3.0,2.0,6.0,21.1,,11.8,1.0,51,0.0,0.0,1016.2,83.0,0.0,55,,5,0.0,0.0,0.0,0.0,0.0,2019-06-30 17:31:00+00:00,240.0,2019-06-30,18.0,230.0,5.0,5.0,8.0,20.3,19.4,12.2,1.0,106.0,0.0,0.0,1016.3,82.0,0.0,59.0,,5.0,0.0,0.0,0.0,0.0,0.0,260,2019-06-30,19,280,3.0,2.0,6.0,21.1,,11.8,1.0,51,0.0,0.0,1016.2,83.0,0.0,55,,5,0.0,0.0,0.0,0.0,0.0
7568,873#2019-09-29 18:01:00+00:00,2019-09-29 20:04:15.945643+02:00,873,2019-09-29 18:01:00+00:00,ASD,8400058,6,873,Intercity,NS,UT,8400621,6,2019-09-29 18:01:00+00:00,2019-09-29T18:01:00.000Z,18,,18,,2019-09-29-19-260,260,2019-09-29,19,230,5.0,5.0,11.0,16.4,,14.3,0.0,0,0.0,0.0,992.9,68.0,8.0,87,,5,0.0,0.0,0.0,0.0,0.0,2019-09-29 17:31:00+00:00,240.0,2019-09-29,18.0,240.0,9.0,8.0,15.0,17.1,15.2,15.2,0.0,1.0,0.2,0.2,992.3,61.0,8.0,88.0,23.0,7.0,0.0,1.0,0.0,0.0,0.0,260,2019-09-29,19,230,5.0,5.0,11.0,16.4,,14.3,0.0,0,0.0,0.0,992.9,68.0,8.0,87,,5,0.0,0.0,0.0,0.0,0.0
7569,875#2019-06-30 18:31:00+00:00,2019-06-30 20:34:50.259980+02:00,875,2019-06-30 18:31:00+00:00,ASD,8400058,6,875,Intercity,NS,UT,8400621,6,2019-06-30 18:31:00+00:00,2019-06-30T18:33:15.000Z,18,,18,,2019-06-30-19-260,260,2019-06-30,19,280,3.0,2.0,6.0,21.1,,11.8,1.0,51,0.0,0.0,1016.2,83.0,0.0,55,,5,0.0,0.0,0.0,0.0,0.0,2019-06-30 18:01:00+00:00,240.0,2019-06-30,19.0,270.0,4.0,4.0,7.0,19.2,,12.7,1.0,52.0,0.0,0.0,1016.1,80.0,0.0,66.0,,5.0,0.0,0.0,0.0,0.0,0.0,260,2019-06-30,19,280,3.0,2.0,6.0,21.1,,11.8,1.0,51,0.0,0.0,1016.2,83.0,0.0,55,,5,0.0,0.0,0.0,0.0,0.0


In [45]:
dfa.iloc[1].to_frame()

Unnamed: 0,1
RideInstance,105#2019-01-02 07:33:00+00:00
ObservationTime,2019-01-02 08:14:53.961020+01:00
RideId,105
RideTime,2019-01-02 07:33:00+00:00
DepartureStationCode,ASD
...,...
DestinationFog,0
DestinationRain,1
DestinationSnow,0
DestinationThunder,0


In [46]:
dfa = rename_weather('Departure', dfa)

In [47]:
dfa.head()

Unnamed: 0,RideInstance,ObservationTime,RideId,RideTime,DepartureStationCode,DepartureStationUIC,DepartureStationType,TrainId,TrainType,TrainOperator,DestinationStationCode,DestinationStationUIC,DestinationStationType,PlannedArrivalTime,ActualArrivalTime,PlannedArrivalPlatform,PlannedArrivalPlatformSuffix,ActualArrivalPlatform,ActualArrivalPlatformSuffix,WeatherKey,DepartureWeatherStationCode,DepartureTimestamp,DepartureHour,DepartureWindDir,DepartureWindHour,DepartureWindSpeed,DepartureMaxWindSpeed,DepartureTemperature,DepartureMinTemp10M,DepartureDewPointTemp,DepartureSunshineDur,DepartureRadiation,DeparturePrecipDur,DeparturePrecipHour,DepartureAirPressure,DepartureVisibility,DepartureCloudiness,DepartureHumidity,DepartureWeatherCode,DepartureWeatherCodeIndicator,DepartureFog,DepartureRain,DepartureSnow,DepartureThunder,DepartureIceFormation,PlannedDepartureTime,DestinationWeatherStationCode,DestinationTimestamp,DestinationHour,DestinationWindDir,DestinationWindHour,DestinationWindSpeed,DestinationMaxWindSpeed,DestinationTemperature,DestinationMinTemp10M,DestinationDewPointTemp,DestinationSunshineDur,DestinationRadiation,DestinationPrecipDur,DestinationPrecipHour,DestinationAirPressure,DestinationVisibility,DestinationCloudiness,DestinationHumidity,DestinationWeatherCode,DestinationWeatherCodeIndicator,DestinationFog,DestinationRain,DestinationSnow,DestinationThunder,DestinationIceFormation
0,104#2019-06-22 19:58:00+00:00,2019-06-22 21:16:20.243615+02:00,104,2019-06-22 19:58:00+00:00,UT,8400621,6,300104,ICE International,NS,ASD,8400058,6,2019-06-22 19:58:00+00:00,2019-06-22T19:58:00.000Z,7,b,7,b,2019-06-22-20-240,240,2019-06-22,20,50,6.0,7.0,10.0,18.3,,13.0,0.5,12,0.0,0.0,1020.5,75.0,0.0,71,,5,0.0,0.0,0.0,0.0,0.0,2019-06-22 19:28:00+00:00,260.0,2019-06-22,20.0,20.0,4.0,4.0,7.0,19.0,,12.2,0.3,11.0,0.0,0.0,1020.2,83.0,0.0,64.0,,5.0,0.0,0.0,0.0,0.0,0.0
1,105#2019-01-02 07:33:00+00:00,2019-01-02 08:14:53.961020+01:00,105,2019-01-02 07:33:00+00:00,ASD,8400058,6,105,ICE International,NS,UT,8400621,6,2019-01-02 07:33:00+00:00,2019-01-02T07:34:02.000Z,18,,18,,2019-01-02-8-260,260,2019-01-02,8,340,4.0,4.0,10.0,4.9,,1.9,0.0,0,0.1,0.1,1036.8,70.0,8.0,81,23.0,7,0.0,1.0,0.0,0.0,0.0,2019-01-02 07:03:00+00:00,240.0,2019-01-02,8.0,350.0,6.0,6.0,11.0,5.0,,2.3,0.0,0.0,0.0,-0.1,1037.2,66.0,7.0,83.0,81.0,7.0,0.0,1.0,0.0,0.0,0.0
2,105#2019-01-03 07:33:00+00:00,2019-01-03 08:49:03.881832+01:00,105,2019-01-03 07:33:00+00:00,ASD,8400058,6,105,ICE International,NS,UT,8400621,6,2019-01-03 07:33:00+00:00,2019-01-03T07:33:00.000Z,18,,18,,2019-01-03-8-260,260,2019-01-03,8,310,2.0,1.0,3.0,2.6,,1.5,0.0,0,0.0,0.0,1039.9,65.0,8.0,92,,5,0.0,0.0,0.0,0.0,0.0,2019-01-03 07:03:00+00:00,240.0,2019-01-03,8.0,310.0,3.0,2.0,4.0,1.8,,0.9,0.0,0.0,0.0,-0.1,1040.0,65.0,5.0,93.0,22.0,7.0,0.0,1.0,0.0,0.0,0.0
3,105#2019-01-04 07:33:00+00:00,2019-01-04 08:37:38.178122+01:00,105,2019-01-04 07:33:00+00:00,ASD,8400058,6,105,ICE International,NS,UT,8400621,6,2019-01-04 07:33:00+00:00,2019-01-04T07:34:13.000Z,18,,18,,2019-01-04-8-260,260,2019-01-04,8,270,3.0,2.0,6.0,4.6,,2.4,0.0,0,0.0,0.0,1039.1,64.0,8.0,85,,5,0.0,0.0,0.0,0.0,0.0,2019-01-04 07:03:00+00:00,240.0,2019-01-04,8.0,290.0,6.0,5.0,8.0,5.0,,2.3,0.0,0.0,0.0,0.0,1039.0,70.0,8.0,82.0,,5.0,0.0,0.0,0.0,0.0,0.0
4,105#2019-01-05 07:32:00+00:00,2019-01-05 08:40:55.071092+01:00,105,2019-01-05 07:32:00+00:00,ASD,8400058,6,105,ICE International,NS,UT,8400621,6,2019-01-05 07:32:00+00:00,2019-01-05T07:38:37.000Z,19,,19,,2019-01-05-8-260,260,2019-01-05,8,320,4.0,4.0,8.0,7.7,,4.4,0.0,0,0.0,0.0,1032.4,75.0,8.0,79,,5,0.0,0.0,0.0,0.0,0.0,2019-01-05 07:02:00+00:00,240.0,2019-01-05,8.0,320.0,8.0,8.0,12.0,7.8,,4.4,0.0,1.0,0.0,0.0,1032.8,75.0,8.0,78.0,,5.0,0.0,0.0,0.0,0.0,0.0


# Create the departure time field

In [48]:
dfa['PlannedArrivalTime'] = pd.to_datetime(dfa['PlannedArrivalTime'])

In [49]:
dfa['PlannedDepartureTime'] = dfa['PlannedArrivalTime'] - timedelta(minutes = 30)

In [50]:
dfa['PlannedDepartureTime']

0      2019-06-22 19:28:00+00:00
1      2019-01-02 07:03:00+00:00
2      2019-01-03 07:03:00+00:00
3      2019-01-04 07:03:00+00:00
4      2019-01-05 07:02:00+00:00
                  ...           
7566   2019-09-29 17:01:00+00:00
7567   2019-06-30 17:31:00+00:00
7568   2019-09-29 17:31:00+00:00
7569   2019-06-30 18:01:00+00:00
7570   2019-09-29 18:01:00+00:00
Name: PlannedDepartureTime, Length: 7571, dtype: datetime64[ns, UTC]

In [51]:
dfa['PlannedArrivalTime'] 

0      2019-06-22 19:58:00+00:00
1      2019-01-02 07:33:00+00:00
2      2019-01-03 07:33:00+00:00
3      2019-01-04 07:33:00+00:00
4      2019-01-05 07:32:00+00:00
                  ...           
7566   2019-09-29 17:31:00+00:00
7567   2019-06-30 18:01:00+00:00
7568   2019-09-29 18:01:00+00:00
7569   2019-06-30 18:31:00+00:00
7570   2019-09-29 18:31:00+00:00
Name: PlannedArrivalTime, Length: 7571, dtype: datetime64[ns, UTC]

In [52]:
# create weather key for departure
dfa['WeatherKey'] = dfa['PlannedDepartureTime'].dt.strftime('%Y-%m-%d') + '-' + (dfa['PlannedDepartureTime'].dt.hour.astype(int) + 1).astype(str)

In [53]:
dfa.head()

Unnamed: 0,RideInstance,ObservationTime,RideId,RideTime,DepartureStationCode,DepartureStationUIC,DepartureStationType,TrainId,TrainType,TrainOperator,DestinationStationCode,DestinationStationUIC,DestinationStationType,PlannedArrivalTime,ActualArrivalTime,PlannedArrivalPlatform,PlannedArrivalPlatformSuffix,ActualArrivalPlatform,ActualArrivalPlatformSuffix,WeatherKey,DepartureWeatherStationCode,DepartureTimestamp,DepartureHour,DepartureWindDir,DepartureWindHour,DepartureWindSpeed,DepartureMaxWindSpeed,DepartureTemperature,DepartureMinTemp10M,DepartureDewPointTemp,DepartureSunshineDur,DepartureRadiation,DeparturePrecipDur,DeparturePrecipHour,DepartureAirPressure,DepartureVisibility,DepartureCloudiness,DepartureHumidity,DepartureWeatherCode,DepartureWeatherCodeIndicator,DepartureFog,DepartureRain,DepartureSnow,DepartureThunder,DepartureIceFormation,PlannedDepartureTime,DestinationWeatherStationCode,DestinationTimestamp,DestinationHour,DestinationWindDir,DestinationWindHour,DestinationWindSpeed,DestinationMaxWindSpeed,DestinationTemperature,DestinationMinTemp10M,DestinationDewPointTemp,DestinationSunshineDur,DestinationRadiation,DestinationPrecipDur,DestinationPrecipHour,DestinationAirPressure,DestinationVisibility,DestinationCloudiness,DestinationHumidity,DestinationWeatherCode,DestinationWeatherCodeIndicator,DestinationFog,DestinationRain,DestinationSnow,DestinationThunder,DestinationIceFormation
0,104#2019-06-22 19:58:00+00:00,2019-06-22 21:16:20.243615+02:00,104,2019-06-22 19:58:00+00:00,UT,8400621,6,300104,ICE International,NS,ASD,8400058,6,2019-06-22 19:58:00+00:00,2019-06-22T19:58:00.000Z,7,b,7,b,2019-06-22-20,240,2019-06-22,20,50,6.0,7.0,10.0,18.3,,13.0,0.5,12,0.0,0.0,1020.5,75.0,0.0,71,,5,0.0,0.0,0.0,0.0,0.0,2019-06-22 19:28:00+00:00,260.0,2019-06-22,20.0,20.0,4.0,4.0,7.0,19.0,,12.2,0.3,11.0,0.0,0.0,1020.2,83.0,0.0,64.0,,5.0,0.0,0.0,0.0,0.0,0.0
1,105#2019-01-02 07:33:00+00:00,2019-01-02 08:14:53.961020+01:00,105,2019-01-02 07:33:00+00:00,ASD,8400058,6,105,ICE International,NS,UT,8400621,6,2019-01-02 07:33:00+00:00,2019-01-02T07:34:02.000Z,18,,18,,2019-01-02-8,260,2019-01-02,8,340,4.0,4.0,10.0,4.9,,1.9,0.0,0,0.1,0.1,1036.8,70.0,8.0,81,23.0,7,0.0,1.0,0.0,0.0,0.0,2019-01-02 07:03:00+00:00,240.0,2019-01-02,8.0,350.0,6.0,6.0,11.0,5.0,,2.3,0.0,0.0,0.0,-0.1,1037.2,66.0,7.0,83.0,81.0,7.0,0.0,1.0,0.0,0.0,0.0
2,105#2019-01-03 07:33:00+00:00,2019-01-03 08:49:03.881832+01:00,105,2019-01-03 07:33:00+00:00,ASD,8400058,6,105,ICE International,NS,UT,8400621,6,2019-01-03 07:33:00+00:00,2019-01-03T07:33:00.000Z,18,,18,,2019-01-03-8,260,2019-01-03,8,310,2.0,1.0,3.0,2.6,,1.5,0.0,0,0.0,0.0,1039.9,65.0,8.0,92,,5,0.0,0.0,0.0,0.0,0.0,2019-01-03 07:03:00+00:00,240.0,2019-01-03,8.0,310.0,3.0,2.0,4.0,1.8,,0.9,0.0,0.0,0.0,-0.1,1040.0,65.0,5.0,93.0,22.0,7.0,0.0,1.0,0.0,0.0,0.0
3,105#2019-01-04 07:33:00+00:00,2019-01-04 08:37:38.178122+01:00,105,2019-01-04 07:33:00+00:00,ASD,8400058,6,105,ICE International,NS,UT,8400621,6,2019-01-04 07:33:00+00:00,2019-01-04T07:34:13.000Z,18,,18,,2019-01-04-8,260,2019-01-04,8,270,3.0,2.0,6.0,4.6,,2.4,0.0,0,0.0,0.0,1039.1,64.0,8.0,85,,5,0.0,0.0,0.0,0.0,0.0,2019-01-04 07:03:00+00:00,240.0,2019-01-04,8.0,290.0,6.0,5.0,8.0,5.0,,2.3,0.0,0.0,0.0,0.0,1039.0,70.0,8.0,82.0,,5.0,0.0,0.0,0.0,0.0,0.0
4,105#2019-01-05 07:32:00+00:00,2019-01-05 08:40:55.071092+01:00,105,2019-01-05 07:32:00+00:00,ASD,8400058,6,105,ICE International,NS,UT,8400621,6,2019-01-05 07:32:00+00:00,2019-01-05T07:38:37.000Z,19,,19,,2019-01-05-8,260,2019-01-05,8,320,4.0,4.0,8.0,7.7,,4.4,0.0,0,0.0,0.0,1032.4,75.0,8.0,79,,5,0.0,0.0,0.0,0.0,0.0,2019-01-05 07:02:00+00:00,240.0,2019-01-05,8.0,320.0,8.0,8.0,12.0,7.8,,4.4,0.0,1.0,0.0,0.0,1032.8,75.0,8.0,78.0,,5.0,0.0,0.0,0.0,0.0,0.0


In [68]:
dfa['WeatherKey'] = dfa.apply(lambda row: add_uic_code(row['DestinationStationCode'], row['WeatherKey'], [260, 240]), axis=1)

In [69]:
dfa.head()

Unnamed: 0,RideInstance,ObservationTime,RideId,RideTime,DepartureStationCode,DepartureStationUIC,DepartureStationType,TrainId,TrainType,TrainOperator,DestinationStationCode,DestinationStationUIC,DestinationStationType,PlannedArrivalTime,ActualArrivalTime,PlannedArrivalPlatform,PlannedArrivalPlatformSuffix,ActualArrivalPlatform,ActualArrivalPlatformSuffix,WeatherKey,DepartureWeatherStationCode,DepartureTimestamp,DepartureHour,DepartureWindDir,DepartureWindHour,DepartureWindSpeed,DepartureMaxWindSpeed,DepartureTemperature,DepartureMinTemp10M,DepartureDewPointTemp,DepartureSunshineDur,DepartureRadiation,DeparturePrecipDur,DeparturePrecipHour,DepartureAirPressure,DepartureVisibility,DepartureCloudiness,DepartureHumidity,DepartureWeatherCode,DepartureWeatherCodeIndicator,DepartureFog,DepartureRain,DepartureSnow,DepartureThunder,DepartureIceFormation,PlannedDepartureTime,DestinationWeatherStationCode,DestinationTimestamp,DestinationHour,DestinationWindDir,DestinationWindHour,DestinationWindSpeed,DestinationMaxWindSpeed,DestinationTemperature,DestinationMinTemp10M,DestinationDewPointTemp,DestinationSunshineDur,DestinationRadiation,DestinationPrecipDur,DestinationPrecipHour,DestinationAirPressure,DestinationVisibility,DestinationCloudiness,DestinationHumidity,DestinationWeatherCode,DestinationWeatherCodeIndicator,DestinationFog,DestinationRain,DestinationSnow,DestinationThunder,DestinationIceFormation,DestinationWeatherStationCode.1,DestinationTimestamp.1,DestinationHour.1,DestinationWindDir.1,DestinationWindHour.1,DestinationWindSpeed.1,DestinationMaxWindSpeed.1,DestinationTemperature.1,DestinationMinTemp10M.1,DestinationDewPointTemp.1,DestinationSunshineDur.1,DestinationRadiation.1,DestinationPrecipDur.1,DestinationPrecipHour.1,DestinationAirPressure.1,DestinationVisibility.1,DestinationCloudiness.1,DestinationHumidity.1,DestinationWeatherCode.1,DestinationWeatherCodeIndicator.1,DestinationFog.1,DestinationRain.1,DestinationSnow.1,DestinationThunder.1,DestinationIceFormation.1
0,104#2019-06-22 19:58:00+00:00,2019-06-22 21:16:20.243615+02:00,104,2019-06-22 19:58:00+00:00,UT,8400621,6,300104,ICE International,NS,ASD,8400058,6,2019-06-22 19:58:00+00:00,2019-06-22T19:58:00.000Z,7,b,7,b,2019-06-22-20-260,240,2019-06-22,20,50,6.0,7.0,10.0,18.3,,13.0,0.5,12,0.0,0.0,1020.5,75.0,0.0,71,,5,0.0,0.0,0.0,0.0,0.0,2019-06-22 19:28:00+00:00,260.0,2019-06-22,20.0,20.0,4.0,4.0,7.0,19.0,,12.2,0.3,11.0,0.0,0.0,1020.2,83.0,0.0,64.0,,5.0,0.0,0.0,0.0,0.0,0.0,240.0,2019-06-22,20.0,50.0,6.0,7.0,10.0,18.3,,13.0,0.5,12.0,0.0,0.0,1020.5,75.0,0.0,71.0,,5.0,0.0,0.0,0.0,0.0,0.0
1,105#2019-01-02 07:33:00+00:00,2019-01-02 08:14:53.961020+01:00,105,2019-01-02 07:33:00+00:00,ASD,8400058,6,105,ICE International,NS,UT,8400621,6,2019-01-02 07:33:00+00:00,2019-01-02T07:34:02.000Z,18,,18,,2019-01-02-8--240,260,2019-01-02,8,340,4.0,4.0,10.0,4.9,,1.9,0.0,0,0.1,0.1,1036.8,70.0,8.0,81,23.0,7,0.0,1.0,0.0,0.0,0.0,2019-01-02 07:03:00+00:00,240.0,2019-01-02,8.0,350.0,6.0,6.0,11.0,5.0,,2.3,0.0,0.0,0.0,-0.1,1037.2,66.0,7.0,83.0,81.0,7.0,0.0,1.0,0.0,0.0,0.0,260.0,2019-01-02,8.0,340.0,4.0,4.0,10.0,4.9,,1.9,0.0,0.0,0.1,0.1,1036.8,70.0,8.0,81.0,23.0,7.0,0.0,1.0,0.0,0.0,0.0
2,105#2019-01-03 07:33:00+00:00,2019-01-03 08:49:03.881832+01:00,105,2019-01-03 07:33:00+00:00,ASD,8400058,6,105,ICE International,NS,UT,8400621,6,2019-01-03 07:33:00+00:00,2019-01-03T07:33:00.000Z,18,,18,,2019-01-03-8--240,260,2019-01-03,8,310,2.0,1.0,3.0,2.6,,1.5,0.0,0,0.0,0.0,1039.9,65.0,8.0,92,,5,0.0,0.0,0.0,0.0,0.0,2019-01-03 07:03:00+00:00,240.0,2019-01-03,8.0,310.0,3.0,2.0,4.0,1.8,,0.9,0.0,0.0,0.0,-0.1,1040.0,65.0,5.0,93.0,22.0,7.0,0.0,1.0,0.0,0.0,0.0,260.0,2019-01-03,8.0,310.0,2.0,1.0,3.0,2.6,,1.5,0.0,0.0,0.0,0.0,1039.9,65.0,8.0,92.0,,5.0,0.0,0.0,0.0,0.0,0.0
3,105#2019-01-04 07:33:00+00:00,2019-01-04 08:37:38.178122+01:00,105,2019-01-04 07:33:00+00:00,ASD,8400058,6,105,ICE International,NS,UT,8400621,6,2019-01-04 07:33:00+00:00,2019-01-04T07:34:13.000Z,18,,18,,2019-01-04-8--240,260,2019-01-04,8,270,3.0,2.0,6.0,4.6,,2.4,0.0,0,0.0,0.0,1039.1,64.0,8.0,85,,5,0.0,0.0,0.0,0.0,0.0,2019-01-04 07:03:00+00:00,240.0,2019-01-04,8.0,290.0,6.0,5.0,8.0,5.0,,2.3,0.0,0.0,0.0,0.0,1039.0,70.0,8.0,82.0,,5.0,0.0,0.0,0.0,0.0,0.0,260.0,2019-01-04,8.0,270.0,3.0,2.0,6.0,4.6,,2.4,0.0,0.0,0.0,0.0,1039.1,64.0,8.0,85.0,,5.0,0.0,0.0,0.0,0.0,0.0
4,105#2019-01-05 07:32:00+00:00,2019-01-05 08:40:55.071092+01:00,105,2019-01-05 07:32:00+00:00,ASD,8400058,6,105,ICE International,NS,UT,8400621,6,2019-01-05 07:32:00+00:00,2019-01-05T07:38:37.000Z,19,,19,,2019-01-05-8--240,260,2019-01-05,8,320,4.0,4.0,8.0,7.7,,4.4,0.0,0,0.0,0.0,1032.4,75.0,8.0,79,,5,0.0,0.0,0.0,0.0,0.0,2019-01-05 07:02:00+00:00,240.0,2019-01-05,8.0,320.0,8.0,8.0,12.0,7.8,,4.4,0.0,1.0,0.0,0.0,1032.8,75.0,8.0,78.0,,5.0,0.0,0.0,0.0,0.0,0.0,260.0,2019-01-05,8.0,320.0,4.0,4.0,8.0,7.7,,4.4,0.0,0.0,0.0,0.0,1032.4,75.0,8.0,79.0,,5.0,0.0,0.0,0.0,0.0,0.0


In [56]:
result = pd.merge(dfa,result_weather, on = 'WeatherKey', how='left')
result

Unnamed: 0,RideInstance,ObservationTime,RideId,RideTime,DepartureStationCode,DepartureStationUIC,DepartureStationType,TrainId,TrainType,TrainOperator,DestinationStationCode,DestinationStationUIC,DestinationStationType,PlannedArrivalTime,ActualArrivalTime,PlannedArrivalPlatform,PlannedArrivalPlatformSuffix,ActualArrivalPlatform,ActualArrivalPlatformSuffix,WeatherKey,DepartureWeatherStationCode,DepartureTimestamp,DepartureHour,DepartureWindDir,DepartureWindHour,DepartureWindSpeed,DepartureMaxWindSpeed,DepartureTemperature,DepartureMinTemp10M,DepartureDewPointTemp,DepartureSunshineDur,DepartureRadiation,DeparturePrecipDur,DeparturePrecipHour,DepartureAirPressure,DepartureVisibility,DepartureCloudiness,DepartureHumidity,DepartureWeatherCode,DepartureWeatherCodeIndicator,DepartureFog,DepartureRain,DepartureSnow,DepartureThunder,DepartureIceFormation,PlannedDepartureTime,DestinationWeatherStationCode,DestinationTimestamp,DestinationHour,DestinationWindDir,DestinationWindHour,DestinationWindSpeed,DestinationMaxWindSpeed,DestinationTemperature,DestinationMinTemp10M,DestinationDewPointTemp,DestinationSunshineDur,DestinationRadiation,DestinationPrecipDur,DestinationPrecipHour,DestinationAirPressure,DestinationVisibility,DestinationCloudiness,DestinationHumidity,DestinationWeatherCode,DestinationWeatherCodeIndicator,DestinationFog,DestinationRain,DestinationSnow,DestinationThunder,DestinationIceFormation,StationCode,Timestamp,Hour,WindDir,WindHour,WindSpeed,MaxWindSpeed,Temperature,MinTemp10M,DewPointTemp,SunshineDur,Radiation,PrecipDur,PrecipHour,AirPressure,Visibility,Cloudines,Humidity,WeatherCode,WeatherCodeIndicator,Fog,Rain,Snow,Thunder,IceFormation
0,104#2019-06-22 19:58:00+00:00,2019-06-22 21:16:20.243615+02:00,104,2019-06-22 19:58:00+00:00,UT,8400621,6,300104,ICE International,NS,ASD,8400058,6,2019-06-22 19:58:00+00:00,2019-06-22T19:58:00.000Z,7,b,7,b,2019-06-22-20-240,240,2019-06-22,20,50,6.0,7.0,10.0,18.3,,13.0,0.5,12,0.0,0.0,1020.5,75.0,0.0,71,,5,0.0,0.0,0.0,0.0,0.0,2019-06-22 19:28:00+00:00,260.0,2019-06-22,20.0,20.0,4.0,4.0,7.0,19.0,,12.2,0.3,11.0,0.0,0.0,1020.2,83.0,0.0,64.0,,5.0,0.0,0.0,0.0,0.0,0.0,240.0,2019-06-22,20.0,50.0,6.0,7.0,10.0,18.3,,13.0,0.5,12.0,0.0,0.0,1020.5,75.0,0.0,71.0,,5.0,0.0,0.0,0.0,0.0,0.0
1,105#2019-01-02 07:33:00+00:00,2019-01-02 08:14:53.961020+01:00,105,2019-01-02 07:33:00+00:00,ASD,8400058,6,105,ICE International,NS,UT,8400621,6,2019-01-02 07:33:00+00:00,2019-01-02T07:34:02.000Z,18,,18,,2019-01-02-8-260,260,2019-01-02,8,340,4.0,4.0,10.0,4.9,,1.9,0.0,0,0.1,0.1,1036.8,70.0,8.0,81,23.0,7,0.0,1.0,0.0,0.0,0.0,2019-01-02 07:03:00+00:00,240.0,2019-01-02,8.0,350.0,6.0,6.0,11.0,5.0,,2.3,0.0,0.0,0.0,-0.1,1037.2,66.0,7.0,83.0,81.0,7.0,0.0,1.0,0.0,0.0,0.0,260.0,2019-01-02,8.0,340.0,4.0,4.0,10.0,4.9,,1.9,0.0,0.0,0.1,0.1,1036.8,70.0,8.0,81.0,23.0,7.0,0.0,1.0,0.0,0.0,0.0
2,105#2019-01-03 07:33:00+00:00,2019-01-03 08:49:03.881832+01:00,105,2019-01-03 07:33:00+00:00,ASD,8400058,6,105,ICE International,NS,UT,8400621,6,2019-01-03 07:33:00+00:00,2019-01-03T07:33:00.000Z,18,,18,,2019-01-03-8-260,260,2019-01-03,8,310,2.0,1.0,3.0,2.6,,1.5,0.0,0,0.0,0.0,1039.9,65.0,8.0,92,,5,0.0,0.0,0.0,0.0,0.0,2019-01-03 07:03:00+00:00,240.0,2019-01-03,8.0,310.0,3.0,2.0,4.0,1.8,,0.9,0.0,0.0,0.0,-0.1,1040.0,65.0,5.0,93.0,22.0,7.0,0.0,1.0,0.0,0.0,0.0,260.0,2019-01-03,8.0,310.0,2.0,1.0,3.0,2.6,,1.5,0.0,0.0,0.0,0.0,1039.9,65.0,8.0,92.0,,5.0,0.0,0.0,0.0,0.0,0.0
3,105#2019-01-04 07:33:00+00:00,2019-01-04 08:37:38.178122+01:00,105,2019-01-04 07:33:00+00:00,ASD,8400058,6,105,ICE International,NS,UT,8400621,6,2019-01-04 07:33:00+00:00,2019-01-04T07:34:13.000Z,18,,18,,2019-01-04-8-260,260,2019-01-04,8,270,3.0,2.0,6.0,4.6,,2.4,0.0,0,0.0,0.0,1039.1,64.0,8.0,85,,5,0.0,0.0,0.0,0.0,0.0,2019-01-04 07:03:00+00:00,240.0,2019-01-04,8.0,290.0,6.0,5.0,8.0,5.0,,2.3,0.0,0.0,0.0,0.0,1039.0,70.0,8.0,82.0,,5.0,0.0,0.0,0.0,0.0,0.0,260.0,2019-01-04,8.0,270.0,3.0,2.0,6.0,4.6,,2.4,0.0,0.0,0.0,0.0,1039.1,64.0,8.0,85.0,,5.0,0.0,0.0,0.0,0.0,0.0
4,105#2019-01-05 07:32:00+00:00,2019-01-05 08:40:55.071092+01:00,105,2019-01-05 07:32:00+00:00,ASD,8400058,6,105,ICE International,NS,UT,8400621,6,2019-01-05 07:32:00+00:00,2019-01-05T07:38:37.000Z,19,,19,,2019-01-05-8-260,260,2019-01-05,8,320,4.0,4.0,8.0,7.7,,4.4,0.0,0,0.0,0.0,1032.4,75.0,8.0,79,,5,0.0,0.0,0.0,0.0,0.0,2019-01-05 07:02:00+00:00,240.0,2019-01-05,8.0,320.0,8.0,8.0,12.0,7.8,,4.4,0.0,1.0,0.0,0.0,1032.8,75.0,8.0,78.0,,5.0,0.0,0.0,0.0,0.0,0.0,260.0,2019-01-05,8.0,320.0,4.0,4.0,8.0,7.7,,4.4,0.0,0.0,0.0,0.0,1032.4,75.0,8.0,79.0,,5.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7566,871#2019-09-29 17:31:00+00:00,2019-09-29 19:33:54.061500+02:00,871,2019-09-29 17:31:00+00:00,ASD,8400058,6,871,Intercity,NS,UT,8400621,6,2019-09-29 17:31:00+00:00,2019-09-29T17:31:00.000Z,18,,18,,2019-09-29-18-260,260,2019-09-29,18,230,5.0,5.0,11.0,16.6,14.9,15.1,0.0,1,0.4,0.1,993.0,65.0,8.0,91,23.0,7,0.0,1.0,0.0,0.0,0.0,2019-09-29 17:01:00+00:00,240.0,2019-09-29,18.0,240.0,9.0,8.0,15.0,17.1,15.2,15.2,0.0,1.0,0.2,0.2,992.3,61.0,8.0,88.0,23.0,7.0,0.0,1.0,0.0,0.0,0.0,260.0,2019-09-29,18.0,230.0,5.0,5.0,11.0,16.6,14.9,15.1,0.0,1.0,0.4,0.1,993.0,65.0,8.0,91.0,23.0,7.0,0.0,1.0,0.0,0.0,0.0
7567,873#2019-06-30 18:01:00+00:00,2019-06-30 20:03:54.979467+02:00,873,2019-06-30 18:01:00+00:00,ASD,8400058,6,873,Intercity,NS,UT,8400621,6,2019-06-30 18:01:00+00:00,2019-06-30T18:01:00.000Z,18,,18,,2019-06-30-18-260,260,2019-06-30,19,280,3.0,2.0,6.0,21.1,,11.8,1.0,51,0.0,0.0,1016.2,83.0,0.0,55,,5,0.0,0.0,0.0,0.0,0.0,2019-06-30 17:31:00+00:00,240.0,2019-06-30,18.0,230.0,5.0,5.0,8.0,20.3,19.4,12.2,1.0,106.0,0.0,0.0,1016.3,82.0,0.0,59.0,,5.0,0.0,0.0,0.0,0.0,0.0,260.0,2019-06-30,18.0,280.0,3.0,3.0,6.0,22.3,22.4,12.7,1.0,103.0,0.0,0.0,1016.3,82.0,0.0,54.0,,5.0,0.0,0.0,0.0,0.0,0.0
7568,873#2019-09-29 18:01:00+00:00,2019-09-29 20:04:15.945643+02:00,873,2019-09-29 18:01:00+00:00,ASD,8400058,6,873,Intercity,NS,UT,8400621,6,2019-09-29 18:01:00+00:00,2019-09-29T18:01:00.000Z,18,,18,,2019-09-29-18-260,260,2019-09-29,19,230,5.0,5.0,11.0,16.4,,14.3,0.0,0,0.0,0.0,992.9,68.0,8.0,87,,5,0.0,0.0,0.0,0.0,0.0,2019-09-29 17:31:00+00:00,240.0,2019-09-29,18.0,240.0,9.0,8.0,15.0,17.1,15.2,15.2,0.0,1.0,0.2,0.2,992.3,61.0,8.0,88.0,23.0,7.0,0.0,1.0,0.0,0.0,0.0,260.0,2019-09-29,18.0,230.0,5.0,5.0,11.0,16.6,14.9,15.1,0.0,1.0,0.4,0.1,993.0,65.0,8.0,91.0,23.0,7.0,0.0,1.0,0.0,0.0,0.0
7569,875#2019-06-30 18:31:00+00:00,2019-06-30 20:34:50.259980+02:00,875,2019-06-30 18:31:00+00:00,ASD,8400058,6,875,Intercity,NS,UT,8400621,6,2019-06-30 18:31:00+00:00,2019-06-30T18:33:15.000Z,18,,18,,2019-06-30-19-260,260,2019-06-30,19,280,3.0,2.0,6.0,21.1,,11.8,1.0,51,0.0,0.0,1016.2,83.0,0.0,55,,5,0.0,0.0,0.0,0.0,0.0,2019-06-30 18:01:00+00:00,240.0,2019-06-30,19.0,270.0,4.0,4.0,7.0,19.2,,12.7,1.0,52.0,0.0,0.0,1016.1,80.0,0.0,66.0,,5.0,0.0,0.0,0.0,0.0,0.0,260.0,2019-06-30,19.0,280.0,3.0,2.0,6.0,21.1,,11.8,1.0,51.0,0.0,0.0,1016.2,83.0,0.0,55.0,,5.0,0.0,0.0,0.0,0.0,0.0


In [57]:
dfa = result

In [58]:
dfa = rename_weather('Destination', dfa)

In [59]:
dfa.head()

Unnamed: 0,RideInstance,ObservationTime,RideId,RideTime,DepartureStationCode,DepartureStationUIC,DepartureStationType,TrainId,TrainType,TrainOperator,DestinationStationCode,DestinationStationUIC,DestinationStationType,PlannedArrivalTime,ActualArrivalTime,PlannedArrivalPlatform,PlannedArrivalPlatformSuffix,ActualArrivalPlatform,ActualArrivalPlatformSuffix,WeatherKey,DepartureWeatherStationCode,DepartureTimestamp,DepartureHour,DepartureWindDir,DepartureWindHour,DepartureWindSpeed,DepartureMaxWindSpeed,DepartureTemperature,DepartureMinTemp10M,DepartureDewPointTemp,DepartureSunshineDur,DepartureRadiation,DeparturePrecipDur,DeparturePrecipHour,DepartureAirPressure,DepartureVisibility,DepartureCloudiness,DepartureHumidity,DepartureWeatherCode,DepartureWeatherCodeIndicator,DepartureFog,DepartureRain,DepartureSnow,DepartureThunder,DepartureIceFormation,PlannedDepartureTime,DestinationWeatherStationCode,DestinationTimestamp,DestinationHour,DestinationWindDir,DestinationWindHour,DestinationWindSpeed,DestinationMaxWindSpeed,DestinationTemperature,DestinationMinTemp10M,DestinationDewPointTemp,DestinationSunshineDur,DestinationRadiation,DestinationPrecipDur,DestinationPrecipHour,DestinationAirPressure,DestinationVisibility,DestinationCloudiness,DestinationHumidity,DestinationWeatherCode,DestinationWeatherCodeIndicator,DestinationFog,DestinationRain,DestinationSnow,DestinationThunder,DestinationIceFormation,DestinationWeatherStationCode.1,DestinationTimestamp.1,DestinationHour.1,DestinationWindDir.1,DestinationWindHour.1,DestinationWindSpeed.1,DestinationMaxWindSpeed.1,DestinationTemperature.1,DestinationMinTemp10M.1,DestinationDewPointTemp.1,DestinationSunshineDur.1,DestinationRadiation.1,DestinationPrecipDur.1,DestinationPrecipHour.1,DestinationAirPressure.1,DestinationVisibility.1,DestinationCloudiness.1,DestinationHumidity.1,DestinationWeatherCode.1,DestinationWeatherCodeIndicator.1,DestinationFog.1,DestinationRain.1,DestinationSnow.1,DestinationThunder.1,DestinationIceFormation.1
0,104#2019-06-22 19:58:00+00:00,2019-06-22 21:16:20.243615+02:00,104,2019-06-22 19:58:00+00:00,UT,8400621,6,300104,ICE International,NS,ASD,8400058,6,2019-06-22 19:58:00+00:00,2019-06-22T19:58:00.000Z,7,b,7,b,2019-06-22-20-240,240,2019-06-22,20,50,6.0,7.0,10.0,18.3,,13.0,0.5,12,0.0,0.0,1020.5,75.0,0.0,71,,5,0.0,0.0,0.0,0.0,0.0,2019-06-22 19:28:00+00:00,260.0,2019-06-22,20.0,20.0,4.0,4.0,7.0,19.0,,12.2,0.3,11.0,0.0,0.0,1020.2,83.0,0.0,64.0,,5.0,0.0,0.0,0.0,0.0,0.0,240.0,2019-06-22,20.0,50.0,6.0,7.0,10.0,18.3,,13.0,0.5,12.0,0.0,0.0,1020.5,75.0,0.0,71.0,,5.0,0.0,0.0,0.0,0.0,0.0
1,105#2019-01-02 07:33:00+00:00,2019-01-02 08:14:53.961020+01:00,105,2019-01-02 07:33:00+00:00,ASD,8400058,6,105,ICE International,NS,UT,8400621,6,2019-01-02 07:33:00+00:00,2019-01-02T07:34:02.000Z,18,,18,,2019-01-02-8-260,260,2019-01-02,8,340,4.0,4.0,10.0,4.9,,1.9,0.0,0,0.1,0.1,1036.8,70.0,8.0,81,23.0,7,0.0,1.0,0.0,0.0,0.0,2019-01-02 07:03:00+00:00,240.0,2019-01-02,8.0,350.0,6.0,6.0,11.0,5.0,,2.3,0.0,0.0,0.0,-0.1,1037.2,66.0,7.0,83.0,81.0,7.0,0.0,1.0,0.0,0.0,0.0,260.0,2019-01-02,8.0,340.0,4.0,4.0,10.0,4.9,,1.9,0.0,0.0,0.1,0.1,1036.8,70.0,8.0,81.0,23.0,7.0,0.0,1.0,0.0,0.0,0.0
2,105#2019-01-03 07:33:00+00:00,2019-01-03 08:49:03.881832+01:00,105,2019-01-03 07:33:00+00:00,ASD,8400058,6,105,ICE International,NS,UT,8400621,6,2019-01-03 07:33:00+00:00,2019-01-03T07:33:00.000Z,18,,18,,2019-01-03-8-260,260,2019-01-03,8,310,2.0,1.0,3.0,2.6,,1.5,0.0,0,0.0,0.0,1039.9,65.0,8.0,92,,5,0.0,0.0,0.0,0.0,0.0,2019-01-03 07:03:00+00:00,240.0,2019-01-03,8.0,310.0,3.0,2.0,4.0,1.8,,0.9,0.0,0.0,0.0,-0.1,1040.0,65.0,5.0,93.0,22.0,7.0,0.0,1.0,0.0,0.0,0.0,260.0,2019-01-03,8.0,310.0,2.0,1.0,3.0,2.6,,1.5,0.0,0.0,0.0,0.0,1039.9,65.0,8.0,92.0,,5.0,0.0,0.0,0.0,0.0,0.0
3,105#2019-01-04 07:33:00+00:00,2019-01-04 08:37:38.178122+01:00,105,2019-01-04 07:33:00+00:00,ASD,8400058,6,105,ICE International,NS,UT,8400621,6,2019-01-04 07:33:00+00:00,2019-01-04T07:34:13.000Z,18,,18,,2019-01-04-8-260,260,2019-01-04,8,270,3.0,2.0,6.0,4.6,,2.4,0.0,0,0.0,0.0,1039.1,64.0,8.0,85,,5,0.0,0.0,0.0,0.0,0.0,2019-01-04 07:03:00+00:00,240.0,2019-01-04,8.0,290.0,6.0,5.0,8.0,5.0,,2.3,0.0,0.0,0.0,0.0,1039.0,70.0,8.0,82.0,,5.0,0.0,0.0,0.0,0.0,0.0,260.0,2019-01-04,8.0,270.0,3.0,2.0,6.0,4.6,,2.4,0.0,0.0,0.0,0.0,1039.1,64.0,8.0,85.0,,5.0,0.0,0.0,0.0,0.0,0.0
4,105#2019-01-05 07:32:00+00:00,2019-01-05 08:40:55.071092+01:00,105,2019-01-05 07:32:00+00:00,ASD,8400058,6,105,ICE International,NS,UT,8400621,6,2019-01-05 07:32:00+00:00,2019-01-05T07:38:37.000Z,19,,19,,2019-01-05-8-260,260,2019-01-05,8,320,4.0,4.0,8.0,7.7,,4.4,0.0,0,0.0,0.0,1032.4,75.0,8.0,79,,5,0.0,0.0,0.0,0.0,0.0,2019-01-05 07:02:00+00:00,240.0,2019-01-05,8.0,320.0,8.0,8.0,12.0,7.8,,4.4,0.0,1.0,0.0,0.0,1032.8,75.0,8.0,78.0,,5.0,0.0,0.0,0.0,0.0,0.0,260.0,2019-01-05,8.0,320.0,4.0,4.0,8.0,7.7,,4.4,0.0,0.0,0.0,0.0,1032.4,75.0,8.0,79.0,,5.0,0.0,0.0,0.0,0.0,0.0


In [61]:
dfa.head()

Unnamed: 0,RideInstance,ObservationTime,RideId,RideTime,DepartureStationCode,DepartureStationUIC,DepartureStationType,TrainId,TrainType,TrainOperator,DestinationStationCode,DestinationStationUIC,DestinationStationType,PlannedArrivalTime,ActualArrivalTime,PlannedArrivalPlatform,PlannedArrivalPlatformSuffix,ActualArrivalPlatform,ActualArrivalPlatformSuffix,WeatherKey,DepartureWeatherStationCode,DepartureTimestamp,DepartureHour,DepartureWindDir,DepartureWindHour,DepartureWindSpeed,DepartureMaxWindSpeed,DepartureTemperature,DepartureMinTemp10M,DepartureDewPointTemp,DepartureSunshineDur,DepartureRadiation,DeparturePrecipDur,DeparturePrecipHour,DepartureAirPressure,DepartureVisibility,DepartureCloudiness,DepartureHumidity,DepartureWeatherCode,DepartureWeatherCodeIndicator,DepartureFog,DepartureRain,DepartureSnow,DepartureThunder,DepartureIceFormation,PlannedDepartureTime,DestinationWeatherStationCode,DestinationTimestamp,DestinationHour,DestinationWindDir,DestinationWindHour,DestinationWindSpeed,DestinationMaxWindSpeed,DestinationTemperature,DestinationMinTemp10M,DestinationDewPointTemp,DestinationSunshineDur,DestinationRadiation,DestinationPrecipDur,DestinationPrecipHour,DestinationAirPressure,DestinationVisibility,DestinationCloudiness,DestinationHumidity,DestinationWeatherCode,DestinationWeatherCodeIndicator,DestinationFog,DestinationRain,DestinationSnow,DestinationThunder,DestinationIceFormation,DestinationWeatherStationCode.1,DestinationTimestamp.1,DestinationHour.1,DestinationWindDir.1,DestinationWindHour.1,DestinationWindSpeed.1,DestinationMaxWindSpeed.1,DestinationTemperature.1,DestinationMinTemp10M.1,DestinationDewPointTemp.1,DestinationSunshineDur.1,DestinationRadiation.1,DestinationPrecipDur.1,DestinationPrecipHour.1,DestinationAirPressure.1,DestinationVisibility.1,DestinationCloudiness.1,DestinationHumidity.1,DestinationWeatherCode.1,DestinationWeatherCodeIndicator.1,DestinationFog.1,DestinationRain.1,DestinationSnow.1,DestinationThunder.1,DestinationIceFormation.1
0,104#2019-06-22 19:58:00+00:00,2019-06-22 21:16:20.243615+02:00,104,2019-06-22 19:58:00+00:00,UT,8400621,6,300104,ICE International,NS,ASD,8400058,6,2019-06-22 19:58:00+00:00,2019-06-22T19:58:00.000Z,7,b,7,b,2019-06-22-20-240,240,2019-06-22,20,50,6.0,7.0,10.0,18.3,,13.0,0.5,12,0.0,0.0,1020.5,75.0,0.0,71,,5,0.0,0.0,0.0,0.0,0.0,2019-06-22 19:28:00+00:00,260.0,2019-06-22,20.0,20.0,4.0,4.0,7.0,19.0,,12.2,0.3,11.0,0.0,0.0,1020.2,83.0,0.0,64.0,,5.0,0.0,0.0,0.0,0.0,0.0,240.0,2019-06-22,20.0,50.0,6.0,7.0,10.0,18.3,,13.0,0.5,12.0,0.0,0.0,1020.5,75.0,0.0,71.0,,5.0,0.0,0.0,0.0,0.0,0.0
1,105#2019-01-02 07:33:00+00:00,2019-01-02 08:14:53.961020+01:00,105,2019-01-02 07:33:00+00:00,ASD,8400058,6,105,ICE International,NS,UT,8400621,6,2019-01-02 07:33:00+00:00,2019-01-02T07:34:02.000Z,18,,18,,2019-01-02-8-260,260,2019-01-02,8,340,4.0,4.0,10.0,4.9,,1.9,0.0,0,0.1,0.1,1036.8,70.0,8.0,81,23.0,7,0.0,1.0,0.0,0.0,0.0,2019-01-02 07:03:00+00:00,240.0,2019-01-02,8.0,350.0,6.0,6.0,11.0,5.0,,2.3,0.0,0.0,0.0,-0.1,1037.2,66.0,7.0,83.0,81.0,7.0,0.0,1.0,0.0,0.0,0.0,260.0,2019-01-02,8.0,340.0,4.0,4.0,10.0,4.9,,1.9,0.0,0.0,0.1,0.1,1036.8,70.0,8.0,81.0,23.0,7.0,0.0,1.0,0.0,0.0,0.0
2,105#2019-01-03 07:33:00+00:00,2019-01-03 08:49:03.881832+01:00,105,2019-01-03 07:33:00+00:00,ASD,8400058,6,105,ICE International,NS,UT,8400621,6,2019-01-03 07:33:00+00:00,2019-01-03T07:33:00.000Z,18,,18,,2019-01-03-8-260,260,2019-01-03,8,310,2.0,1.0,3.0,2.6,,1.5,0.0,0,0.0,0.0,1039.9,65.0,8.0,92,,5,0.0,0.0,0.0,0.0,0.0,2019-01-03 07:03:00+00:00,240.0,2019-01-03,8.0,310.0,3.0,2.0,4.0,1.8,,0.9,0.0,0.0,0.0,-0.1,1040.0,65.0,5.0,93.0,22.0,7.0,0.0,1.0,0.0,0.0,0.0,260.0,2019-01-03,8.0,310.0,2.0,1.0,3.0,2.6,,1.5,0.0,0.0,0.0,0.0,1039.9,65.0,8.0,92.0,,5.0,0.0,0.0,0.0,0.0,0.0
3,105#2019-01-04 07:33:00+00:00,2019-01-04 08:37:38.178122+01:00,105,2019-01-04 07:33:00+00:00,ASD,8400058,6,105,ICE International,NS,UT,8400621,6,2019-01-04 07:33:00+00:00,2019-01-04T07:34:13.000Z,18,,18,,2019-01-04-8-260,260,2019-01-04,8,270,3.0,2.0,6.0,4.6,,2.4,0.0,0,0.0,0.0,1039.1,64.0,8.0,85,,5,0.0,0.0,0.0,0.0,0.0,2019-01-04 07:03:00+00:00,240.0,2019-01-04,8.0,290.0,6.0,5.0,8.0,5.0,,2.3,0.0,0.0,0.0,0.0,1039.0,70.0,8.0,82.0,,5.0,0.0,0.0,0.0,0.0,0.0,260.0,2019-01-04,8.0,270.0,3.0,2.0,6.0,4.6,,2.4,0.0,0.0,0.0,0.0,1039.1,64.0,8.0,85.0,,5.0,0.0,0.0,0.0,0.0,0.0
4,105#2019-01-05 07:32:00+00:00,2019-01-05 08:40:55.071092+01:00,105,2019-01-05 07:32:00+00:00,ASD,8400058,6,105,ICE International,NS,UT,8400621,6,2019-01-05 07:32:00+00:00,2019-01-05T07:38:37.000Z,19,,19,,2019-01-05-8-260,260,2019-01-05,8,320,4.0,4.0,8.0,7.7,,4.4,0.0,0,0.0,0.0,1032.4,75.0,8.0,79,,5,0.0,0.0,0.0,0.0,0.0,2019-01-05 07:02:00+00:00,240.0,2019-01-05,8.0,320.0,8.0,8.0,12.0,7.8,,4.4,0.0,1.0,0.0,0.0,1032.8,75.0,8.0,78.0,,5.0,0.0,0.0,0.0,0.0,0.0,260.0,2019-01-05,8.0,320.0,4.0,4.0,8.0,7.7,,4.4,0.0,0.0,0.0,0.0,1032.4,75.0,8.0,79.0,,5.0,0.0,0.0,0.0,0.0,0.0


In [428]:
# save the dataset with the weate
dfa.to_csv('../assets/data/2019 UT-ASD/2019-parsed-data.csv')