In [1]:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pandas as pd

import sys
sys.path.append("../")
import KNMIRequest as KR
import HolidayRequest as HR

import warnings
warnings.filterwarnings('ignore')

In [2]:
df_train = pd.read_csv('../input/train.csv', index_col=0, parse_dates=True)
df_train.head()

Unnamed: 0,tripid,userid,bikeid,account,bikenumber,start_time,end_time,start_lat,start_lng,end_lat,end_lng,date
1,33838,13452,382,AT,6631000433,2019-01-01 06:46:03,2019-01-01 06:46:03,52.296065,4.787667,52.2962,4.787679,2019-01-01 06:46:00
2,33839,13452,812,AT,6631000146,2019-01-01 06:47:39,2019-01-01 06:47:39,52.295938,4.788336,52.29594,4.788557,2019-01-01 06:47:35
3,33851,13182,238,AT,6631000443,2019-01-01 14:15:36,2019-01-01 14:15:36,52.307337,4.807633,52.307336,4.80763,2019-01-01 14:15:32
4,33852,13182,238,AT,6631000443,2019-01-01 14:33:50,2019-01-01 14:33:50,52.308478,4.80997,52.308537,4.810099,2019-01-01 14:33:46
5,33867,12674,787,AI,6631000283,2019-01-01 20:11:05,2019-01-01 20:11:05,52.51034,4.725336,52.508565,4.726276,2019-01-01 20:11:01


In [3]:
df_test = pd.read_csv('../input/test.csv', index_col=0, parse_dates=True)
df_test.head()

Unnamed: 0,tripid,userid,bikeid,account,bikenumber,start_time,end_time,start_lat,start_lng,end_lat,end_lng,date
1,151899,18734,1221,AT,6631000941,2019-11-01 00:17:17,2019-11-01 00:39:04,52.285242,4.853709,52.283334,4.853159,2019-11-01 00:17:13
2,151901,14414,749,AV,6631000096,2019-11-01 01:35:09,2019-11-01 01:37:53,52.306448,4.800563,52.30646,4.800592,2019-11-01 01:35:05
3,151903,23773,758,AV,6631000273,2019-11-01 06:13:18,2019-11-01 06:16:54,52.307922,4.806427,52.307627,4.804996,2019-11-01 06:13:14
4,151905,15420,318,AT,6631000009,2019-11-01 06:39:14,2019-11-01 06:41:01,52.295016,4.790957,52.297227,4.787585,2019-11-01 06:39:10
5,151909,12504,498,AV,6631000702,2019-11-01 06:49:33,2019-11-01 06:51:17,52.294959,4.792023,52.295575,4.790326,2019-11-01 06:49:29


In [8]:
df_KNMI = KR.get_KNMI_HH(start=2019010100, end=2019120923)
ser_holidays = HR.get_holiday_range()
df_nat_hol = pd.read_csv('../input/Nationale Feestdagen.csv')
df_nat_hol['date'] = pd.to_datetime(df_nat_hol['Datum'])

In [9]:
def df_prep(df):
    df['start_time'] = pd.to_datetime(df['start_time'])
    df['end_time'] = pd.to_datetime(df['end_time'])
    df['date'] = pd.to_datetime(df['date'])

    # extracting extra features
    df['weekday'] = df['date'].dt.day_name()
    df['hour'] = df['date'].dt.hour 
    
    df['Friday'] = (df['weekday'] == 'Friday')
    df['Weekend'] = (df['weekday'] == 'Saturday') | (df['weekday'] == 'Sunday')
    df['School holiday'] = df['start_time'].dt.date.isin(ser_holidays)
    
    df['National holiday'] = df['start_time'].dt.date.isin(df_nat_hol['date'].dt.date)
    
    df['Days from epoch'] = (df['start_time'] - df['start_time'].min()).dt.days

In [10]:
df_prep(df_train)
df_train.head()

Unnamed: 0,tripid,userid,bikeid,account,bikenumber,start_time,end_time,start_lat,start_lng,end_lat,end_lng,date,weekday,hour,Friday,Weekend,School holiday,National holiday,Days from epoch
1,33838,13452,382,AT,6631000433,2019-01-01 06:46:03,2019-01-01 06:46:03,52.296065,4.787667,52.2962,4.787679,2019-01-01 06:46:00,Tuesday,6,False,False,True,True,0
2,33839,13452,812,AT,6631000146,2019-01-01 06:47:39,2019-01-01 06:47:39,52.295938,4.788336,52.29594,4.788557,2019-01-01 06:47:35,Tuesday,6,False,False,True,True,0
3,33851,13182,238,AT,6631000443,2019-01-01 14:15:36,2019-01-01 14:15:36,52.307337,4.807633,52.307336,4.80763,2019-01-01 14:15:32,Tuesday,14,False,False,True,True,0
4,33852,13182,238,AT,6631000443,2019-01-01 14:33:50,2019-01-01 14:33:50,52.308478,4.80997,52.308537,4.810099,2019-01-01 14:33:46,Tuesday,14,False,False,True,True,0
5,33867,12674,787,AI,6631000283,2019-01-01 20:11:05,2019-01-01 20:11:05,52.51034,4.725336,52.508565,4.726276,2019-01-01 20:11:01,Tuesday,20,False,False,True,True,0


In [11]:
df_prep(df_test)
df_test.head()

Unnamed: 0,tripid,userid,bikeid,account,bikenumber,start_time,end_time,start_lat,start_lng,end_lat,end_lng,date,weekday,hour,Friday,Weekend,School holiday,National holiday,Days from epoch
1,151899,18734,1221,AT,6631000941,2019-11-01 00:17:17,2019-11-01 00:39:04,52.285242,4.853709,52.283334,4.853159,2019-11-01 00:17:13,Friday,0,True,False,False,False,0
2,151901,14414,749,AV,6631000096,2019-11-01 01:35:09,2019-11-01 01:37:53,52.306448,4.800563,52.30646,4.800592,2019-11-01 01:35:05,Friday,1,True,False,False,False,0
3,151903,23773,758,AV,6631000273,2019-11-01 06:13:18,2019-11-01 06:16:54,52.307922,4.806427,52.307627,4.804996,2019-11-01 06:13:14,Friday,6,True,False,False,False,0
4,151905,15420,318,AT,6631000009,2019-11-01 06:39:14,2019-11-01 06:41:01,52.295016,4.790957,52.297227,4.787585,2019-11-01 06:39:10,Friday,6,True,False,False,False,0
5,151909,12504,498,AV,6631000702,2019-11-01 06:49:33,2019-11-01 06:51:17,52.294959,4.792023,52.295575,4.790326,2019-11-01 06:49:29,Friday,6,True,False,False,False,0
