In [2]:
# !pip3 install holidays

In [3]:
import pandas as pd
import holidays
import datetime
import calendar

In [4]:
df = pd.read_csv('/home/faye/data/vehicles_2018_cleaned_dataset')

In [5]:
# Converting date columns to datetime
datetime_columns = ['DAYOFSERVICE', 'LASTUPDATE']
for column in datetime_columns:
    df[column] = pd.to_datetime(df[column])

In [6]:
# Getting the Irish holidays - including bank holidays
# https://towardsdatascience.com/5-minute-guide-to-detecting-holidays-in-python-c270f8479387
irish_holidays_2018 = []
for date in holidays.Ireland(years=2018).items():
    irish_holidays_2018.append(str(date[0]))

In [7]:
irish_holidays_2018

['2018-01-01',
 '2018-12-25',
 '2018-03-17',
 '2018-03-19',
 '2018-04-02',
 '2018-05-07',
 '2018-06-04',
 '2018-08-06',
 '2018-10-29',
 '2018-12-26']

In [8]:
df['Is_HOLIDAY'] = [1 if str(val).split()[0] in irish_holidays_2018 else 0 for val in df['DAYOFSERVICE']]
df['IS_WEEKDAY'] = [1 if int(val.weekday()) < 5 else 0 for val in df['DAYOFSERVICE']]
df['DAYOFWEEK'] = [calendar.day_name[val.weekday()] for val in df['DAYOFSERVICE']]
df['MONTHOFSERVICE'] = df['DAYOFSERVICE'].dt.strftime('%B')

In [9]:
df.tail(10)

Unnamed: 0,DAYOFSERVICE,VEHICLEID,DISTANCE,MINUTES,LASTUPDATE,Is_HOLIDAY,IS_WEEKDAY,DAYOFWEEK,MONTHOFSERVICE
272612,2018-12-29,3393871,18770,0 days 00:09:56,2019-01-16 18:00:42,0,0,Saturday,December
272613,2018-12-29,3393873,85857,0 days 04:41:24,2019-01-16 18:00:42,0,0,Saturday,December
272614,2018-12-29,3393874,154631,0 days 12:13:12,2019-01-16 18:00:42,0,0,Saturday,December
272615,2018-12-29,3393875,210361,0 days 13:44:30,2019-01-16 18:00:42,0,0,Saturday,December
272616,2018-12-29,3393877,234756,0 days 16:49:18,2019-01-16 18:00:42,0,0,Saturday,December
272617,2018-12-29,3393878,264237,0 days 17:18:40,2019-01-16 18:00:42,0,0,Saturday,December
272618,2018-12-29,3394105,250335,0 days 14:30:54,2019-01-16 18:00:42,0,0,Saturday,December
272619,2018-12-29,3394109,172539,0 days 12:19:09,2019-01-16 18:00:42,0,0,Saturday,December
272620,2018-12-29,3394130,188057,0 days 10:49:08,2019-01-16 18:00:42,0,0,Saturday,December
272621,2018-12-29,3394131,291697,0 days 17:41:17,2019-01-16 18:00:42,0,0,Saturday,December


### Calculating how long it took to get to the next stop
As we know, each tripid represents one trip at a certain point of the day. So what we need to do is take away the second stop from the first stop. **actualtime_arr from the second stop - actualtime_dep from the first stop**

* Load the leavetimes dataset
* Group the table by TRIPID
* Sort them by PROGRNUMBER
* for id_ in tripid:
    * for stopnum in progrnumber:
        * get actualtime_arr of the next PROGRNUMBER and minus from current actualtime_dep
        * add that answer to a list

In [10]:
leavetimes = pd.read_feather('/home/faye/data/leavetimes_cleaned_1.feather')

In [48]:
datetime_columns = ['DAYOFSERVICE', 'LASTUPDATE']
for column in datetime_columns:
    leavetimes[column] = pd.to_datetime(leavetimes[column])

In [11]:
leavetimes = leavetimes.sort_values(by='PROGRNUMBER')
grouped_leavetimes = leavetimes.groupby(by='TRIPID')

In [36]:
tripid = [[leavetimes['TRIPID'].unique()], [leavetimes['DAYOFSERVICE'].unique()]]

In [28]:
for id_ in leavetimes['TRIPID']:
    for id_2 in tripid:
        if id_ == id_2:
            print(leavetimes.loc[leavetimes['TRIPID']==id_])
            break

         DAYOFSERVICE          LASTUPDATE   TRIPID PROGRNUMBER STOPPOINTID  \
63548038   2018-07-16 2018-07-24 14:51:47  7177678           1        4606   
65966591   2018-07-24 2018-08-16 18:56:29  7177678           1        4606   
65659776   2018-07-23 2018-08-16 14:11:40  7177678           1        4606   
66792235   2018-07-27 2018-08-17 03:39:00  7177678           1        4606   
66685868   2018-07-26 2018-08-17 03:29:11  7177678           1        4606   
...               ...                 ...      ...         ...         ...   
66690561   2018-07-26 2018-08-17 03:29:11  7177678          51        2278   
68761599   2018-07-17 2018-07-25 14:14:46  7177678          51        2278   
64175180   2018-07-18 2018-07-27 22:42:56  7177678          51        2278   
64584647   2018-07-19 2018-07-28 10:09:47  7177678          51        2278   
64859246   2018-07-20 2018-08-02 16:04:37  7177678          51        2278   

         VEHICLEID JUSTIFICATIONID  SUPPRESSED  PLANNEDTIME_ARR

In [53]:
v = leavetimes.loc[(leavetimes['TRIPID']==8089172) & (leavetimes['DAYOFSERVICE']=='2018-09-15 00:00:00')]
v['']

Unnamed: 0,DAYOFSERVICE,LASTUPDATE,TRIPID,PROGRNUMBER,STOPPOINTID,VEHICLEID,JUSTIFICATIONID,SUPPRESSED,PLANNEDTIME_ARR,ACTUALTIME_ARR,PLANNEDTIME_DEP,ACTUALTIME_DEP
87534874,2018-09-15,2018-11-02 11:43:16,8089172,1,7391,2406868,,,38700,38729,38700,38729
87534875,2018-09-15,2018-11-02 11:43:16,8089172,2,493,2406868,,,38798,38809,38798,38809
87534876,2018-09-15,2018-11-02 11:43:16,8089172,3,494,2406868,,,38873,38931,38873,38931
87534877,2018-09-15,2018-11-02 11:43:16,8089172,4,495,2406868,,,38964,39006,38964,39021
88729483,2018-09-15,2018-11-02 11:43:16,8089172,5,400,2406868,,,39038,39056,39038,39056
87534878,2018-09-15,2018-11-02 11:43:16,8089172,6,346,2406868,,,39091,39101,39091,39341
87534879,2018-09-15,2018-11-02 11:43:16,8089172,7,317,2406868,,,39183,39387,39183,39537
87534880,2018-09-15,2018-11-02 11:43:16,8089172,8,312,2406868,,,39378,39674,39378,39700
87534881,2018-09-15,2018-11-02 11:43:16,8089172,9,1444,2406868,,,39462,39751,39462,39841
87534882,2018-09-15,2018-11-02 11:43:16,8089172,10,1445,2406868,,,39514,39938,39514,39938


In [30]:
leavetimes.head(5)

Unnamed: 0,DAYOFSERVICE,LASTUPDATE,TRIPID,PROGRNUMBER,STOPPOINTID,VEHICLEID,JUSTIFICATIONID,SUPPRESSED,PLANNEDTIME_ARR,ACTUALTIME_ARR,PLANNEDTIME_DEP,ACTUALTIME_DEP
63548038,2018-07-16,2018-07-24 14:51:47,7177678,1,4606,1932308,,,28800,28808,28800,28808
87534874,2018-09-15,2018-11-02 11:43:16,8089172,1,7391,2406868,,,38700,38729,38700,38729
58526528,2018-06-12,2018-06-21 08:29:43,6873220,1,4381,1001119,,,53400,53358,53400,53358
15215882,2018-02-13,2018-02-28 13:29:11,6247318,1,4843,1001238,,,82800,82753,82800,82753
87534944,2018-09-15,2018-11-02 11:43:16,8091976,1,3980,2868318,,,77700,77782,77700,77782
