In [1]:
import pandas as pd
import numpy as np
import pickle

from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.model_selection import cross_validate
from sklearn.model_selection import cross_val_score
from sklearn.tree import export_graphviz
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
import matplotlib.pyplot as plt
%matplotlib inline

import warnings
import datetime

In [2]:
filename = "models_SVR/models.pkl"
stopfractionsname = "stopfractions.pkl"
models = pickle.load(open(filename, "rb"))

In [3]:
def get_prediction(route,direction,datestring,stopA,stopB):
    datetime_departure = datetime.datetime.strptime(datestring,'%Y-%m-%d %H:%M:%S')
            
    forecast = find_forecast(datetime_departure)        
    
    prediction = predict_travel_time(route,direction,datetime_departure,forecast['feels_like'],forecast['weather_main'],stopA,stopB)
    
    return int(prediction)

def find_forecast(datetime_departure):
    forecast = get_5day_forcast()
    for i,f in enumerate(forecast['list']):
        if datetime.datetime.fromtimestamp(f['dt']) > datetime_departure:
            f_out = {'feels_like':f['main']['feels_like']-273.15,'weather_main':f['weather'][0]['main']}
            return f_out
    avg_monthly_weather = pickle.load(open('avg_monthly_weather.pkl','rb'))
    f_out = {
        'feels_like':avg_monthly_weather['feels_like'].iloc[datetime_departure.month],
        'weather_main':avg_monthly_weather['weather_main'].iloc[datetime_departure.month]
    }
    return f_out
    
def predict_travel_time(route,direction,datetime_departure,feels_like,weather_main,stopA,stopB):
    departure_times = pickle.load(open('departure_times.pkl','rb'))
    
    stopfractionsname = "stopfractions.pkl"
    stopfractions = pickle.load(open(stopfractionsname,'rb'))

    total_triptime = predict_total_triptime(route,direction,datetime_departure,feels_like,weather_main)
    
    stopA_frac = stopfractions[stopfractions['LINEID']==route][stopfractions['DIRECTION']==direction][stopfractions['STOPPOINTID']==stopA]['TRIP_FRAC'].iloc[0]
    stopB_frac = stopfractions[stopfractions['LINEID']==route][stopfractions['DIRECTION']==direction][stopfractions['STOPPOINTID']==stopB]['TRIP_FRAC'].iloc[0]
    fraction = stopB_frac - stopA_frac
    
    schedule = [datetime_departure.replace(hour=0, minute=0, second=0, microsecond=0) + datetime.timedelta(seconds=int(t)) for t in departure_times[route][direction][datetime_departure.weekday()]]

    predictions = predict_total_triptimes(route,direction,schedule,feels_like,weather_main)
    for i, start_time in enumerate(schedule):
        if start_time + datetime.timedelta(seconds=predictions[i]*stopA_frac) > datetime_departure:
            prediction = predictions[i]*fraction
            return prediction
            
def predict_fractional_triptime(route,direction,datetime_departure,feels_like,weather_main,stopA,stopB):
    stopfractionsname = "stopfractions.pkl"
    stopfractions = pickle.load(open(stopfractionsname,'rb'))

    total_triptime = predict_total_triptime(route,direction,datetime_departure,feels_like,weather_main)
    
    stopA_frac = stopfractions[stopfractions['LINEID']==route][stopfractions['DIRECTION']==direction][stopfractions['STOPPOINTID']==stopA]['TRIP_FRAC'].iloc[0]
    stopB_frac = stopfractions[stopfractions['LINEID']==route][stopfractions['DIRECTION']==direction][stopfractions['STOPPOINTID']==stopB]['TRIP_FRAC'].iloc[0]
    
    fraction = stopB_frac - stopA_frac
    return total_triptime*fraction
    
def predict_total_triptime(route,direction,datetime_departure,feels_like,weather_main):
    model = find_model(route,direction)
    
    feels_like = normalize(feels_like,model['max_feels_like'],model['min_feels_like'])
    
    time_in_seconds = datetime_departure.hour*60*60 + datetime_departure.minute*60 + datetime_departure.second
    plannedtime_dep_cos = np.cos(2*np.pi*time_in_seconds/(60*60*24))
    plannedtime_dep_sin = np.sin(2*np.pi*time_in_seconds/(60*60*24))
    month = datetime_departure.month
    weekday = datetime_departure.weekday()
    weekend = True if weekday in [5,6] else False
    rush_hour = (25200 < time_in_seconds < 32400) or (57600 < time_in_seconds < 68400)
    
    params = {
        'RUSH_HOUR_' + str(rush_hour):1,
        'PLANNEDTIME_DEP_COS':plannedtime_dep_cos,
        'PLANNEDTIME_DEP_SIN':plannedtime_dep_sin,
        'feels_like':feels_like,
        'MONTH_' + str(month):1,
        'WEEKDAY_' + str(weekday):1,
        'WEEKEND_' + str(weekend):1,
        'weather_main_' + str(weather_main):1
    }

    all_params = [feature for feature in model['columns']]
    for feature in all_params:
        if feature not in params:
            params[feature] = 0
    
    d = pd.DataFrame(params,index=[0],columns=model['columns'])
    
#     prediction = unnormalize(model['model'].predict(d), model['max_trip'], model['min_trip'])
    prediction = model['model'].predict(d)
    return prediction

def predict_total_triptimes(route,direction,datetimes_departure,feels_like,weather_main):
    model = find_model(route,direction)
    
    feels_like = normalize(feels_like,model['max_feels_like'],model['min_feels_like'])
    
    params = {feature:[0]*len(datetimes_departure) for feature in model['columns']}
    
    for i, dt in enumerate(datetimes_departure):
        time_in_seconds = dt.hour*60*60 + dt.minute*60 + dt.second
        plannedtime_dep_cos = np.cos(2*np.pi*time_in_seconds/(60*60*24))
        plannedtime_dep_sin = np.sin(2*np.pi*time_in_seconds/(60*60*24))
        month = dt.month
        weekday = dt.weekday()
        weekend = True if weekday in [5,6] else False
        rush_hour = (25200 < time_in_seconds < 32400) or (57600 < time_in_seconds < 68400)
    
        params['RUSH_HOUR_' + str(rush_hour)][i] = 1
        params['PLANNEDTIME_DEP_COS'][i] = plannedtime_dep_cos
        params['PLANNEDTIME_DEP_SIN'][i] = plannedtime_dep_sin
        params['feels_like'][i] = 1
        params['MONTH_' + str(month)][i] = 1
        params['WEEKDAY_' + str(weekday)][i] = 1
        params['WEEKEND_' + str(weekend)][i] = 1
        params['weather_main_' + str(weather_main)][i] = 1
    
    d = pd.DataFrame(params,index=[i for i in range(len(datetimes_departure))],columns=model['columns'])
    
    predictions = unnormalize(model['model'].predict(d), model['max_trip'], model['min_trip'])
    
    return predictions

def unnormalize(x_norm,x_max,x_min):
    return x_norm*(x_max - x_min) + x_min

def normalize(x,x_max,x_min):
    return (x - x_min)/(x_max - x_min)

def find_model(route,direction):
    filename = "models_SVR/models.pkl"
    models = pickle.load(open(filename, "rb"))
    for model in models:
        if model['route'] == route and model['direction'] == direction:
            return model
        
def get_5day_forcast():
    import requests

    """Scrapes weather data from openweathermap.org"""
    
    API_KEY = "16fb93e92d3bd8aefd9b647c1a8f6acf"
    URL = "http://api.openweathermap.org/data/2.5/forecast?q=Dublin,ie&appid=" + API_KEY
    
#     time = get_datetime()
    try:
        r = requests.get(url = URL)
    except: 
        print("Scraping error: data not collected.")
        return None
    
    weather = r.json()
    return weather

In [9]:
d = predict_total_triptime('46A',1,today,0,'Drizzle')

In [5]:
d

NameError: name 'd' is not defined

In [8]:
import datetime

today = datetime.datetime.now()

In [7]:
weekday = today.weekday()

In [242]:
weekday

3

In [243]:
weekend = True if weekday in [5,6] else False

In [244]:
weekend

False

In [245]:
str(True)

'True'

In [9]:
# predict_fractional_triptime('1',1,today,0,'Drizzle',48,44)

predict_travel_time('1',1,today,20,'Drizzle',48,44)



325.8525642888964

In [94]:
dftrips = pd.read_csv('../../data/rt_trips_DB_2018.txt',sep=';')

dftrips.head()

Unnamed: 0,DATASOURCE,DAYOFSERVICE,TRIPID,LINEID,ROUTEID,DIRECTION,PLANNEDTIME_ARR,PLANNEDTIME_DEP,ACTUALTIME_ARR,ACTUALTIME_DEP,BASIN,TENDERLOT,SUPPRESSED,JUSTIFICATIONID,LASTUPDATE,NOTE
0,DB,07-FEB-18 00:00:00,6253783,68,68_80,1,87245,84600,87524.0,84600.0,BasDef,,,,28-FEB-18 12:05:11,",2967409,"
1,DB,07-FEB-18 00:00:00,6262138,25B,25B_271,2,30517,26460,32752.0,,BasDef,,,,28-FEB-18 12:05:11,",2580260,"
2,DB,07-FEB-18 00:00:00,6254942,45A,45A_70,2,35512,32100,36329.0,32082.0,BasDef,,,,28-FEB-18 12:05:11,",2448968,"
3,DB,07-FEB-18 00:00:00,6259460,25A,25A_273,1,57261,54420,58463.0,54443.0,BasDef,,,,28-FEB-18 12:05:11,",3094242,"
4,DB,07-FEB-18 00:00:00,6253175,14,14_15,1,85383,81600,84682.0,81608.0,BasDef,,,,28-FEB-18 12:05:11,",2526331,"


In [96]:
dftrips.drop(['BASIN','TENDERLOT','SUPPRESSED','JUSTIFICATIONID','LASTUPDATE','NOTE','ACTUALTIME_ARR','ACTUALTIME_DEP','ROUTEID','TRIPID','DATASOURCE'],axis=1,inplace=True)

In [101]:
dftrips['DAYOFSERVICE'] = dftrips['DAYOFSERVICE'].astype('datetime64')
dftrips['LINEID'] = dftrips['LINEID'].astype('category')
dftrips['DIRECTION'] = dftrips['DIRECTION'].astype('category')

dftrips['WEEKDAY'] = dftrips['DAYOFSERVICE'].dt.weekday

In [102]:
dftrips.head()

Unnamed: 0,DAYOFSERVICE,LINEID,DIRECTION,PLANNEDTIME_ARR,PLANNEDTIME_DEP,WEEKDAY
0,2018-02-07,68,1,87245,84600,2
1,2018-02-07,25B,2,30517,26460,2
2,2018-02-07,45A,2,35512,32100,2
3,2018-02-07,25A,1,57261,54420,2
4,2018-02-07,14,1,85383,81600,2


In [115]:
departure_times = dict()
for route in dftrips['LINEID'].unique():
    departure_times[route] = dict()
    for direction in dftrips[dftrips['LINEID'] == route]['DIRECTION'].unique():
        departure_times[route][direction] = dict()
        for weekday in dftrips[dftrips['LINEID'] == route][dftrips['DIRECTION'] == direction]['WEEKDAY'].unique():
            departure_times[route][direction][weekday] = list()
            for plannedtime_dep in dftrips[dftrips['LINEID'] == route][dftrips['DIRECTION'] == direction][dftrips['WEEKDAY'] == weekday]['PLANNEDTIME_DEP'].unique():
                departure_times[route][direction][weekday] += [plannedtime_dep]
            departure_times[route][direction][weekday] = sorted(departure_times[route][direction][weekday])
            


  
  


In [323]:
pickle.dump(departure_times,open('departure_times.pkl','wb'))

In [122]:
print(departure_times['46A'][1][0])

[18900, 21960, 22080, 22500, 22560, 23040, 23520, 23580, 24000, 24120, 24480, 24660, 24960, 25200, 25440, 25740, 25920, 26100, 26280, 26400, 26820, 26880, 27000, 27360, 27600, 27840, 27900, 28320, 28440, 28800, 28980, 29280, 29400, 29520, 29760, 30000, 30060, 30240, 30600, 30720, 31140, 31200, 31680, 31800, 32100, 32160, 32220, 32400, 32640, 32760, 33000, 33120, 33300, 33600, 33840, 34080, 34200, 34380, 34560, 34800, 34920, 35040, 35100, 35400, 35460, 35520, 36000, 36480, 36540, 36600, 36900, 36960, 37080, 37200, 37440, 37620, 37800, 37920, 38160, 38400, 38700, 38880, 39000, 39240, 39360, 39600, 39780, 39840, 40200, 40320, 40500, 40800, 40860, 41280, 41400, 41760, 41940, 42000, 42240, 42300, 42480, 42600, 42720, 43020, 43200, 43560, 43680, 43800, 44100, 44160, 44400, 44640, 45000, 45120, 45180, 45600, 45720, 46080, 46200, 46260, 46560, 46800, 47040, 47340, 47400, 47520, 47880, 48000, 48420, 48480, 48600, 48960, 49200, 49440, 49500, 49800, 49920, 50040, 50400, 50580, 50880, 51000, 51120

In [247]:
dt = datetime.datetime(2020,12,12,14,30)

In [249]:
forecast = get_5day_forcast()

In [250]:
forecast

{'cod': '200',
 'message': 0,
 'cnt': 40,
 'list': [{'dt': 1595505600,
   'main': {'temp': 290.35,
    'feels_like': 288.9,
    'temp_min': 290.35,
    'temp_max': 291.06,
    'pressure': 1012,
    'sea_level': 1013,
    'grnd_level': 1011,
    'humidity': 86,
    'temp_kf': -0.71},
   'weather': [{'id': 804,
     'main': 'Clouds',
     'description': 'overcast clouds',
     'icon': '04d'}],
   'clouds': {'all': 87},
   'wind': {'speed': 4.3, 'deg': 266},
   'visibility': 10000,
   'pop': 0.79,
   'sys': {'pod': 'd'},
   'dt_txt': '2020-07-23 12:00:00'},
  {'dt': 1595516400,
   'main': {'temp': 290.96,
    'feels_like': 289.72,
    'temp_min': 290.96,
    'temp_max': 291.31,
    'pressure': 1012,
    'sea_level': 1012,
    'grnd_level': 1010,
    'humidity': 81,
    'temp_kf': -0.35},
   'weather': [{'id': 500,
     'main': 'Rain',
     'description': 'light rain',
     'icon': '10d'}],
   'clouds': {'all': 90},
   'wind': {'speed': 3.83, 'deg': 281},
   'visibility': 10000,
   'pop': 

In [258]:
print(forecast['list'][0]['dt'])
print(forecast['list'][0]['main']['feels_like']-273.15)
print(forecast['list'][0]['weather'][0]['main'])

1595505600
15.75
Clouds


In [271]:
find_forecast(today)

{'feels_like': 16.230000000000018, 'weather_main': 'Clouds'}

In [272]:
today

datetime.datetime(2020, 7, 23, 11, 16, 33, 536202)

In [273]:
other_day = datetime.datetime(2020,7,28,11,16,33,536202)

In [13]:
get_prediction('1',1,datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),48,44)



322

In [303]:
datestring = "2020-7-4 10:10:10"
datetime_departure = datetime.datetime.strptime(datestring,'%Y-%m-%d %H:%M:%S')

In [299]:
datetime_departure

datetime.datetime(2020, 7, 4, 10, 10, 10)

In [297]:
get_prediction('46')

In [11]:
datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")

'2020-07-27 15:01:50'

In [30]:
get_prediction('46A',2,datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),792,768)



1066

In [18]:
stopfractionsname = "stopfractions.pkl"
stopfractions = pickle.load(open(stopfractionsname,'rb'))

In [29]:
for stop in stopfractions[stopfractions['LINEID']=='46A'][stopfractions['DIRECTION'] == 1]['STOPPOINTID'].unique():
    print(stop)
print()
for stop in stopfractions[stopfractions['LINEID']=='46A'][stopfractions['DIRECTION'] == 2]['STOPPOINTID'].unique():
    print(stop)

81
264
334
406
435
747
756
757
758
759
760
761
762
763
767
807
808
809
810
811
812
813
814
817
818
819
842
845
846
847
848
2007
2008
2009
2010
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2031
2032
2033
2034
2035
2036
2039
2795
4565
4566
4567
4571
4636
4962
6059
7353

2
192
278
320
461
768
769
770
771
772
773
774
775
776
777
786
792
795
796
797
798
799
800
801
802
803
804
805
806
807
906
907
908
909
2039
2040
2041
2042
2043
2044
2045
2046
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2068
2069
2070
2084
4568
4569
4570
4727
4728
4962
7491
7513
7658
7689


  """Entry point for launching an IPython kernel.
  after removing the cwd from sys.path.


In [35]:
get_prediction('39A',2,datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),793,767)



IndexError: single positional indexer is out-of-bounds

In [36]:
get_prediction('145', 2, "2020-07-27 15:33:46", 792, 768)



1002