In [200]:
import pandas as pd

In [201]:
# Connect to database

from sqlalchemy import create_engine

URI="localhost"
PORT="5433"
DB = "jetaDb"
USER = "postgres"
PASSWORD = "00001234"
  
engine = create_engine("postgresql://{}:{}@{}:{}/{}".format(USER, PASSWORD, URI, PORT, DB), echo=True)

In [203]:
# Variables from query

start = "765"
destination = "1035"
lineid = "17"
start_time = "1530008000"

# http://localhost:8000/main/journeytime?source=767&destination=1914&lineid=39A&time=1530008000

# Rain should be gotten from api or else table

rain = 0.5

In [204]:
# Transform time into variables required from model

import time
from datetime import datetime, timedelta
from pytz import timezone

# Get Irish timezone (utc + daylight saving time (DST))
irish_time = timezone('Europe/Dublin')
print("Irish Standard Time: ", irish_time)

# Get unixtime as datetime object
dt_time = datetime.fromtimestamp(int(start_time), irish_time)
print("Datetime: ", dt_time)

# Get day of week -> Mon: 0, Sun: 6
weekday = dt_time.weekday()
print("Weekday: ", weekday)

# Create list with desired weekday filled.
week_dummies = [0] * 7
week_dummies[weekday] = 1
del week_dummies[2] # Delete wednesday - not included in model due to dummy var trap
print("Week dummies: ", week_dummies)

# Get arrivaltime in seconds
date = dt_time.date()
date_unixtime = time.mktime(date.timetuple())
seconds_since_midnight = int(time.mktime((dt_time - timedelta(seconds = date_unixtime)).timetuple()))
print("Seconds since midnight (arrival time): ", seconds_since_midnight)


Irish Standard Time:  Europe/Dublin
Datetime:  2018-06-26 11:13:20+01:00
Weekday:  1
Week dummies:  [0, 1, 0, 0, 0, 0]
Seconds since midnight (arrival time):  40400


In [205]:
# Model inputs

model_inputs = [seconds_since_midnight, rain] + week_dummies
model_inputs

[40400, 0.5, 0, 1, 0, 0, 0, 0]

In [206]:
# Get stop lists associated with this lineid, start stop and end stop

sql = """

SELECT * 
FROM main_routes 
WHERE routeid IN (
    SELECT UNNEST(routes) 
    FROM main_lines 
    WHERE main_lines.lineid = '{0}'
) 
AND '{1}' = ANY(main_routes.stopids) 
AND '{2}' = ANY(main_routes.stopids) 
;

""".format(lineid, start, destination)

routes = pd.read_sql(sql, engine)

# Lines.objects.all
# Routes.objects.all(routeid )

2018-07-09 13:55:02,793 INFO sqlalchemy.engine.base.Engine select version()
2018-07-09 13:55:02,796 INFO sqlalchemy.engine.base.Engine {}
2018-07-09 13:55:02,809 INFO sqlalchemy.engine.base.Engine select current_schema()
2018-07-09 13:55:02,811 INFO sqlalchemy.engine.base.Engine {}
2018-07-09 13:55:02,818 INFO sqlalchemy.engine.base.Engine SELECT CAST('test plain returns' AS VARCHAR(60)) AS anon_1
2018-07-09 13:55:02,820 INFO sqlalchemy.engine.base.Engine {}
2018-07-09 13:55:02,824 INFO sqlalchemy.engine.base.Engine SELECT CAST('test unicode returns' AS VARCHAR(60)) AS anon_1
2018-07-09 13:55:02,826 INFO sqlalchemy.engine.base.Engine {}
2018-07-09 13:55:02,832 INFO sqlalchemy.engine.base.Engine show standard_conforming_strings
2018-07-09 13:55:02,834 INFO sqlalchemy.engine.base.Engine {}
2018-07-09 13:55:02,844 INFO sqlalchemy.engine.base.Engine select relname from pg_class c join pg_namespace n on n.oid=c.relnamespace where pg_catalog.pg_table_is_visible(c.oid) and relname=%(name)s
20

In [207]:
routes

Unnamed: 0,routeid,direction,stopids
0,17_15,2,"[3085, 3082, 3083, 430, 431, 432, 433, 434, 20..."
1,17_17,2,"[4391, 765, 867, 869, 870, 873, 874, 875, 876,..."
2,17_18,2,"[4391, 765, 867, 869, 870, 873, 874, 875, 876,..."


In [208]:
if routes.shape[0] > 1:
    print("Error: multiple possible routes.")
    print(routes)

Error: multiple possible routes.
  routeid  direction                                            stopids
0   17_15          2  [3085, 3082, 3083, 430, 431, 432, 433, 434, 20...
1   17_17          2  [4391, 765, 867, 869, 870, 873, 874, 875, 876,...
2   17_18          2  [4391, 765, 867, 869, 870, 873, 874, 875, 876,...


In [209]:
# Convert list of stopids to list

stop_list = routes['stopids'].tolist()[0]
print(stop_list)

[3085, 3082, 3083, 430, 431, 432, 433, 434, 2068, 2069, 2070, 2084, 4391, 765, 867, 869, 870, 873, 874, 875, 876, 3011, 2820, 2822, 2823, 5032, 2867, 1035, 1314, 1316, 1315, 1317, 1318, 1319, 1281, 1329, 1330, 1331, 1332, 1333, 1334, 1335, 1336, 1086, 1087, 1088, 1089, 1090, 2413, 2414, 2415, 2327, 2464, 2465, 2466, 3360, 1392, 1394, 1395, 3361, 3362, 1436, 3952, 2190, 1366, 1367]


In [210]:
# Slice list by start and destination stop

journey_stops = stop_list[stop_list.index(int(start)):(stop_list.index(int(destination))+1)]
print(journey_stops)

[765, 867, 869, 870, 873, 874, 875, 876, 3011, 2820, 2822, 2823, 5032, 2867, 1035]


In [211]:
# Change each stopid into string
stringified = list(map(str, journey_stops))

# Zip ['0', .. 'n'] and ['1', .., 'n'] into list [('0', '1'), .., ('n - 1', 'n')]
# Join tuples to make ['1_2', .., 'n-1_n']
journey_segments = [ '_'.join(x) for x in zip(stringified[0:], stringified[1:])]
print(journey_segments)

['765_867', '867_869', '869_870', '870_873', '873_874', '874_875', '875_876', '876_3011', '3011_2820', '2820_2822', '2822_2823', '2823_5032', '5032_2867', '2867_1035']


In [212]:
# Select coefficient rows with these segment ids

sql2 = """

SELECT *
FROM main_coefficients
WHERE segment = ANY(ARRAY{0})

""".format(journey_segments)

coefficients = pd.read_sql(sql2, engine)

2018-07-09 13:55:46,722 INFO sqlalchemy.engine.base.Engine select relname from pg_class c join pg_namespace n on n.oid=c.relnamespace where pg_catalog.pg_table_is_visible(c.oid) and relname=%(name)s
2018-07-09 13:55:46,723 INFO sqlalchemy.engine.base.Engine {'name': "\n\nSELECT *\nFROM main_coefficients\nWHERE segment = ANY(ARRAY['765_867', '867_869', '869_870', '870_873', '873_874', '874_875', '875_876', '876_3011', '3011_2820', '2820_2822', '2822_2823', '2823_5032', '5032_2867', '2867_1035'])\n\n"}
2018-07-09 13:55:46,735 INFO sqlalchemy.engine.base.Engine 

SELECT *
FROM main_coefficients
WHERE segment = ANY(ARRAY['765_867', '867_869', '869_870', '870_873', '873_874', '874_875', '875_876', '876_3011', '3011_2820', '2820_2822', '2822_2823', '2823_5032', '5032_2867', '2867_1035'])


2018-07-09 13:55:46,736 INFO sqlalchemy.engine.base.Engine {}


In [213]:
coefficients.head(5)

Unnamed: 0,segment,intercept,arrivaltime,rain,dayofweek_Friday,dayofweek_Monday,dayofweek_Saturday,dayofweek_Sunday,dayofweek_Thursday,dayofweek_Tuesday
0,5032_2867,147.715267,-0.000223,1.381925,0.100805,-1.800103,-14.525151,-15.889108,-3.611345,-0.004672
1,870_873,30.550306,-4.7e-05,0.258867,-1.686536,0.070722,-3.08397,-1.879405,0.007767,-0.377671
2,874_875,78.153434,-0.000397,2.809119,-1.631592,-0.13098,-20.270857,-18.347582,-1.072653,-7.381753
3,867_869,43.829241,3.1e-05,-0.536599,-5.05922,-0.482129,-8.83753,-4.597139,-2.126799,-1.798628
4,2823_5032,30.768614,0.000134,1.20518,-1.167154,-3.805998,-12.43413,-13.66708,-3.629655,-2.244374


In [214]:
# Sort values by journey_segment segmentid

coefficients['segment'] = coefficients['segment'].astype("category")
coefficients['segment'].cat.set_categories(journey_segments, inplace=True)
coefficients = coefficients.sort_values(["segment"])
coefficients.head(5)

Unnamed: 0,segment,intercept,arrivaltime,rain,dayofweek_Friday,dayofweek_Monday,dayofweek_Saturday,dayofweek_Sunday,dayofweek_Thursday,dayofweek_Tuesday
9,765_867,156.686976,-0.000134,-2.035997,-0.152052,-1.243961,-14.716208,-11.77352,0.277127,0.20235
3,867_869,43.829241,3.1e-05,-0.536599,-5.05922,-0.482129,-8.83753,-4.597139,-2.126799,-1.798628
10,869_870,28.661684,4e-06,0.131681,-0.660243,-0.22372,-2.750362,-4.323592,1.036157,-1.407403
1,870_873,30.550306,-4.7e-05,0.258867,-1.686536,0.070722,-3.08397,-1.879405,0.007767,-0.377671
8,873_874,76.985275,-0.000387,4.333352,-5.395845,-4.786822,-14.946799,-10.594033,-4.262079,-6.799442


In [215]:
# Rearrange columns and set segment id as index

coefficients = coefficients[["segment", "intercept", "arrivaltime", "rain", "dayofweek_Monday", "dayofweek_Tuesday", 
                        "dayofweek_Thursday", "dayofweek_Friday", "dayofweek_Saturday", "dayofweek_Sunday"]]
coefficients = coefficients.set_index('segment')
coefficients.head(5)

Unnamed: 0_level_0,intercept,arrivaltime,rain,dayofweek_Monday,dayofweek_Tuesday,dayofweek_Thursday,dayofweek_Friday,dayofweek_Saturday,dayofweek_Sunday
segment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
765_867,156.686976,-0.000134,-2.035997,-1.243961,0.20235,0.277127,-0.152052,-14.716208,-11.77352
867_869,43.829241,3.1e-05,-0.536599,-0.482129,-1.798628,-2.126799,-5.05922,-8.83753,-4.597139
869_870,28.661684,4e-06,0.131681,-0.22372,-1.407403,1.036157,-0.660243,-2.750362,-4.323592
870_873,30.550306,-4.7e-05,0.258867,0.070722,-0.377671,0.007767,-1.686536,-3.08397,-1.879405
873_874,76.985275,-0.000387,4.333352,-4.786822,-6.799442,-4.262079,-5.395845,-14.946799,-10.594033


In [216]:
arrivaltime = model_inputs[0]
totaltraveltime = 0
segment_times = []

for i, rows in coefficients.iterrows():
    traveltime = (rows['intercept']
                  +(rows['arrivaltime']*arrivaltime)
                  +(rows['rain']*model_inputs[1])
                  +(rows['dayofweek_Friday']*model_inputs[2])
                  +(rows['dayofweek_Monday']*model_inputs[3])
                  +(rows['dayofweek_Saturday']*model_inputs[4])
                  +(rows['dayofweek_Sunday']*model_inputs[5])
                  +(rows['dayofweek_Thursday']*model_inputs[6])
                  +(rows['dayofweek_Tuesday']*model_inputs[7]))
    
    segment_times.append((i, round(traveltime)))
    totaltraveltime += traveltime
    arrivaltime = model_inputs[0] + totaltraveltime # initial start time + sum of previous segment times
        
print('Arrival Time:', int(arrivaltime))
print('Total Travel Time:', int(totaltraveltime))
#print('Segment Times:', segment_times)
segment_times

Arrival Time: 41261
Total Travel Time: 861


[('765_867', 149.0),
 ('867_869', 44.0),
 ('869_870', 29.0),
 ('870_873', 29.0),
 ('873_874', 59.0),
 ('874_875', 63.0),
 ('875_876', 55.0),
 ('876_3011', 68.0),
 ('3011_2820', 44.0),
 ('2820_2822', 39.0),
 ('2822_2823', 51.0),
 ('2823_5032', 33.0),
 ('5032_2867', 137.0),
 ('2867_1035', 61.0)]

In [217]:
total = 0
for i in segment_times:
    total += i[1]
    
print(total)

861.0


In [218]:
# Construct json

json_dict = {}
json_dict['arrivaltime'] = round(arrivaltime)
json_dict['totaltraveltime'] = round(totaltraveltime)
json_dict['segment_times'] = {i[0]:i[1] for i in segment_times}

# for i in segment_times:
#     json_dict['segment_times'][i[0]] = i[1]

In [219]:
json.dumps(json_dict)

'{"arrivaltime": 41261.0, "totaltraveltime": 861.0, "segment_times": {"765_867": 149.0, "867_869": 44.0, "869_870": 29.0, "870_873": 29.0, "873_874": 59.0, "874_875": 63.0, "875_876": 55.0, "876_3011": 68.0, "3011_2820": 44.0, "2820_2822": 39.0, "2822_2823": 51.0, "2823_5032": 33.0, "5032_2867": 137.0, "2867_1035": 61.0}}'