In [1]:
import pandas as pd

In [2]:
# Connect to database

from sqlalchemy import create_engine

URI="localhost"
PORT="5433"
DB = "jetaDb"
USER = "postgres"
PASSWORD = "00001234"
  
engine = create_engine("postgresql://{}:{}@{}:{}/{}".format(USER, PASSWORD, URI, PORT, DB), echo=True)

  """)


In [12]:
# Variables from query

start = "1037"
destination = "669"
lineid = "14"
start_time = "1530008000"

# http://localhost:8000/main/journeytime?source=767&destination=1914&lineid=39A&time=1530008000

# Rain should be gotten from api or else table

rain = 0.5

In [13]:
# Transform time into variables required from model

import time
from datetime import datetime, timedelta
from pytz import timezone

# Get Irish timezone (utc + daylight saving time (DST))
irish_time = timezone('Europe/Dublin')
print("Irish Standard Time: ", irish_time)

# Get unixtime as datetime object
dt_time = datetime.fromtimestamp(int(start_time), irish_time)
print("Datetime: ", dt_time)

# Get day of week -> Mon: 0, Sun: 6
weekday = dt_time.weekday()
print("Weekday: ", weekday)

# Create list with desired weekday filled.
week_dummies = [0] * 7
week_dummies[weekday] = 1
del week_dummies[2] # Delete wednesday - not included in model due to dummy var trap
print("Week dummies: ", week_dummies)

# Get arrivaltime in seconds
date = dt_time.date()
date_unixtime = time.mktime(date.timetuple())
seconds_since_midnight = int(time.mktime((dt_time - timedelta(seconds = date_unixtime)).timetuple()))
print("Seconds since midnight (arrival time): ", seconds_since_midnight)


Irish Standard Time:  Europe/Dublin
Datetime:  2018-06-26 11:13:20+01:00
Weekday:  1
Week dummies:  [0, 1, 0, 0, 0, 0]
Seconds since midnight (arrival time):  40400


In [14]:
# Model inputs

model_inputs = [seconds_since_midnight, rain] + week_dummies
model_inputs

[40400, 0.5, 0, 1, 0, 0, 0, 0]

In [15]:
# Get stop lists associated with this lineid, start stop and end stop

sql = """

SELECT * 
FROM main_routes 
WHERE routeid IN (
    SELECT UNNEST(routes) 
    FROM main_lines 
    WHERE main_lines.lineid = '{0}'
) 
AND '{1}' = ANY(main_routes.stopids) 
AND '{2}' = ANY(main_routes.stopids) 
;

""".format(lineid, start, destination)

routes = pd.read_sql(sql, engine)

# Lines.objects.all
# Routes.objects.all(routeid )

2018-07-19 16:17:28,242 INFO sqlalchemy.engine.base.Engine select relname from pg_class c join pg_namespace n on n.oid=c.relnamespace where pg_catalog.pg_table_is_visible(c.oid) and relname=%(name)s
2018-07-19 16:17:28,244 INFO sqlalchemy.engine.base.Engine {'name': "\n\nSELECT * \nFROM main_routes \nWHERE routeid IN (\n    SELECT UNNEST(routes) \n    FROM main_lines \n    WHERE main_lines.lineid = '14'\n) \nAND '1037' = ANY(main_routes.stopids) \nAND '669' = ANY(main_routes.stopids) \n;\n\n"}
2018-07-19 16:17:28,251 INFO sqlalchemy.engine.base.Engine 

SELECT * 
FROM main_routes 
WHERE routeid IN (
    SELECT UNNEST(routes) 
    FROM main_lines 
    WHERE main_lines.lineid = '14'
) 
AND '1037' = ANY(main_routes.stopids) 
AND '669' = ANY(main_routes.stopids) 
;


2018-07-19 16:17:28,253 INFO sqlalchemy.engine.base.Engine {}


In [16]:
routes

Unnamed: 0,routeid,direction,stopids,lineid
0,14_16,2,"[6041, 2826, 2842, 2843, 2844, 2845, 2846, 284...",14


In [17]:
if routes.shape[0] > 1:
    print("Error: multiple possible routes.")
    print(routes)

In [18]:
# Convert list of stopids to list

stop_list = routes['stopids'].tolist()[0]
print(stop_list)

[6041, 2826, 2842, 2843, 2844, 2845, 2846, 2847, 4397, 2848, 2849, 2850, 2851, 2852, 4707, 4708, 4336, 1051, 1052, 1053, 1054, 1036, 4709, 1037, 1038, 2998, 2999, 3000, 3001, 3002, 1166, 1167, 1168, 1169, 1170, 1069, 1070, 1071, 4528, 1072, 7577, 1353, 1354, 7589, 7578, 7582, 297, 496, 7490, 497, 497, 515, 515, 516, 4384, 516, 4384, 519, 519, 521, 522, 521, 523, 7659, 522, 523, 669, 7659, 670, 523, 669, 669, 670, 671, 672, 671, 670, 4382, 672, 671, 4382, 672, 1185, 1185, 4382, 1186, 1185, 1186, 1187, 1187, 1186, 1188, 216, 1187, 1188, 1189, 1188, 217, 216, 216, 217, 242, 217, 242, 243, 242, 244, 243, 245, 244, 243, 246, 244, 245, 245, 248, 246, 246, 248, 248]


In [19]:
# Slice list by start and destination stop

journey_stops = stop_list[stop_list.index(int(start)):(stop_list.index(int(destination))+1)]
print(journey_stops)

[1037, 1038, 2998, 2999, 3000, 3001, 3002, 1166, 1167, 1168, 1169, 1170, 1069, 1070, 1071, 4528, 1072, 7577, 1353, 1354, 7589, 7578, 7582, 297, 496, 7490, 497, 497, 515, 515, 516, 4384, 516, 4384, 519, 519, 521, 522, 521, 523, 7659, 522, 523, 669]


In [20]:
# Change each stopid into string
stringified = list(map(str, journey_stops))

# Zip ['0', .. 'n'] and ['1', .., 'n'] into list [('0', '1'), .., ('n - 1', 'n')]
# Join tuples to make ['1_2', .., 'n-1_n']
journey_segments = [ '_'.join(x) for x in zip(stringified[0:], stringified[1:])]
print(journey_segments)

['1037_1038', '1038_2998', '2998_2999', '2999_3000', '3000_3001', '3001_3002', '3002_1166', '1166_1167', '1167_1168', '1168_1169', '1169_1170', '1170_1069', '1069_1070', '1070_1071', '1071_4528', '4528_1072', '1072_7577', '7577_1353', '1353_1354', '1354_7589', '7589_7578', '7578_7582', '7582_297', '297_496', '496_7490', '7490_497', '497_497', '497_515', '515_515', '515_516', '516_4384', '4384_516', '516_4384', '4384_519', '519_519', '519_521', '521_522', '522_521', '521_523', '523_7659', '7659_522', '522_523', '523_669']


In [21]:
# Select coefficient rows with these segment ids

sql2 = """

SELECT *
FROM main_coefficients
WHERE segment = ANY(ARRAY{0})

""".format(journey_segments)

coefficients = pd.read_sql(sql2, engine)

2018-07-19 16:17:37,246 INFO sqlalchemy.engine.base.Engine select relname from pg_class c join pg_namespace n on n.oid=c.relnamespace where pg_catalog.pg_table_is_visible(c.oid) and relname=%(name)s
2018-07-19 16:17:37,248 INFO sqlalchemy.engine.base.Engine {'name': "\n\nSELECT *\nFROM main_coefficients\nWHERE segment = ANY(ARRAY['1037_1038', '1038_2998', '2998_2999', '2999_3000', '3000_3001', '3001_3002', '3002_1 ... (296 characters truncated) ... _4384', '4384_516', '516_4384', '4384_519', '519_519', '519_521', '521_522', '522_521', '521_523', '523_7659', '7659_522', '522_523', '523_669'])\n\n"}
2018-07-19 16:17:37,304 INFO sqlalchemy.engine.base.Engine 

SELECT *
FROM main_coefficients
WHERE segment = ANY(ARRAY['1037_1038', '1038_2998', '2998_2999', '2999_3000', '3000_3001', '3001_3002', '3002_1166', '1166_1167', '1167_1168', '1168_1169', '1169_1170', '1170_1069', '1069_1070', '1070_1071', '1071_4528', '4528_1072', '1072_7577', '7577_1353', '1353_1354', '1354_7589', '7589_7578', '75

In [24]:
coefficients

Unnamed: 0,segment,intercept,arrivaltime,rain,fri,mon,sat,sun,thu,tue
0,496_7490,31.970619,0.000222,-1.472018,-1.100677,-1.056602,-3.639345,-5.263058,-0.914075,1.125332
1,297_496,146.259796,0.000106,-0.242413,-1.734347,-0.876709,-2.227975,-5.359356,0.169455,-0.847426
2,3000_3001,70.008387,-0.000301,0.58243,2.288158,-1.269739,-8.016978,-7.546336,1.48027,0.69847
3,497_515,89.164583,0.00024,1.125525,-1.723923,-0.842273,-17.046423,-15.62677,1.404663,1.981157
4,516_4384,51.985758,2.6e-05,-0.34324,-0.663003,-2.120607,-8.416413,-8.662695,-1.060889,-0.454478
5,4528_1072,121.303684,-0.000738,1.83224,-1.868188,-4.485772,-12.464389,-11.97774,-1.902826,-0.304012
6,1170_1069,93.934436,-0.000502,1.566209,-0.089142,-5.004358,-6.669353,-7.658335,-2.374778,-2.066961
7,7577_1353,65.096152,2.8e-05,1.463132,2.961382,-5.443738,-11.141825,-17.47453,-3.435599,-1.493497
8,1168_1169,43.555511,-0.000196,0.580813,0.149645,-1.781629,-0.790685,-0.8480952,-1.343932,-0.510376
9,7490_497,26.819233,-2e-06,1.600397,0.170671,1.398619,-1.451024,-2.795865,0.078767,-0.312558


In [23]:
# Sort values by journey_segment segmentid

coefficients['segment'] = coefficients['segment'].astype("category")
coefficients['segment'].cat.set_categories(journey_segments, inplace=True)
coefficients = coefficients.sort_values(["segment"])
coefficients.head(5)

ValueError: Categorical categories must be unique

In [215]:
# Rearrange columns and set segment id as index

coefficients = coefficients[["segment", "intercept", "arrivaltime", "rain", "dayofweek_Monday", "dayofweek_Tuesday", 
                        "dayofweek_Thursday", "dayofweek_Friday", "dayofweek_Saturday", "dayofweek_Sunday"]]
coefficients = coefficients.set_index('segment')
coefficients.head(5)

Unnamed: 0_level_0,intercept,arrivaltime,rain,dayofweek_Monday,dayofweek_Tuesday,dayofweek_Thursday,dayofweek_Friday,dayofweek_Saturday,dayofweek_Sunday
segment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
765_867,156.686976,-0.000134,-2.035997,-1.243961,0.20235,0.277127,-0.152052,-14.716208,-11.77352
867_869,43.829241,3.1e-05,-0.536599,-0.482129,-1.798628,-2.126799,-5.05922,-8.83753,-4.597139
869_870,28.661684,4e-06,0.131681,-0.22372,-1.407403,1.036157,-0.660243,-2.750362,-4.323592
870_873,30.550306,-4.7e-05,0.258867,0.070722,-0.377671,0.007767,-1.686536,-3.08397,-1.879405
873_874,76.985275,-0.000387,4.333352,-4.786822,-6.799442,-4.262079,-5.395845,-14.946799,-10.594033


In [216]:
arrivaltime = model_inputs[0]
totaltraveltime = 0
segment_times = []

for i, rows in coefficients.iterrows():
    traveltime = (rows['intercept']
                  +(rows['arrivaltime']*arrivaltime)
                  +(rows['rain']*model_inputs[1])
                  +(rows['dayofweek_Friday']*model_inputs[2])
                  +(rows['dayofweek_Monday']*model_inputs[3])
                  +(rows['dayofweek_Saturday']*model_inputs[4])
                  +(rows['dayofweek_Sunday']*model_inputs[5])
                  +(rows['dayofweek_Thursday']*model_inputs[6])
                  +(rows['dayofweek_Tuesday']*model_inputs[7]))
    
    segment_times.append((i, round(traveltime)))
    totaltraveltime += traveltime
    arrivaltime = model_inputs[0] + totaltraveltime # initial start time + sum of previous segment times
        
print('Arrival Time:', int(arrivaltime))
print('Total Travel Time:', int(totaltraveltime))
#print('Segment Times:', segment_times)
segment_times

Arrival Time: 41261
Total Travel Time: 861


[('765_867', 149.0),
 ('867_869', 44.0),
 ('869_870', 29.0),
 ('870_873', 29.0),
 ('873_874', 59.0),
 ('874_875', 63.0),
 ('875_876', 55.0),
 ('876_3011', 68.0),
 ('3011_2820', 44.0),
 ('2820_2822', 39.0),
 ('2822_2823', 51.0),
 ('2823_5032', 33.0),
 ('5032_2867', 137.0),
 ('2867_1035', 61.0)]

In [217]:
total = 0
for i in segment_times:
    total += i[1]
    
print(total)

861.0


In [218]:
# Construct json

json_dict = {}
json_dict['arrivaltime'] = round(arrivaltime)
json_dict['totaltraveltime'] = round(totaltraveltime)
json_dict['segment_times'] = {i[0]:i[1] for i in segment_times}

# for i in segment_times:
#     json_dict['segment_times'][i[0]] = i[1]

In [219]:
json.dumps(json_dict)

'{"arrivaltime": 41261.0, "totaltraveltime": 861.0, "segment_times": {"765_867": 149.0, "867_869": 44.0, "869_870": 29.0, "870_873": 29.0, "873_874": 59.0, "874_875": 63.0, "875_876": 55.0, "876_3011": 68.0, "3011_2820": 44.0, "2820_2822": 39.0, "2822_2823": 51.0, "2823_5032": 33.0, "5032_2867": 137.0, "2867_1035": 61.0}}'