In [1]:
import pandas as pd
import json

In [2]:
# Connect to database

from sqlalchemy import create_engine

URI="localhost"
PORT="5433"
DB = "jetaDb"
USER = "postgres"
PASSWORD = "00001234"
  
engine = create_engine("postgresql://{}:{}@{}:{}/{}".format(USER, PASSWORD, URI, PORT, DB), echo=True)

  """)


In [3]:
# Variables from query

start = "767"
destination = "770"
lineid = "39A"
start_time = "1532018880"

# http://localhost:8000/main/journeytime?source=767&destination=1914&lineid=39A&time=1530008000
# http://localhost:8000/main/journeytime?source=719&destination=603&lineid=31B&time=1532018880
# Rain should be gotten from api or else table

rain = 0.5

In [4]:
# Transform time into variables required from model

import time
from datetime import datetime, timedelta
from pytz import timezone

# Get Irish timezone (utc + daylight saving time (DST))
irish_time = timezone('Europe/Dublin')
print("Irish Standard Time: ", irish_time)

# Get unixtime as datetime object
dt_time = datetime.fromtimestamp(int(start_time), irish_time)
print("Datetime: ", dt_time)

# Get day of week -> Mon: 0, Sun: 6
weekday = dt_time.weekday()
print("Weekday: ", weekday)

# Create list with desired weekday filled.
week_dummies = [0] * 7
week_dummies[weekday] = 1
del week_dummies[2] # Delete wednesday - not included in model due to dummy var trap
print("Week dummies: ", week_dummies)

# Get arrivaltime in seconds
date = dt_time.date()
date_unixtime = time.mktime(date.timetuple())
seconds_since_midnight = int(time.mktime((dt_time - timedelta(seconds = date_unixtime)).timetuple()))
print("Seconds since midnight (arrival time): ", seconds_since_midnight)


Irish Standard Time:  Europe/Dublin
Datetime:  2018-07-19 17:48:00+01:00
Weekday:  3
Week dummies:  [0, 0, 1, 0, 0, 0]
Seconds since midnight (arrival time):  64080


In [5]:
# Model inputs

model_inputs = [seconds_since_midnight, rain] + week_dummies
model_inputs

[64080, 0.5, 0, 0, 1, 0, 0, 0]

In [6]:
# Get stop lists associated with this lineid, start stop and end stop

sql = """

SELECT * 
FROM main_routes 
WHERE routeid IN (
    SELECT UNNEST(routes) 
    FROM main_lines 
    WHERE main_lines.lineid = '{0}'
) 
AND '{1}' = ANY(main_routes.stopids) 
AND '{2}' = ANY(main_routes.stopids) 
;

""".format(lineid, start, destination)

routes = pd.read_sql(sql, engine)

# Lines.objects.all
# Routes.objects.all(routeid )

2018-07-22 15:07:32,095 INFO sqlalchemy.engine.base.Engine select version()
2018-07-22 15:07:32,098 INFO sqlalchemy.engine.base.Engine {}
2018-07-22 15:07:32,195 INFO sqlalchemy.engine.base.Engine select current_schema()
2018-07-22 15:07:32,197 INFO sqlalchemy.engine.base.Engine {}
2018-07-22 15:07:32,292 INFO sqlalchemy.engine.base.Engine SELECT CAST('test plain returns' AS VARCHAR(60)) AS anon_1
2018-07-22 15:07:32,294 INFO sqlalchemy.engine.base.Engine {}
2018-07-22 15:07:32,352 INFO sqlalchemy.engine.base.Engine SELECT CAST('test unicode returns' AS VARCHAR(60)) AS anon_1
2018-07-22 15:07:32,354 INFO sqlalchemy.engine.base.Engine {}
2018-07-22 15:07:32,455 INFO sqlalchemy.engine.base.Engine show standard_conforming_strings
2018-07-22 15:07:32,457 INFO sqlalchemy.engine.base.Engine {}
2018-07-22 15:07:32,602 INFO sqlalchemy.engine.base.Engine select relname from pg_class c join pg_namespace n on n.oid=c.relnamespace where pg_catalog.pg_table_is_visible(c.oid) and relname=%(name)s
20

In [7]:
routes

Unnamed: 0,routeid,direction,stopids,lineid
0,39A_40,1,"[767, 768, 769, 770, 771, 772, 773, 774, 775, ...",39A
1,39A_41,1,"[767, 768, 769, 770, 771, 772, 773, 774, 775, ...",39A


In [8]:
if routes.shape[0] > 1:
    print("Error: multiple possible routes.")
    print(routes)

Error: multiple possible routes.
  routeid  direction                                            stopids lineid
0  39A_40          1  [767, 768, 769, 770, 771, 772, 773, 774, 775, ...    39A
1  39A_41          1  [767, 768, 769, 770, 771, 772, 773, 774, 775, ...    39A


In [9]:
# Convert list of stopids to list

stop_list = routes['stopids'].tolist()[0]
print(stop_list)

[767, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 779, 780, 781, 782, 783, 784, 785, 786, 793, 7576, 7586, 7587, 7588, 328, 1443, 1444, 1445, 1647, 1648, 1649, 1911, 1913, 1914, 1805, 1806, 1660, 1661, 1662, 1664, 1665, 1666, 1807, 7167, 1808, 7389, 7025, 4464, 1869, 1870, 1871, 1872, 1873, 1874, 1875, 1876, 1877, 1878, 1879, 1899, 6107, 6108, 6109, 6110, 7020, 7029, 7038, 7011, 2171, 7160, 7047, 7161, 7162]


In [10]:
# Slice list by start and destination stop

journey_stops = stop_list[stop_list.index(int(start)):(stop_list.index(int(destination))+1)]
print(journey_stops)

[767, 768, 769, 770]


In [11]:
# Remove duplicate values from list, while maintaining stop order.

from more_itertools import unique_everseen

journey_stops = list(unique_everseen(journey_stops))
print(journey_stops)

[767, 768, 769, 770]


In [12]:
# Change each stopid into string
stringified = list(map(str, journey_stops))

# Zip ['0', .. 'n'] and ['1', .., 'n'] into list [('0', '1'), .., ('n - 1', 'n')]
# Join tuples to make ['1_2', .., 'n-1_n']
journey_segments = [ '_'.join(x) for x in zip(stringified[0:], stringified[1:])]
print(journey_segments)

['767_768', '768_769', '769_770']


In [13]:
journey_segments

['767_768', '768_769', '769_770']

In [14]:
# Select coefficient rows with these segment ids

sql2 = """

SELECT *
FROM main_coefficients
WHERE segment = ANY(ARRAY{0})

""".format(journey_segments)

coefficients = pd.read_sql(sql2, engine)

2018-07-22 15:07:56,800 INFO sqlalchemy.engine.base.Engine select relname from pg_class c join pg_namespace n on n.oid=c.relnamespace where pg_catalog.pg_table_is_visible(c.oid) and relname=%(name)s
2018-07-22 15:07:56,802 INFO sqlalchemy.engine.base.Engine {'name': "\n\nSELECT *\nFROM main_coefficients\nWHERE segment = ANY(ARRAY['767_768', '768_769', '769_770'])\n\n"}
2018-07-22 15:07:57,542 INFO sqlalchemy.engine.base.Engine 

SELECT *
FROM main_coefficients
WHERE segment = ANY(ARRAY['767_768', '768_769', '769_770'])


2018-07-22 15:07:57,546 INFO sqlalchemy.engine.base.Engine {}


In [15]:
coefficients

Unnamed: 0,segment,intercept,arrivaltime,rain,fri,mon,sat,sun,thu,tue
0,767_768,123.285491,-0.000179,1.266745,-2.018558,0.834109,-10.095867,-10.386478,-1.594662,-2.440293
1,769_770,70.868285,-0.000202,0.290486,-1.482873,-2.142149,-10.079057,-11.424488,-2.1654,-1.492595
2,768_769,70.940947,6e-06,0.853329,-0.247459,-4.777141,-12.717419,-19.586873,-3.682586,-2.967836


In [16]:
# Sort values by journey_segment segmentid

coefficients['segment'] = coefficients['segment'].astype("category")
coefficients['segment'].cat.set_categories(journey_segments, inplace=True)
coefficients = coefficients.sort_values(["segment"])
coefficients

Unnamed: 0,segment,intercept,arrivaltime,rain,fri,mon,sat,sun,thu,tue
0,767_768,123.285491,-0.000179,1.266745,-2.018558,0.834109,-10.095867,-10.386478,-1.594662,-2.440293
2,768_769,70.940947,6e-06,0.853329,-0.247459,-4.777141,-12.717419,-19.586873,-3.682586,-2.967836
1,769_770,70.868285,-0.000202,0.290486,-1.482873,-2.142149,-10.079057,-11.424488,-2.1654,-1.492595


In [17]:
# Rearrange columns and set segment id as index

coefficients = coefficients[["segment", "intercept", "arrivaltime", "rain", "mon", "tue", "thu", "fri", "sat", "sun"]]
coefficients = coefficients.set_index('segment')
coefficients.head(5)

Unnamed: 0_level_0,intercept,arrivaltime,rain,mon,tue,thu,fri,sat,sun
segment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
767_768,123.285491,-0.000179,1.266745,0.834109,-2.440293,-1.594662,-2.018558,-10.095867,-10.386478
768_769,70.940947,6e-06,0.853329,-4.777141,-2.967836,-3.682586,-0.247459,-12.717419,-19.586873
769_770,70.868285,-0.000202,0.290486,-2.142149,-1.492595,-2.1654,-1.482873,-10.079057,-11.424488


In [18]:
arrivaltime = model_inputs[0]
totaltraveltime = 0
segment_times = []

for i, rows in coefficients.iterrows():
    traveltime = (rows['intercept']
                  +(rows['arrivaltime']*arrivaltime)
                  +(rows['rain']*model_inputs[1])
                  +(rows['fri']*model_inputs[2])
                  +(rows['mon']*model_inputs[3])
                  +(rows['sat']*model_inputs[4])
                  +(rows['sun']*model_inputs[5])
                  +(rows['thu']*model_inputs[6])
                  +(rows['fri']*model_inputs[7]))
    
    segment_times.append((i, round(traveltime)))
    totaltraveltime += traveltime
    arrivaltime = model_inputs[0] + totaltraveltime # initial start time + sum of previous segment times
        
print('Arrival Time:', int(arrivaltime))
print('Total Travel Time:', int(totaltraveltime))
#print('Segment Times:', segment_times)
segment_times

Arrival Time: 64289
Total Travel Time: 209


[('767_768', 102.0), ('768_769', 59.0), ('769_770', 48.0)]

In [19]:
total = 0
for i in segment_times:
    total += i[1]
    
print(total)

209.0


In [20]:
# Construct json

json_dict = {}
json_dict['arrivaltime'] = round(arrivaltime)
json_dict['totaltraveltime'] = round(totaltraveltime)
json_dict['segment_times'] = {i[0]:i[1] for i in segment_times}

# for i in segment_times:
#     json_dict['segment_times'][i[0]] = i[1]

In [26]:
json.dumps(json_dict)

'{"arrivaltime": 64939.0, "totaltraveltime": 859.0, "segment_times": {"719_720": 35.0, "720_721": 32.0, "721_693": 40.0, "693_585": 82.0, "585_586": 52.0, "586_587": 60.0, "587_588": 32.0, "588_589": 41.0, "589_590": 24.0, "590_591": 62.0, "591_592": 35.0, "592_593": 38.0, "593_594": 33.0, "594_595": 25.0, "595_596": 24.0, "596_597": 32.0, "597_598": 31.0, "598_599": 29.0, "599_600": 52.0, "600_601": 47.0, "601_602": 25.0, "602_603": 29.0}}'