In [21]:
import numpy as np
import pandas as pd
import datetime as dt

### Input

In [22]:
time = 1531686783 + 600
station = 'Baclaran'

### Source DataFrame

In [23]:
# when is the next train coming?
df_schedule = pd.read_csv('train_schedules.csv')
df_schedule.head(2)

Unnamed: 0,5th Avenue,Abad Santos,Baclaran,Balintawak,Bambang,Blumentritt,Carriedo,Central Terminal,Doroteo Jose,EDSA,Gil Puyat,Libertad,Monumento,Pedro Gil,Quirino,R Papa,Roosevelt,Tayuman,United Nations,Vito Cruz
0,1531686600,1531686600,1531686600,1531686600,1531686600,1531686600,1531686600,1531686600,1531686600,1531686600,1531686600,1531686600,1531686600,1531686600,1531686600,1531686600,1531686600,1531686600,1531686600,1531686600
1,1531686783,1531686783,1531686783,1531686783,1531686783,1531686783,1531686783,1531686783,1531686783,1531686783,1531686783,1531686783,1531686783,1531686783,1531686783,1531686783,1531686783,1531686783,1531686783,1531686783


In [24]:
# distances between stations
df_distances = pd.read_csv('df_distances.csv')
df_distances.head(3)

Unnamed: 0,start_station,end_station,distance,travel_time
0,Baclaran,EDSA,0.588,1.764
1,Baclaran,Libertad,1.598,4.794
2,EDSA,Libertad,1.01,3.03


In [25]:
# number of passengers waiting per minute per station
df_queue = pd.read_csv('df_passengers_per_min_per_station.csv')
df_queue.head(3)

Unnamed: 0,station,per_minute,card_num,timestamp
0,5th Avenue,2018-07-16 01:00PM,6,1531717200
1,5th Avenue,2018-07-16 01:02PM,4,1531717320
2,5th Avenue,2018-07-16 01:03PM,5,1531717380


4

In [53]:
# check load factor: light, moderate, heavy
def check_load_factor(time, is_raining=False, is_weekday=True):
    h = int(dt.datetime.fromtimestamp(time).strftime('%H'))
    if (h >= 11 and h < 15) or (h >= 21):
        return 'lightly'
    elif (h < 7) or (h >= 9 and h < 11) or (h >= 15 and h < 16)\
        or (h >= 19 and h < 21):
        return 'moderately'
    else:
        return 'heavy'

In [57]:
def query(time, station1, station2):
    scheds =  df_schedule.loc[df_schedule.loc[:, station1]
                              >= time, station1].iloc[:5]
    scheds = scheds.apply(lambda x: dt.datetime.fromtimestamp(x)\
                                      .strftime('%I:%M%p')).values
    
    travel_time = df_distances.loc[(df_distances.start_station==station1)
                                   & (df_distances.end_station==station2),
                                   'travel_time'].values[0]
    
    queue_people = df_queue.loc[(df_queue.station==station) &
                                (df_queue.timestamp >= time)]\
                            .sort_values('timestamp').head(15).card_num.sum()
    
    
    queue = int((queue_people / 150) * 3 // 1)
    
    load_factor = check_load_factor(time)
    
    msg = 'Arrival schedules of the next '
    msg += f'five trains in {station} station: '
    msg += ', '.join(scheds[:-1])
    msg += f', and {scheds[-1]}. '
    msg += f'Expected waiting time from station is {queue} minutes. '
    msg += f'Incoming trains are expected to be {load_factor} loaded. '
    msg += f'Total travel time from {station1} to {station2} '
    msg += f'is {int(np.round(travel_time))} minutes. '
    msg += 'Ingat po sa byahe!'
    
    return msg.split('. ')

In [58]:
query(time, 'Baclaran', 'Roosevelt')

['Arrival schedules of the next five trains in Baclaran station: 04:45AM, 04:48AM, 04:51AM, 04:54AM, and 04:57AM',
 'Expected waiting time from station is 7 minutes',
 'Incoming trains are expected to be moderately loaded',
 'Total travel time from Baclaran to Roosevelt is 54 minutes',
 'Ingat po sa byahe!']