-
Notifications
You must be signed in to change notification settings - Fork 0
/
stop_frequency_counter.py
157 lines (136 loc) · 5.51 KB
/
stop_frequency_counter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
"""
CIF_timetable_converter.py
"""
# GLOBAL PARAMETERS
paths = {
# Path to folder with timetable data.
'timetable':
'..\\TRACC Data\PT Timetable Data',
# Name of folder in timetable data to store stop frequencies.
'output': 'stop_frequency'
}
# Day of operation - from monday to sunday.
DAY = 'tuesday'
# Starting/ending point.
START_HOUR = 8
START_MINUTES = 0
END_HOUR = 9
END_MINUTES = 0
MODES = {
'ferry': 'ATCO_Ferry_timetable.csv',
'bus': 'Bus_2_timetable.csv',
'coach': 'Coach_1_timetable.csv',
'metro': 'Metro_3_timetable.csv',
'rail': 'NationalRail_4_timetable.csv',
'tram': 'Tram_5_timetable.csv',
}
# Do not edit below this point!
# ===================================================================================================
import os
import time
import pandas as pd
import datetime
def get_stop_frequency(df_timetable, day, start_hour, end_hour, group_by_routes=False, group_by_departure=True,
start_minute=0, end_minute=0):
"""
Given a timetable produced from a .cif file with CIF-timetable-converter.py, analyze stop frequency in a given
time period.
:param df_timetable: timetable dataframe
:param day: day of week
:param start_hour: starting hour
:param end_hour: ending hour
:param group_by_routes: True if you want to get frequencies of particular routes on a stop
:param group_by_departure: True if you want to analyze frequency by departure, not arrival time
:param start_minute: starting minute
:param end_minute: ending minute
:return: dataframe with stops and frequencies in a given time period.
"""
start = datetime.time(hour=start_hour, minute=start_minute)
end = datetime.time(hour=end_hour, minute=end_minute)
day = 'operates_on_' + day.lower() + 's'
if group_by_routes:
group_by_cols = ['location', 'route_number_(identifier)']
output_columns = ['location', 'route_number_(identifier)', day]
else:
group_by_cols = ['location']
output_columns = ['location', day]
if group_by_departure:
timestamp_column = 'published_departure_time'
else:
timestamp_column = 'published_arrival_time'
# TOTAL
# Get records within the timeframe.
total = df_timetable.loc[
(df_timetable[timestamp_column] >= start)
& (df_timetable[timestamp_column] <= end)
& (df_timetable[day] == 1)] \
.groupby(group_by_cols) \
.sum() \
.reset_index() \
[output_columns]
# INBOUND
inbound = df_timetable.loc[
(df_timetable[timestamp_column] >= start)
& (df_timetable[timestamp_column] <= end)
& (df_timetable[day] == 1)
# Add condition for taking only records heading in inbound direction.
& (df_timetable['route_direction'] == 'I')] \
.groupby(group_by_cols) \
.sum() \
.reset_index() \
[output_columns]
# OUTBOUND
outbound = df_timetable.loc[
(df_timetable[timestamp_column] >= start)
& (df_timetable[timestamp_column] <= end)
& (df_timetable[day] == 1)
# Add condition for taking only records heading in outbound direction.
& (df_timetable['route_direction'] == 'O')] \
.groupby(group_by_cols) \
.sum() \
.reset_index() \
[output_columns]
# Merge all dataframes.
frequency = total \
.merge(inbound, how='left', on=group_by_cols) \
.merge(outbound, how='left', on=group_by_cols) \
.rename(columns={
day + '_x': 'frequency',
day + '_y': 'inbound_frequency',
day: 'outbound_frequency'}) \
.sort_values(by='frequency', ascending=False)
# Add prefix for easier joining with shapefiles.
frequency['location'] = frequency['location'].apply(lambda x: 'QLN' + str(x).strip())
return frequency
def main():
"""
Given variables: DAY / START_HOUR / END_HOUR, this function will analyze timetables listed in MODES dictionary, perform
a stop frequency calculation for all stops within the timetable and return a .csv with stop frequency in given period.
:return:
"""
# Clock starts.
START = time.time()
print("Frequency calculation commencing.")
# Loop through all MODES of travels and their associated timetables.
for i, mode in enumerate(MODES.keys()):
print("{}/{} - Analyzing {} timetable from {}.".format(i + 1, len(MODES.keys()), mode, MODES[mode]))
# Cast column dtypes.
df_timetable = pd.read_csv(os.path.join(paths['timetable'], MODES[mode]), dtype={
'unique_identifier': str,
'route_number_(identifier)': str,
'location': str
})
# Cast time columns to datetime.time objects.
time_cols = ['published_arrival_time', 'published_departure_time', 'next_stop_arrival_time']
for col in time_cols:
df_timetable[col] = pd.to_datetime(df_timetable[col]).dt.time
# Calculate frequency.
print('\tDay: {}\n\tTimeframe: {}-{}\n\tCalculating frequency...'.format(DAY, START_HOUR, END_HOUR))
frequency = get_stop_frequency(df_timetable, DAY, START_HOUR, END_HOUR, group_by_departure=True)
# Save to .csv
print('\tSaving.')
output = "{}_{}_{}_to_{}.csv".format(mode, DAY, str(START_HOUR), str(END_HOUR))
frequency.to_csv(os.path.join(paths['timetable'], paths['output'], output), index=False)
print("\nFinished.\nTotal runtime: {0:.7}".format(str(time.time() - START)))
if __name__ == '__main__':
main()