# Split the days and hours
- Split requests into hours to read on demand
- Convert the times to absolute seconds

In [1]:
import pandas as pd
reqs = pd.read_csv('requests.csv',parse_dates=['start_dt'])
reqs.head()

Unnamed: 0,start_dt,from_node,to_node
0,2013-05-05,1167,1956
1,2013-05-05,1888,2354
2,2013-05-05,711,1371
3,2013-05-05,1684,1267
4,2013-05-05,1385,1497


In [2]:
# get the day, hour and convert times to seconds
# map the days to sunday to saturday 0 -> 6
reqs['day'] = (reqs['start_dt'].dt.dayofweek + 1) % 7
reqs['hour'] = reqs['start_dt'].dt.hour
reqs['seconds'] = (reqs['start_dt'] - reqs['start_dt'].min()).dt.\
    total_seconds().round(0).astype(int)
reqs = reqs.rename(columns=dict(seconds='time')).sort_values(['time'])

In [3]:
# remove requests that start and finish at the same node
same_node = reqs[reqs['from_node']==reqs['to_node']]

print(f'Will be removing {len(same_node)} requests that start and finish at the same node')

clean = reqs.drop(same_node.index)
clean.head()

Will be removing 27663 requests that start and finish at the same node


Unnamed: 0,start_dt,from_node,to_node,day,hour,time
0,2013-05-05,1167,1956,0,0,0
136,2013-05-05,1669,1243,0,0,0
137,2013-05-05,1609,1632,0,0,0
138,2013-05-05,1092,910,0,0,0
139,2013-05-05,1278,867,0,0,0


In [4]:
# dump in new csv
clean.loc[:,['time','from_node','to_node']].to_csv('ride_requests.csv',index=False)
# clean.head()