In [11]:

"""
This file can be used to generate inflfow and outflow at BOTH cluster-level (H=60 ,d = 0.5) and station-level (H=10 ,d = 0.01)

cluster parameters:
columns=["start_cluster"] #for cluster demand
H,d = 60,0.5 

station parameters:
columns=["start_station"] #for station demand
H,d = 10,0.01 

"""

'\nThis file can be used to generate inflfow and outflow at BOTH cluster-level (H=60 ,d = 0.5) and station-level (H=10 ,d = 0.01)\n\ncluster parameters:\ncolumns=["start_cluster"] #for cluster demand\nH,d = 60,0.5 \n\nstation parameters:\ncolumns=["start_station"] #for station demand\nH,d = 10,0.01 \n\n'

In [12]:
import datetime
import cvxpy as cp
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from haversine import haversine_vector, Unit
import os

In [13]:
def temporal_clustering(H,demand):
    
    def str_to_dt(dt_str):
        return datetime.datetime.strptime(dt_str, '%H:%M:%S').time()

    
    # temporal clustering
    demand.insert(3,'start_timeslot', demand['start_time'].apply(lambda x:int(np.floor((60*str_to_dt(x).hour + str_to_dt(x).minute)/H))))
    demand.insert(6,'end_timeslot', demand['end_time'].apply(lambda x:int(np.floor((60*str_to_dt(x).hour + str_to_dt(x).minute)/H))))


In [14]:
def spatial_clustering(d,demand):
    os.chdir('C:\Work\WORK_PACKAGE\Demand_forecasting\github\DeepAR-pytorch\My_model\Rental_Fleet_Demand_Forecast\data\station_names_n_cluster_center_id')
    stations = pd.read_csv('stations.csv',index_col=0)
    
    stn_coords = list(zip(stations.y, stations.x))

    D = haversine_vector(stn_coords, stn_coords, Unit.KILOMETERS, comb=True)
    R = 1*(D < d) # 1 km threshold
    
    # ilp to select cluster centers
    x = cp.Variable((len(stations),),boolean=True)
    prob = cp.Problem(cp.Minimize(cp.sum(x)), [x.T@R>=1])
    prob.solve()
    
    # map each station to a cluster
    stations['cc'] = x.value
    cluster_centers = stations.loc[stations.cc==1]
    cc_coords = list(zip(cluster_centers.y, cluster_centers.x))
    stations['cc_id'] = np.argmin(haversine_vector(cc_coords, stn_coords, Unit.KILOMETERS, comb=True), axis=1)
    
    station_to_cluster_map = dict(zip(stations.index, stations.cc_id))
    
    # spatial clustering
    demand['start_cluster'] = demand['start_station'].apply(lambda x: station_to_cluster_map[x])
    demand['end_cluster'] = demand['end_station'].apply(lambda x: station_to_cluster_map[x])
    
    return station_to_cluster_map, stations

In [15]:
if __name__ == "__main__":
    os.chdir('C:\Work\WORK_PACKAGE\Demand_forecasting\github\DeepAR-pytorch\My_model\Rental_Fleet_Demand_Forecast\data\\raw_data')
    demand = pd.read_csv('transaction_logs.csv',index_col=0)

## start
# DATAFRAME T cluster level demand

    H,d = 10,0.01 # H:Timeslot duration in minutes, d = radius for clustering in km
    #temporal clustering
    temporal_clustering(H,demand)
    
    #spatial clustering
    station_to_cluster_map, stations = spatial_clustering(d,demand)
    print(station_to_cluster_map)
    #print(stations['region'])
    os.chdir('C:\Work\WORK_PACKAGE\Demand_forecasting\github\DeepAR-pytorch\My_model\Rental_Fleet_Demand_Forecast\data\station_names_n_cluster_center_id')
    stations.to_csv('station_cc_id_'+str(H)+'_'+str(d)+'.csv', index=False)

    #aggregated demand table
    T = pd.pivot_table(demand, values = 'car', index=["start_date","start_timeslot"], columns=["start_station"], aggfunc=lambda x: len(x.unique()))
    IN = pd.pivot_table(demand, values = 'car', index=["end_date","end_timeslot"], columns=["end_station"], aggfunc=lambda x: len(x.unique()))
    IN.fillna(0, inplace=True)
    T.fillna(0, inplace=True)
    
    # missing_clusters = [x for x in range(T.columns[-1]) if x not in T.columns]
    # missing_clusters.sort()

    # for ms in missing_clusters:
    #     T.insert(ms, ms, 0)
    """
    Ensure that the index of T and IN are of the same length and aligned

    """
    start_idx = max(IN.index[0],T.index[0])
    end_idx = min(IN.index[-1],T.index[-1])
    T = T.loc[start_idx:end_idx]
    IN = IN.loc[start_idx:end_idx]

    print(T)
    print(len(T.columns))
    os.chdir('C:\Work\WORK_PACKAGE\Demand_forecasting\github\DeepAR-pytorch\My_model\Rental_Fleet_Demand_Forecast\data\station_level\outflow_data')
    T.to_csv('station_outflow_'+str(H)+'_'+str(d)+'_'+str(start_idx)+'_to_'+str(end_idx)+'.csv', index=False)

    # missing_clusters = [x for x in range(IN.columns[-1]) if x not in IN.columns]
    # missing_clusters.sort()
    
    # for ms in missing_clusters:
    #     IN.insert(ms, ms, 0)

    print(IN)
    print(len(IN.columns))
    os.chdir('C:\Work\WORK_PACKAGE\Demand_forecasting\github\DeepAR-pytorch\My_model\Rental_Fleet_Demand_Forecast\data\station_level\inflow_data')
    IN.to_csv('station_inflow_'+str(H)+'_'+str(d)+'_'+str(start_idx)+'_to_'+str(end_idx)+'.csv', index=False)
    
## stop

    #raw_data = T.to_numpy()


{34: 0, 20: 1, 298: 2, 140: 3, 329: 4, 285: 5, 272: 6, 286: 7, 203: 8, 294: 9, 328: 10, 195: 11, 46: 12, 119: 13, 355: 14, 77: 15, 93: 16, 192: 17, 35: 18, 348: 19, 163: 20, 310: 21, 202: 22, 318: 23, 245: 24, 206: 25, 352: 26, 190: 27, 178: 28, 129: 29, 166: 30, 345: 31, 370: 32, 29: 33, 18: 34, 331: 35, 218: 36, 251: 37, 56: 38, 249: 39, 149: 40, 154: 41, 326: 42, 210: 43, 191: 44, 32: 45, 239: 46, 388: 47, 278: 48, 306: 49, 186: 50, 170: 51, 94: 52, 139: 53, 141: 54, 377: 55, 116: 56, 209: 57, 376: 58, 151: 59, 385: 60, 132: 61, 78: 62, 366: 63, 150: 64, 320: 65, 9: 66, 155: 67, 389: 68, 288: 69, 58: 70, 323: 71, 162: 72, 43: 73, 330: 74, 325: 75, 118: 76, 110: 77, 15: 78, 341: 79, 307: 80, 84: 81, 304: 82, 138: 83, 296: 84, 147: 85, 126: 86, 228: 87, 263: 88, 113: 89, 243: 90, 81: 91, 74: 92, 257: 93, 368: 94, 82: 95, 5: 96, 98: 97, 47: 98, 17: 99, 37: 100, 168: 101, 41: 102, 40: 103, 16: 104, 28: 105, 26: 106, 30: 107, 344: 108, 73: 109, 64: 110, 181: 111, 128: 112, 378: 113, 281: