In [1]:
import pandas as pd
import numpy as np
import os
import copy

In [2]:
output_directory = '/Users/francescmartiescofet/Documents/ETH/IVT/eqasim-java/switzerland/SA_scenario_output3'
last_iter = 5

In [3]:
drt_legs = []
global_avg = []
global_moving_avg = []
global_successive_avg = []
zonal_avg = []
zonal_moving_avg = []
zonal_successive_avg = []
for i in range(last_iter + 1):
    iter_path = os.path.join(output_directory,'testDrtZones','ITERS','it.' + str(i)) + \
                '/' + str(i) + '.'
    global_avg.append(pd.read_csv(iter_path + 'DrtWaitTimeGlobalAvg.csv', sep=';'))
    global_moving_avg.append(pd.read_csv(iter_path + 'DrtWaitTimeGlobalMovingAvg.csv', sep=';'))
    global_successive_avg.append(pd.read_csv(iter_path + 'DrtWaitTimeGlobalSuccessiveAvg.csv', sep=';'))
    zonal_avg.append(pd.read_csv(iter_path + 'DrtWaitTimesZonalAvg.csv', sep=';'))
    zonal_moving_avg.append(pd.read_csv(iter_path + 'DrtWaitTimesZonalMovingAvg.csv', sep=';'))
    zonal_successive_avg.append(pd.read_csv(iter_path + 'DrtWaitTimesZonalSuccessiveAvg.csv', sep=';'))
    drt_legs.append(pd.read_csv(iter_path + 'drt_legs_drt.csv', sep=';'))

link2zones_csv = pd.read_csv(os.path.join(output_directory,'testDrtZones','drt_WayneCountyLink2Zones.csv'), sep=';')
link2zones_csv = link2zones_csv.set_index('link_id')

In [4]:
link2zones = {}
for link_id, zone in link2zones_csv.iterrows():
    link2zones[link_id] = zone.zone.item()

In [5]:
def check_global_avg(global_avg, drt_legs):
    for it, (it_global_avg, it_drt_legs) in enumerate(zip(global_avg, drt_legs)):
        print('Iteration ' + str(it))
        print('Global avg: ' + str(it_global_avg['avg'][0]))
        print('Average from legs: ' + str(it_drt_legs.waitTime.mean()))
        print('\n')

In [6]:
check_global_avg(global_avg, drt_legs)

Iteration 0
Global avg: nan
Average from legs: nan


Iteration 1
Global avg: 70.625
Average from legs: 70.625


Iteration 2
Global avg: 70.55
Average from legs: 70.55


Iteration 3
Global avg: 70.71428571428571
Average from legs: 70.71428571428571


Iteration 4
Global avg: 71.77777777777777
Average from legs: 71.77777777777777


Iteration 5
Global avg: 77.08333333333333
Average from legs: 77.08333333333333




In [7]:
def check_global_moving_window_avg(global_moving_avg, drt_legs, moving_window):
    for it, it_global_moving_avg in enumerate(global_moving_avg):
        start = 0
        if it >= moving_window:
            start = it - moving_window + 1
        it_drt_legs = pd.concat([drt_legs[j] for j in range(start, it+1)])
        print('Iteration ' + str(it))
        print('Global moving avg: ' + str(it_global_moving_avg['avg'][0]))
        print('Average from legs: ' + str(it_drt_legs.waitTime.mean()))
        print('\n')
        

In [8]:
check_global_moving_window_avg(global_moving_avg, drt_legs, 2)

Iteration 0
Global moving avg: nan
Average from legs: nan


Iteration 1
Global moving avg: 70.625
Average from legs: 70.625


Iteration 2
Global moving avg: 70.578125
Average from legs: 70.578125


Iteration 3
Global moving avg: 70.64044943820225
Average from legs: 70.64044943820225


Iteration 4
Global moving avg: 71.3125
Average from legs: 71.3125


Iteration 5
Global moving avg: 74.60740740740741
Average from legs: 74.60740740740741




In [9]:
def check_global_successive_avg(global_successive_avg, drt_legs, weight):
    iters_avg = []
    for it_drt_leg in drt_legs:
        iters_avg.append(it_drt_leg.waitTime.mean())
    print(iters_avg)
    iters_successive_avg = []
    for it in range(len(iters_avg)):
        if it == 0:
            iters_successive_avg.append(iters_avg[it])
            continue
        previous_avg = iters_successive_avg[it-1]
        if np.isnan(previous_avg):
            iters_successive_avg.append(iters_avg[it])
            continue
        iters_successive_avg.append((1-weight) * previous_avg + weight * iters_avg[it])
            
    for it, it_global_successive_avg in enumerate(global_successive_avg):
        print('Iteration ' + str(it))
        print('Global successive avg: ' + str(it_global_successive_avg['avg'][0]))
        print('Average from legs: ' + str(iters_successive_avg[it]))
        print('\n')

In [10]:
check_global_successive_avg(global_successive_avg, drt_legs, 0.5)

[nan, 70.625, 70.55, 70.71428571428571, 71.77777777777777, 77.08333333333333]
Iteration 0
Global successive avg: nan
Average from legs: nan


Iteration 1
Global successive avg: 70.625
Average from legs: 70.625


Iteration 2
Global successive avg: 70.5875
Average from legs: 70.5875


Iteration 3
Global successive avg: 70.65089285714285
Average from legs: 70.65089285714285


Iteration 4
Global successive avg: 71.21433531746031
Average from legs: 71.21433531746031


Iteration 5
Global successive avg: 74.14883432539682
Average from legs: 74.14883432539682




In [11]:
def check_zonal_avg(zonal_avg, drt_legs, link2zones, iter_to_check, time_bins = 30):
    
    it_zonal_avg = zonal_avg[iter_to_check].copy()
    it_zonal_avg = it_zonal_avg.set_index('zone')
    it_drt_legs = drt_legs[iter_to_check].copy()
    hour_bins = [i*3600 for i in range(0,time_bins+1)]
    it_drt_legs['time_bin'] = pd.cut(it_drt_legs.departureTime, hour_bins).map(lambda x: x.left)
    it_drt_legs['zone'] = it_drt_legs['fromLinkId'].map(link2zones)
    
    grouped = it_drt_legs.groupby(['zone','time_bin']).waitTime.mean()
    zone_list = it_drt_legs['zone'].unique()
    for zone in zone_list:
        for i in range(time_bins):
            if (not np.isnan(grouped.loc[zone,i*3600]) and not np.isnan(it_zonal_avg.loc[zone][str(i)])):
                if (grouped.loc[zone,i*3600] != it_zonal_avg.loc[zone][str(i)]):
                    print("There's a different value in zone", zone, "at time", i)
            if (np.isnan(grouped.loc[zone,i*3600]) and not np.isnan(it_zonal_avg.loc[zone][str(i)])):
                print("There's a different value in zone", zone, "at time", i)
            if (not np.isnan(grouped.loc[zone,i*3600]) and np.isnan(it_zonal_avg.loc[zone][str(i)])):
                print("There's a different value in zone", zone, "at time", i)
    # Checked some random zones and the avg times match
    #print(it_drt_legs.groupby(['zone','time_bin']).waitTime.mean()[113934].iloc[:20])
    #print(it_zonal_avg[it_zonal_avg.zone == 113934].iloc[:,:20])

In [12]:
def create_df_from_legs(it_drt_legs, link2zones, time_bins = 30):
    hour_bins = [i*3600 for i in range(0,time_bins+1)]
    it_drt_legs['time_bin'] = pd.cut(it_drt_legs.departureTime, hour_bins).map(lambda x: int(x.left/3600))
    it_drt_legs['zone'] = it_drt_legs['fromLinkId'].map(link2zones)
    grouped = it_drt_legs.groupby(['zone','time_bin']).waitTime.mean().reset_index()
    df = pd.pivot(grouped, index='zone', columns='time_bin').sort_index()
    df.columns = [i for i in range(time_bins)] if not df.empty else []
    df = df.applymap(lambda x: int(x*1000) if not np.isnan(x) else np.nan)
    return df


In [13]:
def check_zonal_average(zonal_avg, drt_legs, link2zones, time_bins = 30):
    n_iters = len(zonal_avg)
    for it in range(1,n_iters):
        it_zonal_avg = zonal_avg[it].copy()
        it_zonal_avg = it_zonal_avg.set_index('zone').sort_index()
        it_zonal_avg = it_zonal_avg[~it_zonal_avg.isnull().all(axis=1)].applymap(lambda x: int(x*1000) if not np.isnan(x) else np.nan)
        it_zonal_avg.columns = [i for i in range(time_bins)] if not it_zonal_avg.empty else []
        
        it_drt_legs = drt_legs[it].copy()
        df_from_legs = create_df_from_legs(it_drt_legs, link2zones, time_bins = time_bins)
        pd.testing.assert_frame_equal(it_zonal_avg,df_from_legs)
        print('Iteration', it, it_zonal_avg.equals(df_from_legs))
    

In [14]:
check_zonal_average(zonal_avg, drt_legs, link2zones, time_bins = 30)

Iteration 1 True
Iteration 2 True
Iteration 3 True
Iteration 4 True
Iteration 5 True


In [15]:
def check_zonal_moving_average(zonal_moving_avg, drt_legs, link2zones, window, time_bins = 30):
    n_iters = len(zonal_moving_avg)
    for it in range(1,n_iters):
        it_zonal_moving_avg = zonal_moving_avg[it].copy()
        it_zonal_moving_avg = it_zonal_moving_avg.set_index('zone').sort_index()
        it_zonal_moving_avg = it_zonal_moving_avg[~it_zonal_moving_avg.isnull().all(axis=1)].applymap(lambda x: int(x*1000) if not np.isnan(x) else np.nan)
        it_zonal_moving_avg.columns = [i for i in range(time_bins)] if not it_zonal_moving_avg.empty else []
        
        start_window = 0
        if it > window:
            start_window = it - window + 1
        it_drt_legs_list = [drt_legs[j].copy() for j in range(start_window,it+1)]
        it_drt_legs = pd.concat(it_drt_legs_list)
        
        df_from_legs = create_df_from_legs(it_drt_legs, link2zones, time_bins = time_bins)
        pd.testing.assert_frame_equal(it_zonal_moving_avg,df_from_legs)
        print('Iteration', it, it_zonal_moving_avg.equals(df_from_legs))
    

In [16]:
check_zonal_moving_average(zonal_moving_avg, drt_legs, link2zones, 2, time_bins = 30)

Iteration 1 True
Iteration 2 True
Iteration 3 True
Iteration 4 True
Iteration 5 True


In [17]:
list(create_df_from_legs(drt_legs[1].copy(), link2zones, time_bins = 30).index.values)

[106823,
 106824,
 107268,
 109043,
 110372,
 110820,
 111255,
 111700,
 111703,
 112143,
 112593,
 113481,
 113482,
 113934,
 114367,
 114812,
 119256]

In [18]:
def combine_df_successive_avg(previous_succ_avg, it_df_from_legs, weight, time_bins=30):
    zones = list(it_df_from_legs.index.values)
    zones_dict = {}
    for zone in zones:
        zone_bins = []
        for i in range(time_bins):
            if zone in previous_succ_avg.index:
                if np.isnan(previous_succ_avg.loc[zone][i]):
                    zone_bins.append(it_df_from_legs.loc[zone][i])
                else:
                    zone_bins.append((1-weight) * previous_succ_avg.loc[zone][i] + weight * it_df_from_legs.loc[zone][i])
            else:
                zone_bins.append(it_df_from_legs.loc[zone][i])
        zones_dict[zone] = copy.deepcopy(zone_bins)
    
    new_df = pd.DataFrame.from_dict(data=zones_dict,orient='index', columns = [i for i in range(30)])
    new_df.index.name = 'zone'
    return new_df


def compute_successive_averages(drt_legs, weight, link2zones, time_bins=30):
    n_iters = len(zonal_moving_avg)
    successive_avg = []
    successive_avg.append(pd.DataFrame(columns=['zone'] + [i for i in range(time_bins)]).set_index(['zone']))
    
    for it in range(1,n_iters):
        it_drt_legs = drt_legs[it].copy()
        it_df_from_legs = create_df_from_legs(it_drt_legs, link2zones, time_bins = time_bins)
        succ_avg_it = combine_df_successive_avg(successive_avg[it-1], it_df_from_legs, weight=weight, time_bins=time_bins)
        successive_avg.append(succ_avg_it)
    
    return successive_avg

In [19]:
successive_avg = compute_successive_averages(drt_legs, 0.5, link2zones, time_bins=30)

In [20]:
successive_avg[1]

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,20,21,22,23,24,25,26,27,28,29
zone,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
106823,,,,,,,,,,,...,,,,,,,,,,
106824,,,,,,,,,61000.0,,...,,,,,,,,,,
107268,,,,,,,,,61000.0,,...,,,,,,,,,,
109043,,,,,,,,,61000.0,,...,,,,,,,,,,
110372,,,,,,,,,,,...,,,,,,,,,,
110820,,,,,,,,,,,...,,,,,,,,,,
111255,,,,,,,,89000.0,,,...,,,,,,,,,,
111700,,,,,,,,,,,...,,,,,,,,,,
111703,,,,,,,61000.0,,,,...,,,,,,,,,,
112143,,,,,,,,,,,...,,,,,,,,,,


In [21]:
def check_zonal_successive_average(zonal_successive_avg, drt_legs, link2zones, weight, time_bins = 30):
    successive_avg = compute_successive_averages(drt_legs, weight, link2zones, time_bins=time_bins)
    n_iters = len(zonal_successive_avg)
    for it in range(1,n_iters):
        it_zonal_successive_avg = zonal_successive_avg[it].copy()
        it_zonal_successive_avg = it_zonal_successive_avg.set_index('zone').sort_index()
        it_zonal_successive_avg = it_zonal_successive_avg[~it_zonal_successive_avg.isnull().all(axis=1)].applymap(lambda x: int(x*1000) if not np.isnan(x) else np.nan)
        it_zonal_successive_avg.columns = [i for i in range(time_bins)] if not it_zonal_successive_avg.empty else []
        
        pd.testing.assert_frame_equal(it_zonal_successive_avg,successive_avg[it])
        print('Iteration', it, it_zonal_successive_avg.equals(successive_avg[it]))

In [22]:
check_zonal_successive_average(zonal_successive_avg, drt_legs, link2zones, 0.5, time_bins = 30)

Iteration 1 True
Iteration 2 True
Iteration 3 True
Iteration 4 True
Iteration 5 True
