In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import xml.etree.ElementTree as ET
import random
from math import sqrt
from tqdm import tqdm
from copy import deepcopy

In [2]:
import warnings
warnings.filterwarnings('ignore')

pd.set_option('max_colwidth', None)
pd.set_option('display.max_columns', None)

In [3]:
start_hour = 5
end_hour = 10

In [4]:
meta = pd.read_csv('calibration_data/Traffic_Counts_by_Study_20240122.csv')
hourly_data = pd.read_csv('calibration_data/Traffic_Count_Studies_by_Hour_Bins_20240122.csv')

# TAZ calibration



In [5]:
demand_file_name = 'alaskanway_od2trips_psrc_vehicle_trips_converted_taz_calibrated_V3.rou.xml'

In [6]:
demand_xml = ET.parse(demand_file_name).getroot().findall('trip')
demand_df = []
for vehicle in tqdm(demand_xml):
    ID = vehicle.get('id')
    depart = float(vehicle.get('depart'))
    otaz, dtaz = vehicle.get('fromTaz'), vehicle.get('toTaz')
    vehicle_data = {'id': ID,'depart': depart, 'fromTaz': otaz, 'toTaz':dtaz}   
    demand_df.append(vehicle_data)


demand_df = pd.DataFrame(demand_df)

100%|██████████████████████████████████████████████████████████████████████| 205937/205937 [00:00<00:00, 604233.73it/s]


### ALWAYS RUN from THIS CELL BEFORE MAKING A NEW DEMAND FILE. 
 

In [19]:
demand_tree = ET.parse('alaskanway_od2trips_psrc_vehicle_trips_converted_taz_calibrated_V3.rou.xml')
demand_root = demand_tree.getroot()
len(demand_root.findall('trip'))

205937

In [20]:
name_edge_dict = {'denny_e1':'332473706#0', 'denny_e2':'428087113#0', 'denny_e3':'436163947#1',
                 'denny_w1':'-436163947#2', 'denny_w2':'-428087113#1', 'denny_w3':'-332473706#1',
                 'yesler_e1':'gneE50', 'yesler_e2':'gneE355', 
                 'yesler_w1':'gneE354', 'yesler_w2':'gneE49',
                  'boren_se1':'240455063#5', 'boren_nw3':'-240455063#5', 'boren_se2':'428231010#0',
                  'boren_nw2':'-428231010#1', 'boren_se3':'428230982#0', 'boren_nw1':'-428230982#0',
                  'pine_e1':'-37551507#1', 'pine_e2':'428246441#1',
                  'pine_w2':'37551507#0','pine_w1':'-428246441#1', 'pine_w3':'8111110#0.68',
                  'stewart_w1':'371267131#2', 
                  'seven_n1':'52840083.93', 'seven_s1':'367069126', 
                  'first_se1':'-351500650#4', 'first_se2':'-428243129#2', 'first_se3':'-610573957#1', 
                  'first_nw1':'610573957#1', 'first_nw2':'428243129#0', 'first_nw3':'351500650#4'
                 }

# change percentage with sampleNumber?
def get_sample(tazID, originTrips:bool, hour, percentage, replacement=True, avoid_tazs=None):
    if originTrips:
        sample =  demand_df[demand_df['fromTaz'] == tazID]
    else:
        sample =  demand_df[demand_df['toTaz'] == tazID]
        
    if avoid_tazs is not None:
        if originTrips:
            for avoid_taz in avoid_tazs:
                sample = sample[sample['toTaz'] != avoid_taz]
        else:
            for avoid_taz in avoid_tazs:
                sample = sample[sample['fromTaz'] != avoid_taz]
    
    sample = sample[sample['depart']//3600 == hour]
    selected = sample.sample(int(percentage/100*len(sample)), replace=replacement)
    selected.reset_index(drop=True, inplace=True)
    return selected 


counter=1
def get_new_ids(df):
    global counter
    df['org_id'] = df['id']
    df['id'] = df['id']+f't{counter}:'
    counter +=1
    count_dict = {}
    for index, row in df[df.duplicated(subset=['id'], keep=False)].iterrows():
        count = count_dict.setdefault(row['id'], 1)
        df.loc[index, 'id'] = f"{row['id']}{count}"
        count_dict[row['id']] += 1
    return df


def make_trip(element_to_copy, synthesizedRow):
    new_trip = deepcopy(element_to_copy)
    new_id, new_depart = synthesizedRow['id'], str(synthesizedRow['depart'])
    new_trip.set('id', new_id)
    new_trip.set('depart', new_depart)
    return new_trip


def is_valid(trip_element):
#     highways = ['5000', '5001', '5002', '5003', '5004', '5005', '5008', '5009', '5010', '5011']
    highways = ['5004']
    
    if trip_element is None:
        return False
    otaz, dtaz = (trip_element.get('fromTaz')), (trip_element.get('toTaz'))
    if (otaz in highways) or (dtaz in highways):
        return False
    return True



def taz_synthesis(taz_list, originTrips:bool, hour, percentage, avoid_tazs=None):
    global counter
    added_trips = 0
    for taz in taz_list:
        sample = get_sample(taz, originTrips, hour, percentage, replacement=True, avoid_tazs=avoid_tazs).copy() 
        
        ### adjust time with a normal distritbution (95%CI: -10 minutes:+10minutes)
        time_offset = np.random.normal(loc=0.0, scale=5.1, size=len(sample))
        sample['org_depart'] = sample['depart']
        sample['depart'] = round(sample['depart'] + time_offset*60, 1)
        
        ### making new IDs and sort based on depart time
        sample = get_new_ids(sample)
        sample.sort_values(by='depart', inplace=True)
        
        # adding the new demand to the file
        insert_index = 0
        for idx, trip in sample.iterrows():
            orgID = trip['org_id']
            element_to_copy = demand_root.find(f'.//trip[@id="{orgID}"]')
            if is_valid(element_to_copy):  # avoiding highways
                added_trips += 1
                new_trip_element = make_trip(element_to_copy, trip)          
                for index, element in enumerate(demand_root.findall('trip')):
                    if float(element.get('depart')) > float(new_trip_element.get('depart')):
                        insert_index = index
                        break
                    else:
                        insert_index = index + 1
                demand_root.insert(insert_index+1, new_trip_element)
    print(f'{counter-1}){taz_list}, {hour}, {percentage}, avoiding:-{avoid_tazs}- done. Added {added_trips} trips.')



def reduction(edge_name, hour, percentage, avoid_names=None):
    sample = get_sample(edge_name, hour, percentage, replacement=False, avoid_names=avoid_names).copy() 
    # reducing the sample from original demand file
    sample = sample.sort_values(by='depart')
    for tripID in list(sample['id']):
        element_to_delete = demand_root.find(f'.//trip[@id="{tripID}"]')
        if element_to_delete is not None:
            demand_root.remove(element_to_delete)
    print(f"reduction {edge_name}, hour {hour}, {percentage}%, avoiding:{avoid_names} done. removed {len(sample)} trips")


In [21]:
northtazs = ['430', '431', '432', '433', '434', '435']
taz_synthesis(northtazs, True, 8, 100)
taz_synthesis(northtazs, False, 8, 100)
taz_synthesis(northtazs, True, 7, 100)
taz_synthesis(northtazs, False, 7, 100)
taz_synthesis(northtazs, True, 9, 100)
taz_synthesis(northtazs, False, 9, 100)

midwesttazs = ['450', '451', '452', '453', '454', '457', '458', '459', '460', '461', '467', '468', '469', '470']
taz_synthesis(midwesttazs, True, 8, 100)
taz_synthesis(midwesttazs, False, 8, 100)
taz_synthesis(midwesttazs, True, 7, 100)
taz_synthesis(midwesttazs, False, 7, 100)
taz_synthesis(midwesttazs, True, 9, 100)
taz_synthesis(midwesttazs, False, 9, 100)

east_southtazs = ['604', '605', '606', '607', '608', '609', '615']
taz_synthesis(east_southtazs, True, 8, 100)
taz_synthesis(east_southtazs, False, 8, 100)
taz_synthesis(east_southtazs, True, 7, 100)
taz_synthesis(east_southtazs, False, 7, 100)
taz_synthesis(east_southtazs, True, 9, 100)
taz_synthesis(east_southtazs, False, 9, 100)

chinatown = ['628', '629', '630', '634', '635']
taz_synthesis(chinatown, True, 8, 100)
taz_synthesis(chinatown, False, 8, 100)
taz_synthesis(chinatown, True, 7, 100)
taz_synthesis(chinatown, False, 7, 100)
taz_synthesis(chinatown, True, 9, 100)
taz_synthesis(chinatown, False, 9, 100)

taz_synthesis(['426', '427', '428', '429', '550', '553'], True, 8, 100)
taz_synthesis(['426', '427', '428', '429', '550', '553'], False, 8, 100)
taz_synthesis(['426', '427', '428', '429', '550', '553'], True, 7, 100)
taz_synthesis(['426', '427', '428', '429', '550', '553'], False, 7, 100)

taz_synthesis(['547', '548', '549', '551', '552'], True, 8, 100)
taz_synthesis(['547', '548', '549', '551', '552'], False, 8, 100)
taz_synthesis(['547', '548', '549', '551', '552'], True, 7, 100)
taz_synthesis(['547', '548', '549', '551', '552'], False, 7, 100)

taz_synthesis(['443', '442','441', '440', '438', '436'], True, 8, 100)
taz_synthesis(['443', '442','441', '440', '438', '436'], False, 8, 100)
taz_synthesis(['443', '442','441', '440', '438', '436'], True, 7, 100)
taz_synthesis(['443', '442','441', '440', '438', '436'], False, 7, 100)
taz_synthesis(['443', '442','441', '440', '438', '436'], True, 9, 100)
taz_synthesis(['443', '442','441', '440', '438', '436'], False, 9, 100)

6)['430', '431', '432', '433', '434', '435'], 8, 100, avoiding:-None- done. Added 2693 trips.
12)['430', '431', '432', '433', '434', '435'], 8, 100, avoiding:-None- done. Added 4969 trips.
18)['430', '431', '432', '433', '434', '435'], 7, 100, avoiding:-None- done. Added 1944 trips.
24)['430', '431', '432', '433', '434', '435'], 7, 100, avoiding:-None- done. Added 4449 trips.
30)['430', '431', '432', '433', '434', '435'], 9, 100, avoiding:-None- done. Added 2787 trips.
36)['430', '431', '432', '433', '434', '435'], 9, 100, avoiding:-None- done. Added 4202 trips.
50)['450', '451', '452', '453', '454', '457', '458', '459', '460', '461', '467', '468', '469', '470'], 8, 100, avoiding:-None- done. Added 998 trips.
64)['450', '451', '452', '453', '454', '457', '458', '459', '460', '461', '467', '468', '469', '470'], 8, 100, avoiding:-None- done. Added 1187 trips.
78)['450', '451', '452', '453', '454', '457', '458', '459', '460', '461', '467', '468', '469', '470'], 7, 100, avoiding:-None- don

In [22]:
len(demand_root.findall('trip'))

255147

In [17]:
c=0
for i in demand_root.findall('trip'):
    if 'c' in i.get('id'):
        print(i.get('id'), i.get('depart'))
        c+=1
c

2121c46: 17923.1
2083c46: 17931.2
2269c46: 18021.3
89c46: 18107.0
1758c13:2 18178.0
93c46: 18208.1
2310c46:2 18309.4
2122c46: 18322.1
2306c29:3 18344.9
2310c46:1 18346.7
1815c9:2 18350.8
2084c46: 18373.1
71c46: 18403.8
2084c29:2 18531.9
5122c46: 18566.8
1815c9:1 18606.5
2270c46: 18654.8
5182c29:2 18689.6
2264c46: 18700.0
2306c29:1 18720.4
2263c46: 18738.7
4984c46:2 18793.3
5185c4: 18796.6
4985c46:1 18826.1
5182c29:1 18856.9
2619c4:4 18866.4
1772c24:1 18867.5
5186c46: 18867.7
5124c46: 18939.3
2730c13:2 18941.9
1772c24:3 19010.5
2306c29:2 19037.1
5121c46: 19083.1
2616c46:2 19111.2
2306c29:4 19120.7
1772c24:2 19130.7
2084c29:1 19134.3
167c13: 19137.2
2619c4:1 19168.8
1758c13:1 19229.3
2730c13:1 19253.3
4984c46:1 19279.3
4983c46: 19293.3
4987c46:2 19297.8
2616c46:1 19302.5
4986c46:1 19315.7
2603c46:2 19320.7
2603c46:1 19340.1
4986c46:2 19378.1
5185c46: 19404.3
4862c9:1 19433.2
2670c29:1 19481.1
2534c13: 19507.2
8401c46:1 19541.4
4862c9:3 19542.6
5151c46: 19544.3
4985c46:2 19552.5
2619c4:2 

5280

In [23]:
demand_tree.write('alaskanway_od2trips_psrc_vehicle_trips_converted_taz_calibrated_V4.rou.xml', encoding='utf-8', xml_declaration=True)
