# Fremont Traffic Congestion Analysis (Navigation App Usage)

## Table of Contents
TODO: Clean up the notebook
- [Imports](#imports)
- [Global Functions](#global-functions)
- [Data Preprocessing](#data-preprocessing)
- [Section Plots](#section-plots)
- [Data Copying](#data-copying)
- [Percentage of Congested Links](#percentage-of-congested-links)
- [Percentage of Congested Links by Road Type](#percentage-of-congested-links-by-road-type)
- [Visualize the congested Area on the Map at Different SRC](#visualize-the-congested-area-on-the-map-at-different-src)
- [Macroscopic Fundamental Diagram](#macroscopic-fundamental-diagram)

# Imports

In [None]:
import sqlite3
import copy
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import geopandas as gpd
import contextily as cx
from matplotlib_scalebar.scalebar import ScaleBar

# Global Functions

In [None]:
def plot_experiment(gdf_list, col_name):
    """
    Plot all the curves of the data for a list of GeoDataFrames on the designated column.
    
    Keyword arguments:
    gdf_list --- a list of GeoDataFrame that contains the processed data 
                 merged from sections.shp and sqlite output from aimsun
    col_name --- the name of the column to be plotted
    """ 
    for i in range(len(gdf_list)):
        plt.plot(np.array(gdf_list[i][col_name]))

# Data Preprocessing

In [None]:
# GLOBAL VARIABLES/CONSTANTS

# SQLite File Path formatter
__SQLITE_PATH_FORMAT = "<PATH_TO_DATABASE>.sqlite"

# sections.shp File Path
__SECTION_SHP = "<PATH_TO_SECTIONS_FILE>.shp"

# Number of experiments
__NUM_EXP = 11

# Output File Directory
__OUTPUT = "output/"

# SQL Query to be excecuted for different tables
__SQL_EXTRACT_MISECT_QUERY = 'SELECT * FROM MISECT'
__SQL_EXTRACT_MILANE_QUERY = 'SELECT * FROM MILANE'

# Columns to extract from different tables
__MISECT_COLUMNS = ['ent', 'eid', 'sid', 'flow_capacity', 'speed', 'travel', 'traveltime', 'density', 'flow', 'dtime']
__MILANE_COLUMNS = ['ent', 'eid', 'sid', 'lane', 'flow', 'speed', 'density', 'input_flow']

# Actual time for each time step
__TIME_REAL = ['14:15', '14:30', '14:45', '15:00', '15:15', '15:30', '15:45', '16:00', '16:15', '16:30', '16:45', '17:00', '17:15', '17:30', '17:45', '18:00', '18:15', '18:30', '18:45', '19:00', '19:15', '19:30', '19:45', '20:00']

# Threshold for a section to be considered congested
rho = np.linspace(0.1, 0.9, 9)

In [None]:
# Create a SQL connection to our SQLite database

# A list of established connections to our databases
con = []

for i in range(__NUM_EXP):
    con.append(sqlite3.connect(__SQLITE_PATH_FORMAT.format(number=i)))

In [None]:
# Run SQL query and convert SQL to DataFrame

# List of dataframes extracted from each experiment
df = []
df_milane = []
for i in range(__NUM_EXP):
    # Run SQL
    query = pd.read_sql(__SQL_EXTRACT_MISECT_QUERY, con[i])
    
    # Convert SQL to DataFrame
    dataframe = pd.DataFrame(query, columns = __MISECT_COLUMNS)
    df.append(dataframe)
    
    query = pd.read_sql(__SQL_EXTRACT_MILANE_QUERY, con[i])
    dataframe = pd.DataFrame(query, columns = __MILANE_COLUMNS)
    df_milane.append(dataframe)

In [None]:
# Read the sections.shp shapefile
sections = gpd.read_file(__SECTION_SHP)
sections.crs

In [None]:
# Create a deep copy of df as back up in order not to rerun the above cell
df_copy = copy.deepcopy(df)
df_milane_copy = copy.deepcopy(df_milane)
sections_copy = copy.deepcopy(sections)

In [None]:
sections.head()

# Section Plots

In [None]:
sections_ppp = sections[sections['name']=='Paseo Padre Parkway']
sections_ppp.head()

In [None]:
sections_mission = sections[sections['name']=='Mission Boulevard']
sections_mission.head()

In [None]:
sections_i680 = sections[sections['func_class']==1]
sections_i680 = sections_i680[sections_i680['name']!='Mission Boulevard']
sections_i680.head()

In [None]:
ax = sections.plot(figsize=(15, 15))
# Position and layout
scale_bar = ScaleBar(
    dx=1,
    location='upper left',  # in relation to the whole plot
    label_loc='left', scale_loc='bottom'  # in relation to the line
)
ax.add_artist(scale_bar)
x, y, arrow_length = 0.05, 0.95, 0.07
ax.annotate('N', xy=(x, y), xytext=(x, y-arrow_length),
            arrowprops=dict(facecolor='black', width=5, headwidth=15),
            ha='center', va='center', fontsize=20,
            xycoords=ax.transAxes)
ax.xaxis.set_tick_params(labelbottom=False)
ax.yaxis.set_tick_params(labelleft=False)
cx.add_basemap(ax, crs='EPSG:32610', source=cx.providers.CartoDB.Voyager) #4326
plt.title('Road Sections')

In [None]:
ax = sections_i680.plot(figsize=(15, 15))
cx.add_basemap(ax, crs='EPSG:32610', source=cx.providers.CartoDB.Voyager) #4326
plt.title('I-680')

In [None]:
ax = sections_mission.plot(figsize=(15, 15))
cx.add_basemap(ax, crs='EPSG:32610', source=cx.providers.CartoDB.Voyager) #4326
plt.title('Mission Boulevard')

# Data Copying

In [None]:
# Restore the sections file in case of modification
sections = copy.deepcopy(sections_copy)
sections = sections.rename(columns={'speed': 'speed_limit'})
df = copy.deepcopy(df_copy)

In [None]:
# Drop the sections with missing average speed
df_total = []
df_local = []
df_throu = []
print([df[i].shape for i in range(__NUM_EXP)])
for i in range(__NUM_EXP):
    df[i] = df[i][df[i]['speed'] >= 0.0]
    df_total.append(copy.deepcopy(df[i][df[i]['sid'] == 0]))
    df_local.append(copy.deepcopy(df[i][df[i]['sid'] == 1]))
    df_throu.append(copy.deepcopy(df[i][df[i]['sid'] == 2]))
print([df[i].shape for i in range(__NUM_EXP)])
print([df_total[i].shape for i in range(__NUM_EXP)])
print([df_local[i].shape for i in range(__NUM_EXP)])
print([df_throu[i].shape for i in range(__NUM_EXP)])

In [None]:
# Preprocess dataframe to merge with sections

group_cols = ['ent','eid']
# identify the columns which we want to average; this could
# equivalently be defined as list(df.columns[4:])
metric_cols = ['flow_capacity']

# create a new DataFrame with a MultiIndex consisting of the group_cols
# and a column for the mean of each column in metric_cols
aggs = []
for i in range(__NUM_EXP):
    aggs.append(df_total[i].groupby(group_cols)[metric_cols].mean())

# 1. remove the metric_cols from df because we are going to replace them
# with the means in aggs 
# 2. dedupe to leave only one row with each combination of group_cols
# in df
for i in range(__NUM_EXP):
    # Step 1
    df_total[i].drop(metric_cols, axis=1, inplace=True)
    
    # Step 2
    # df[i].drop_duplicates(subset=group_cols, keep='last', inplace=True)

# add the mean columns from aggs into df
for i in range(__NUM_EXP):
    df_total[i] = df_total[i].merge(right=aggs[i], right_index=True, left_on=group_cols, how='right')

In [None]:
# Merge datasets: sections and dataframe
sections_m = []

for i in range(__NUM_EXP):
    sections_m.append(pd.merge(df_total[i], sections, how='left', left_on='eid', right_on='eid'))

In [None]:
# Convert the merged sections into GeoDataFrame and replace null values with 0
gdf = []

for i in range(__NUM_EXP):
    gdf.append(gpd.GeoDataFrame(sections_m[i], geometry='geometry'))
    gdf[i]['flow_capacity'] = gdf[i]['flow_capacity'].fillna(0)

In [None]:
# Group each GeoDataFrame on timestep and aggregate by mean
# Remove the first row as it is the average of the rest
gdf_agg = []

for i in range(__NUM_EXP):
    gdf_agg.append(gdf[i].groupby('ent').agg(np.mean).iloc[1:, :])

In [None]:
# Plot the aggregated(mean) flow-capacity ratio at each timestep for each experiment
fig = plt.gcf()
fig.set_size_inches(18.5, 10.5)
plot_experiment(gdf_agg, 'flow_capacity')
plt.legend(["0% SRC(All SUE)",
                     "10% SRC",
                     "20% SRC",
                     "30% SRC",
                     "40% SRC",
                     "50% SRC",
                     "60% SRC",
                     "70% SRC",
                     "80% SRC",
                     "90% SRC",
                     "100% All SRC (No SUE)"])
default_x_ticks = range(len(__TIME_REAL))
plt.xticks(default_x_ticks, __TIME_REAL, fontsize='15', rotation=45)
plt.yticks(fontsize='15')
plt.title('Mean Flow-Capacity Ratio')
plt.savefig(__OUTPUT + 'Mean Flow-Capacity Ratio.png')
plt.show()

In [None]:
# Extract the data of I-680 from GeoDataframe, aggregate(mean) them, and plot them
# similar to the above process
gdf_i680 = []
gdf_i680_agg = []

for i in range(__NUM_EXP):
    gdf_i680.append(gdf[i][(gdf[i]['func_class'] == 1) & (gdf[i]['name'] != "Mission Boulevard")])

for i in range(__NUM_EXP):
    gdf_i680_agg.append(gdf_i680[i].groupby('ent').agg(np.mean).iloc[1:, :])

In [None]:
# Plot the aggregated(mean) flow-capacity ratio at each timestep for each experiment on I-680
fig = plt.gcf()
fig.set_size_inches(18.5, 10.5)
plot_experiment(gdf_i680_agg, 'flow_capacity')
plt.legend(["0% SRC(All SUE)",
                     "10% SRC",
                     "20% SRC",
                     "30% SRC",
                     "40% SRC",
                     "50% SRC",
                     "60% SRC",
                     "70% SRC",
                     "80% SRC",
                     "90% SRC",
                     "100% All SRC (No SUE)"])
plt.title('I-680 Volume-to-Capacity Ratio')
default_x_ticks = range(len(__TIME_REAL))
plt.xticks(default_x_ticks, __TIME_REAL, fontsize='15', rotation=45)
plt.yticks(fontsize='15')
plt.show()

In [None]:
# Extract the data of I-680 from GeoDataframe, aggregate(mean) them, and plot them
# similar to the above process
gdf_mission = []
gdf_mission_agg = []

for i in range(__NUM_EXP):
    gdf_mission.append(gdf[i][gdf[i]['name'] == "Mission Boulevard"])

for i in range(__NUM_EXP):
    gdf_mission_agg.append(gdf_mission[i].groupby('ent').agg(np.mean).iloc[1:, :])

In [None]:
# Plot the aggregated(mean) flow-capacity ratio at each timestep for each experiment on Mission Boulevard
fig = plt.gcf()
fig.set_size_inches(18.5, 10.5)
plot_experiment(gdf_mission_agg, 'flow_capacity')
plt.legend(["0% SRC(All SUE)",
                     "10% SRC",
                     "20% SRC",
                     "30% SRC",
                     "40% SRC",
                     "50% SRC",
                     "60% SRC",
                     "70% SRC",
                     "80% SRC",
                     "90% SRC",
                     "100% All SRC (No SUE)"])
plt.title('Mission Boulevard Volume-to-Capacity Ratio')
default_x_ticks = range(len(__TIME_REAL))
plt.xticks(default_x_ticks, __TIME_REAL, fontsize='15', rotation=45)
plt.yticks(fontsize='15')
plt.show()

In [None]:
# Extract the data of Paseo Padre Parkway from GeoDataframe, aggregate(mean) them, and plot them
# similar to the above process
gdf_ppp = []
gdf_ppp_agg = []

for i in range(__NUM_EXP):
    gdf_ppp.append(gdf[i][gdf[i]['name'] == "Paseo Padre Parkway"])

for i in range(__NUM_EXP):
    gdf_ppp_agg.append(gdf_ppp[i].groupby('ent').agg(np.mean).iloc[1:, :])

In [None]:
# Plot the aggregated(mean) flow-capacity ratio at each timestep for each experiment on Paseo Padre Parkway
fig = plt.gcf()
fig.set_size_inches(18.5, 10.5)
plot_experiment(gdf_mission_agg, 'flow_capacity')
plt.legend(["0% SRC(All SUE)",
                     "10% SRC",
                     "20% SRC",
                     "30% SRC",
                     "40% SRC",
                     "50% SRC",
                     "60% SRC",
                     "70% SRC",
                     "80% SRC",
                     "90% SRC",
                     "100% All SRC (No SUE)"])
default_x_ticks = range(len(__TIME_REAL))
plt.xticks(default_x_ticks, __TIME_REAL, fontsize='15', rotation=45)
plt.yticks(fontsize='15')
plt.title('Paseo Padre Parkway Volume-to-Capacity Ratio')
plt.show()

# Percentage of Congested Links

In [None]:
# Restore the sections file in case of modification
sections = copy.deepcopy(sections_copy)
sections = sections.rename(columns={'speed': 'speed_limit'})
df = copy.deepcopy(df_copy)

In [None]:
total_length = sections['geometry'].length.sum()
total_length

In [None]:
# Drop the sections with missing average speed
df_total = []
df_local = []
df_throu = []
print([df[i].shape for i in range(__NUM_EXP)])
for i in range(__NUM_EXP):
    df[i] = df[i][df[i]['speed'] >= 0.0]
    df_total.append(copy.deepcopy(df[i][df[i]['sid'] == 0]))
    df_local.append(copy.deepcopy(df[i][df[i]['sid'] == 1]))
    df_throu.append(copy.deepcopy(df[i][df[i]['sid'] == 2]))
print([df[i].shape for i in range(__NUM_EXP)])
print([df_total[i].shape for i in range(__NUM_EXP)])
print([df_local[i].shape for i in range(__NUM_EXP)])
print([df_throu[i].shape for i in range(__NUM_EXP)])

In [None]:
# Preprocess dataframe to merge with sections

group_cols = ['ent', 'eid']
# identify the columns which we want to average; this could
# equivalently be defined as list(df.columns[4:])
metric_cols = ['speed']

# create a new DataFrame with a MultiIndex consisting of the group_cols
# and a column for the mean of each column in metric_cols
aggs = []
for i in range(__NUM_EXP):
    aggs.append(df_total[i].groupby(group_cols)[metric_cols].mean())

# 1. remove the metric_cols from df because we are going to replace them
# with the means in aggs 
# 2. dedupe to leave only one row with each combination of group_cols
# in df
for i in range(__NUM_EXP):
    # Step 1
    df_total[i].drop(metric_cols, axis=1, inplace=True)
    
    # Step 2
    # df[i].drop_duplicates(subset=group_cols, keep='last', inplace=True) # No dedupe for congestion

# add the mean columns from aggs into df
for i in range(__NUM_EXP):
    df_total[i] = df_total[i].merge(right=aggs[i], right_index=True, left_on=group_cols, how='right')

In [None]:
# Merge datasets: sections and dataframe
sections_cong = []

for i in range(__NUM_EXP):
    sections_cong.append(pd.merge(df_total[i], sections, how='left', left_on='eid', right_on='eid'))

In [None]:
for i in range(__NUM_EXP):
    sections_cong[i] = sections_cong[i][['ent', 'eid', 'speed', 'speed_limit', 'geometry']]

In [None]:
# Convert the merged sections into GeoDataFrame and drop null values
gdf = []

for i in range(__NUM_EXP):
    gdf.append(gpd.GeoDataFrame(sections_cong[i], geometry='geometry'))
    gdf[i]['speed'] = gdf[i]['speed'].dropna()
    gdf[i]['length'] = gdf[i]['geometry'].length

In [None]:
# Add a column for each section at each threshold,
# congested = 1, else 0
# Threshold for a section to be considered congested
rho = np.linspace(0.1, 0.9, 9)
for threshold in rho:
    for i in range(__NUM_EXP):
        speed_ratio = gdf[i]['speed'] / gdf[i]['speed_limit']
        gdf[i]['congested at rho = ' + str(round(threshold, 1))] = [int(r < threshold) for r in speed_ratio]

In [None]:
# next steps: multiply the congested at rho column 
# with section length to create a congestion weight column
# that can be summed to get our desired result

In [None]:
# Add a column for congestion weight at each threshold,
# congested = 1, else 0
for threshold in rho:
    for i in range(__NUM_EXP):
        weight = gdf[i]['length'] * gdf[i]['congested at rho = ' + str(round(threshold, 1))]
        gdf[i]['weight at rho = ' + str(round(threshold, 1))] = weight

In [None]:
# Group each GeoDataFrame on timestep and aggregate by sum
# Remove the first row as it is the average of the rest
gdf_agg = []

for i in range(__NUM_EXP):
    gdf_agg.append(gdf[i].groupby('ent').agg(np.sum).iloc[1:, :])

In [None]:
for threshold in rho:
    for i in range(__NUM_EXP):
        gdf_agg[i]['congestion ratio at rho = ' + str(round(threshold, 1))] = gdf_agg[i]['weight at rho = ' + str(round(threshold, 1))] / total_length

In [None]:
len(gdf_agg)

In [None]:
# Plot the aggregated(mean) flow-capacity ratio at each timestep for each experiment on Mission Boulevard
for threshold in rho:
    fig = plt.gcf()
    fig.set_size_inches(18.5, 10.5)
    plot_experiment(gdf_agg, 'congestion ratio at rho = ' + str(round(threshold, 1)))

    plt.legend(["0% SRC(All SUE)",
                         "10% SRC",
                         "20% SRC",
                         "30% SRC",
                         "40% SRC",
                         "50% SRC",
                         "60% SRC",
                         "70% SRC",
                         "80% SRC",
                         "90% SRC",
                         "100% All SRC (No SUE)"])
    title = 'Weighted Congestion Ratio over Time Step (Threshold = {threshold})'.format(threshold=str(round(threshold, 1)))
    plt.title(title)
    default_x_ticks = range(len(__TIME_REAL))
    plt.xticks(default_x_ticks, __TIME_REAL, fontsize='15', rotation=45)
    plt.yticks(fontsize='15')
    plt.savefig(__OUTPUT + f'{title}.png')
    plt.show()

In [None]:
def plot_congested_links(gdf, sections, rho):
    congested = copy.deepcopy(gdf)
    sections = copy.deepcopy(sections)
    sections_congested = []
    
    # Filter out the congested sections
    for i in range(__NUM_EXP):
        congested[i] = congested[i][congested[i]['congested at rho = {number}'.format(number=rho)] == 1]
        congested[i] = congested[i][['ent', 'eid', 'congested at rho = {number}'.format(number=rho)]]
        
    # Map the congested sections to the sections.shp
    for i in range(__NUM_EXP):
        sections_congested.append(sections[sections['eid'].isin(congested[i]['eid'])])
        
    # Plot
    for i in range(__NUM_EXP): # Experiment with smaller range
        ax = sections_congested[i].plot(figsize=(15, 15), color='red')
        print(sections_congested[i].crs)
        # cx.add_basemap(ax, crs='EPSG:32610', source=cx.providers.CartoDB.Voyager) #4326
        plt.xlim([591900, 597200])
        plt.ylim([4.148 * 1e6, 4.158 * 1e6])
        # plt.xticks(fontsize=24, rotation=45)
        # plt.yticks(fontsize=24)
        scale_bar = ScaleBar(
            dx=1,
            location='upper left',  # in relation to the whole plot
            label_loc='left', scale_loc='bottom'  # in relation to the line
        )
        ax.add_artist(scale_bar)
        x, y, arrow_length = 0.05, 0.95, 0.07
        ax.annotate('N', xy=(x, y), xytext=(x, y-arrow_length),
                    arrowprops=dict(facecolor='black', width=5, headwidth=15),
                    ha='center', va='center', fontsize=20,
                    xycoords=ax.transAxes)
        ax.xaxis.set_tick_params(labelbottom=False)
        ax.yaxis.set_tick_params(labelleft=False)
        # plt.title('Congestion Points when rho = {number} where SRC = {percent}%'.format(number=rho, percent=i*10))
        plt.savefig(__OUTPUT + f'Congested_Links_Visualized_SRC-{i * 10}%_Rho-{rho}.png', transparent=True)
        plt.show()

In [None]:
# need the gdf from the above section
congested_gdf = copy.deepcopy(gdf)
congested = copy.deepcopy(congested_gdf)
sections_congested = []

# Filter out the congested sections
for i in range(__NUM_EXP):
    congested[i] = congested[i][congested[i]['congested at rho = {number}'.format(number=0.7)] == 1]
    congested[i] = congested[i][['ent', 'eid', 'congested at rho = {number}'.format(number=0.7)]]

# Map the congested sections to the sections.shp
for i in range(__NUM_EXP):
    sections_congested.append(sections[sections['eid'].isin(congested[i]['eid'])])

for threshold in rho:
    plot_congested_links(congested_gdf, sections, round(threshold, 1))

## Percentage of Congested Links by Road Type

In [None]:
# Restore the sections file in case of modification
sections = copy.deepcopy(sections_copy)
sections = sections.rename(columns={'speed': 'speed_limit'})
df = copy.deepcopy(df_copy)

In [None]:
# Drop the sections with missing average speed
df_total = []
df_local = []
df_throu = []
print([df[i].shape for i in range(__NUM_EXP)])
for i in range(__NUM_EXP):
    df[i] = df[i][df[i]['speed'] >= 0.0]
    df_total.append(copy.deepcopy(df[i][df[i]['sid'] == 0]))
    df_local.append(copy.deepcopy(df[i][df[i]['sid'] == 1]))
    df_throu.append(copy.deepcopy(df[i][df[i]['sid'] == 2]))
print([df[i].shape for i in range(__NUM_EXP)])
print([df_total[i].shape for i in range(__NUM_EXP)])
print([df_local[i].shape for i in range(__NUM_EXP)])
print([df_throu[i].shape for i in range(__NUM_EXP)])

In [None]:
total_length = sections['geometry'].length.sum()
total_length

In [None]:
total_num_sections = len(sections['eid'].unique())
total_num_sections

In [None]:
sections_by_road_type = []
for i in range(5):
    sections_by_road_type.append(sections[sections['func_class'] == i + 1])

In [None]:
def get_congested_gdf_agg(sections_gdf):
    # Convert the merged sections into GeoDataFrame and drop null values
    gdf = []
    for i in range(__NUM_EXP):
        gdf.append(gpd.GeoDataFrame(sections_gdf[i], geometry='geometry'))
        gdf[i]['speed'] = gdf[i]['speed'].dropna()
        gdf[i]['length'] = gdf[i]['geometry'].length

    # Add a column for each section at each threshold,
    # congested = 1, else 0
    for threshold in rho:
        for i in range(__NUM_EXP):
            speed_ratio = gdf[i]['speed'] / gdf[i]['speed_limit']
            gdf[i]['congested at rho = ' + str(round(threshold, 1))] = list(np.array([int(r < threshold) for r in speed_ratio]) / total_num_sections)

    # Add a column for congestion weight at each threshold,
    # congested = 1, else 0
    for threshold in rho:
        for i in range(__NUM_EXP):
            weight = gdf[i]['length'] * gdf[i]['congested at rho = ' + str(round(threshold, 1))] * total_num_sections
            gdf[i]['weight at rho = ' + str(round(threshold, 1))] = weight

    # Group each GeoDataFrame on timestep and aggregate by sum
    # Remove the first row as it is the average of the rest
    gdf_agg = []
    for i in range(__NUM_EXP):
        gdf_agg.append(gdf[i].groupby('ent').agg(np.sum).iloc[1:, :])

    for threshold in rho:
        for i in range(__NUM_EXP):
            gdf_agg[i]['congestion ratio at rho = ' + str(round(threshold, 1))] = gdf_agg[i]['weight at rho = ' + str(round(threshold, 1))] / total_length
            
    return copy.deepcopy(gdf_agg)

In [None]:
# Preprocess dataframe to merge with sections

group_cols = ['ent', 'eid']
# identify the columns which we want to average; this could
# equivalently be defined as list(df.columns[4:])
metric_cols = ['speed']

# create a new DataFrame with a MultiIndex consisting of the group_cols
# and a column for the mean of each column in metric_cols
aggs = []
for i in range(__NUM_EXP):
    aggs.append(df_total[i].groupby(group_cols)[metric_cols].mean())

# 1. remove the metric_cols from df because we are going to replace them
# with the means in aggs 
# 2. dedupe to leave only one row with each combination of group_cols
# in df
for i in range(__NUM_EXP):
    # Step 1
    df_total[i].drop(metric_cols, axis=1, inplace=True)
    
    # Step 2
    # df[i].drop_duplicates(subset=group_cols, keep='last', inplace=True) # No dedupe for congestion

# add the mean columns from aggs into df
for i in range(__NUM_EXP):
    df_total[i] = df_total[i].merge(right=aggs[i], right_index=True, left_on=group_cols, how='right')

In [None]:
def plot_congestion_percentage_by_road_type(road_type, isWeighted=True, savefig=True):
    # Merge datasets: sections and dataframe
    sections_cong = []
    sections_by_type = []

    if isinstance(road_type, list):
        sections_by_type = sections[sections['func_class'].isin(road_type)]
    elif isinstance(road_type, int):
        sections_by_type = sections[sections['func_class'] == road_type]
    else:
        raise Exception("Invalid road type (function class)!")
    
    for i in range(__NUM_EXP):
        sections_cong.append(pd.merge(df_total[i], sections_by_type, how='left', left_on='eid', right_on='eid'))
        
    gdf_agg = get_congested_gdf_agg(sections_cong)
        
    # Plot the aggregated(mean) flow-capacity ratio at each timestep for each experiment
    for threshold in rho:
        fig = plt.gcf()
        fig.set_size_inches(18.5, 10.5)
        if isWeighted:
            plot_experiment(gdf_agg, 'congestion ratio at rho = ' + str(round(threshold, 1)))
            title = 'Weighted Congestion Ratio over Time Step (Threshold = {threshold}) for Road Type {rt}'.format(threshold=str(round(threshold, 1)), rt=road_type)
        else:
            plot_experiment(gdf_agg, 'congested at rho = ' + str(round(threshold, 1)))
            title = 'Unweighted Congestion Ratio over Time Step (Threshold = {threshold}) for Road Type {rt}'.format(threshold=str(round(threshold, 1)), rt=road_type)

        plt.legend(["0% SRC(All SUE)",
                             "10% SRC",
                             "20% SRC",
                             "30% SRC",
                             "40% SRC",
                             "50% SRC",
                             "60% SRC",
                             "70% SRC",
                             "80% SRC",
                             "90% SRC",
                             "100% All SRC (No SUE)"])
        
        plt.title(title)
        default_x_ticks = range(len(__TIME_REAL))
        plt.xticks(default_x_ticks, __TIME_REAL, fontsize='15', rotation=45)
        plt.yticks(fontsize='15')
        if savefig:
            plt.savefig(__OUTPUT + f'{title}.png')
        plt.show()

### Road Type Plots

In [None]:
plot_congestion_percentage_by_road_type(1)

In [None]:
plot_congestion_percentage_by_road_type(2)

In [None]:
plot_congestion_percentage_by_road_type(3)

In [None]:
plot_congestion_percentage_by_road_type(4)

In [None]:
plot_congestion_percentage_by_road_type(5)

In [None]:
plot_congestion_percentage_by_road_type([1, 2, 3])

In [None]:
plot_congestion_percentage_by_road_type([1, 2, 3, 5])

In [None]:
plot_congestion_percentage_by_road_type(1, isWeighted=False)

In [None]:
plot_congestion_percentage_by_road_type(2, isWeighted=False)

In [None]:
plot_congestion_percentage_by_road_type(3, isWeighted=False)

In [None]:
plot_congestion_percentage_by_road_type(5, isWeighted=False)

In [None]:
plot_congestion_percentage_by_road_type([1, 2, 3], isWeighted=False)

In [None]:
plot_congestion_percentage_by_road_type([1, 2, 3, 5], isWeighted=False)

In [None]:
def plot_roads_by_type(func_class, savefig=True):
    if isinstance(func_class, int):
        func_class = [func_class]
    ax = sections[sections['func_class'].isin(func_class)].plot(figsize=(15, 15))
    # cx.add_basemap(ax, crs='EPSG:32610', source=cx.providers.CartoDB.Voyager) #4326
    title = f'Road Sections of Type {func_class}'
    plt.xlim([591900, 597200])
    plt.ylim([4.148 * 1e6, 4.158 * 1e6])
    plt.title(title)
    if savefig:
        plt.savefig(__OUTPUT + f'{title}.png', transparent=True)
    plt.show()
    

In [None]:
plot_roads_by_type([1])

In [None]:
plot_roads_by_type([2, 3])

In [None]:
plot_roads_by_type([2])

In [None]:
plot_roads_by_type([3])

In [None]:
plot_roads_by_type([4])

In [None]:
plot_roads_by_type([5])

## Plotting for different timesteps for an experiment

In [None]:
def plot_congestion_experiment(gdf, sections, rho):
    congested = copy.deepcopy(gdf)
    sections = copy.deepcopy(sections)
    sections_congested = []
    
    # Filter out the congested sections
    for i in range(__NUM_EXP):
        congested[i] = congested[i][congested[i]['congested at rho = {number}'.format(number=rho)] == 1]
        congested[i] = congested[i][['ent', 'eid', 'congested at rho = {number}'.format(number=rho)]]
        
    # Map the congested sections to the sections.shp
    for i in range(__NUM_EXP):
        sections_congested.append(sections[sections['eid'].isin(congested[i]['eid'])])
        
    # Plot
    for i in range(__NUM_EXP): # Experiment with smaller range
        ax = sections_congested[i].plot(figsize=(15, 15), color='red')
        # cx.add_basemap(ax, crs='EPSG:32610', source=cx.providers.CartoDB.Voyager) # removing does not make the map same scale
        plt.xlim([591900, 597200])
        plt.ylim([4.148 * 1e6, 4.158 * 1e6])
        plt.title('Congestion Points when Threshold = {number} where SRC = {percent}%'.format(number=rho, percent=i*10))
        plt.savefig(__OUTPUT + f'Congestion_Visualized_SRC-{i * 10}%_Rho-{rho}.png')
        plt.close()

In [None]:
# need the gdf from the above section
congested_gdf = copy.deepcopy(gdf)

In [None]:
for threshold in rho:
    plot_congestion_experiment(congested_gdf, sections, round(threshold, 1))

In [None]:
def plot_congestion_experiment_step(gdf, sections, rho, ent, savefig=True):
    congested = copy.deepcopy(gdf)
    sections = copy.deepcopy(sections)
    sections_congested = []
    
    # Filter out the congested sections
    for i in range(__NUM_EXP):
        congested[i] = congested[i][(congested[i]['congested at rho = {number}'.format(number=rho)] == 1) & (congested[i]['ent'] == ent)]
        congested[i] = congested[i][['ent', 'eid', 'congested at rho = {number}'.format(number=rho)]]
        
    # Map the congested sections to the sections.shp
    for i in range(__NUM_EXP):
        sections_congested.append(sections[sections['eid'].isin(congested[i]['eid'])])
        
    # Plot
    for i in range(__NUM_EXP): # Experiment with smaller range
        ax = sections_congested[i].plot(figsize=(15, 15), color='red')
        # cx.add_basemap(ax, crs='EPSG:32610', source=cx.providers.CartoDB.Voyager) # removing does not make the map same scale
        plt.xlim([591900, 597200])
        plt.ylim([4.148 * 1e6, 4.158 * 1e6])
        # plt.xticks(fontsize=24, rotation=45)
        # plt.yticks(fontsize=24)
        scale_bar = ScaleBar(
            dx=1,
            location='upper left',  # in relation to the whole plot
            label_loc='left', scale_loc='bottom'  # in relation to the line
        )
        ax.add_artist(scale_bar)
        x, y, arrow_length = 0.05, 0.95, 0.07
        ax.annotate('N', xy=(x, y), xytext=(x, y-arrow_length),
                    arrowprops=dict(facecolor='black', width=5, headwidth=15),
                    ha='center', va='center', fontsize=20,
                    xycoords=ax.transAxes)
        ax.xaxis.set_tick_params(labelbottom=False)
        ax.yaxis.set_tick_params(labelleft=False)
        title = 'Congestion Points when Threshold = {number} where SRC = {percent}% at {time}'.format(number=rho, percent=i*10, time=__TIME_REAL[ent])
        # plt.title(title)
        
        if savefig:
            plt.savefig(__OUTPUT + f'{title}.png', transparent=True)

        plt.close()

In [None]:
# need the gdf from the above section
congested_gdf = copy.deepcopy(gdf)

In [None]:
for ent in range(24):
    for threshold in rho:
        plot_congestion_experiment_step(congested_gdf, sections, round(threshold, 1), ent)

## Comparing Vehicle Kilometers Traveled (VKT) and Delay Time under Different SRC User Percentages

In [None]:
# Restore the sections file in case of modification
sections = copy.deepcopy(sections_copy)
sections = sections.rename(columns={'speed': 'speed_limit'})
df = copy.deepcopy(df_copy)

In [None]:
# Drop the sections with missing average speed
df_total = []
df_local = []
df_throu = []
print([df[i].shape for i in range(__NUM_EXP)])
for i in range(__NUM_EXP):
    df[i] = df[i][df[i]['speed'] >= 0.0]
    df_total.append(copy.deepcopy(df[i][df[i]['sid'] == 0]))
    df_local.append(copy.deepcopy(df[i][df[i]['sid'] == 1]))
    df_throu.append(copy.deepcopy(df[i][df[i]['sid'] == 2]))
print([df[i].shape for i in range(__NUM_EXP)])
print([df_total[i].shape for i in range(__NUM_EXP)])
print([df_local[i].shape for i in range(__NUM_EXP)])
print([df_throu[i].shape for i in range(__NUM_EXP)])

In [None]:
vkt_df_total = copy.deepcopy(df_total)
vkt_df_local = copy.deepcopy(df_local)
vkt_df_throu = copy.deepcopy(df_throu)
for i in range(__NUM_EXP):
    vkt_df_total[i] = vkt_df_total[i].groupby('eid').agg(np.sum)
    vkt_df_local[i] = vkt_df_local[i].groupby('eid').agg(np.sum)
    vkt_df_throu[i] = vkt_df_throu[i].groupby('eid').agg(np.sum)

In [None]:
vkt_total = []
vkt_local = []
vkt_throu = []
delay_total = []
delay_local = []
delay_throu = []
ttime_total = []
ttime_local = []
ttime_throu = []
for i in range(__NUM_EXP):
    vkt_total.append(np.sum(vkt_df_total[i]['travel']))
    vkt_local.append(np.sum(vkt_df_local[i]['travel']))
    vkt_throu.append(np.sum(vkt_df_throu[i]['travel']))
    delay_total.append(np.mean(vkt_df_total[i]['dtime']))
    delay_local.append(np.mean(vkt_df_local[i]['dtime']))
    delay_throu.append(np.mean(vkt_df_throu[i]['dtime']))
    ttime_total.append(np.mean(vkt_df_total[i]['traveltime']))
    ttime_local.append(np.mean(vkt_df_local[i]['traveltime']))
    ttime_throu.append(np.mean(vkt_df_throu[i]['traveltime']))
print(vkt_total)
print(vkt_local)
print(vkt_throu)

In [None]:
def plot_vkt(vkt, vkt_type='Total', savefig=True):
    fig = plt.gcf()
    fig.set_size_inches(18.5, 10.5)
    x = range(11)
    y = vkt
    labels = ["0% SRC(All SUE)",
                         "10% SRC",
                         "20% SRC",
                         "30% SRC",
                         "40% SRC",
                         "50% SRC",
                         "60% SRC",
                         "70% SRC",
                         "80% SRC",
                         "90% SRC",
                         "100% All SRC (No SUE)"]
    plt.plot(x, y)
    plt.ylabel('Kilometers')
    plt.xticks(x, labels, rotation='vertical')
    plt.margins(0.2)
    plt.subplots_adjust(bottom=0.15)
    
    title = f'{vkt_type} VKT Under Different SRC'
    plt.title(title)
    if savefig:
        plt.savefig(__OUTPUT + f'{title}.png')
    plt.show()

In [None]:
plot_vkt(vkt_total, 'Total')

In [None]:
plot_vkt(vkt_local, 'Resident')

In [None]:
plot_vkt(vkt_throu, 'Traveler')

In [None]:
def plot_delay(delay, delay_type='Total', savefig=True):
    fig = plt.gcf()
    fig.set_size_inches(18.5, 10.5)
    x = range(11)
    y = delay
    labels = ["0% SRC(All SUE)",
                         "10% SRC",
                         "20% SRC",
                         "30% SRC",
                         "40% SRC",
                         "50% SRC",
                         "60% SRC",
                         "70% SRC",
                         "80% SRC",
                         "90% SRC",
                         "100% All SRC (No SUE)"]
    plt.plot(x, y)
    plt.ylabel('seconds/vehicle')
    plt.xticks(x, labels, rotation='vertical')
    plt.ylim([0, 150])
    plt.margins(0.2)
    plt.subplots_adjust(bottom=0.15)
    
    title = f'{delay_type} Mean Delay Under Different SRC'
    plt.title(title)
    if savefig:
        plt.savefig(__OUTPUT + f'{title}.png')
    plt.show()

In [None]:
plot_delay(delay_total, 'Total')

In [None]:
plot_delay(delay_local, 'Resident')

In [None]:
plot_delay(delay_throu, 'Traveler')

In [None]:
def plot_travel_time(ttime, delay_type='Total', savefig=True):
    fig = plt.gcf()
    fig.set_size_inches(18.5, 10.5)
    x = range(11)
    y = ttime
    labels = ["0% SRC(All SUE)",
                         "10% SRC",
                         "20% SRC",
                         "30% SRC",
                         "40% SRC",
                         "50% SRC",
                         "60% SRC",
                         "70% SRC",
                         "80% SRC",
                         "90% SRC",
                         "100% All SRC (No SUE)"]
    # plt.yscale("log") 
    plt.plot(x, y)
    plt.ylabel('seconds/vehicle')
    plt.xticks(x, labels, rotation='vertical')
    # plt.ylim([5000, 40000])
    plt.margins(0.2)
    plt.subplots_adjust(bottom=0.15)
    
    title = f'{delay_type} Mean Travel Time Under Different SRC'
    plt.title(title)
    if savefig:
        plt.savefig(__OUTPUT + f'{title}.png')
    plt.show()

In [None]:
plot_travel_time(ttime_total, 'Total')

In [None]:
plot_travel_time(ttime_local, 'Resident')

In [None]:
plot_travel_time(ttime_throu, 'Traveler')

# Visualize the Congested Area on the Map at Different SRC

In [None]:
# need the gdf from the above section
congested_gdf = copy.deepcopy(gdf)
congested_gdf[0].shape

In [None]:
def plot_congestion_experiment(gdf, sections, rho):
    congested = copy.deepcopy(gdf)
    sections = copy.deepcopy(sections)
    sections_congested = []
    
    # Filter out the congested sections
    for i in range(__NUM_EXP):
        congested[i] = congested[i][congested[i]['congested at rho = {number}'.format(number=rho)] == 1]
        congested[i] = congested[i][['ent', 'eid', 'congested at rho = {number}'.format(number=rho)]]
        
    # Map the congested sections to the sections.shp
    for i in range(__NUM_EXP):
        sections_congested.append(sections[sections['eid'].isin(congested[i]['eid'])])
        
    # Plot
    for i in range(__NUM_EXP): # Experiment with smaller range
        ax = sections_congested[i].plot(figsize=(15, 15))
        print(sections_congested[i].crs)
        # cx.add_basemap(ax, crs='EPSG:32610', source=cx.providers.CartoDB.Voyager) #4326
#         plt.xlim([591900, 597200])
#         plt.ylim([4.148 * 1e6, 4.158 * 1e6])
        plt.title('Congestion Points when rho = {number} where SRC = {percent}%'.format(number=rho, percent=i*10))
        # plt.savefig(__OUTPUT + f'Congestion_Visualized_SRC-{i * 10}%_Rho-{rho}.png')
        plt.show()

In [None]:
for threshold in rho:
    plot_congestion_experiment(congested_gdf, sections, threshold)

# Macroscopic Fundamental Diagram

In [None]:
# Restore the sections file in case of modification
sections = copy.deepcopy(sections_copy)
sections = sections.rename(columns={'speed': 'speed_limit'})
df = copy.deepcopy(df_copy)
df_milane = copy.deepcopy(df_milane_copy)

In [None]:
# Drop the sections with missing average speed
df_total = []
df_local = []
df_throu = []
df_milane_total = []
df_milane_local = []
df_milane_throu = []
print([df[i].shape for i in range(__NUM_EXP)])
for i in range(__NUM_EXP):
    df[i] = df[i][df[i]['speed'] >= 0.0]
    df_milane[i] = df_milane[i][(df_milane[i]['speed'] >= 0.0) & (df_milane[i]['density'] >= 0.5)]
    df_total.append(copy.deepcopy(df[i][df[i]['sid'] == 0]))
    df_local.append(copy.deepcopy(df[i][df[i]['sid'] == 1]))
    df_throu.append(copy.deepcopy(df[i][df[i]['sid'] == 2]))
    df_milane_total.append(copy.deepcopy(df_milane[i][df_milane[i]['sid'] == 0]))
    df_milane_local.append(copy.deepcopy(df_milane[i][df_milane[i]['sid'] == 1]))
    df_milane_throu.append(copy.deepcopy(df_milane[i][df_milane[i]['sid'] == 2]))
print([df[i].shape for i in range(__NUM_EXP)])
print([df_total[i].shape for i in range(__NUM_EXP)])
print([df_local[i].shape for i in range(__NUM_EXP)])
print([df_throu[i].shape for i in range(__NUM_EXP)])
print([df_milane[i].shape for i in range(__NUM_EXP)])
print([df_milane_total[i].shape for i in range(__NUM_EXP)])
print([df_milane_local[i].shape for i in range(__NUM_EXP)])
print([df_milane_throu[i].shape for i in range(__NUM_EXP)])

In [None]:
def plot_MFD(avg=False, savefig=True):
    for i in range(__NUM_EXP):
        x_dens = df_milane_total[i]['density'].tolist()
        y_flow = df_milane_total[i]['input_flow'].tolist()
        xp = np.linspace(0, max(x_dens), 1000)
        poly = np.poly1d(np.polyfit(x_dens, y_flow, 3))
        plt.scatter(x_dens, y_flow, alpha=0.2)
        plt.plot(xp, poly(xp), '-', color='orange')
        # plt.xlim(0, 50)
        plt.ylim(bottom=0)
        if avg:
            plt.xlabel('avg density (veh/km per lane)')
            plt.ylabel('avg flow (veh/h per lane)')
            plt.xlim([0, 100])
            title = f'Avg MFD for Experiment {i}'
        else: 
            plt.xlabel('density (veh/km per lane)')
            plt.ylabel('flow (veh/h per lane)')
            title = f'MFD for Experiment {i}'
        plt.title(title)
        if savefig:
            plt.savefig(__OUTPUT + f'{title}.png')
        plt.show()

In [None]:
plot_MFD()

In [None]:
aggs_milane = []
for i in range(__NUM_EXP):
    df_milane_total[i] = df_milane_total[i][df_milane_total[i]['flow'] > 0.0]

group_cols = ['eid']
# identify the columns which we want to average
metric_cols = ['density', 'input_flow', 'flow']

# create a new DataFrame with a MultiIndex consisting of the group_cols
# and a column for the mean of each column in metric_cols
for i in range(__NUM_EXP):
    aggs_milane.append(df_milane_total[i].groupby(group_cols)[metric_cols].mean())

# 1. remove the metric_cols from df because we are going to replace them
# with the means in aggs 
# 2. dedupe to leave only one row with each combination of group_cols
# in df
for i in range(__NUM_EXP):
    # Step 1
    df_milane_total[i].drop(metric_cols, axis=1, inplace=True)
    
    # Step 2
    df_milane_total[i].drop_duplicates(subset=group_cols, keep='last', inplace=True) # dedupe for plotting aggregated data

# add the mean columns from aggs into df
for i in range(__NUM_EXP):
    df_milane_total[i] = df_milane_total[i].merge(right=aggs_milane[i], right_index=True, left_on=group_cols, how='right')

In [None]:
plot_MFD(avg=True) # w/ dedupe

# Generate Base Map

In [None]:
sections = copy.deepcopy(sections_copy)
ax = sections.plot(figsize=(15, 15), color='None')
cx.add_basemap(ax, crs='EPSG:32610', source=cx.providers.CartoDB.Voyager) #4326
plt.xlim([591900, 597200])
plt.ylim([4.148 * 1e6, 4.158 * 1e6])
ax.set_xticks([])
ax.set_yticks([])
plt.savefig(__OUTPUT + 'basemap.png')
plt.show()

# Draw Sections without Recorded Data

In [None]:
# Restore the sections file in case of modification
sections = copy.deepcopy(sections_copy)
sections = sections.rename(columns={'speed': 'speed_limit'})
df = copy.deepcopy(df_copy)

In [None]:
for i in range(__NUM_EXP):
    df[i] = df[i][df[i]['speed'] < 0.0]

In [None]:
# Merge datasets: sections and dataframe
sections_null = []

for i in range(__NUM_EXP):
    sections_null.append(pd.merge(df[i], sections, how='left', left_on='eid', right_on='eid'))
    
# # convert to GeoDataframe
# gdf_null = gpd.GeoDataFrame(sections_null, geometry='geometry')
# gdf_null.head()

ax = sections_null[0].plot(figsize=(15, 15))
cx.add_basemap(ax, crs='EPSG:32610', source=cx.providers.CartoDB.Voyager) #4326
plt.title('Road Sections')