# Fix pathing

In [1]:
import sys


sys.path.append("../..")


In [2]:
import constants

import os


constants.PROJECT_DIRECTORY_PATH = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(constants.PROJECT_DIRECTORY_PATH))))


# Imports

In [3]:
import json
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import IPython.display


# Constants

In [4]:
FOLDER_NAMES = [
    "ex_3_least_GA", "ex_3_least_MA", "ex_3_least_NSGA2", "ex_3_least_MemeticNSGA2",
    "ex_3_avg_GA", "ex_3_avg_MA", "ex_3_avg_NSGA2", "ex_3_avg_MemeticNSGA2",
    "ex_3_most_GA", "ex_3_most_MA", "ex_3_most_NSGA2", "ex_3_most_MemeticNSGA2",
]
FILE_INFO = {
    "ex_3_least_GA": ("GA", "Lowest Activity", "green"),
    "ex_3_least_MA": ("MA", "Lowest Activity", "green"),
    "ex_3_least_NSGA2": ("NSGA-II", "Lowest Activity", "green"),
    "ex_3_least_MemeticNSGA2": ("Memetic NSGA-II", "Lowest Activity", "green"),

    "ex_3_avg_GA": ("GA", "Average Activity", "yellow"),
    "ex_3_avg_MA": ("MA", "Average Activity", "yellow"),
    "ex_3_avg_NSGA2": ("NSGA-II", "Average Activity", "yellow"),
    "ex_3_avg_MemeticNSGA2": ("Memetic NSGA-II", "Average Activity", "yellow"),

    "ex_3_most_GA": ("GA", "Highest Activity", "red"),
    "ex_3_most_MA": ("MA", "Highest Activity", "red"),
    "ex_3_most_NSGA2": ("NSGA-II", "Highest Activity", "red"),
    "ex_3_most_MemeticNSGA2": ("Memetic NSGA-II", "Highest Activity", "red"),
}

FOLDER_PATH = os.path.join(os.path.dirname(constants.PROJECT_DIRECTORY_PATH), "Simulator", "data")

SEEDS = list(range(10))


# Methods

In [5]:
def load_json():
    dataset = {}

    for folder_name in FOLDER_NAMES:
        for seed in SEEDS:
            heuristic, day, color = FILE_INFO[folder_name]

            with open(os.path.join(FOLDER_PATH, folder_name, f"heuristic_seed={seed}.json"), 'r') as file:
                dataset[(seed, heuristic, day, color)] = json.load(file)

    return dataset


In [6]:
def compute_metrics(dataset):
    dataframe = pd.DataFrame()

    dataframe["day"] = "None"
    dataframe["heuristic"] = "None"
    dataframe["seed"] = 0
    dataframe["generation"] = 0
    dataframe["individual"] = 0
    dataframe["diversity"] = 0
    dataframe["fitness"] = np.nan
    dataframe["front_number"] = np.nan
    dataframe["crowding_distance"] = np.nan
    dataframe["percentage_violations"] = 0
    dataframe["percentage_violations_urban"] = 0
    dataframe["percentage_violations_rural"] = 0
    dataframe["avg_response_time_urban_a"] = 0
    dataframe["avg_response_time_urban_h"] = 0
    dataframe["avg_response_time_urban_v1"] = 0
    dataframe["avg_response_time_rural_a"] = 0
    dataframe["avg_response_time_rural_h"] = 0
    dataframe["avg_response_time_rural_v1"] = 0

    for (seed, heuristic, day, _), data in dataset.items():
        generations = list(range(len(data['diversity'])))

        for generation in generations:
            individuals = list(range(len(data['percentage_violations'][generation])))

            diversity = data['diversity'][generation][0]

            for individual in individuals:
                objectives = []
                objective_keys = [
                    "fitness",
                    "front_number",
                    "crowding_distance",
                    "percentage_violations",
                    "percentage_violations_urban",
                    "percentage_violations_rural",
                    "avg_response_time_urban_a",
                    "avg_response_time_urban_h",
                    "avg_response_time_urban_v1",
                    "avg_response_time_rural_a",
                    "avg_response_time_rural_h",
                    "avg_response_time_rural_v1",
                ]

                for objective_key in objective_keys:
                    if objective_key in data:
                        objective = data[objective_key][generation][individual]
                    else:
                        objective = np.nan
                    
                    objectives.append(objective)

                # add row
                dataframe.loc[len(dataframe.index)] = [
                    day,
                    heuristic,
                    seed,
                    generation,
                    individual,
                    diversity,
                    *objectives
                ]
    
    dataframe.drop_duplicates(
        subset=[
            "day",
            "heuristic",
            "seed",
            "generation",
            "diversity",
            "fitness",
            "front_number",
            "percentage_violations",
            "percentage_violations_urban",
            "percentage_violations_rural",
            "avg_response_time_urban_a",
            "avg_response_time_urban_h",
            "avg_response_time_urban_v1",
            "avg_response_time_rural_a",
            "avg_response_time_rural_h",
            "avg_response_time_rural_v1",
        ],
        inplace=True
    )
    
    return dataframe


# Main

In [7]:
dataset = load_json()

for key, value in dataset.items():
    if key != (0, 'GA', 'Highest Activity', 'red') and key != (0, 'NSGA-II', 'Highest Activity', 'red'):
        continue

    print(key)
    for key, value in value.items():
        print(key)


(0, 'GA', 'Highest Activity', 'red')
avg_response_time_rural_a
avg_response_time_rural_h
avg_response_time_rural_v1
avg_response_time_urban_a
avg_response_time_urban_h
avg_response_time_urban_v1
diversity
fitness
percentage_violations
percentage_violations_rural
percentage_violations_urban
(0, 'NSGA-II', 'Highest Activity', 'red')
avg_response_time_rural_a
avg_response_time_rural_h
avg_response_time_rural_v1
avg_response_time_urban_a
avg_response_time_urban_h
avg_response_time_urban_v1
crowding_distance
diversity
front_number
percentage_violations
percentage_violations_rural
percentage_violations_urban


In [8]:
dataframe = compute_metrics(dataset)

dataframe.info()


<class 'pandas.core.frame.DataFrame'>
Index: 92970 entries, 0 to 208738
Data columns (total 18 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   day                          92970 non-null  object 
 1   heuristic                    92970 non-null  object 
 2   seed                         92970 non-null  int64  
 3   generation                   92970 non-null  int64  
 4   individual                   92970 non-null  int64  
 5   diversity                    92970 non-null  float64
 6   fitness                      36286 non-null  float64
 7   front_number                 56684 non-null  float64
 8   crowding_distance            56684 non-null  float64
 9   percentage_violations        92970 non-null  float64
 10  percentage_violations_urban  92970 non-null  float64
 11  percentage_violations_rural  92970 non-null  float64
 12  avg_response_time_urban_a    92970 non-null  float64
 13  avg_response_time_ur

In [9]:
filtered_df = dataframe[(dataframe["day"] == "Average Activity") & (dataframe["heuristic"] == "MA")]

max_generation_per_seed = filtered_df.groupby("seed")["generation"].max()
best_individuals = []
for seed, max_gen in max_generation_per_seed.items():
    best_individual = filtered_df[(filtered_df["seed"] == seed) & (filtered_df["generation"] == max_gen)].iloc[0]
    best_individuals.append(best_individual)

# dataFrame of best individuals from each seed
best_individuals_df = pd.DataFrame(best_individuals)

# find the overall best individual across all seeds
overall_best_individual = best_individuals_df.loc[best_individuals_df["fitness"].idxmin()]
overall_best_seed = overall_best_individual["seed"]
overall_best_fitness = overall_best_individual["fitness"]
overall_best_vio_urban = overall_best_individual["percentage_violations_urban"]
overall_best_vio_rural = overall_best_individual["percentage_violations_rural"]
overall_best_individual_generations = overall_best_individual["generation"]

# total amount of generations
total_generations = filtered_df["generation"].nunique()
std_dev_fitness = best_individuals_df["fitness"].std()

# average fitness and average number of generations per seed
average_fitness_per_seed = best_individuals_df["fitness"].mean()
average_generations_per_seed = best_individuals_df["generation"].mean()

print("Overall Best: Seed:", overall_best_seed)
print("Overall Best: Fitness:", overall_best_fitness)
print("Overall Best: Vio Urban:", overall_best_vio_urban)
print("Overall Best: Vio Rural:", overall_best_vio_rural)
print("Overall Best: Total Generations:", total_generations)
print("Average Fitness per Seed:", average_fitness_per_seed)
print("Average Generations per Seed:", average_generations_per_seed)
print("Standard Deviation of Fitness:", std_dev_fitness)


Overall Best: Seed: 4
Overall Best: Fitness: 0.0792683
Overall Best: Vio Urban: 0.0833333
Overall Best: Vio Rural: 0.0
Overall Best: Total Generations: 53
Average Fitness per Seed: 0.09085376999999999
Average Generations per Seed: 39.0
Standard Deviation of Fitness: 0.008355756805413993


In [10]:
day = "Highest Activity"
seed = 0

heuristic = "NSGA-II"

filtered_df = dataframe[
    (dataframe["day"] == day) &
    (dataframe["heuristic"] == heuristic) &
    (dataframe["seed"] == seed) & 
    (dataframe["generation"] == dataframe[(dataframe["day"] == day) & (dataframe["heuristic"] == heuristic) & (dataframe["seed"] == seed)]["generation"].max()) &
    (dataframe["front_number"] == 0)
]
IPython.display.display(filtered_df)


Unnamed: 0,day,heuristic,seed,generation,individual,diversity,fitness,front_number,crowding_distance,percentage_violations,percentage_violations_urban,percentage_violations_rural,avg_response_time_urban_a,avg_response_time_urban_h,avg_response_time_urban_v1,avg_response_time_rural_a,avg_response_time_rural_h,avg_response_time_rural_v1
177140,Highest Activity,NSGA-II,0,110,0,0.15,,0.0,1.79769e+308,0.169399,0.18125,0.086957,641.514,1229.86,3736.78,1000.6,1696.25,3959.5
177142,Highest Activity,NSGA-II,0,110,2,0.15,,0.0,1.79769e+308,0.174863,0.2,0.0,637.946,1291.51,3819.87,944.867,1644.5,4509.88
177144,Highest Activity,NSGA-II,0,110,4,0.15,,0.0,1.35556,0.169399,0.1875,0.043478,650.108,1243.56,3794.39,988.667,1670.5,4785.0


In [11]:
best_nsga_lowest = [23102, 24640, 30060, 35602, 41062, 46642, 52122, 57518, 60460, 64180]
best_nsga_average = [109540, 115240, 116900, 118660, 125340, 126920, 130820, 133160, 136120, 137740]


In [13]:
# dataFrame of best individuals from each seed
best_individuals_df = dataframe.loc[best_nsga_lowest]

IPython.display.display(best_individuals_df)

# find the overall best individual across all seeds
overall_best_individual = best_individuals_df.loc[23102]
overall_best_seed = overall_best_individual["seed"]
overall_best_fitness = overall_best_individual["percentage_violations"]
overall_best_vio_urban = overall_best_individual["percentage_violations_urban"]
overall_best_vio_rural = overall_best_individual["percentage_violations_rural"]
overall_best_individual_generations = overall_best_individual["generation"]

# total amount of generations
total_generations = filtered_df["generation"].nunique()
std_dev_fitness = best_individuals_df["percentage_violations"].std()

# average fitness and average number of generations per seed
average_fitness_per_seed = best_individuals_df["percentage_violations"].mean()
average_generations_per_seed = best_individuals_df["generation"].mean()

print("Overall Best: Seed:", overall_best_seed)
print("Overall Best: Fitness:", overall_best_fitness)
print("Overall Best: Vio Urban:", overall_best_vio_urban)
print("Overall Best: Vio Rural:", overall_best_vio_rural)
print("Overall Best: Total Generations:", total_generations)
print("Average Fitness per Seed:", average_fitness_per_seed)
print("Average Generations per Seed:", average_generations_per_seed)
print("Standard Deviation of Fitness:", std_dev_fitness)


Unnamed: 0,day,heuristic,seed,generation,individual,diversity,fitness,front_number,crowding_distance,percentage_violations,percentage_violations_urban,percentage_violations_rural,avg_response_time_urban_a,avg_response_time_urban_h,avg_response_time_urban_v1,avg_response_time_rural_a,avg_response_time_rural_h,avg_response_time_rural_v1
23102,Lowest Activity,NSGA-II,0,270,2,0.1,,0.0,1.79769e+308,0.060403,0.057971,0.090909,541.481,988.321,2734.26,1119.4,1742.83,3982.25
24640,Lowest Activity,NSGA-II,1,76,0,0.05,,0.0,1.79769e+308,0.067114,0.065217,0.090909,534.463,1075.06,2824.43,1044.4,1428.5,3964.75
30060,Lowest Activity,NSGA-II,2,270,0,0.25,,0.0,1.79769e+308,0.073826,0.065217,0.181818,517.741,1067.79,2762.75,1061.4,1577.83,3756.5
35602,Lowest Activity,NSGA-II,3,276,2,0.1,,0.0,1.79769e+308,0.067114,0.065217,0.090909,530.815,1032.1,2763.01,1051.0,1532.0,3750.75
41062,Lowest Activity,NSGA-II,4,272,2,0.2,,0.0,1.79769e+308,0.060403,0.057971,0.090909,510.463,1017.5,2805.36,1044.4,1520.5,3970.0
46642,Lowest Activity,NSGA-II,5,278,2,0.1,,0.0,1.79769e+308,0.080537,0.07971,0.090909,526.426,1037.38,2832.12,953.8,1447.33,3971.5
52122,Lowest Activity,NSGA-II,6,273,2,0.15,,0.0,1.79769e+308,0.073826,0.072464,0.090909,543.37,1075.7,2790.49,1037.8,1389.67,3980.25
57518,Lowest Activity,NSGA-II,7,268,18,0.1,,0.0,1.79769e+308,0.080537,0.07971,0.090909,528.815,1066.98,2689.68,934.8,1459.83,3981.25
60460,Lowest Activity,NSGA-II,8,147,0,0.05,,0.0,1.79769e+308,0.060403,0.050725,0.181818,526.333,1078.04,2754.74,1058.8,1508.0,3757.25
64180,Lowest Activity,NSGA-II,9,185,0,0.05,,0.0,1.79769e+308,0.04698,0.043478,0.090909,513.037,1016.13,2754.29,1153.8,1474.83,3983.5


Overall Best: Seed: 0
Overall Best: Fitness: 0.0604027
Overall Best: Vio Urban: 0.057971
Overall Best: Vio Rural: 0.0909091
Overall Best: Total Generations: 1
Average Fitness per Seed: 0.0671141
Average Generations per Seed: 231.5
Standard Deviation of Fitness: 0.010493085443492988


In [14]:
day = "Highest Activity"
seed = 0

heuristic = "GA"
filtered_df = dataframe[
    (dataframe["day"] == day) & 
    (dataframe["heuristic"] == heuristic) &
    (dataframe["seed"] == seed) & 
    (dataframe["generation"] == dataframe[(dataframe["day"] == day) & (dataframe["heuristic"] == heuristic) & (dataframe["seed"] == seed)]["generation"].max()) &
    (dataframe["individual"] == 0)
]
IPython.display.display(filtered_df)

heuristic = "MA"
filtered_df = dataframe[
    (dataframe["day"] == day) &
    (dataframe["heuristic"] == heuristic) &
    (dataframe["seed"] == seed) & 
    (dataframe["generation"] == dataframe[(dataframe["day"] == day) & (dataframe["heuristic"] == heuristic) & (dataframe["seed"] == seed)]["generation"].max()) &
    (dataframe["individual"] == 0)
]
IPython.display.display(filtered_df)

heuristic = "NSGA-II"
filtered_df = dataframe[
    (dataframe["day"] == day) &
    (dataframe["heuristic"] == heuristic) &
    (dataframe["seed"] == seed) & 
    (dataframe["generation"] == dataframe[(dataframe["day"] == day) & (dataframe["heuristic"] == heuristic) & (dataframe["seed"] == seed)]["generation"].max()) &
    (dataframe["front_number"] == 0)
]
IPython.display.display(filtered_df)

heuristic = "Memetic NSGA-II"
filtered_df = dataframe[
    (dataframe["day"] == day) &
    (dataframe["heuristic"] == heuristic) &
    (dataframe["seed"] == seed) & 
    (dataframe["generation"] == dataframe[(dataframe["day"] == day) & (dataframe["heuristic"] == heuristic) & (dataframe["seed"] == seed)]["generation"].max()) &
    (dataframe["front_number"] == 0)
]
IPython.display.display(filtered_df)


Unnamed: 0,day,heuristic,seed,generation,individual,diversity,fitness,front_number,crowding_distance,percentage_violations,percentage_violations_urban,percentage_violations_rural,avg_response_time_urban_a,avg_response_time_urban_h,avg_response_time_urban_v1,avg_response_time_rural_a,avg_response_time_rural_h,avg_response_time_rural_v1
160700,Highest Activity,GA,0,51,0,0.7,0.163934,,,0.163934,0.18125,0.043478,616.149,1234.88,3778.39,952.6,1424.88,5067.12


Unnamed: 0,day,heuristic,seed,generation,individual,diversity,fitness,front_number,crowding_distance,percentage_violations,percentage_violations_urban,percentage_violations_rural,avg_response_time_urban_a,avg_response_time_urban_h,avg_response_time_urban_v1,avg_response_time_rural_a,avg_response_time_rural_h,avg_response_time_rural_v1
168600,Highest Activity,MA,0,50,0,0.55,0.147541,,,0.147541,0.16875,0.0,624.459,1174.62,3572.12,864.8,1574.5,4542.62


Unnamed: 0,day,heuristic,seed,generation,individual,diversity,fitness,front_number,crowding_distance,percentage_violations,percentage_violations_urban,percentage_violations_rural,avg_response_time_urban_a,avg_response_time_urban_h,avg_response_time_urban_v1,avg_response_time_rural_a,avg_response_time_rural_h,avg_response_time_rural_v1
177140,Highest Activity,NSGA-II,0,110,0,0.15,,0.0,1.79769e+308,0.169399,0.18125,0.086957,641.514,1229.86,3736.78,1000.6,1696.25,3959.5
177142,Highest Activity,NSGA-II,0,110,2,0.15,,0.0,1.79769e+308,0.174863,0.2,0.0,637.946,1291.51,3819.87,944.867,1644.5,4509.88
177144,Highest Activity,NSGA-II,0,110,4,0.15,,0.0,1.35556,0.169399,0.1875,0.043478,650.108,1243.56,3794.39,988.667,1670.5,4785.0


Unnamed: 0,day,heuristic,seed,generation,individual,diversity,fitness,front_number,crowding_distance,percentage_violations,percentage_violations_urban,percentage_violations_rural,avg_response_time_urban_a,avg_response_time_urban_h,avg_response_time_urban_v1,avg_response_time_rural_a,avg_response_time_rural_h,avg_response_time_rural_v1
197280,Highest Activity,Memetic NSGA-II,0,65,0,0.2,,0.0,1.79769e+308,0.185792,0.18125,0.217391,612.365,1174.01,3664.85,1253.67,1647.75,3205.62
197282,Highest Activity,Memetic NSGA-II,0,65,2,0.2,,0.0,1.79769e+308,0.202186,0.23125,0.0,669.459,1215.63,3649.41,901.6,1355.88,4742.0
197284,Highest Activity,Memetic NSGA-II,0,65,4,0.2,,0.0,1.52598,0.191257,0.2125,0.043478,667.095,1310.44,4032.77,1028.73,1458.5,5732.75
197285,Highest Activity,Memetic NSGA-II,0,65,5,0.2,,0.0,0.680147,0.174863,0.1875,0.086957,654.108,1164.98,3528.71,1048.6,1360.88,5171.62
