# Fix pathing

In [1]:
import sys


sys.path.append("../..")


In [2]:
import constants

import os


constants.PROJECT_DIRECTORY_PATH = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(constants.PROJECT_DIRECTORY_PATH))))


# Imports

In [3]:
import json
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import IPython.display


# Constants

In [4]:
FOLDER_NAMES = [
    "ex_3_least_GA", "ex_3_least_MA", "ex_3_least_NSGA2", "ex_3_least_MemeticNSGA2",
    "ex_3_avg_GA", "ex_3_avg_MA", "ex_3_avg_NSGA2", "ex_3_avg_MemeticNSGA2",
    "ex_3_most_GA", "ex_3_most_MA", "ex_3_most_NSGA2", "ex_3_most_MemeticNSGA2",
]
FILE_INFO = {
    "ex_3_least_GA": ("GA", "Lowest Activity", "green"),
    "ex_3_least_MA": ("MA", "Lowest Activity", "green"),
    "ex_3_least_NSGA2": ("NSGA-II", "Lowest Activity", "green"),
    "ex_3_least_MemeticNSGA2": ("Memetic NSGA-II", "Lowest Activity", "green"),

    "ex_3_avg_GA": ("GA", "Average Activity", "yellow"),
    "ex_3_avg_MA": ("MA", "Average Activity", "yellow"),
    "ex_3_avg_NSGA2": ("NSGA-II", "Average Activity", "yellow"),
    "ex_3_avg_MemeticNSGA2": ("Memetic NSGA-II", "Average Activity", "yellow"),

    "ex_3_most_GA": ("GA", "Highest Activity", "red"),
    "ex_3_most_MA": ("MA", "Highest Activity", "red"),
    "ex_3_most_NSGA2": ("NSGA-II", "Highest Activity", "red"),
    "ex_3_most_MemeticNSGA2": ("Memetic NSGA-II", "Highest Activity", "red"),
}

FOLDER_PATH = os.path.join(os.path.dirname(constants.PROJECT_DIRECTORY_PATH), "Simulator", "data")

SEEDS = list(range(10))


# Methods

In [5]:
def load_json():
    dataset = {}

    for folder_name in FOLDER_NAMES:
        for seed in SEEDS:
            heuristic, day, color = FILE_INFO[folder_name]

            with open(os.path.join(FOLDER_PATH, folder_name, f"heuristic_seed={seed}.json"), 'r') as file:
                dataset[(seed, heuristic, day, color)] = json.load(file)

    return dataset


In [6]:
def compute_metrics(dataset):
    dataframe = pd.DataFrame()

    dataframe["day"] = "None"
    dataframe["heuristic"] = "None"
    dataframe["seed"] = 0
    dataframe["generation"] = 0
    dataframe["individual"] = 0
    dataframe["diversity"] = 0
    dataframe["fitness"] = np.nan
    dataframe["front_number"] = np.nan
    dataframe["crowding_distance"] = np.nan
    dataframe["percentage_violations"] = 0
    dataframe["percentage_violations_urban"] = 0
    dataframe["percentage_violations_rural"] = 0
    dataframe["avg_response_time_urban_a"] = 0
    dataframe["avg_response_time_urban_h"] = 0
    dataframe["avg_response_time_urban_v1"] = 0
    dataframe["avg_response_time_rural_a"] = 0
    dataframe["avg_response_time_rural_h"] = 0
    dataframe["avg_response_time_rural_v1"] = 0

    for (seed, heuristic, day, _), data in dataset.items():
        generations = list(range(len(data['diversity'])))

        for generation in generations:
            individuals = list(range(len(data['percentage_violations'][generation])))

            diversity = data['diversity'][generation][0]

            for individual in individuals:
                objectives = []
                objective_keys = [
                    "fitness",
                    "front_number",
                    "crowding_distance",
                    "percentage_violations",
                    "percentage_violations_urban",
                    "percentage_violations_rural",
                    "avg_response_time_urban_a",
                    "avg_response_time_urban_h",
                    "avg_response_time_urban_v1",
                    "avg_response_time_rural_a",
                    "avg_response_time_rural_h",
                    "avg_response_time_rural_v1",
                ]

                for objective_key in objective_keys:
                    if objective_key in data:
                        objective = data[objective_key][generation][individual]
                    else:
                        objective = np.nan
                    
                    objectives.append(objective)

                # add row
                dataframe.loc[len(dataframe.index)] = [
                    day,
                    heuristic,
                    seed,
                    generation,
                    individual,
                    diversity,
                    *objectives
                ]
    
    dataframe.drop_duplicates(
        subset=[
            "day",
            "heuristic",
            "seed",
            "generation",
            "diversity",
            "fitness",
            "front_number",
            "percentage_violations",
            "percentage_violations_urban",
            "percentage_violations_rural",
            "avg_response_time_urban_a",
            "avg_response_time_urban_h",
            "avg_response_time_urban_v1",
            "avg_response_time_rural_a",
            "avg_response_time_rural_h",
            "avg_response_time_rural_v1",
        ],
        inplace=True
    )
    
    return dataframe


# Main

In [7]:
dataset = load_json()

for key, value in dataset.items():
    if key != (0, 'GA', 'Highest Activity', 'red') and key != (0, 'NSGA-II', 'Highest Activity', 'red'):
        continue

    print(key)
    for key, value in value.items():
        print(key)


(0, 'GA', 'Highest Activity', 'red')
avg_response_time_rural_a
avg_response_time_rural_h
avg_response_time_rural_v1
avg_response_time_urban_a
avg_response_time_urban_h
avg_response_time_urban_v1
diversity
fitness
percentage_violations
percentage_violations_rural
percentage_violations_urban
(0, 'NSGA-II', 'Highest Activity', 'red')
avg_response_time_rural_a
avg_response_time_rural_h
avg_response_time_rural_v1
avg_response_time_urban_a
avg_response_time_urban_h
avg_response_time_urban_v1
crowding_distance
diversity
front_number
percentage_violations
percentage_violations_rural
percentage_violations_urban


In [8]:
dataframe = compute_metrics(dataset)

dataframe.info()


<class 'pandas.core.frame.DataFrame'>
Index: 92970 entries, 0 to 208738
Data columns (total 18 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   day                          92970 non-null  object 
 1   heuristic                    92970 non-null  object 
 2   seed                         92970 non-null  int64  
 3   generation                   92970 non-null  int64  
 4   individual                   92970 non-null  int64  
 5   diversity                    92970 non-null  float64
 6   fitness                      36286 non-null  float64
 7   front_number                 56684 non-null  float64
 8   crowding_distance            56684 non-null  float64
 9   percentage_violations        92970 non-null  float64
 10  percentage_violations_urban  92970 non-null  float64
 11  percentage_violations_rural  92970 non-null  float64
 12  avg_response_time_urban_a    92970 non-null  float64
 13  avg_response_time_ur

In [80]:
filtered_df = dataframe[(dataframe["day"] == "Average Activity") & (dataframe["heuristic"] == "MA")]

max_generation_per_seed = filtered_df.groupby("seed")["generation"].max()
best_individuals = []
for seed, max_gen in max_generation_per_seed.items():
    best_individual = filtered_df[(filtered_df["seed"] == seed) & (filtered_df["generation"] == max_gen)].iloc[0]
    best_individuals.append(best_individual)

# DataFrame of best individuals from each seed
best_individuals_df = pd.DataFrame(best_individuals)

# Find the overall best individual across all seeds
overall_best_individual = best_individuals_df.loc[best_individuals_df["fitness"].idxmin()]
overall_best_seed = overall_best_individual["seed"]
overall_best_fitness = overall_best_individual["fitness"]
overall_best_vio_urban = overall_best_individual["percentage_violations_urban"]
overall_best_vio_rural = overall_best_individual["percentage_violations_rural"]
overall_best_individual_generations = overall_best_individual["generation"]

# Total amount of generations: count all unique generation numbers across all seeds
total_generations = filtered_df["generation"].nunique()
std_dev_fitness = best_individuals_df["fitness"].std()

# Average fitness and average number of generations per seed
average_fitness_per_seed = best_individuals_df["fitness"].mean()
average_generations_per_seed = best_individuals_df["generation"].mean()

print("Overall Best: Seed:", overall_best_seed)
print("Overall Best: Fitness:", overall_best_fitness)
print("Overall Best: Vio Urban:", overall_best_vio_urban)
print("Overall Best: Vio Rural:", overall_best_vio_rural)
print("Overall Best: Total Generations:", total_generations)
print("Average Fitness per Seed:", average_fitness_per_seed)
print("Average Generations per Seed:", average_generations_per_seed)
print("Standard Deviation of Fitness:", std_dev_fitness)


Overall Best: Seed: 4
Overall Best: Fitness: 0.0792683
Overall Best: Vio Urban: 0.0833333
Overall Best: Vio Rural: 0.0
Overall Best: Total Generations: 53
Average Fitness per Seed: 0.09085376999999999
Average Generations per Seed: 39.0
Standard Deviation of Fitness: 0.008355756805413993


In [103]:
day = "Average Activity"
seed = 9

heuristic = "Memetic NSGA-II"

filtered_df = dataframe[
    (dataframe["day"] == day) &
    (dataframe["heuristic"] == heuristic) &
    (dataframe["seed"] == seed) & 
    (dataframe["generation"] == dataframe[(dataframe["day"] == day) & (dataframe["heuristic"] == heuristic) & (dataframe["seed"] == seed)]["generation"].max()) &
    (dataframe["front_number"] == 0)
]
IPython.display.display(filtered_df)


Unnamed: 0,day,heuristic,seed,generation,individual,diversity,fitness,front_number,crowding_distance,percentage_violations,percentage_violations_urban,percentage_violations_rural,avg_response_time_urban_a,avg_response_time_urban_h,avg_response_time_urban_v1,avg_response_time_rural_a,avg_response_time_rural_h,avg_response_time_rural_v1
159660,Average Activity,Memetic NSGA-II,9,204,0,0.2,,0.0,1.79769e+308,0.067073,0.070513,0.0,522.069,1018.26,2876.9,929.5,1514.0,2936.75
159661,Average Activity,Memetic NSGA-II,9,204,1,0.2,,0.0,1.79769e+308,0.067073,0.070513,0.0,522.986,1009.63,2875.32,934.25,1513.0,2940.25
159665,Average Activity,Memetic NSGA-II,9,204,5,0.2,,0.0,0.0,0.067073,0.070513,0.0,521.792,1017.93,2882.34,926.0,1503.0,2940.25
159667,Average Activity,Memetic NSGA-II,9,204,7,0.2,,0.0,0.0,0.067073,0.070513,0.0,527.208,1019.12,2903.41,931.75,1501.5,2936.5


In [None]:
[138980, 141000, 143920, 145080, 146540, 149920, 152800, 153780, 155560, 159660]

In [105]:
# DataFrame of best individuals from each seed
best_individuals_df = dataframe.loc[[138980, 141000, 143920, 145080, 146540, 149920, 152800, 153780, 155560, 159660]]

IPython.display.display(best_individuals_df)

# Find the overall best individual across all seeds
overall_best_individual = best_individuals_df.loc[149920]
overall_best_seed = overall_best_individual["seed"]
overall_best_fitness = overall_best_individual["percentage_violations"]
overall_best_vio_urban = overall_best_individual["percentage_violations_urban"]
overall_best_vio_rural = overall_best_individual["percentage_violations_rural"]
overall_best_individual_generations = overall_best_individual["generation"]

# Total amount of generations: count all unique generation numbers across all seeds
total_generations = filtered_df["generation"].nunique()
std_dev_fitness = best_individuals_df["percentage_violations"].std()

# Average fitness and average number of generations per seed
average_fitness_per_seed = best_individuals_df["percentage_violations"].mean()
average_generations_per_seed = best_individuals_df["generation"].mean()

print("Overall Best: Seed:", overall_best_seed)
print("Overall Best: Fitness:", overall_best_fitness)
print("Overall Best: Vio Urban:", overall_best_vio_urban)
print("Overall Best: Vio Rural:", overall_best_vio_rural)
print("Overall Best: Total Generations:", total_generations)
print("Average Fitness per Seed:", average_fitness_per_seed)
print("Average Generations per Seed:", average_generations_per_seed)
print("Standard Deviation of Fitness:", std_dev_fitness)


Unnamed: 0,day,heuristic,seed,generation,individual,diversity,fitness,front_number,crowding_distance,percentage_violations,percentage_violations_urban,percentage_violations_rural,avg_response_time_urban_a,avg_response_time_urban_h,avg_response_time_urban_v1,avg_response_time_rural_a,avg_response_time_rural_h,avg_response_time_rural_v1
138980,Average Activity,Memetic NSGA-II,0,61,0,0.05,,0.0,1.79769e+308,0.079268,0.083333,0.0,533.264,1050.23,2847.78,1111.0,1042.0,2596.25
141000,Average Activity,Memetic NSGA-II,1,100,0,0.05,,0.0,1.79769e+308,0.079268,0.083333,0.0,556.486,1019.31,2915.9,917.5,1058.75,2939.75
143920,Average Activity,Memetic NSGA-II,2,145,0,0.35,,0.0,1.79769e+308,0.097561,0.102564,0.0,551.833,1076.11,3024.66,941.75,1075.75,2931.5
145080,Average Activity,Memetic NSGA-II,3,57,0,0.05,,0.0,1.79769e+308,0.085366,0.089744,0.0,526.278,1012.5,2932.0,935.0,1064.5,2611.75
146540,Average Activity,Memetic NSGA-II,4,72,0,0.05,,0.0,1.79769e+308,0.085366,0.089744,0.0,543.514,1014.45,2736.37,925.0,1076.75,3258.5
149920,Average Activity,Memetic NSGA-II,5,168,0,0.15,,0.0,1.79769e+308,0.067073,0.070513,0.0,536.139,1038.99,2830.85,908.25,1100.75,2599.0
152800,Average Activity,Memetic NSGA-II,6,143,0,0.05,,0.0,1.79769e+308,0.067073,0.070513,0.0,531.806,968.393,2792.88,922.0,1543.25,3343.25
153780,Average Activity,Memetic NSGA-II,7,48,0,0.05,,0.0,1.79769e+308,0.085366,0.089744,0.0,555.597,986.869,2780.34,925.5,1115.5,2941.0
155560,Average Activity,Memetic NSGA-II,8,88,0,0.05,,0.0,1.79769e+308,0.073171,0.076923,0.0,530.056,1062.82,2890.49,957.5,1054.0,2955.25
159660,Average Activity,Memetic NSGA-II,9,204,0,0.2,,0.0,1.79769e+308,0.067073,0.070513,0.0,522.069,1018.26,2876.9,929.5,1514.0,2936.75


Overall Best: Seed: 5
Overall Best: Fitness: 0.0670732
Overall Best: Vio Urban: 0.0705128
Overall Best: Vio Rural: 0.0
Overall Best: Total Generations: 1
Average Fitness per Seed: 0.07865856
Average Generations per Seed: 108.6
Standard Deviation of Fitness: 0.01014226218292108


In [None]:
day = "Highest Activity"
seed = 0

heuristic = "GA"
filtered_df = dataframe[
    (dataframe["day"] == day) & 
    (dataframe["heuristic"] == heuristic) &
    (dataframe["seed"] == seed) & 
    (dataframe["generation"] == dataframe[(dataframe["day"] == day) & (dataframe["heuristic"] == heuristic) & (dataframe["seed"] == seed)]["generation"].max()) &
    (dataframe["individual"] == 0)
]
IPython.display.display(filtered_df)

heuristic = "MA"
filtered_df = dataframe[
    (dataframe["day"] == day) &
    (dataframe["heuristic"] == heuristic) &
    (dataframe["seed"] == seed) & 
    (dataframe["generation"] == dataframe[(dataframe["day"] == day) & (dataframe["heuristic"] == heuristic) & (dataframe["seed"] == seed)]["generation"].max()) &
    (dataframe["individual"] == 0)
]
IPython.display.display(filtered_df)

heuristic = "NSGA-II"
filtered_df = dataframe[
    (dataframe["day"] == day) &
    (dataframe["heuristic"] == heuristic) &
    (dataframe["seed"] == seed) & 
    (dataframe["generation"] == dataframe[(dataframe["day"] == day) & (dataframe["heuristic"] == heuristic) & (dataframe["seed"] == seed)]["generation"].max()) &
    (dataframe["front_number"] == 0)
]
IPython.display.display(filtered_df)

heuristic = "Memetic NSGA-II"
filtered_df = dataframe[
    (dataframe["day"] == day) &
    (dataframe["heuristic"] == heuristic) &
    (dataframe["seed"] == seed) & 
    (dataframe["generation"] == dataframe[(dataframe["day"] == day) & (dataframe["heuristic"] == heuristic) & (dataframe["seed"] == seed)]["generation"].max()) &
    (dataframe["front_number"] == 0)
]
IPython.display.display(filtered_df)
