### FUNCTIONS / LIBRARIES

In [1]:
%load_ext autoreload
%autoreload 2


In [4]:
# Functions Optmization

from Optimization import prepare_vehicles_with_stats
from Optimization import spatial_optimization_pipeline
from Optimization import temporal_optimization_pipeline
from Optimization import run_fairness_pipeline
from Optimization import run_max_coverage_pipeline
from Optimization import combine_optimized_dfs
from Optimization import compute_combined_optimization_scores
from Optimization import select_random_vehicles
from Optimization import vehicle_optimization_stats_pipeline
from Optimization import extract_string_lists # Done (call if needed)
from Optimization import plot_vehicles_by_group # Done (call if needed)
from Optimization import prepare_selected_vehicles_from_combined 
from Optimization import master_function_analysis
from Optimization import visualization_master_function
from Optimization import pipeline_plot_frequency

import warnings
import geopandas as gpd
import pandas as pd

In [5]:
# Set the warning filter to ignore SettingWithCopyWarning
warnings.simplefilter(action='ignore', category=pd.errors.SettingWithCopyWarning)


### RAW DATA / PARAMETERS 

In [6]:
# Full CBS
cbs_full = gpd.read_file('data/temp/full_cbs.gpkg')

# CIty Stats

city_stats = pd.read_csv('data/city_stats_amsterdam.csv')

# Gruped by points 
points_gdf = gpd.read_file('data/temp/grouped_by_points_GVB.gpkg')

# Parameters
City = 'Amsterdam'

# City Boundary 
city_geo = gpd.read_file("data/Gemeente2.geojson")


### HERE YOU SET N for number of vehicles

In [7]:
# Set N
N = 100 # 50 and 100 

### PROCESS

In [8]:
# Usage Create Vehicles with Stats
vehicles_stats = prepare_vehicles_with_stats(points_gdf, cbs_full)

In [9]:
# Export Vehicle Stats
# vehicles_stats.to_file('data/temp/vehicles_stats_AMS_50buffer_1503.gpkg', driver='GPKG')

In [10]:
# Selecte Vehicles for Spatial Optimization # coverage threshold = 0 so that it continues!
optimized_spatial, filtered_vehicles, max_space_vehicles = spatial_optimization_pipeline(points_gdf, cbs_full, vehicles_stats, coverage_threshold=0, top_n=N)

In [11]:
# Select Vehicles for temporal optimization
optimized_temporal, filtered_vehicles_temp, max_temp_vehicles = temporal_optimization_pipeline(vehicles_stats, top_n=N)

In [12]:
# Selecte Vehicles for Fairness Optimization
closest_simple, closest_relative, closest_absolute, df_area_stats, df_opts, fair_vehicles = run_fairness_pipeline(vehicles_stats, city_stats, n=N)

  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)


In [13]:
# Selecte Vehicles for Max Coverage Optimization

tops, cbs_lists, max_number, gdf_filtered, max_pop_vehicles = run_max_coverage_pipeline(vehicles_stats, cbs_full, n=N)

max_A_inhab       = tops['max_A_inhab']
max_A_old         = tops['max_A_65+']
max_A_young       = tops['max_A_0_15']
max_A_dutch       = tops['max_A_nederlan']
max_A_non_western = tops['max_A_n_west_m']
max_P_old         = tops['max_P_65+']
max_P_non_western = tops['max_P_n_west_m']
max_point_count   = tops['max_count']

In [14]:
max_space_vehicles

Unnamed: 0,max_spatial
0,1921_GVB
1,309_GVB
2,2268_GVB
3,2021_GVB
4,1467_GVB
...,...
95,3040_GVB
96,3032_GVB
97,2115_GVB
98,1470_GVB


In [15]:
# Create Combined First Itearation 
combined_df = combine_optimized_dfs(max_space_vehicles, max_temp_vehicles, max_pop_vehicles, fair_vehicles)

In [16]:
combined_df

Unnamed: 0,max_spatial,max_temporal,max_A_inhab,max_A_65+,max_A_0_15,max_A_nederlan,max_A_n_west_m,max_P_65+,max_P_n_west_m,max_count,closest_absolute,closest_relative,closest_simple
0,1921_GVB,1416_GVB,1921_GVB,1921_GVB,2277_GVB,2268_GVB,2277_GVB,1423_GVB,1466_GVB,1416_GVB,1462_GVB,1462_GVB,1462_GVB
1,309_GVB,1607_GVB,2268_GVB,2268_GVB,2268_GVB,1921_GVB,1921_GVB,322_GVB,1104_GVB,2121_GVB,1917_GVB,1917_GVB,1917_GVB
2,2268_GVB,330_GVB,1906_GVB,1403_GVB,2257_GVB,1906_GVB,2257_GVB,1608_GVB,1118_GVB,2041_GVB,2019_GVB,2028_GVB,2089_GVB
3,2021_GVB,1606_GVB,2277_GVB,320_GVB,1921_GVB,1910_GVB,2265_GVB,1609_GVB,1138_GVB,1141_GVB,2089_GVB,2057_GVB,2057_GVB
4,1467_GVB,1413_GVB,1907_GVB,2277_GVB,2265_GVB,2277_GVB,1906_GVB,1604_GVB,367_GVB,315_GVB,2057_GVB,1419_GVB,2020_GVB
...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,3040_GVB,1455_GVB,2019_GVB,3044_GVB,1957_GVB,2020_GVB,1124_GVB,1461_GVB,2103_GVB,1408_GVB,1168_GVB,1454_GVB,3065_GVB
96,3032_GVB,1125_GVB,2060_GVB,2121_GVB,3048_GVB,2019_GVB,1413_GVB,2064_GVB,2072_GVB,3055_GVB,1132_GVB,2098_GVB,3045_GVB
97,2115_GVB,1141_GVB,2057_GVB,2039_GVB,3044_GVB,2060_GVB,1953_GVB,1453_GVB,2124_GVB,1461_GVB,2147_GVB,1913_GVB,3044_GVB
98,1470_GVB,1117_GVB,1414_GVB,2049_GVB,3037_GVB,2057_GVB,1958_GVB,2037_GVB,2114_GVB,1907_GVB,3037_GVB,2037_GVB,3048_GVB


In [17]:
# Combined vehicles (based on chosen columns) - not included | May go for discussion in the paper
scores_combined_df, top_combined_final = compute_combined_optimization_scores(combined_df)

In [18]:
# Create Random Vehicles 
random_vehicles = select_random_vehicles(vehicles_stats, n=N) 

In [19]:
print(combined_df.columns.tolist())


['max_spatial', 'max_temporal', 'max_A_inhab', 'max_A_65+', 'max_A_0_15', 'max_A_nederlan', 'max_A_n_west_m', 'max_P_65+', 'max_P_n_west_m', 'max_count', 'closest_absolute', 'closest_relative', 'closest_simple']


In [20]:
# 1) build all_vehicles from gdf_vehicles, not vehicles_stats
all_vehicles = pd.DataFrame({
    'all_vehicles': vehicles_stats['uni_id'].astype(str)
})

# 2) reset its index to 0…177
all_vehicles = all_vehicles.reset_index(drop=True)

In [21]:
# Take random 10 vehicles from the combined DataFrame
#all_vehicles = all_vehicles.sample(n=50, random_state=1)


In [22]:
# Create Selected Vehicles Stats
final_df_cells_test = vehicle_optimization_stats_pipeline(
    vehicles_stats,
    cbs_full,
    city_stats,
    max_space_vehicles,
    max_temp_vehicles,
    max_pop_vehicles,
    fair_vehicles,
    top_combined_final, # as these are not included in the final analysis
    random_vehicles,
    all_vehicles
)

In [23]:

# Globally format all floats as integers with thousands separators
pd.set_option('display.float_format', '{:,.0f}'.format)

In [24]:
# We have tiny error that Amsterdam is not showing the right value 
final_df_cells_test.at['cells_unique', 'Amsterdam'] = len(cbs_full)
# We have tiny error that Amsterdam is not showing the right value 
final_df_cells_test.at['avg_points_per_cell', 'Amsterdam'] = '-'

In [25]:
# TWO VARIANTS for TEMPORAL - starting from max point vehicles - more points - or starting from max avg_points_per_cell - maybe better for the graph
final_df_cells_test

Unnamed: 0,max_spatial,max_temporal,max_A_inhab,max_A_65+,max_A_0_15,max_A_nederlan,max_A_n_west_m,max_P_65+,max_P_n_west_m,max_count,fairest_absolute,fairest_relative,fairest_simple,combined_opt,random,all_vehicles,Amsterdam
A_inhab,3369775,4635185,5045025,5021630,4930190,5039050,4780770,2308995,3320540,2986315,3397900,3400640,3385360,560085,3069285,8471845,-
A_0_15,420950,608230,620601,621540,630057,618333,614422,298811,459940,350595,400668,417038,395739,65810,385638,1050147,-
A_65+,419060,573086,607038,609915,597364,606981,581577,342503,386646,379313,416346,421130,411584,71683,380293,1047752,-
A_nederlan,1451698,1912586,2176984,2167270,2094060,2181063,1952566,1030095,1240193,1349936,1496207,1477286,1516747,250612,1347408,3656741,-
A_n_west_m,1207541,1810292,1800426,1793378,1819808,1786645,1870848,792077,1473611,975172,1156796,1201149,1117794,186587,1065478,3006099,-
count,790896,757990,808717,801812,753076,815152,744584,763655,617002,1099286,815111,815873,818951,101663,796550,2127870,-
A_inhab_uniq,403240,328640,382665,385145,382340,385310,381360,274380,291420,363965,371570,374800,360410,284865,388385,403240,870375
A_0_15_uniq,52298,43007,48976,49288,49575,49334,49439,35862,40060,46655,47787,48601,46522,36270,50210,52298,122388
A_65+_uniq,51191,42147,48722,49317,48355,49317,48164,38353,36081,45956,46700,46991,44834,35958,49222,51191,112084
A_nederlan_uniq,178541,143714,169905,171374,167504,171452,167004,123237,120087,159417,163218,165473,157539,124810,171231,178541,382106


### WE DO IT MANUALLY HERE AND HERE COMES THE EXPORT

In [26]:
n = N
# Select and rename only the desired columns
subset_df = final_df_cells_test[[
    "max_spatial", 
    "max_temporal", 
    "fairest_relative"
]].copy()

subset_df.columns = [
    f"max_spatial_{n}",
    f"max_temporal_{n}",
    f"fairest_relative_{n}"
]


In [27]:
subset_df.index.name = "indicator"
subset_df.to_csv(f'data/temp/optimzed_vehicles_N{N}_test.csv', index=True, index_label=False)

In [28]:
subset_df

Unnamed: 0_level_0,max_spatial_100,max_temporal_100,fairest_relative_100
indicator,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
A_inhab,3369775,4635185,3400640
A_0_15,420950,608230,417038
A_65+,419060,573086,421130
A_nederlan,1451698,1912586,1477286
A_n_west_m,1207541,1810292,1201149
count,790896,757990,815873
A_inhab_uniq,403240,328640,374800
A_0_15_uniq,52298,43007,48601
A_65+_uniq,51191,42147,46991
A_nederlan_uniq,178541,143714,165473


In [29]:
pd.read_csv(f'data/temp/optimzed_vehicles_N{N}_test.csv', index_col=0)

Unnamed: 0,max_spatial_100,max_temporal_100,fairest_relative_100
A_inhab,3369775,4635185,3400640
A_0_15,420950,608230,417038
A_65+,419060,573086,421130
A_nederlan,1451698,1912586,1477286
A_n_west_m,1207541,1810292,1201149
count,790896,757990,815873
A_inhab_uniq,403240,328640,374800
A_0_15_uniq,52298,43007,48601
A_65+_uniq,51191,42147,46991
A_nederlan_uniq,178541,143714,165473


### EXPORT ALL VEHICLES AND CITY

In [36]:
# Extract only 'all_vehicles' and 'Amsterdam' columns
subset_df2 = final_df_cells_test[["all_vehicles", "Amsterdam"]].copy()
subset_df2.index.name = "indicator"
subset_df2.to_csv(f'data/temp/all_vehicles_Amsterdam_test.csv', index=True, index_label=False)

In [37]:
pd.read_csv(f'data/temp/all_vehicles_Amsterdam_test.csv', index_col=0)

Unnamed: 0,all_vehicles,Amsterdam
A_inhab,8471845,-
A_0_15,1050147,-
A_65+,1047752,-
A_nederlan,3656741,-
A_n_west_m,3006099,-
count,2127870,-
A_inhab_uniq,403240,870375.0
A_0_15_uniq,52298,122388
A_65+_uniq,51191,112084
A_nederlan_uniq,178541,382106


In [30]:
# THEN GO SEE THE TASKS TO DO FRO EXPORRTS
# GITHUB REPO Update + Share 
# TITUS | 

In [31]:
# Createa CSV with the selected vehicles
# combined_df.to_csv('data/temp/combined_df_10_AMS_1503.csv', index=False)

In [32]:
# # Usage Visualization Quick (Only if needed!)

# lists_dict = extract_string_lists(combined_df)
# lists_dict.keys()
# plot_vehicles_by_group(vehicles_stats, lists_dict, city_geo) 

In [31]:
# # Create Optmized Vehicles GDFs 
# max_space_gdf = prepare_selected_vehicles_from_combined(vehicles_stats, combined_df, column='max_temporal')
# # Create Main Analysis 
# sums_df, cbs_gdf, joined_gdf, average_stats = master_function_analysis(
#     max_space_gdf, cbs_full,50)
# # Create Main Visualization 
# fig1, fig2, fig3, fig4 = visualization_master_function(
#     max_space_gdf, cbs_full, joined_gdf, city_geo, 50, sums_df, average_stats)