# Interpreting the results from TSP EA based algorithm

In [1]:
import pandas as pd
import numpy as np
import folium
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import os
import sys
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import multiprocessing as mp

In [2]:
from EA import *

## Load the data and visualise what the goal

In [3]:
french_cities = pd.read_json('./fr.json')
french_cities

Unnamed: 0,city,lat,lng,country,iso2,admin_name,capital,population,population_proper
0,Paris,48.8567,2.3522,France,FR,Île-de-France,primary,11060000,2148271
1,Marseille,43.2964,5.3700,France,FR,Provence-Alpes-Côte d’Azur,admin,870731,870731
2,Lyon,45.7600,4.8400,France,FR,Auvergne-Rhône-Alpes,admin,522969,522969
3,Toulouse,43.6045,1.4440,France,FR,Occitanie,admin,493465,493465
4,Nice,43.7034,7.2663,France,FR,Provence-Alpes-Côte d’Azur,minor,342669,342669
...,...,...,...,...,...,...,...,...,...
629,Saint-Laurent-sur-Saône,46.3053,4.8394,France,FR,Auvergne-Rhône-Alpes,,1689,1688
630,La Neuvillette-lès-Reims,49.2890,4.0058,France,FR,Grand Est,,1567,1567
631,Laleu,46.1689,-1.1994,France,FR,Nouvelle-Aquitaine,,1149,1149
632,La Walck,48.8497,7.6108,France,FR,Grand Est,,1042,1042


For now we analyze all cities.

In [4]:
D = compute_spherical_D(french_cities)

100%|██████████| 634/634 [00:05<00:00, 116.96it/s]


In [5]:
distances_df = pd.DataFrame(D, columns=french_cities.city.to_list())
distances_df.index = distances_df.columns
distances_df

Unnamed: 0,Paris,Marseille,Lyon,Toulouse,Nice,Nantes,Montpellier,Strasbourg,Bordeaux,Lille,...,Hesdin,Les Roches-de-Condrieu,Neuf-Brisach,Cravanche,Lannoy,Saint-Laurent-sur-Saône,La Neuvillette-lès-Reims,Laleu,La Walck,Châtelaudren
Paris,0.000000,660504.604572,392057.419081,588168.854817,685941.788291,342707.692931,594838.827655,396754.777545,499113.729472,203365.713098,...,170261.907713,420178.916838,392980.024919,357577.289544,210427.853034,339500.806781,129688.100464,400500.540475,384670.766286,392155.725593
Marseille,660504.604572,0.000000,277140.981572,318738.625990,159502.510488,695079.110192,125498.568092,615829.237888,505263.654656,833763.734889,...,826628.378648,244541.607691,551112.796212,497743.736348,835586.872429,337181.018112,674507.977579,609053.074980,641157.541333,868885.583699
Lyon,392057.419081,277140.981572,0.000000,359849.114635,298349.843005,515389.039219,250686.859139,383084.411330,435992.483871,557107.857218,...,553644.419304,34584.808454,323654.226822,259654.691836,558694.975236,60634.611190,397367.504866,468891.666608,402030.251181,666338.624905
Toulouse,588168.854817,318738.625990,359849.114635,0.000000,468448.462688,464863.066184,195899.592131,736122.383326,211852.744967,790397.659292,...,754090.594620,334127.735675,680193.790686,614767.954557,796439.559513,401883.998727,661754.513444,353052.354342,751279.247892,645843.900999
Nice,685941.788291,159502.510488,298349.843005,468448.462688,0.000000,790410.398038,272813.796510,543872.940187,637104.306140,832742.017208,...,840480.824165,277485.145294,480200.683100,440564.368769,832353.034918,346530.801576,669194.582862,720092.398354,572852.878542,953726.982162
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Saint-Laurent-sur-Saône,339500.806781,337181.018112,60634.611190,401883.998727,346530.801576,497221.853503,308906.645432,334513.197714,452083.944105,498215.866075,...,497518.705961,94922.262358,278538.316038,212941.205857,499503.003127,0.000000,337558.705418,464584.090140,351035.105640,637686.483546
La Neuvillette-lès-Reims,129688.100464,674507.977579,397367.504866,661754.513444,669194.582862,471416.301222,631341.323663,284196.171482,604215.266446,163568.644293,...,185751.227466,430368.837581,294783.489678,276519.870684,163341.098262,337558.705418,0.000000,521252.972903,267096.768328,516353.959469
Laleu,400500.540475,609053.074980,468891.666608,353052.354342,720092.398354,119738.394120,490528.778425,724544.336712,155449.954578,586827.515334,...,525252.473211,469173.849091,691510.879775,631726.596210,596403.629231,464584.090140,521252.972903,0.000000,725191.549351,295705.641855
La Walck,384670.766286,641157.541333,402030.251181,751279.247892,572852.878542,704607.081855,649172.742043,31234.204337,765378.293699,382156.462069,...,435782.942265,434510.174025,92669.780452,144914.718805,375007.664088,351035.105640,267096.768328,725191.549351,0.000000,776780.477318


In [6]:
# plot what the distances looks like
fig = px.box(distances_df.iloc[:-1].T.iloc[:12].T)
fig

In [7]:
temp = french_cities.copy()
temp['population'] = temp['population'].apply(lambda x : x**0.5)

In [8]:
fig = px.scatter_mapbox(temp, 
                    lat='lat',lon='lng',
                    color='capital',
                    size='population',
                    mapbox_style="carto-positron",
                    zoom=5)

fig.update_layout(
    margin=dict(l=5,r=20,b=5,t=5),
    paper_bgcolor="Black"
    )

fig

In [9]:
fig = px.density_mapbox(
    temp, 
    lat='lat',lon='lng',
    #color='capital',
    z='population',
    mapbox_style="carto-positron",
    zoom=5    
)

fig.update_layout(
    margin=dict(l=5,r=20,b=5,t=5),
    paper_bgcolor="Black"
    )

fig

In [10]:
fig = px.density_mapbox(
    temp.reset_index(), 
    lat='lat',lon='lng',
    #color='capital',
    z='index',
    mapbox_style="carto-positron",
    zoom=5    
)

fig.update_layout(
    margin=dict(l=5,r=20,b=5,t=5),
    paper_bgcolor="Black"
    )

fig

Here we have a problem. The distribution of points is not uniformly distributed compared to the distribution of cities.

## Results analysis

In [19]:
import os
import pandas as pd
import multiprocessing as mp

def load_clean_df(args):
    global all_dfs
    full_path, name = args
    # print(f'Processing {name}')
    df = pd.read_csv(full_path, index_col='Unnamed: 0')
    df['Best Individual'] = df['Best Individual'].apply(lambda x: [int(c) for c in x[1:-1].split(',')])
    df['All Scores'] = df['All Scores'].apply(lambda x: [float(c) for c in x[1:-1].split(',')])
    all_dfs[name] = df

all_results = os.listdir('./results/')
all_dfs = {}

full_paths = ['./results/' + name for name in all_results]
names = all_results

for fp, n in tqdm(list(zip(full_paths, names))):
    load_clean_df((fp, n))

100%|██████████| 679/679 [04:07<00:00,  2.74it/s]


In [None]:
list(all_dfs.values())[0]

Unnamed: 0,Iteration,Best Individual,Number of Same Individuals,Number of Shared Patterns,Score,All Scores,alpha,beta,gamma
0,0,"[89, 54, 97, 41, 50, 57, 73, 51, 75, 47, 81, 4...",144,36060,23.566657,"[27.31482790852093, 29.657353976543217, 31.187...",1.000000,0.01,0.400000
1,1,"[31, 56, 75, 2, 65, 36, 78, 35, 91, 40, 73, 26...",181,46360,23.966352,"[25.641472193267298, 24.572111133622013, 26.20...",0.990000,0.01,0.396000
2,2,"[31, 56, 75, 2, 65, 36, 78, 35, 91, 40, 73, 26...",200,51093,23.966352,"[30.113559957823213, 30.216805090422625, 29.93...",0.980100,0.01,0.392040
3,3,"[73, 1, 96, 77, 31, 2, 89, 81, 51, 68, 41, 24,...",208,54534,24.303876,"[30.430551148029757, 29.393651577489404, 29.18...",0.970299,0.01,0.388120
4,4,"[91, 39, 94, 55, 14, 31, 76, 74, 37, 97, 1, 58...",194,55124,24.323050,"[30.124698704296712, 28.749032197137197, 28.12...",0.960596,0.01,0.384238
...,...,...,...,...,...,...,...,...,...
825,825,"[7, 65, 88, 48, 64, 30, 86, 33, 74, 46, 76, 9,...",954,95950,17.573787,"[24.719417332622967, 22.29113537657576, 21.134...",0.100000,0.01,0.010214
826,826,"[7, 65, 88, 48, 64, 30, 86, 33, 74, 46, 76, 9,...",958,96149,17.573787,"[20.339354125114056, 19.82469679683991, 21.337...",0.100000,0.01,0.010112
827,827,"[7, 65, 88, 48, 64, 30, 86, 33, 74, 46, 76, 9,...",956,96031,17.573787,"[18.483192391335894, 23.26276150468684, 21.604...",0.100000,0.01,0.010011
828,828,"[98, 77, 42, 51, 72, 44, 81, 41, 95, 35, 78, 6...",865,95146,17.102993,"[17.10299298855589, 19.722765307402383, 18.166...",0.100000,0.01,0.100000


### Best of each iteration