In [1]:
import pandas as pd 
import matplotlib.pyplot as pyplot
import seaborn as sns 
import os
from os import path

In [2]:
mainfolder = path.join('..', '..', 'ppgi-stpg-gpx', 'data', 'sim_primRST_mutations')

# One can read the instance's folders using
# os.listdir(mainfolder)
# But for God's sake, I want already sorted
folders = [f'B{ii}' for ii in range(1,19)]

folders

['B1',
 'B2',
 'B3',
 'B4',
 'B5',
 'B6',
 'B7',
 'B8',
 'B9',
 'B10',
 'B11',
 'B12',
 'B13',
 'B14',
 'B15',
 'B16',
 'B17',
 'B18']

In [3]:
data = list()
for folder in folders:
    for trial in range(1, 51):
        fname = path.join(mainfolder, folder, f'trial_{trial}_simulation.csv')
        df = pd.read_csv(fname)
        data.append(df)
            

In [4]:
frame = pd.concat(data, ignore_index=True)

In [5]:
frame.head()

Unnamed: 0,nro_trial,instance_problem,nro_nodes,nro_edges,nro_terminals,global_optimum,tx_mutation,population_size,max_generation,best_cost,best_fitness,iterations,run_time,max_last_improvement,stopped_by
0,1,B1,50,63,9,82,0.2,100,4000,82,118,5,0.275482,4,BestKnowReached
1,2,B1,50,63,9,82,0.2,100,4000,82,37,17,0.821649,16,BestKnowReached
2,3,B1,50,63,9,82,0.2,100,4000,82,21,26,1.108921,25,BestKnowReached
3,4,B1,50,63,9,82,0.2,100,4000,82,16,39,1.57318,38,BestKnowReached
4,5,B1,50,63,9,82,0.2,100,4000,82,111,7,0.37783,6,BestKnowReached


In [6]:
frame.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 900 entries, 0 to 899
Data columns (total 15 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   nro_trial             900 non-null    int64  
 1   instance_problem      900 non-null    object 
 2   nro_nodes             900 non-null    int64  
 3   nro_edges             900 non-null    int64  
 4   nro_terminals         900 non-null    int64  
 5   global_optimum        900 non-null    int64  
 6   tx_mutation           900 non-null    float64
 7   population_size       900 non-null    int64  
 8   max_generation        900 non-null    int64  
 9   best_cost             900 non-null    int64  
 10  best_fitness          900 non-null    int64  
 11  iterations            900 non-null    int64  
 12  run_time              900 non-null    float64
 13  max_last_improvement  900 non-null    int64  
 14  stopped_by            900 non-null    object 
dtypes: float64(2), int64(11

In [18]:
18 * 50

900

In [7]:
group = frame[['instance_problem', 'best_cost', 'iterations', 'max_last_improvement']].groupby('instance_problem')

In [8]:
result = (group
            .agg(['mean', 'std'])
            .reindex(folders).round(2))

In [9]:
result

Unnamed: 0_level_0,best_cost,best_cost,iterations,iterations,max_last_improvement,max_last_improvement
Unnamed: 0_level_1,mean,std,mean,std,mean,std
instance_problem,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
B1,82.1,0.3,59.18,149.34,8.18,8.69
B2,83.0,0.0,9.0,6.44,8.0,6.44
B3,138.0,0.0,3.2,1.12,2.2,1.12
B4,59.3,0.84,90.98,173.63,19.98,19.71
B5,61.0,0.0,47.04,65.0,46.04,65.0
B6,123.44,0.86,502.34,180.86,121.34,190.74
B7,111.0,0.0,3.66,1.1,2.66,1.1
B8,104.06,0.42,17.2,70.26,6.2,6.55
B9,220.0,0.0,4.08,1.56,3.08,1.56
B10,86.0,0.0,8.24,5.04,7.24,5.04


In [25]:
# result.to_clipboard()

In [10]:
print(result.to_latex())

\begin{tabular}{lrrrrrr}
\toprule
{} & \multicolumn{2}{l}{best\_cost} & \multicolumn{2}{l}{iterations} & \multicolumn{2}{l}{max\_last\_improvement} \\
{} &      mean &   std &       mean &     std &                 mean &     std \\
instance\_problem &           &       &            &         &                      &         \\
\midrule
B1               &     82.10 &  0.30 &      59.18 &  149.34 &                 8.18 &    8.69 \\
B2               &     83.00 &  0.00 &       9.00 &    6.44 &                 8.00 &    6.44 \\
B3               &    138.00 &  0.00 &       3.20 &    1.12 &                 2.20 &    1.12 \\
B4               &     59.30 &  0.84 &      90.98 &  173.63 &                19.98 &   19.71 \\
B5               &     61.00 &  0.00 &      47.04 &   65.00 &                46.04 &   65.00 \\
B6               &    123.44 &  0.86 &     502.34 &  180.86 &               121.34 &  190.74 \\
B7               &    111.00 &  0.00 &       3.66 &    1.10 &                 2.66 & 

In [11]:
result2 = (pd.crosstab(frame['instance_problem'], frame['stopped_by'])
            .reindex(folders))

In [12]:
result2

stopped_by,BestKnowReached,Stagnation
instance_problem,Unnamed: 1_level_1,Unnamed: 2_level_1
B1,45,5
B2,50,0
B3,50,0
B4,43,7
B5,50,0
B6,12,38
B7,50,0
B8,49,1
B9,50,0
B10,50,0


In [13]:
print(result2.to_latex())

\begin{tabular}{lrr}
\toprule
stopped\_by &  BestKnowReached &  Stagnation \\
instance\_problem &                  &             \\
\midrule
B1               &               45 &           5 \\
B2               &               50 &           0 \\
B3               &               50 &           0 \\
B4               &               43 &           7 \\
B5               &               50 &           0 \\
B6               &               12 &          38 \\
B7               &               50 &           0 \\
B8               &               49 &           1 \\
B9               &               50 &           0 \\
B10              &               50 &           0 \\
B11              &               47 &           3 \\
B12              &                1 &          49 \\
B13              &               20 &          30 \\
B14              &                0 &          50 \\
B15              &                0 &          50 \\
B16              &                0 &          50 \\
B17        