In [1]:
import os
import pandas as pd

# Bibliotecas para visualização dos dados
from matplotlib import pyplot as plt
import seaborn as sns

In [2]:
from myutil import read_simulation, read_simulation_dataset

In [3]:
%%time

SIMULATION_FOLDER = "20200422_simplest"
filetemplate = "trial_{0}_simulation.csv"

raw_data = read_simulation(SIMULATION_FOLDER, filetemplate, mintrial=0, maxtrial=29)

Wall time: 6.8 s


In [4]:
raw_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 180 entries, 0 to 179
Data columns (total 16 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   nro_trial             180 non-null    int64  
 1   instance_problem      180 non-null    object 
 2   nro_nodes             180 non-null    int64  
 3   nro_edges             180 non-null    int64  
 4   nro_terminals         180 non-null    int64  
 5   tx_crossover          180 non-null    float64
 6   tx_mutation           180 non-null    float64
 7   global_optimum        180 non-null    int64  
 8   best_cost             180 non-null    int64  
 9   best_fitness          180 non-null    int64  
 10  population_size       180 non-null    int64  
 11  max_generation        180 non-null    int64  
 12  iterations            180 non-null    int64  
 13  run_time              180 non-null    float64
 14  max_last_improvement  180 non-null    int64  
 15  why_stopped           1

In [5]:
raw_data['why_stopped'].unique()

array(['stagnation'], dtype=object)

In [6]:
df = raw_data[raw_data["global_optimum"] > raw_data["best_cost"]]

df

Unnamed: 0,nro_trial,instance_problem,nro_nodes,nro_edges,nro_terminals,tx_crossover,tx_mutation,global_optimum,best_cost,best_fitness,population_size,max_generation,iterations,run_time,max_last_improvement,why_stopped


In [7]:
categorical_order = ["B{0}".format(i) for i in range(13,19)]

In [8]:
selected_columns = ["instance_problem", "nro_nodes", "nro_edges", "nro_terminals", "global_optimum"]
frame = raw_data[selected_columns].drop_duplicates()  \
                                    .set_index("instance_problem") \
                                    .reindex(categorical_order, axis=0)

In [9]:
basic_stats = ['mean','std']
grouped = raw_data.groupby("instance_problem")

In [10]:
frame.columns = pd.MultiIndex.from_tuples([("graph", col) for col in frame.columns])
frame2 = frame.join(
        grouped[["best_cost", "iterations", "run_time"]] \
        .agg(basic_stats) \
        .reindex(categorical_order)
    ).round(2)

frame2

Unnamed: 0_level_0,graph,graph,graph,graph,best_cost,best_cost,iterations,iterations,run_time,run_time
Unnamed: 0_level_1,nro_nodes,nro_edges,nro_terminals,global_optimum,mean,std,mean,std,mean,std
instance_problem,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
B13,100,125,17,165,512.47,12.07,501.07,0.25,127.58,14.45
B14,100,125,25,235,504.33,9.37,501.03,0.18,146.77,12.66
B15,100,125,50,318,502.9,7.32,501.0,0.0,147.34,16.41
B16,100,200,17,127,480.3,19.06,503.0,1.76,176.24,17.19
B17,100,200,25,131,448.2,14.68,502.7,2.25,187.13,22.35
B18,100,200,50,218,498.47,12.31,502.43,1.72,192.39,19.64


In [11]:
frame2["iterations"]

Unnamed: 0_level_0,mean,std
instance_problem,Unnamed: 1_level_1,Unnamed: 2_level_1
B13,501.07,0.25
B14,501.03,0.18
B15,501.0,0.0
B16,503.0,1.76
B17,502.7,2.25
B18,502.43,1.72


In [12]:
# frame2.to_csv(os.path.join("..", "outputdata", "processed", "ga_simpliestcrossover.csv"))

In [18]:
frame2.to_clipboard()

In [13]:
raw_data["why_stopped"].unique()

array(['stagnation'], dtype=object)

In [14]:
DATASET = "B13"
file = "trial_{0}_best_fitness.csv"
dfbest_fit = read_simulation_dataset(DATASET,SIMULATION_FOLDER, file, mintrial=0, maxtrial=29)

In [15]:
dfbest_fit.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 32 entries, 0 to 0
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   iteration  32 non-null     int64 
 1   cost       32 non-null     int64 
 2   fitness    32 non-null     int64 
 3   trial      32 non-null     object
dtypes: int64(3), object(1)
memory usage: 1.2+ KB


In [16]:
dfbest_fit['iteration'].unique() # it's a bad thing

array([0, 1], dtype=int64)

In [17]:
dfbest_fit.head(10)

Unnamed: 0,iteration,cost,fitness,trial
0,0,504,1453,0
0,0,503,1523,1
0,0,501,1510,2
0,0,514,1197,3
0,0,527,1254,4
0,0,523,915,5
1,1,518,1094,5
0,0,499,1645,6
0,0,506,1788,7
0,0,488,1402,8
