## Importing libraries

In [6]:
import pandas as pd
import numpy as np
import os
from copy import copy

## setting up expriment related variables

#### column names

In [105]:
columns = ["algo", "experiment_id", "dataset", "seed", "generation", "training_fitness", "timing", "pop_node_count"]

## defining helping functions

In [106]:
def get_column_names(log_level = 1, base_cols = columns):
    base_cols = copy(base_cols)
    
    if log_level == 1:
        base_cols.append("test_fitness", "log_level")
    
    elif log_level == 2:
        base_cols.extend(["test_fitness","genotypic_diversity", "phenotipic_diversity", "log_level"])
    elif log_level == 3:
        base_cols.extend(["test_fitness","pop_nodes", "pop_fitnesses", "log_level"])
    else:
        base_cols.extend(["test_fitness","genotypic_diversity", "phenotipic_diversity","pop_nodes", "pop_fitnesses",
                         "log_level"])
    
    return base_cols

In [145]:
def get_experiment_results(experiment_id=None, logger_name="logger_checking.csv", base_cols = columns, experiment_id_index=1):
    
    # getting the path to the logger file
    logger = os.path.join(os.getcwd().split("utils")[0], "main" ,"log", logger_name)
    
    # loading logger data into a pandas dataframe
    results = pd.read_csv(logger, header = None , index_col=None)
    
    
    # getting the experiment id of the last row in the logger data, if -1 is given as the experiment id
    if experiment_id == -1:
        
        # getting the experiment id of the last experiment
        experiment_id = results[experiment_id_index].iloc[-1]
        
        # filtering the results to only contain the required experiment_id
        results = results[results[experiment_id_index] == experiment_id].dropna(axis=1)
    
    # if a specific expriment id was given
    elif isinstance(experiment_id, str):
        results = results[results[experiment_id_index] == experiment_id].dropna(axis=1)
    
    # if a list of experiment_ids was given
    elif isinstance(experiment_id, list):
        
        # filtering the results to only contain the required experiment_ids
        results = results[results[experiment_id_index].isin(experiment_id)].dropna(axis=1)
    
    # if experiment_id is none, return the entire logger dataset
    else:
        
        # getting the column names from the inffered log level:
        colnames = get_column_names(log_level=results.iloc[-1].iloc[-1], base_cols=base_cols)

        results.columns = colnames
        
        # returning the results
        return results.drop(columns=["log_level"])
        
    
    
    # getting the column names from the inffered log level:
    colnames = get_column_names(log_level=results.iloc[-1].iloc[-1], base_cols=base_cols)
    
    results.columns = colnames
    
    # returning the results
    return results.drop(columns=["log_level"])

In [146]:
def analyse_results(x, y, experiment_id = -1, logger_name="logger_checking.csv", colnames=columns, log_level=2):
    
    # getting the col_names based on the log level:
    cols = get_column_names(log_level=log_level, base_cols=colnames)
    # getting the results dataframe
    df = get_experiment_results(experiment_id=experiment_id, logger_name=logger_name)
    
    return df

In [147]:
df = analyse_results(1, 2, experiment_id=-1, logger_name="logger_checking.csv",log_level=2)

In [182]:
df.groupby(["algo", "generation", "dataset"]).describe()["training_fitness"]["50%"]

algo          generation  dataset           
StandardGSGP  0           efficiency_heating    13.265035
                          parkinson_updrs       11.835640
              1           efficiency_heating    12.231092
                          parkinson_updrs       11.628748
              2           efficiency_heating    11.709281
                                                  ...    
              98          parkinson_updrs        9.991224
              99          efficiency_heating     3.483392
                          parkinson_updrs        9.987185
              100         efficiency_heating     3.483392
                          parkinson_updrs        9.987054
Name: 50%, Length: 202, dtype: float64

In [164]:
df[(df.dataset == "efficiency_heating") & (df.generation == 0)]

Unnamed: 0,algo,experiment_id,dataset,seed,generation,training_fitness,timing,pop_node_count,test_fitness,genotypic_diversity,phenotipic_diversity
404,StandardGSGP,fbbe1ea4-e15b-11ee-be43-badda4640570,efficiency_heating,0,0,15.941948,0.042898,1375.0,16.606918,10587030.0,2082459.4
505,StandardGSGP,fbbe1ea4-e15b-11ee-be43-badda4640570,efficiency_heating,1,0,10.588122,0.044219,1523.0,11.578728,184131.2,15843.618


In [59]:
df['algo'].unique()

array(['ld50', 'efficiency_heating', 'parkinson_updrs'], dtype=object)

In [54]:
df.groupby(['generation','algo']).describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,experiment_id,experiment_id,experiment_id,experiment_id,experiment_id,experiment_id,experiment_id,experiment_id,dataset,dataset,...,timing,timing,test_fitness,test_fitness,test_fitness,test_fitness,test_fitness,test_fitness,test_fitness,test_fitness
Unnamed: 0_level_1,Unnamed: 1_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,...,75%,max,count,mean,std,min,25%,50%,75%,max
generation,algo,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
0.051787,ld50,1.0,1.0,,1.0,1.0,1.0,1.0,1.0,1.0,6.0,...,2279.192627,2279.192627,1.0,68.719780,,68.719780,68.719780,68.719780,68.719780,68.719780
0.054161,ld50,1.0,1.0,,1.0,1.0,1.0,1.0,1.0,1.0,10.0,...,2278.805176,2278.805176,1.0,0.390756,,0.390756,0.390756,0.390756,0.390756,0.390756
0.056062,ld50,1.0,1.0,,1.0,1.0,1.0,1.0,1.0,1.0,18.0,...,2276.316650,2276.316650,1.0,0.724282,,0.724282,0.724282,0.724282,0.724282,0.724282
0.060889,ld50,1.0,1.0,,1.0,1.0,1.0,1.0,1.0,1.0,40.0,...,2273.658203,2273.658203,1.0,0.761295,,0.761295,0.761295,0.761295,0.761295,0.761295
0.062625,ld50,1.0,1.0,,1.0,1.0,1.0,1.0,1.0,1.0,7.0,...,2278.691406,2278.691406,1.0,22.468168,,22.468168,22.468168,22.468168,22.468168,22.468168
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0.103877,ld50,1.0,1.0,,1.0,1.0,1.0,1.0,1.0,1.0,98.0,...,2260.236816,2260.236816,1.0,0.891920,,0.891920,0.891920,0.891920,0.891920,0.891920
0.111985,ld50,1.0,1.0,,1.0,1.0,1.0,1.0,1.0,1.0,87.0,...,2262.281006,2262.281006,1.0,0.760432,,0.760432,0.760432,0.760432,0.760432,0.760432
0.116673,ld50,1.0,1.0,,1.0,1.0,1.0,1.0,1.0,1.0,89.0,...,2262.101562,2262.101562,1.0,0.893122,,0.893122,0.893122,0.893122,0.893122,0.893122
0.135405,ld50,1.0,1.0,,1.0,1.0,1.0,1.0,1.0,1.0,0.0,...,2280.465820,2280.465820,1.0,41107.720000,,41107.720000,41107.720000,41107.720000,41107.720000,41107.720000


In [None]:
def plot_results(x, y, experiment_id = -1):
    pass

In [16]:
get_experiment_results(colnames=get_column_names(log_level=4))

Unnamed: 0,algo,experiment_id,dataset,seed,generation,training_fitness,timing,pop_node_count,test_fitness,genotypic_diversity,phenotipic_diversity,pop_nodes,pop_fitnesses
0,StandardGSGP,bf59181c-e14a-11ee-83f6-badda4640570,bioav,0,0,59.923904,0.080649,1653.0,62.043407,tensor(1376.8030),236.035250,3 3 2 3 2 3 3 2 2 3 2 3 3 2 7 6 5 5 6 5 7 4 4 ...,tensor(71.8188) tensor(72.4896) tensor(71.7204...
1,StandardGSGP,bf59181c-e14a-11ee-83f6-badda4640570,bioav,0,1,58.533848,0.131338,3281.0,60.608387,tensor(228.6850),3.224503,10 34 42 27 25 36 28 23 13 22 21 37 19 52 32 1...,tensor(59.9239) tensor(72.2352) tensor(71.9489...
2,StandardGSGP,bf59181c-e14a-11ee-83f6-badda4640570,bioav,0,2,58.533848,0.065430,2670.0,60.608387,tensor(383.3936),3.513942,28 3 4 39 19 3 22 4 106 22 4 38 21 39 60 4 4 2...,tensor(58.5338) tensor(71.6509) tensor(71.6334...
3,StandardGSGP,bf59181c-e14a-11ee-83f6-badda4640570,bioav,0,3,58.533848,0.055447,3254.0,60.608387,tensor(366.6199),4.212390,28 38 26 10 29 10 28 13 54 30 68 14 15 41 14 2...,tensor(58.5338) tensor(60.8217) tensor(59.9157...
4,StandardGSGP,bf59181c-e14a-11ee-83f6-badda4640570,bioav,0,4,58.475479,0.059915,2736.0,60.522114,tensor(563.4876),4.455651,28 3 4 4 3 3 74 22 43 27 22 3 14 64 47 22 31 4...,tensor(58.5338) tensor(71.7714) tensor(72.5095...
...,...,...,...,...,...,...,...,...,...,...,...,...,...
227,StandardGSGP,e07d2ada-e14d-11ee-a587-badda4640570,bioav,0,46,43.705608,0.074875,26753.0,44.637959,44.195823669433594,1.481535,,
228,StandardGSGP,e07d2ada-e14d-11ee-a587-badda4640570,bioav,0,47,43.257000,0.076206,27629.0,44.090466,42.3504753112793,1.366302,,
229,StandardGSGP,e07d2ada-e14d-11ee-a587-badda4640570,bioav,0,48,42.243835,0.075893,27906.0,42.946690,45.24501037597656,1.417718,,
230,StandardGSGP,e07d2ada-e14d-11ee-a587-badda4640570,bioav,0,49,42.243835,0.081328,27923.0,42.946690,49.182987213134766,1.552405,,


## Analysing the results