In [1]:
! ls

CV_function.ipynb         Data_Preprocessing.ipynb  pram_grid.ipynb
Data_EDA_First_Week.ipynb main.ipynb


In [2]:
# Standard library imports
import itertools
import json
import datetime
import pathlib

# Third-party imports
import pandas as pd
import torch
from sklearn.model_selection import ShuffleSplit

# Slim-GSGP imports
from slim_gsgp.datasets.data_loader import load_pandas_df  
from slim_gsgp.utils.utils import train_test_split  
from slim_gsgp.main_gp import gp
#from slim_gsgp.main_gsgp import gsgp
#from slim_gsgp.main_slim import slim
from slim_gsgp.evaluators.fitness_functions import rmse

## Load Data 

In [3]:
# Reading the desired dataset
df = pd.read_csv("../data/sustavianfeed.csv", sep=';')

# Dropping the first column (index) and renaming the columns
df = df.drop(columns= ['WING TAG', 'EMPTY MUSCULAR STOMACH'])

# Moving crude protein to the end of the dataframe
df = df[[col for col in df.columns if col != 'CRUDE PROTEIN'] + ['CRUDE PROTEIN']] 

In [4]:
# Turning df into X and y torch.Tensors
X, y = load_pandas_df(df, X_y=True)

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, p_test=0.4)

# Split the test set into validation and test sets
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, p_test=0.5)

# grid search

In [5]:
import os
os.chdir(os.path.join(os.getcwd(), os.pardir))

In [6]:
from utils.grid_search_v2_leo import grid_search



# Define a parameter grid for the grid search
param_grid = {
     "fit_intercept": [True, False]
}

# Perform grid search
grid_search(
    X_train,
    y_train,
    param_grid=param_grid
)


Grid Search Progress: 100%|██████████| 2/2 [00:00<00:00, 186.20combination/s]


({'fit_intercept': False},
 -46.341706085205075,
                model  fit_intercept  mean_score  \
 1  linear regression          False  -46.341706   
 0  linear regression           True    0.026605   
 
                                          fold_scores  \
 1  [-70.49513244628906, -53.06315612792969, -9.38...   
 0  [-0.20772814750671387, -0.31918060779571533, 0...   
 
                     timestamp  
 1  2025-05-12T12:37:05.293858  
 0  2025-05-12T12:37:05.291381  )

# Visualizations

In [388]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.subplots as sp
from plotly.subplots import make_subplots
import ast

In [None]:
df = pd.read_csv("../log/gp.csv", header=None)
settings_df = pd.read_csv("../log/gp_settings.csv", header=None)
unique_settings_df = settings_df.drop_duplicates(0) #the comb id is now indexed

In [None]:
# 0  - Algorithm
# 1  - Instance ID
# 2  - Dataset
# 3  - Seed
# 4  - Generation
# 5  - Fitness
# 6  - Running time
# 7  - Population nodes
# 8  - Test fitness
# 9  - Elite nodes
# 10 - niche entropy
"""From here on, it doesnt appear on df"""
# 11 - sd(pop.fit)
# 12 - Log level 

In [None]:
def get_combination_str(setting_str, unique_settings_df = unique_settings_df):
    comb_str = unique_settings_df[unique_settings_df[0]==setting_str][1][0]
    return comb_str

def param_in_combination(param:str, comb_str: str):
    """
    Parameters that the function can receive:
    'log' / 'verbose'/ 'test_elite' / 'n_jobs' / 'max_depth' / 'n_elites' / 'elistism' / 'n_iter' 
    'settings_dict' / 'p_xo' / 'pop_size' / 'seed' / 'p_m' / 'p_c' / 'init_depth' / 'init_pop_size'
    """
    variable_init = comb_str.find(param) 
    param_init = variable_init + len(param) + 3 #advance 3 steps to account for the quote, the dots and the space. 
    param_end = comb_str.find(',', variable_init)
    param_value = comb_str[param_init:param_end]
    if param_value.startswith('<') or param_value.startswith('['):
        print('The value of the parameter given cannot be converted to its original class:')
    else:
        try:
            return ast.literal_eval(param_value)
        except SyntaxError as s:
            print(f'Parameter does not exist in the combination string or it cannot be accessed. ({s})')

#example
#print(get_combination_str('865732c8-3014-11f0-b37b-baa0ecd080fe'))
#print(param_in_combination('log', get_combination_str('865732c8-3014-11f0-b37b-baa0ecd080fe')))
    
    

In [None]:
'''def pop_fitness_diversity(df, train_color='blue'):
     """
     Out of Bounds
     """
     dif_combs = np.unique(df[[1]])
     for comb in dif_combs:
          y = df[df[1]==comb]
          #comb_dict = get_combination(comb)
          fig = go.Figure()
          fig.add_trace(go.Scatter(y=y.iloc[:,11].values, 
                                   mode='lines', name='Train', line=dict(color=train_color)))
          fig.update_layout(
          height=400, width=800, 
          margin=dict(t=50),
          yaxis_range=[0,None],
          title_text=f'GP - Population Fitness Diversity\nCombination:',
          xaxis_title='Generation', yaxis_title='Fitness Standard Deviation'
          )
          fig.show()'''



In [None]:
def train_test_fit(df, train_color='blue', test_color='orange', rows=5, cols=4):
    dif_combs = np.unique(df[1])  # Get unique combinations
    num_plots = len(dif_combs)
    assert rows*cols==num_plots
    
    # Create subplot grid
    fig = sp.make_subplots(rows=rows, cols=cols, 
                           subplot_titles=[f"Combination index: {unique_settings_df[unique_settings_df[0]==comb].index[0]}" 
                                           for comb in dif_combs])
    
    for i, comb in enumerate(dif_combs):
        y = df[df[1] == comb]
        algo = y.iloc[0,0]
        row = (i // cols) + 1  #Calculate row position
        col = (i % cols) + 1   #Calculate column position
        
        fig.add_trace(
            go.Scatter(y=y.iloc[:, 5].values, mode='lines', name='Train', line=dict(color=train_color),
                       showlegend=(i==0)),
            row=row, col=col
        )
        
        fig.add_trace(
            go.Scatter(y=y.iloc[:, 8].values, mode='lines', name='Test', line=dict(color=test_color),
                       showlegend=(i==0)),
            row=row, col=col
        )

        fig.update_yaxes(range=[0, None], row=row, col=col)
    
    fig.update_layout(
        height=150 * rows,
        width=250 * cols,
        margin=dict(t=50),
        title_text=f'{algo} - Train vs Test Fitness (x=Generation, y=RMSE)',
        showlegend=True
    )

    fig.update_annotations(font_size=10)
    fig.show()

In [None]:
def train_test_fit_and_size(df, comb_idxs: list | int = [i for i in range(unique_settings_df.shape[0])],
                            train_color='blue', test_color='orange'):
     for comb_idx in comb_idxs:
          comb = unique_settings_df.iloc[comb_idx, 0]
          y = df[df[1]==comb]
          algo = y.iloc[0,0]
          fig = make_subplots(
          rows=1, cols=2,
          subplot_titles=(f'{algo} - Fitness evolution\nCombination:', f'{algo} - Size evolution')
          )

          fig.add_trace(go.Scatter(y=y.iloc[:,5].values, 
                                   mode='lines', name='Train', line=dict(color=train_color)), row=1, col=1)
          fig.add_trace(go.Scatter(y=y.iloc[:,8].values, 
                                   mode='lines', name='Test', line=dict(color=test_color)), row=1, col=1)
          fig.add_trace(go.Scatter(y=y.iloc[:,9].values, 
                                   mode='lines', name='Size'), row=1, col=2)
          
          fig.update_xaxes(title_text="Generation")

          fig.update_layout(
          width=1000,
          height=400, 
          showlegend=True,
          yaxis_range=[0,None],
          )
          fig.show()

In [438]:
def niche_entropy(df, train_color='blue', rows=5, cols=4):
    dif_combs = np.unique(df[1])
    num_plots = len(dif_combs)
    assert rows*cols==num_plots, "The number of combinations does not correspond to the grid size defined (rows/cols)."

    fig = sp.make_subplots(rows=rows, cols=cols, 
                           subplot_titles=[f"Combination index: {unique_settings_df[unique_settings_df[0]==comb].index[0]}" 
                                           for comb in dif_combs])

    for i, comb in enumerate(dif_combs):
        y = df[df[1] == comb]
        algo = y.iloc[0,0]
        row = (i // cols) + 1
        col = (i % cols) + 1
        
        fig.add_trace(
            go.Scatter(
                y=y.iloc[:, 10].values,
                mode='lines',
                name='Niche Entropy',
                line=dict(color=train_color),
                showlegend=(i == 0)), row=row, col=col
                )
    
    fig.update_layout(
        height=150 * rows,
        width=250 * cols,
        margin=dict(t=50),
        title_text=f'{algo} - Niche Entropy (x=Generation, y=Entropy)',
    )
    
    fig.show()

In [None]:
def plot_combs_together_test(df, comb_idxs: list | int = [i for i in range(unique_settings_df.shape[0])],
                             colors = ['#FF0000', '#0000FF', '#00FF00', '#FFA500', '#800080', 
                                       '#FF00FF', '#00FFFF', '#FFFF00', '#1F77B4', '#FF7F0E',
                                       '#2CA02C', '#D62728', '#9467BD', '#8C564B', '#E377C2',
                                       '#7F7F7F', '#AEC7E8', '#FFBB78', '#98DF8A', '#FF9896'],
                              ):
     
     assert len(colors)>=len(comb_idxs), "Not enough colors for all combinations"

     fig = go.Figure()
     for i, comb_idx in enumerate(comb_idxs):
          comb = unique_settings_df.iloc[comb_idx, 0]
          y = df[df[1]==comb]
          algo = y.iloc[0,0]

          fig.add_trace(go.Scatter(y=y.iloc[:,8].values, 
                                   mode='lines', name=f'Test Comb {comb_idx}',
                                   line=dict(color=colors[i])))#, row=1, col=1)
          
          fig.update_xaxes(title_text="Generation")

     fig.update_layout(
          width=1000,
          height=400, 
          title_text = f"{algo} - Test Fitness (Combinations indexes: {comb_idxs})",
          showlegend=True,
          yaxis_range=[0,None],
          )
     
     fig.show()



In [None]:
train_test_fit(df)

In [None]:
train_test_fit_and_size(df, comb_idxs=[3,19])

In [None]:
niche_entropy(df)

In [454]:
plot_combs_together_test(df,comb_idxs=[1,2,3])