In [1]:
! ls

CV_function.ipynb         Data_Preprocessing.ipynb  pram_grid.ipynb
Data_EDA_First_Week.ipynb main.ipynb


In [2]:
# Standard library imports
import itertools
import json
import datetime
import pathlib

# Third-party imports
import pandas as pd
import torch
from sklearn.model_selection import ShuffleSplit

# Slim-GSGP imports
from slim_gsgp.datasets.data_loader import load_pandas_df  
from slim_gsgp.utils.utils import train_test_split  
from slim_gsgp.main_gp import gp
#from slim_gsgp.main_gsgp import gsgp
#from slim_gsgp.main_slim import slim
from slim_gsgp.evaluators.fitness_functions import rmse

## Load Data 

In [3]:
# Reading the desired dataset
df = pd.read_csv("../data/sustavianfeed.csv", sep=';')

# Dropping the first column (index) and renaming the columns
df = df.drop(columns= ['WING TAG', 'EMPTY MUSCULAR STOMACH'])

# Moving crude protein to the end of the dataframe
df = df[[col for col in df.columns if col != 'CRUDE PROTEIN'] + ['CRUDE PROTEIN']] 

In [4]:
# Turning df into X and y torch.Tensors
X, y = load_pandas_df(df, X_y=True)

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, p_test=0.4)

# Split the test set into validation and test sets
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, p_test=0.5)

# grid search

In [5]:
import os
os.chdir(os.path.join(os.getcwd(), os.pardir))

In [6]:
from utils.grid_search_v2_leo import grid_search



# Define a parameter grid for the grid search
param_grid = {
     "fit_intercept": [True, False]
}

# Perform grid search
grid_search(
    X_train,
    y_train,
    param_grid=param_grid
)


Grid Search Progress: 100%|██████████| 2/2 [00:00<00:00, 186.20combination/s]


({'fit_intercept': False},
 -46.341706085205075,
                model  fit_intercept  mean_score  \
 1  linear regression          False  -46.341706   
 0  linear regression           True    0.026605   
 
                                          fold_scores  \
 1  [-70.49513244628906, -53.06315612792969, -9.38...   
 0  [-0.20772814750671387, -0.31918060779571533, 0...   
 
                     timestamp  
 1  2025-05-12T12:37:05.293858  
 0  2025-05-12T12:37:05.291381  )

# Visualizations

It is not yet specified for our csvs!

In [None]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [None]:
def get_combination(comb):
     settings_df = pd.read_csv("settings_df.csv", header=None)
     #settings_df.set_index(######)
     comb_dict = settings_df[comb]
     return comb_dict

In [None]:
# Store all csvs for each combination
df = pd.read_csv("p......metrics.csv", header=None)
dif_combs = np.unique(df[[1]])
for comb in dif_combs:
     unique_comb_df = df[df[1]==comb]
     filename = f"{comb}.csv"
     #filename = f"{get_combination(comb)}.csv"
     unique_comb_df.to_csv(filename, index=False)

In [None]:
# For visualizations
def train_test_fit(df, train_color='blue', test_color='orange'):
     dif_combs = np.unique(df[[1]])
     for comb in dif_combs:
          y = pd.read_csv(f"{comb}.csv")
          comb_dict = get_combination(comb)
          fitness = comb_dict['fitness_funtion']
          #TODO
          fig = go.Figure()
          fig.add_trace(go.Scatter(y=y.iloc[:,5].values, 
                                   mode='lines', name='Train', line=dict(color=train_color)))
          fig.add_trace(go.Scatter(y=y.iloc[:,8].values, 
                                   mode='lines', name='Test', line=dict(color=test_color)))
          fig.update_layout(
          height=400, width=800, 
          margin=dict(t=50),
          yaxis_range=[0,None],
          title_text=f'GP - Train vs Test Fitness\nComb:{comb_dict}',
          xaxis_title='Generation', yaxis_title=fitness.upper()
          )
          fig.update_yaxes(range=[0, None])
          fig.show()

def train_test_fit_and_size(df, train_color='blue', test_color='orange'):
     dif_combs = np.unique(df[[1]])
     for comb in dif_combs:
          y = pd.read_csv(f"{comb}.csv")
          comb_dict = get_combination(comb)
          #TODO
          #fitness = comb_dict['fitness_funtion']
          fig = make_subplots(
          rows=1, cols=2,
          subplot_titles=(f'GP - Fitness evolution\nCombination:{comb_dict}', 'GP - Size evolution')
          )

          fig.add_trace(go.Scatter(y=y.iloc[:,5].values, 
                                   mode='lines', name='Train', line=dict(color=train_color)), row=1, col=1)
          fig.add_trace(go.Scatter(y=y.iloc[:,8].values, 
                                   mode='lines', name='Test', line=dict(color=test_color)), row=1, col=1)
          fig.add_trace(go.Scatter(y=y.iloc[:,9].values, 
                                   mode='lines', name='Size'), row=1, col=2)
          fig.update_layout(
          width=1000,
          height=400, 
          showlegend=True,
          yaxis_range=[0,None],
          legend=dict(
               orientation='h',
               yanchor='bottom',
               y=-0.3,
               xanchor='center',
               x=0.5
          )
          )
          fig.show()

def niche_entropy(df, train_color='blue'):
     dif_combs = np.unique(df[[1]])
     for comb in dif_combs:
          y = pd.read_csv(f"{comb}.csv")
          comb_dict = get_combination(comb)
          fig = go.Figure()
          fig.add_trace(go.Scatter(y=y.iloc[:,10].values, 
                         mode='lines', name='Train', line=dict(color=train_color)))
          fig.update_layout(
          height=400, width=800, 
          margin=dict(t=50),
          title_text=f'GP - Niche entropy\nCombination:{comb_dict}',
          yaxis_range=[0,None],
          xaxis_title='Generation', yaxis_title='Entropy'
          )
          fig.show()

def pop_fitness_diversity(df, train_color='blue'):
     dif_combs = np.unique(df[[1]])
     for comb in dif_combs:
          y = pd.read_csv(f"{comb}.csv")
          comb_dict = get_combination(comb)
          fig = go.Figure()
          fig.add_trace(go.Scatter(y=y.iloc[:,11].values, 
                                   mode='lines', name='Train', line=dict(color=train_color)))
          fig.update_layout(
          height=400, width=800, 
          margin=dict(t=50),
          yaxis_range=[0,None],
          title_text=f'GP - Population Fitness Diversity\nCombination:{comb_dict}',
          xaxis_title='Generation', yaxis_title='Fitness Standard Deviation'
          )
          fig.show()

def all_combs_train_test():
     dif_combs = np.unique(df[[1]])
     for comb in dif_combs:
          y = pd.read_csv(f"{comb}.csv")
          comb_dict = get_combination(comb)
          fig = go.Figure()

          #TODO
          pass

