In [1]:
! ls

CV_function.ipynb         Data_Preprocessing.ipynb  main.ipynb
Data_EDA_First_Week.ipynb first_GP_model.ipynb      pram_grid.ipynb


In [2]:
# Standard library imports
import itertools
import json
import datetime
import pathlib

# Third-party imports
import pandas as pd
import torch
from sklearn.model_selection import ShuffleSplit

# Slim-GSGP imports
from slim_gsgp.datasets.data_loader import load_pandas_df  
from slim_gsgp.utils.utils import train_test_split  
from slim_gsgp.main_gp import gp
from slim_gsgp.main_gsgp import gsgp
#from slim_gsgp.main_slim import slim
from slim_gsgp.evaluators.fitness_functions import rmse

import random
import os
os.chdir(os.path.join(os.getcwd(), os.pardir))

In [3]:
data_dir = pathlib.Path("data")
performance_dir = os.path.join(os.getcwd(), "performance_metrics")

In [4]:
#Search Space
MAX_INIT_DEPTH = 4
MAX_DEPTH = 7
TREE_CONSTANTS = [random.uniform(0, 1) for _ in range(9)]+[ -1.]
TREE_FUNCTIONS = ['add', 'subtract']
PROB_CONSTANT = 0.9

#Problem Instance
#X_train=X_train_tensor, y_train=y_train_tensor, 
#X_test=X_val_tensor, y_test=y_val_tensor,
DATASET_NAME = 'Chicken'
FITNESS_FUNCTION = 'rmse'
MINIMIZATION = True

#GP Instance
POP_SIZE = 50
P_XO = 0.9
INITIALIZER = 'grow'
TOURNAMENT_SIZE = 2

#Solve settings
GENERATIONS = 30
ELISTISM =  True
N_ELITES = 1
TEST_ELITE = True
LOG_PATH = performance_dir
performance_file = os.path.join(performance_dir, 'performance_metrics.csv')
LOG_LEVEL = 2
VERBOSE = 1
N_JOBS = 1
SEED = 2

#LOG_DIR = './log/PC2/'

## Load Data 

In [5]:
# Reading the desired dataset
df = pd.read_csv("data/sustavianfeed.csv", sep=';')

# Dropping the first column (index) and renaming the columns
df = df.drop(columns= ['WING TAG', 'EMPTY MUSCULAR STOMACH'])

# Moving crude protein to the end of the dataframe
df = df[[col for col in df.columns if col != 'CRUDE PROTEIN'] + ['CRUDE PROTEIN']] 

In [6]:
# Turning df into X and y torch.Tensors
X, y = load_pandas_df(df, X_y=True)

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, p_test=0.1)

# using  GP

In [7]:
instance_gp = gp(
# ---
    # Search Space
    init_depth=MAX_INIT_DEPTH,
    max_depth=MAX_DEPTH,
    tree_constants=TREE_CONSTANTS,
    tree_functions=TREE_FUNCTIONS,
    prob_const = PROB_CONSTANT,
    # --
    # Problem Instance
    X_train=X_train,
    y_train=y_train,
    X_test=X_test,
    y_test=y_test,
    dataset_name=DATASET_NAME,
    fitness_function=FITNESS_FUNCTION,
    minimization=MINIMIZATION,
    # --
    # GP instance 
    pop_size=POP_SIZE,
    p_xo = P_XO,
    initializer=INITIALIZER,
    tournament_size = TOURNAMENT_SIZE,
    # ---
    # Solve settings
    n_iter=GENERATIONS,
    elitism=ELISTISM,
    n_elites=N_ELITES,
    test_elite=True,
    log_path=performance_file,
    log_level=LOG_LEVEL,
    verbose=VERBOSE,
    n_jobs=1,
    seed=2
)



Verbose Reporter
-----------------------------------------------------------------------------------------------------------------------------------------
|         Dataset         |  Generation  |     Train Fitness     |       Test Fitness       |        Timing          |      Nodes       |
-----------------------------------------------------------------------------------------------------------------------------------------
|     Chicken             |       0      |   75.66123962402344   |   75.65634155273438      |   0.0021779537200927734|      3           |
|     Chicken             |       1      |   75.58212280273438   |   75.57720947265625      |   0.0012166500091552734|      3           |
|     Chicken             |       2      |   70.98829650878906   |   71.70576477050781      |   0.001386880874633789 |      3           |
|     Chicken             |       3      |   63.34794998168945   |   63.042388916015625     |   0.0010457038879394531|      3           |
|     Chicken    

# run montecarlo simulation

In [8]:
from utils.grid_search_v2_leo import grid_search



# Define a parameter grid for the grid search
param_grid = {
    'max_depth':[MAX_DEPTH,MAX_DEPTH+1],
    'tree_constants':[TREE_CONSTANTS],
    'tree_functions':[TREE_FUNCTIONS],
    'prob_const':[PROB_CONSTANT],
    'verbose' :[0]
}

# Perform grid search
grid_search(
    X_train,
    y_train,
    param_grid=param_grid
)


Grid Search Progress: 100%|██████████| 2/2 [03:00<00:00, 90.27s/combination]
