In [1]:
import pandas as pd
import numpy as np
import random
import csv
import torch

import sys
import os
parent_dir = os.path.abspath(os.path.join(os.path.dirname('../NEUROEVOLUTION'), '..'))
if parent_dir not in sys.path:
    sys.path.insert(0, parent_dir)


parent_dir = os.path.abspath(os.path.join(os.path.dirname('../NEUROEVOLUTION/gpolnel'), '..'))
if parent_dir not in sys.path:
    sys.path.insert(0, parent_dir)

from NEUROEVOLUTION.utils import drop_features, device, kfold, seed, save_best_params
from ne_utils import ne_cross_validation

from gpolnel.utils.ffunctions import Ffunctions
from gpolnel.operators.initializers import nn_init
from NEUROEVOLUTION.gpolnel.operators.variators import nn_xo, prm_nn_mtn
from NEUROEVOLUTION.gpolnel.operators.selectors import prm_tournament, prm_roulette_wheel, prm_rank_selection, prm_double_tournament

target = 'lactose'

In [2]:
from sklearn.model_selection import KFold
kfold = KFold(n_splits=2, shuffle=True, random_state=seed)

In [3]:
X = pd.read_csv('../datamart/data_project_nel.csv')
y = pd.read_csv('../datamart/y_' + f'{target}'+ '.csv')['lactose_percent']
X = drop_features(X)

In [4]:
total_batches = 1
batch_size = X.shape[0]
shuffle = True
ffunction = Ffunctions('rmse')


initializers = [nn_init]

pss = [100, 200, 500]
n_iters = [50, 100,250,500]

init_factors = [0.1, 0.05, 0.005] #, 0.05, 0.1
n_hidden_neuronss = [[1], [2], [4], [8], [3,3]]# 
activationss = [torch.relu, torch.sigmoid]

selection_pressures = [  0.1,0.2,0.3]#0.05,
selection_methods = [prm_tournament,prm_rank_selection,   prm_double_tournament]#

mutation_probs = [0.5, 0.8, 0.9] #0.1, 0.5, 0.6, 0.7,
mutation_steps = [0.025, 0.05]#, 0.005, 0.010.001,
#mutation_methods = [prm_subtree_mtn(initializer=prm_grow), prm_hoist_mtn(), prm_point_mtn(sspace=sspace_sml, mutation_prob)]


xo_methods = [nn_xo]
xo_probs = [0.01, 0.1, 0.5] #, 0.8

has_elitisms = [True] #, False
allow_reproductions = [True, False]

best_score = np.inf
name = 'logs/' +f'{target}' + '/random_search_total' + '.csv'
with open(name, 'w', newline='\n') as csvfile:
    w = csv.writer(csvfile, delimiter=';')
    w.writerow(['id'] + ['ps'] + ['n_iter'] +['initializer']+['sspace'] + ['selection_pressure'] + ['selection_method'] + ['mutation_prob'] + ['mutation_method'] + ['mutation_step'] +['xo_prob'] + ['xo_method'] + ['has_elitism'] + ['allow_reproduction'] + ['score'])


for i in range(1, 50):
    
    # ... (your random search logic)
    initializer = random.choice(initializers)
    ps = random.choice(pss)
    n_iter = random.choice(n_iters)

    init_factor = random.choice(init_factors)
    n_hidden_neurons = random.choice(n_hidden_neuronss)
    activations = []
    for j in range(len(n_hidden_neurons)):
        activations.append(random.choice(activationss))
    activations.append(None)

    
    
    sspace = {
        'input_shape': X.shape[1],
        'init_factor' : init_factor,
        'n_hidden_neurons': n_hidden_neurons,
        'activation': activations,
        'n_output': 1,
        'n_batches': total_batches,
        'device': device
        }
    selection_pressures = [ 0.1, 0.2,0.3]#0.05,
    selection_pressure = random.choice(selection_pressures)
    selection_methods = [prm_tournament(selection_pressure), prm_rank_selection()]
    selection_method = random.choice(selection_methods)
    
    
    mutation_prob = random.choice(mutation_probs)
    mutation_step = random.choice(mutation_steps)
    mutation_method = prm_nn_mtn(ms=mutation_step, sspace=sspace)
    
    xo_prob = random.choice(xo_probs)
    xo_method = random.choice(xo_methods)
    
    has_elitism = random.choice(has_elitisms)
    allow_reproduction = random.choice(allow_reproductions)
    

    log_path_cv = 'logs/' + f'{target}'+'/' + f'{i}' + '_cv_results.csv'
    log_path_train = 'logs/'+ f'{target}'+ '/train_logs.csv'
    
    
    score = ne_cross_validation(X, y, batch_size, shuffle, kfold, 
                                initializer=initializer, ps=ps, n_iter=n_iter, 
                                sspace=sspace, 
                                selection_method=selection_method, 
                                mutation_prob=mutation_prob, 
                                mutation_method=mutation_method, 
                                xo_prob=xo_prob, xo_method=xo_method, 
                                has_elitism=has_elitism, 
                                allow_reproduction=allow_reproduction, 
                                log_path_cv=log_path_cv, log_path_train=log_path_train,
                                ffunction=ffunction, seed=seed, device=device, id=i)
    
    # Write all parameters with score as f-strings
    with open(name, 'a', newline='\n') as csvfile:
        w = csv.writer(csvfile, delimiter=';')
        w.writerow([i] + [ps] + [n_iter] + [initializer] + [sspace] + [selection_pressure] + [selection_method] + [mutation_prob] + [mutation_method] + [mutation_step]+[xo_prob] + [xo_method] + [has_elitism] + [allow_reproduction] + [score])
    
    if score < best_score:
        best_score = score
        print(f'New best cv_score: {best_score} at search {i}')
        best_params = {
            'X': X,
            'y': y,
            'batch_size': batch_size,
            'shuffle': shuffle,
            'kfold': kfold,
            'initializer': initializer,
            'ps': ps,
            'n_iter': n_iter,
            'sspace': sspace,
            'selection_pressure': selection_pressure,
            'selection_method': f'{selection_method}',
            'mutation_prob': mutation_prob,
            'mutation_method': f'{mutation_method}',
            'mutation_step': mutation_step,
            'xo_prob': xo_prob,
            'xo_method': f'{xo_method}',
            'has_elitism': has_elitism,
            'allow_reproduction': allow_reproduction,
            'log_path_cv': log_path_cv,
            'log_path_train': log_path_train,
            'ffunction': ffunction,
            'seed': seed,
            'device': device,
            'id': i,
        }
        save_best_params('best_params/'+ f'{target}' + '-best_params-' + f'{i}' +'.pkl', best_params)

save_best_params('best_params/'+ f'{target}' + '-best_params_final.pkl', best_params)

cv_score: 1.9875687
New best cv_score: 1.987568736076355 at search 1
cv_score: 0.7084186
New best cv_score: 0.708418607711792 at search 2
cv_score: 1.434213
cv_score: 2.5769606
cv_score: 0.7212291
cv_score: 2.066866
cv_score: 2.2035408
cv_score: 2.050161
cv_score: 1.5372605
cv_score: 1.3117709
cv_score: 1.4293408
cv_score: 0.6646396
New best cv_score: 0.6646395921707153 at search 12
cv_score: 1.7729964
cv_score: 0.6047729
New best cv_score: 0.6047729253768921 at search 14
cv_score: 1.4326323
cv_score: 1.6358051
cv_score: 1.0199714
cv_score: 2.0473652
cv_score: 2.60666
cv_score: 0.6646396
cv_score: 1.9972169
cv_score: 2.178329
cv_score: 1.5423231
cv_score: 1.8252978
cv_score: 0.65401816
cv_score: 2.1979556
cv_score: 2.1071506
cv_score: 2.066089
cv_score: 1.8310722
cv_score: 0.71441567
cv_score: 0.20872164
New best cv_score: 0.20872163772583008 at search 31
cv_score: 0.8441924
cv_score: 1.9925915
cv_score: 0.6746839
cv_score: 1.8817282
cv_score: 1.4745736
cv_score: 1.8482481
cv_score: 2.