In [1]:
import pandas as pd
import numpy as np
import random
import csv

from utils import drop_features, device, kfold, seed, save_best_params
from genetic_programming_utils import gp_cross_validation
from gpolnel.utils.ffunctions import Ffunctions
from gpolnel.utils.inductive_programming import function_map
from gpolnel.operators.initializers import ERC, grow, full, prm_grow
from gpolnel.operators.variators import swap_xo, prm_subtree_mtn, prm_hoist_mtn, prm_point_mtn
from gpolnel.operators.selectors import prm_tournament, prm_roulette_wheel, prm_rank_selection



In [2]:
X = pd.read_csv('datamart/data_project_nel.csv')
y = pd.read_csv('datamart/y_lactose.csv')['lactose_percent']
X = drop_features(X)

In [3]:
total_batches = 1
batch_size = X.shape[0]
shuffle = True
ffunction = Ffunctions('rmse')


initializers = [grow, full]

pss = [50, 100, 250, 500, 1000]
n_iters = [50, 100, 250, 500, 1000]

fsets = [function_map['add'],  function_map['mul'], function_map['div'],function_map['sub']] #,  function_map['mean']]
constant_sets = [ERC(-1., 1.), ERC(-5., 5.), ERC(-10., 10.)]
p_constantss = [0.05, 0.1, 0.2, 0.3]

max_init_depths = [2, 3, 4, 5]
max_depths = [5, 6, 7, 8]

selection_pressures = [0.05, 0.1, 0.2, 0.3, 0.4]
selection_methods = [ prm_rank_selection, prm_tournament, prm_roulette_wheel]

mutatuion_probs = [0.05, 0.1, 0.15, 0.2, 0.5]
#mutation_methods = [prm_subtree_mtn(initializer=prm_grow), prm_hoist_mtn(), prm_point_mtn(sspace=sspace_sml, mutation_prob)]


xo_methods = [swap_xo]
xo_probs = [0.05, 0.2, 0.5, 0.7, 0.8, 0.9]

has_elitisms = [True, False]
allow_reproductions = [True, False]


best_score = np.inf
name = 'logs/GP/' + 'random_search_total' + '.csv'
with open(name, 'w', newline='\n') as csvfile:
    w = csv.writer(csvfile, delimiter=';')
    w.writerow(['id'] + ['ps'] + ['n_iter'] +['initializer']+['sspace'] + ['selection_pressure'] + ['selection_method'] + ['mutation_prob'] + ['mutation_method'] + ['xo_prob'] + ['xo_method'] + ['has_elitism'] + ['allow_reproduction'] + ['score'])


for i in range(200):
    
    # ... (your random search logic)
    initializer = random.choice(initializers)
    ps = random.choice(pss)
    n_iter = random.choice(n_iters)

    fset = random.sample(fsets, random.randint(1, len(fsets)))
    constant_set = random.choice(constant_sets)
    p_constants = random.choice(p_constantss)
    
    max_init_depth = random.choice(max_init_depths)
    max_depth = random.choice(max_depths)

    sspace = {
        'n_dims': X.shape[1],
        'function_set': fset, 'constant_set': constant_set,
        'p_constants': p_constants,
        'max_init_depth': max_init_depth,
        'max_depth': max_depth,
        'n_batches': total_batches,
        'device': device
        }
    selection_pressures = [0.05, 0.1, 0.2, 0.3, 0.4]
    selection_pressure = random.choice(selection_pressures)
    selection_methods = [ prm_rank_selection(), prm_tournament(selection_pressure), prm_rank_selection()]
    selection_method = random.choice(selection_methods)
    
    mutatuion_probs = [0.05, 0.1, 0.15, 0.2, 0.5]
    mutation_prob = random.choice(mutatuion_probs)
    mutation_methods = [prm_subtree_mtn(initializer=prm_grow), prm_hoist_mtn(), prm_point_mtn(sspace=sspace, prob=mutation_prob)]
    mutation_method = random.choice(mutation_methods)
    
    xo_prob = random.choice(xo_probs)
    xo_method = random.choice(xo_methods)
    
    has_elitism = random.choice(has_elitisms)
    allow_reproduction = random.choice(allow_reproductions)
    

    log_path_cv = 'logs/GP/' + f'{i}' + '_cv_results.csv'
    log_path_train = 'logs/GP/' + f'{i}' + '_cv_train.csv'
    
    score = gp_cross_validation(X, y, batch_size, shuffle, kfold, 
                                initializer=initializer, ps=ps, n_iter=n_iter, 
                                sspace=sspace, 
                                selection_pressure=selection_pressure, 
                                selection_method=selection_method, 
                                mutation_prob=mutation_prob, 
                                mutation_method=mutation_method, 
                                xo_prob=xo_prob, xo_method=xo_method, 
                                has_elitism=has_elitism, 
                                allow_reproduction=allow_reproduction, 
                                log_path_cv=log_path_cv, log_path_train=log_path_train,
                                ffunction=ffunction, seed=seed, device=device, id=i)
    
    # Write all parameters with score as f-strings
    with open(name, 'w', newline='\n') as csvfile:
        w = csv.writer(csvfile, delimiter=';')
        w.writerow([i] + [ps] + [n_iter] + [initializer] + [sspace] + [selection_pressure] + [selection_method] + [mutation_prob] + [mutation_method] + [xo_prob] + [xo_method] + [has_elitism] + [allow_reproduction] + [score])
    
    if score < best_score:
        # ... (update best score logic)
        
        # Save all parameters in a dictionary
        best_params = {
            'X': X,
            'y': y,
            'batch_size': batch_size,
            'shuffle': shuffle,
            'kfold': kfold,
            'initializer': initializer,
            'ps': ps,
            'n_iter': n_iter,
            'sspace': sspace,
            'selection_pressure': selection_pressure,
            'selection_method': f'{selection_method}',
            'mutation_prob': mutation_prob,
            'mutation_method': f'{mutation_method}',
            'xo_prob': xo_prob,
            'xo_method': f'{xo_method}',
            'has_elitism': has_elitism,
            'allow_reproduction': allow_reproduction,
            'log_path_cv': log_path_cv,
            'log_path_train': log_path_train,
            'ffunction': ffunction,
            'seed': seed,
            'device': device,
            'id': i,
        }
        save_best_params('logs/GP/best_params.pkl', best_params)


mul( mul( 0.3621, 3.7133 ) 3.7133 )
add( 2.2925, 2.2925 )
3.3826
add( 1.9830, 2.2561 )
mul( 2.9675, 1.4429 )
4.6868
div( div( -3.4376, -0.5160 ) 1.3202 )
3.9968
div( 3.0455, 0.6334 )
4.9189
cv_score: 0.46769112


KeyboardInterrupt: 

In [12]:
if 'rank' in best_params['selection_method']:
    selection_method = prm_rank_selection()
elif 'roulette' in best_params['selection_method']:
    selection_method = prm_roulette_wheel()
elif 'tournament' in best_params['selection_method']:
    selection_method = prm_tournament(best_params['selection_pressure'])

if 'subtree' in best_params['mutation_method']:
    mutation_method = prm_subtree_mtn(initializer=prm_grow)
elif 'hoist' in best_params['mutation_method']:
    mutation_method = prm_hoist_mtn()
elif 'point' in best_params['mutation_method']:
    mutation_method = prm_point_mtn(sspace=best_params['sspace'], prob=best_params['mutation_prob'])

if 'swap' in best_params['xo_method']:
    xo_method = swap_xo


In [15]:
score = gp_cross_validation(best_params['X'], best_params['y'], best_params['batch_size'], best_params['shuffle'], best_params['kfold'], 
                                initializer=best_params['initializer'], ps=best_params['ps'], n_iter=best_params['n_iter'], 
                                sspace=best_params['sspace'], 
                                selection_pressure=best_params['selection_pressure'], 
                                selection_method=best_params['selection_method'],
                                mutation_prob=best_params['mutation_prob'],
                                mutation_method=mutation_method,
                                xo_prob=best_params['xo_prob'], xo_method=xo_method,
                                has_elitism=best_params['has_elitism'],
                                allow_reproduction=best_params['allow_reproduction'],
                                log_path_cv='logs/GP/best_cv.csv', log_path_train='logs/GP/best_train.csv',
                                ffunction=best_params['ffunction'], seed=best_params['seed'], device=best_params['device'], id=X)
                                