In [1]:
from raw_python.Bagging import create_models, create_bags, evaluate
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn import datasets
from sklearn.tree import DecisionTreeClassifier
import numpy as np
import pandas as pd
import random
from raw_python.BaggingSA import BaggingSA
from typing import Literal, Tuple
from raw_python.Bagging import predict
import sklearn
from scipy.stats import spearmanr, kendalltau, pearsonr



In [2]:
seed = 42

k_cross = 5

params = [
    {
        'T0': 2,
        'cooling_method': 'geometric',
        'alpha': 0.995,
        'max_iterations': 2000,
        'feature_mutation_chance': 0.25,
        'test_split_amount': 5,
        'n_trees': 10,
        'theta': 0.5,
        'beta': 0.5,
        'gamma': 0,
    },
    {
        'T0': 2,
        'cooling_method': 'geometric',
        'alpha': 0.995,
        'max_iterations': 2000,
        'feature_mutation_chance': 0.25,
        'test_split_amount': 5,
        'n_trees': 10,
        'theta': 0.6,
        'beta': 0.4,
        'gamma': 0,
    },
    {
        'T0': 2,
        'cooling_method': 'geometric',
        'alpha': 0.995,
        'max_iterations': 2000,
        'feature_mutation_chance': 0.25,
        'test_split_amount': 5,
        'n_trees': 10,
        'theta': 0.7,
        'beta': 0.3,
        'gamma': 0,
    },
    {
        'T0': 2,
        'cooling_method': 'geometric',
        'alpha': 0.995,
        'max_iterations': 2000,
        'feature_mutation_chance': 0.25,
        'test_split_amount': 5,
        'n_trees': 10,
        'theta': 0.8,
        'beta': 0.2,
        'gamma': 0,
    },
    {
        'T0': 2,
        'cooling_method': 'geometric',
        'alpha': 0.995,
        'max_iterations': 2000,
        'feature_mutation_chance': 0.25,
        'test_split_amount': 5,
        'n_trees': 10,
        'theta': 0.9,
        'beta': 0.1,
        'gamma': 0,
    },
    {
        'T0': 2,
        'cooling_method': 'geometric',
        'alpha': 0.995,
        'max_iterations': 2000,
        'feature_mutation_chance': 0.25,
        'test_split_amount': 5,
        'n_trees': 10,
        'theta': 1,
        'beta': 0,
        'gamma': 0,
    }
]

datasets = ['digits','wine', 'breast_cancer', 'pima']

np.random.seed(seed)
random.seed(seed)

In [3]:


def get_dataset(dataset_name: str) -> Tuple[np.ndarray, np.ndarray]:
    if dataset_name == 'digits':
        data = sklearn.datasets.load_digits()
        X = data.data
        y = data.target
        
    elif dataset_name == 'wine':
        data = sklearn.datasets.load_wine()
        X = data.data
        y = data.target
    
    elif dataset_name == 'breast_cancer':
        data = sklearn.datasets.load_breast_cancer()
        X = data.data
        y = data.target
        
    elif dataset_name == 'pima':
        data = pd.read_csv("./../datasets/pima.csv")
        X = data.iloc[:, :-1].values
        y = data.iloc[:, -1].values
    
    else:
        raise ValueError("Unsupported dataset")
    return X, y

In [4]:

def evaluate_bagging_sa(X_train, y_train, X_test, y_test, params) -> Tuple[float, int, int]: 
    bagging_sa = BaggingSA(X=X_train, y=y_train, T0=params['T0'], alpha=params['alpha'], 
                           cooling_method=params['cooling_method'], max_iterations=params['max_iterations'],
                           n_trees=params['n_trees'], feature_mutation_chance=params['feature_mutation_chance'],
                            test_split_amount=params['test_split_amount'], theta=params['theta'],
                            beta=params['beta'], gamma=params['gamma'])
    models, fitness = bagging_sa.run(monitor_fun=fun_monitor, get_fitness=True, X_for_test=X_test, y_for_test=y_test)
    accuracy = evaluate(X=X_test, y=y_test, models=models)
    return bagging_sa, accuracy, fitness
    
def fun_monitor(iteration, T, best_fitness, fitness, new_fitness, accuracy):
    
    fits.append(new_fitness)
    accs.append(accuracy)

    if iteration % 100 == 0:
        print(f"    Iteration: {iteration}, T: {T:.2f}, Best fitness: {best_fitness:.4f}")

fits = []
accs = []
result = []
print(f"Start at {pd.Timestamp.now()}")
for dataset in datasets:
    result = []
    X, y = get_dataset(dataset)       
    
    random_indices = np.arange(X.shape[0])
    np.random.shuffle(random_indices)
    X = X[random_indices]
    y = y[random_indices]
    
    sub_groups_X = np.array_split(np.array(X), k_cross)
    sub_groups_y = np.array_split(np.array(y), k_cross) 
         
    for i, par in enumerate(params):
        for k in range(k_cross):
            print(f"[Dataset: {dataset}, K: {k+1}/{k_cross}, Parameter Set: {i+1}/{len(params)}]")
            
            if k_cross == 1:
                X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)
            else:
                X_train = np.concatenate(sub_groups_X[:k] + sub_groups_X[k+1:])
                y_train = np.concatenate(sub_groups_y[:k] + sub_groups_y[k+1:])
                X_test = sub_groups_X[k]
                y_test = sub_groups_y[k]
            
            fits = []
            accs = []
            
            bagging_sa, accuracy, fitness = evaluate_bagging_sa(X_train, y_train, X_test, y_test, par)
            
            spearman_corr, spearman_p = spearmanr(fits, accs)
            
            par_arr = [par['T0'], par['alpha'], par['cooling_method'], par['max_iterations'], par['feature_mutation_chance'], par['test_split_amount'], par['n_trees'], par['theta'], par['beta'], par['gamma']]
            result.append([dataset, k+1]+par_arr+[fitness, accuracy, spearman_corr, spearman_p])
            
            df = pd.DataFrame(result, columns=['dataset', 'k_cross', 'T0', 'alpha', 'cooling_method', 'max_iterations', 'feature_mutation_chance', 'test_split_amount', 'n_trees', 'theta', 'beta', 'gamma', 'fitness', 'accuracy', 'spearman_corr', 'spearman_p'])
            
            df.to_csv("./../res/bagging_sa_params.csv", index=False)
            print(f"    Accuracy: {accuracy:.4f}")


Start at 2025-04-28 13:50:09.618990
[Dataset: digits, K: 1/5, Parameter Set: 1/6]
   Acc: 0.4324, Dis: 0.1296, Com: 0.0000 => Fit: 0.5620
   Acc: 0.4064, Dis: 0.1293, Com: 0.0000 => Fit: 0.5356
   Acc: 0.4131, Dis: 0.1303, Com: 0.0000 => Fit: 0.5434
   Acc: 0.4098, Dis: 0.1368, Com: 0.0000 => Fit: 0.5466
   Acc: 0.4150, Dis: 0.1295, Com: 0.0000 => Fit: 0.5445
   Acc: 0.4045, Dis: 0.1320, Com: 0.0000 => Fit: 0.5365
   Acc: 0.4098, Dis: 0.1356, Com: 0.0000 => Fit: 0.5454
   Acc: 0.4096, Dis: 0.1339, Com: 0.0000 => Fit: 0.5435
   Acc: 0.4026, Dis: 0.1341, Com: 0.0000 => Fit: 0.5367
   Acc: 0.4271, Dis: 0.1316, Com: 0.0000 => Fit: 0.5587
   Acc: 0.4184, Dis: 0.1337, Com: 0.0000 => Fit: 0.5521
   Acc: 0.4236, Dis: 0.1312, Com: 0.0000 => Fit: 0.5548
   Acc: 0.4201, Dis: 0.1335, Com: 0.0000 => Fit: 0.5536
   Acc: 0.4219, Dis: 0.1334, Com: 0.0000 => Fit: 0.5553
   Acc: 0.4203, Dis: 0.1314, Com: 0.0000 => Fit: 0.5517
   Acc: 0.4254, Dis: 0.1264, Com: 0.0000 => Fit: 0.5518
   Acc: 0.4148, Dis: 0

KeyboardInterrupt: 