In [5]:
from raw_python.Bagging import create_models, create_bags, evaluate
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn import datasets
from sklearn.tree import DecisionTreeClassifier
import numpy as np
import pandas as pd
import random
from raw_python.BaggingSA import BaggingSA
from typing import Literal, Tuple
from raw_python.Bagging import predict
import sklearn
from scipy.stats import spearmanr, kendalltau, pearsonr



In [None]:
seed = 42

k_cross = 5
alphas = [.975, .985, .995] 
feature_mutation_chances = [.1, .2, .3]
datasets = ['digits','wine', 'breast_cancer', 'pima']


np.random.seed(seed)
random.seed(seed)

In [3]:


def get_dataset(dataset_name: str) -> Tuple[np.ndarray, np.ndarray]:
    if dataset_name == 'digits':
        data = sklearn.datasets.load_digits()
        X = data.data
        y = data.target
        
    elif dataset_name == 'wine':
        data = sklearn.datasets.load_wine()
        X = data.data
        y = data.target
    
    elif dataset_name == 'breast_cancer':
        data = sklearn.datasets.load_breast_cancer()
        X = data.data
        y = data.target
        
    elif dataset_name == 'pima':
        data = pd.read_csv("./../datasets/pima.csv")
        X = data.iloc[:, :-1].values
        y = data.iloc[:, -1].values
    
    else:
        raise ValueError("Unsupported dataset")
    return X, y

In [4]:

def evaluate_bagging_sa(X_train, y_train, X_test, y_test, alpha, feature_mutation_chance) -> Tuple[float, int, int]: 
    bagging_sa = BaggingSA(X=X_train, y=y_train,
                            T0=2.0, cooling_method='geometric', alpha=alpha, max_iterations=2000, n_trees=10,
                            feature_mutation_chance=feature_mutation_chance, test_split_amount=5)
    models, fitness = bagging_sa.run(monitor_fun=fun_monitor, get_fitness=True, X_for_test=X_test, y_for_test=y_test)
    accuracy = evaluate(X=X_test, y=y_test, models=models)
    return bagging_sa, accuracy, fitness
    
def fun_monitor(iteration, T, best_fitness, fitness, new_fitness, accuracy):
    global fit_acc_sum, acc_fitness_difference
    
    acc_fitness_difference += abs(accuracy - fitness)
    
    fits.append(new_fitness)
    accs.append(accuracy)

    if iteration % 100 == 0:
        print(f"    Iteration: {iteration}, T: {T:.2f}, Best fitness: {best_fitness:.4f}")

acc_fitness_difference = 0.0
fits = []
accs = []
result = []
print(f"Start at {pd.Timestamp.now()}")
for dataset in datasets:
    X, y = get_dataset(dataset)       
    
    random_indices = np.arange(X.shape[0])
    np.random.shuffle(random_indices)
    X = X[random_indices]
    y = y[random_indices]
    
    sub_groups_X = np.array_split(np.array(X), k_cross)
    sub_groups_y = np.array_split(np.array(y), k_cross) 
         
    for alpha in alphas:
        for fmc in feature_mutation_chances:
            for k in range(k_cross):
                print(f"[Dataset: {dataset}, Alpha: {alpha}, FMC: {fmc}, k: {k}]")
                
                if k_cross == 1:
                    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)
                else:
                    X_train = np.concatenate(sub_groups_X[:k] + sub_groups_X[k+1:])
                    y_train = np.concatenate(sub_groups_y[:k] + sub_groups_y[k+1:])
                    X_test = sub_groups_X[k]
                    y_test = sub_groups_y[k]
                
                acc_fitness_difference = 0.0
                fits = []
                accs = []
                
                bagging_sa, accuracy, fitness = evaluate_bagging_sa(X_train, y_train, X_test, y_test, alpha, fmc)
                
                spearman_corr, spearman_p = spearmanr(fits, accs)
                
                acc_fitness_difference /= bagging_sa.max_iterations
                
                result.append([dataset, k, alpha, fmc, accuracy, spearman_corr, spearman_p, fitness, acc_fitness_difference])
                
                df = pd.DataFrame(result, columns=["dataset", "kCrossIndex", "alpha", "fmc", "accuracy", "correlation", "spearmanP", "fitness", "accFitnessDifference"])
                df.to_csv("./../res/bagging_sa_params.csv", index=False)
                print(f"    Accuracy: {accuracy:.4f}")


Start at 2025-04-22 16:30:07.404866
[Dataset: digits, Alpha: 0.975, FMC: 0.1, k: 0]
    Iteration: 100, T: 0.16, Best fitness: 0.9415
    Iteration: 200, T: 0.01, Best fitness: 0.9415
    Iteration: 300, T: 0.00, Best fitness: 0.9415
    Iteration: 400, T: 0.00, Best fitness: 0.9445
    Iteration: 500, T: 0.00, Best fitness: 0.9476
    Iteration: 600, T: 0.00, Best fitness: 0.9476
    Iteration: 700, T: 0.00, Best fitness: 0.9476
    Iteration: 800, T: 0.00, Best fitness: 0.9476
    Iteration: 900, T: 0.00, Best fitness: 0.9476
    Accuracy: 0.9389
[Dataset: digits, Alpha: 0.975, FMC: 0.1, k: 1]
    Iteration: 100, T: 0.16, Best fitness: 0.9259
    Iteration: 200, T: 0.01, Best fitness: 0.9321
    Iteration: 300, T: 0.00, Best fitness: 0.9383
    Iteration: 400, T: 0.00, Best fitness: 0.9383
    Iteration: 500, T: 0.00, Best fitness: 0.9383
    Iteration: 600, T: 0.00, Best fitness: 0.9383
    Iteration: 700, T: 0.00, Best fitness: 0.9383
    Iteration: 800, T: 0.00, Best fitness: 0.93

  spearman_corr, spearman_p = spearmanr(fits, accs)


    Accuracy: 1.0000
[Dataset: wine, Alpha: 0.975, FMC: 0.2, k: 1]
    Accuracy: 0.9444
[Dataset: wine, Alpha: 0.975, FMC: 0.2, k: 2]
    Accuracy: 0.9444
[Dataset: wine, Alpha: 0.975, FMC: 0.2, k: 3]
    Accuracy: 0.9444
[Dataset: wine, Alpha: 0.975, FMC: 0.2, k: 4]
    Iteration: 100, T: 0.16, Best fitness: 0.9714
    Accuracy: 0.9444
[Dataset: wine, Alpha: 0.975, FMC: 0.2, k: 5]
    Accuracy: 1.0000
[Dataset: wine, Alpha: 0.975, FMC: 0.2, k: 6]
    Accuracy: 0.9444
[Dataset: wine, Alpha: 0.975, FMC: 0.2, k: 7]
    Iteration: 100, T: 0.16, Best fitness: 0.9714
    Accuracy: 0.9444
[Dataset: wine, Alpha: 0.975, FMC: 0.2, k: 8]
    Accuracy: 0.8824
[Dataset: wine, Alpha: 0.975, FMC: 0.2, k: 9]
    Accuracy: 1.0000
[Dataset: wine, Alpha: 0.975, FMC: 0.3, k: 0]


  spearman_corr, spearman_p = spearmanr(fits, accs)
  spearman_corr, spearman_p = spearmanr(fits, accs)


    Accuracy: 1.0000
[Dataset: wine, Alpha: 0.975, FMC: 0.3, k: 1]
    Accuracy: 0.9444
[Dataset: wine, Alpha: 0.975, FMC: 0.3, k: 2]
    Iteration: 100, T: 0.16, Best fitness: 0.9714
    Accuracy: 0.8333
[Dataset: wine, Alpha: 0.975, FMC: 0.3, k: 3]
    Accuracy: 0.8889
[Dataset: wine, Alpha: 0.975, FMC: 0.3, k: 4]
    Accuracy: 0.9444
[Dataset: wine, Alpha: 0.975, FMC: 0.3, k: 5]


  spearman_corr, spearman_p = spearmanr(fits, accs)
  spearman_corr, spearman_p = spearmanr(fits, accs)


    Accuracy: 1.0000
[Dataset: wine, Alpha: 0.975, FMC: 0.3, k: 6]
    Accuracy: 1.0000
[Dataset: wine, Alpha: 0.975, FMC: 0.3, k: 7]
    Iteration: 100, T: 0.16, Best fitness: 0.9714
    Iteration: 200, T: 0.01, Best fitness: 0.9714
    Iteration: 300, T: 0.00, Best fitness: 0.9714
    Iteration: 400, T: 0.00, Best fitness: 0.9714
    Iteration: 500, T: 0.00, Best fitness: 0.9714
    Iteration: 600, T: 0.00, Best fitness: 0.9714
    Iteration: 700, T: 0.00, Best fitness: 0.9714
    Iteration: 800, T: 0.00, Best fitness: 0.9714
    Iteration: 900, T: 0.00, Best fitness: 0.9714
    Accuracy: 1.0000
[Dataset: wine, Alpha: 0.975, FMC: 0.3, k: 8]
    Accuracy: 0.9412
[Dataset: wine, Alpha: 0.975, FMC: 0.3, k: 9]
    Accuracy: 1.0000
[Dataset: wine, Alpha: 0.985, FMC: 0.1, k: 0]
    Iteration: 100, T: 0.45, Best fitness: 0.9714
    Accuracy: 1.0000
[Dataset: wine, Alpha: 0.985, FMC: 0.1, k: 1]
    Iteration: 100, T: 0.45, Best fitness: 0.9429
    Iteration: 200, T: 0.10, Best fitness: 0.942

  spearman_corr, spearman_p = spearmanr(fits, accs)


    Accuracy: 1.0000
[Dataset: wine, Alpha: 0.985, FMC: 0.2, k: 1]
    Accuracy: 0.9444
[Dataset: wine, Alpha: 0.985, FMC: 0.2, k: 2]
    Iteration: 100, T: 0.45, Best fitness: 0.9714
    Iteration: 200, T: 0.10, Best fitness: 0.9714
    Iteration: 300, T: 0.02, Best fitness: 0.9714
    Accuracy: 0.9444
[Dataset: wine, Alpha: 0.985, FMC: 0.2, k: 3]
    Accuracy: 0.8889
[Dataset: wine, Alpha: 0.985, FMC: 0.2, k: 4]
    Iteration: 100, T: 0.45, Best fitness: 0.9714
    Accuracy: 0.8889
[Dataset: wine, Alpha: 0.985, FMC: 0.2, k: 5]
    Iteration: 100, T: 0.45, Best fitness: 0.9429
    Iteration: 200, T: 0.10, Best fitness: 0.9714
    Iteration: 300, T: 0.02, Best fitness: 0.9714
    Iteration: 400, T: 0.00, Best fitness: 0.9714
    Iteration: 500, T: 0.00, Best fitness: 0.9714
    Iteration: 600, T: 0.00, Best fitness: 0.9714
    Iteration: 700, T: 0.00, Best fitness: 0.9714
    Iteration: 800, T: 0.00, Best fitness: 0.9714
    Iteration: 900, T: 0.00, Best fitness: 0.9714
    Iteration: 

  spearman_corr, spearman_p = spearmanr(fits, accs)
  spearman_corr, spearman_p = spearmanr(fits, accs)


    Accuracy: 0.9444
[Dataset: wine, Alpha: 0.995, FMC: 0.1, k: 2]
    Accuracy: 0.9444
[Dataset: wine, Alpha: 0.995, FMC: 0.1, k: 3]
    Accuracy: 0.8889
[Dataset: wine, Alpha: 0.995, FMC: 0.1, k: 4]
    Accuracy: 0.8889
[Dataset: wine, Alpha: 0.995, FMC: 0.1, k: 5]
    Iteration: 100, T: 1.22, Best fitness: 0.9667
    Accuracy: 1.0000
[Dataset: wine, Alpha: 0.995, FMC: 0.1, k: 6]
    Iteration: 100, T: 1.22, Best fitness: 0.9714
    Iteration: 200, T: 0.74, Best fitness: 0.9714
    Iteration: 300, T: 0.45, Best fitness: 0.9714
    Iteration: 400, T: 0.27, Best fitness: 0.9714
    Accuracy: 1.0000
[Dataset: wine, Alpha: 0.995, FMC: 0.1, k: 7]
    Accuracy: 1.0000
[Dataset: wine, Alpha: 0.995, FMC: 0.1, k: 8]
    Accuracy: 0.8235
[Dataset: wine, Alpha: 0.995, FMC: 0.1, k: 9]
    Accuracy: 1.0000
[Dataset: wine, Alpha: 0.995, FMC: 0.2, k: 0]
    Accuracy: 1.0000
[Dataset: wine, Alpha: 0.995, FMC: 0.2, k: 1]
    Accuracy: 0.9444
[Dataset: wine, Alpha: 0.995, FMC: 0.2, k: 2]
    Accuracy:

  spearman_corr, spearman_p = spearmanr(fits, accs)


    Accuracy: 1.0000
[Dataset: wine, Alpha: 0.995, FMC: 0.3, k: 1]
    Accuracy: 0.9444
[Dataset: wine, Alpha: 0.995, FMC: 0.3, k: 2]
    Accuracy: 1.0000
[Dataset: wine, Alpha: 0.995, FMC: 0.3, k: 3]
    Iteration: 100, T: 1.22, Best fitness: 0.9714
    Accuracy: 1.0000
[Dataset: wine, Alpha: 0.995, FMC: 0.3, k: 4]
    Accuracy: 1.0000
[Dataset: wine, Alpha: 0.995, FMC: 0.3, k: 5]
    Accuracy: 0.9444
[Dataset: wine, Alpha: 0.995, FMC: 0.3, k: 6]
    Iteration: 100, T: 1.22, Best fitness: 0.9714
    Iteration: 200, T: 0.74, Best fitness: 0.9714
    Iteration: 300, T: 0.45, Best fitness: 0.9714
    Accuracy: 1.0000
[Dataset: wine, Alpha: 0.995, FMC: 0.3, k: 7]
    Accuracy: 1.0000
[Dataset: wine, Alpha: 0.995, FMC: 0.3, k: 8]
    Accuracy: 0.9412
[Dataset: wine, Alpha: 0.995, FMC: 0.3, k: 9]


  spearman_corr, spearman_p = spearmanr(fits, accs)


    Accuracy: 1.0000
[Dataset: breast_cancer, Alpha: 0.975, FMC: 0.1, k: 0]
    Iteration: 100, T: 0.16, Best fitness: 0.9714
    Iteration: 200, T: 0.01, Best fitness: 0.9714
    Iteration: 300, T: 0.00, Best fitness: 0.9714
    Iteration: 400, T: 0.00, Best fitness: 0.9714
    Iteration: 500, T: 0.00, Best fitness: 0.9714
    Iteration: 600, T: 0.00, Best fitness: 0.9714
    Iteration: 700, T: 0.00, Best fitness: 0.9714
    Iteration: 800, T: 0.00, Best fitness: 0.9714
    Iteration: 900, T: 0.00, Best fitness: 0.9714
    Accuracy: 0.9825
[Dataset: breast_cancer, Alpha: 0.975, FMC: 0.1, k: 1]
    Iteration: 100, T: 0.16, Best fitness: 0.9619
    Iteration: 200, T: 0.01, Best fitness: 0.9619
    Iteration: 300, T: 0.00, Best fitness: 0.9619
    Iteration: 400, T: 0.00, Best fitness: 0.9619
    Iteration: 500, T: 0.00, Best fitness: 0.9619
    Iteration: 600, T: 0.00, Best fitness: 0.9619
    Iteration: 700, T: 0.00, Best fitness: 0.9619
    Iteration: 800, T: 0.00, Best fitness: 0.961