In [1]:
import pandas as pd 
import numpy as np
from sklearn.model_selection import KFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.dummy import DummyClassifier
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from tabpfn import TabPFNClassifier

def classification_perf2vec(clf, clf_name, modAlgo, log_mode):
    dims = [5, 30]
    modules = ['elitist', 'mirrored', 'base_sampler', 'weights_option', 'local_restart', 'step_size_adaptation'] if modAlgo == 'modCMA' else ['mutation_base','mutation_reference','mutation_n_comps','use_archive','crossover','adaptation_method','lpsr']
    df = pd.read_csv(f'./data/classification_data/{modAlgo}_conf_perf2vec_{log_mode}.csv', index_col = 0)
    df_grid = pd.read_csv(f'./data/raw_data/{modAlgo}_conf_grid.csv', index_col=0)

    columns = ['module', 'dim', 'budget', 'acc', 'f1']
    data = []
    for module in modules:
        print('\n\n')
        print(module)
        for dim in dims:
            budgets = [50*dim, 100*dim, 300*dim, 500*dim, 1000*dim, 1500*dim]
            for budget in budgets:
                variable = f'.*_{budget}_{dim}'           
                df_sub = df[df.index.str.contains(fr'\b{variable}\b', regex=True)]
                df_sub.index = [ int(i.split("_")[0]) for i in df_sub.index]
                module_values = df_grid[module]
                df_sub = df_sub.join(module_values)
                df_sub = df_sub.replace({np.nan: 'None'})

                y = df_sub[module] 
                X = df_sub.drop([module], axis=1)
                kf = KFold(n_splits=10, shuffle=True, random_state=42)
                accuracy_scores = []
                f1_scores = []
                true_values = []
                index_arr = []
                predictions = []
                for train_index, test_index in kf.split(X):
                    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
                    y_train, y_test = y[train_index].values.ravel(), y[test_index].values.ravel()
                    y_test = y_test.tolist()
                    clf.fit(X_train, y_train)
                    y_pred = clf.predict(X_test).tolist()
                    true_values.extend(y_test)
                    predictions.extend(y_pred)
                    index_arr.extend(test_index)
                    acc = accuracy_score(y_test, y_pred)
                    f1 = f1_score(y_test, y_pred, average = 'macro')
                    accuracy_scores.append(acc)
                    f1_scores.append(f1)
                acc = np.mean(accuracy_scores)
                f1 = np.mean(f1_scores)
                data.append([module, dim, budget, acc, f1])
                print("dim_"+str(dim)+"    budget_"+str(budget))
                print("acc = "+str(acc))
                print("f1 = "+str(f1))
                df_pred = pd.DataFrame(np.transpose([true_values, predictions]), index = index_arr, columns = ['true', 'pred'])
                df_pred.to_csv(f'./results/{modAlgo}/classification_predictions/p2v_{clf_name}_dim_{dim}_budget_{budget}_module_{module}.csv')
    df_class = pd.DataFrame(data=data, columns=columns)
    df_class.to_csv(f'./results/{modAlgo}/classification_p2v_{clf_name}.csv')



clf = DummyClassifier(strategy='most_frequent')
# classification_perf2vec(clf, 'dummy', 'modCMA', 'log')
classification_perf2vec(clf, 'dummy', 'modDE', 'log')

clf = RandomForestClassifier(random_state=42)
# classification_perf2vec(clf, 'RF', 'modCMA', 'log')
classification_perf2vec(clf, 'RF', 'modDE', 'log')

clf = TabPFNClassifier(seed=42)
# classification_perf2vec(clf, 'TabPFN', 'modCMA', 'log')
classification_perf2vec(clf, 'TabPFN', 'modDE', 'log')

  from .autonotebook import tqdm as notebook_tqdm





mutation_base
dim_5    budget_250
acc = 0.28466424682395647
f1 = 0.14757509100407443
dim_5    budget_500
acc = 0.28466424682395647
f1 = 0.14757509100407443
dim_5    budget_1500
acc = 0.28466424682395647
f1 = 0.14757509100407443
dim_5    budget_2500
acc = 0.28466424682395647
f1 = 0.14757509100407443
dim_5    budget_5000
acc = 0.28466424682395647
f1 = 0.14757509100407443
dim_5    budget_7500
acc = 0.28466424682395647
f1 = 0.14757509100407443
dim_30    budget_1500
acc = 0.28466424682395647
f1 = 0.14757509100407443
dim_30    budget_3000
acc = 0.28466424682395647
f1 = 0.14757509100407443
dim_30    budget_9000
acc = 0.28466424682395647
f1 = 0.14757509100407443
dim_30    budget_15000
acc = 0.28466424682395647
f1 = 0.14757509100407443
dim_30    budget_30000
acc = 0.28466424682395647
f1 = 0.14757509100407443
dim_30    budget_45000
acc = 0.28466424682395647
f1 = 0.14757509100407443



mutation_reference
dim_5    budget_250
acc = 0.1927707199032063
f1 = 0.0806344255811107
dim_5    budget_500
a