In [2]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.dummy import DummyClassifier
from sklearn.model_selection import KFold
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
# from xgboost import XGBClassifier
from sklearn.metrics import confusion_matrix
from tabpfn import TabPFNClassifier
from lightgbm import LGBMClassifier

def classification_s2v(clf, clf_name, modAlgo):
    dims = [5, 30]
   
    modules = ['elitist', 'mirrored', 'base_sampler', 'weights_option', 'local_restart', 'step_size_adaptation'] if modAlgo == 'modCMA' else ['mutation_base','mutation_reference','mutation_n_comps','use_archive','crossover','adaptation_method','lpsr']
    columns = ['module', 'dim', 'budget', 'acc', 'f1', 'acc_d', 'f1_d']
    data = []

    for module in modules:
        print('\n\n')
        print(module)
        for dim in dims: 
            budgets = [50*dim, 100*dim, 300*dim, 500*dim, 1000*dim, 1500*dim] 
            for budget in budgets:
                df = pd.read_csv(f'./data/classification_data/{modAlgo}/{module}_dim_{dim}_budget_{budget}.csv', index_col=0)
                y = df[module] 
                X = df.drop([module], axis=1)
                kf = KFold(n_splits=10, shuffle=True, random_state=42)
                # clf = RandomForestClassifier(random_state=42)
                dummy = DummyClassifier(strategy='most_frequent')
                accuracy_scores = []
                f1_scores = []
                accuracy_scores_dummy = []
                f1_scores_dummy = []
                true_values = []
                index_arr = []
                predictions = []
                for train_index, test_index in kf.split(X):
                    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
                    y_train, y_test = y[train_index].values.ravel(), y[test_index].values.ravel()
                    y_test = y_test.tolist()

                    clf.fit(X_train, y_train)
                    y_pred = clf.predict(X_test).tolist()
                    true_values.extend(y_test)
                    predictions.extend(y_pred)
                    index_arr.extend(test_index)
                    
                    acc = accuracy_score(y_test, y_pred)
                    f1 = f1_score(y_test, y_pred, average = 'macro')
                    accuracy_scores.append(acc)
                    f1_scores.append(f1)

                    dummy.fit(X_train, y_train)
                    y_pred_dummy = dummy.predict(X_test).tolist()
                    acc_dummy = accuracy_score(y_test, y_pred_dummy)
                    f1_score_dummy = f1_score(y_test, y_pred_dummy, average = 'macro')
                    accuracy_scores_dummy.append(acc_dummy)
                    f1_scores_dummy.append(f1_score_dummy)
                acc = np.mean(accuracy_scores)
                f1 = np.mean(f1_scores)
                acc_d = np.mean(accuracy_scores_dummy)
                f1_d = np.mean(f1_scores_dummy)
                data.append([ module, dim, budget, acc, f1, acc_d, f1_d])
                print("dim_"+str(dim)+"    budget_"+str(budget))
                print("acc = "+str(acc))
                print("acc_dummy = "+str(acc_d))
                print("f1 = "+str(f1))
                df_pred = pd.DataFrame(np.transpose([true_values, predictions]), index = index_arr, columns = ['true', 'pred'])
                df_pred.to_csv(f'./results/{modAlgo}/classification_predictions/s2v_{clf_name}_dim_{dim}_budget_{budget}_module_{module}.csv')
    df_class = pd.DataFrame(data=data, columns=columns)
    df_class.to_csv(f'./results/{modAlgo}/classification_s2v_{clf_name}.csv')


# clf = RandomForestClassifier(random_state=42)
# classification_s2v(clf, 'RF', 'modCMA')
# clf = TabPFNClassifier(seed=42)
# classification_s2v(clf, 'TabPFN', 'modCMA')


clf = RandomForestClassifier(random_state=42)
classification_s2v(clf, 'RF', 'modDE')
clf = TabPFNClassifier(seed=42)
classification_s2v(clf, 'TabPFN', 'modDE')





mutation_base
dim_5    budget_250
acc = 0.5795825771324864
acc_dummy = 0.28466424682395647
f1 = 0.5704495544800292
dim_5    budget_500
acc = 0.6142468239564429
acc_dummy = 0.28466424682395647
f1 = 0.6100431186059369
dim_5    budget_1500
acc = 0.6231699939503932
acc_dummy = 0.28466424682395647
f1 = 0.6204651630568833
dim_5    budget_2500
acc = 0.5796733212341199
acc_dummy = 0.28466424682395647
f1 = 0.5788037000500008
dim_5    budget_5000
acc = 0.6059891107078039
acc_dummy = 0.28466424682395647
f1 = 0.6021410527200187
dim_5    budget_7500
acc = 0.6545674531155475
acc_dummy = 0.28466424682395647
f1 = 0.6524866342229221
dim_30    budget_1500
acc = 0.5277676950998185
acc_dummy = 0.28466424682395647
f1 = 0.5256914975210977
dim_30    budget_3000
acc = 0.6320629159104658
acc_dummy = 0.28466424682395647
f1 = 0.6310275758989764
dim_30    budget_9000
acc = 0.6993647912885661
acc_dummy = 0.28466424682395647
f1 = 0.6961829900879787
dim_30    budget_15000
acc = 0.6820024198427103
acc_dummy = 0.28