In [13]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.model_selection import LeaveOneOut
from sklearn.metrics import accuracy_score, recall_score, f1_score, classification_report, confusion_matrix
from xgboost import XGBClassifier
from sklearn.preprocessing import LabelEncoder
import numpy as np

ELA_Data = pd.read_csv('n_median_features.csv')
ERT_Data = pd.read_csv('rel_ERT.csv')


class ClassifierTrainer_Normalized:
    def __init__(self, ELA_Data, ERT_Data, target_columns):
        
        
        # print(type(ELA_Data), type(ERT_Data))
        
        
        self.data = pd.merge(ELA_Data, ERT_Data, on=['dim','fid'], how='left')
        
        self.target_columns = target_columns
        self.data['min_value_column'] = self.data[target_columns].idxmin(axis=1)
        

        self.label_encoder = LabelEncoder()
        self.data['min_value_column'] = self.label_encoder.fit_transform(self.data['min_value_column'])
        
        self.X = self.data.drop('min_value_column', axis=1)
        # self.Y = self.data('min_value_column')
        
        self.loo = LeaveOneOut()
        self.rf_classifier = None
        self.svm_classifier = None
        self.xgb_classifier = None

    def _train_classifier(self, classifier, name, Y_encoded):
          
      
        accuracies = []
        recall_scores = []
        f1_scores = []
        confusion_matrices = []
        
        # X_train, X_test = self.X[train_index], self.X[test_index]
        #     X_train, X_test = self.X.iloc[train_index], self.X.iloc[test_index]
            
        #     # Y_train, Y_test = Y_encoded[train_index], Y_encoded[test_index]
        #     Y_train, Y_test = self.data["min_value_column"].iloc[train_index], self.data["min_value_column"].iloc[test_index]
            
        #     # print("#######################")
        #     # print(type(self.X))
        #     # print("#######################")

        #     classifier.fit(X_train, Y_train)

        #     Y_pred = classifier.predict(X_test)

        for train_index, test_index in self.loo.split(self.X):
            # X_train, X_test = self.X[train_index], self.X[test_index]
            X_train, X_test = self.X.iloc[train_index], self.X.iloc[test_index]
            
            # Y_train, Y_test = Y_encoded[train_index], Y_encoded[test_index]
            Y_train, Y_test = self.data["min_value_column"].iloc[train_index], self.data["min_value_column"].iloc[test_index]
            
            # print("#######################")
            # print(type(self.X))
            # print("#######################")

            classifier.fit(X_train, Y_train)

            Y_pred = classifier.predict(X_test)

            accuracies.append(accuracy_score(Y_test, Y_pred))
            recall_scores.append(recall_score(Y_test, Y_pred, average='weighted', zero_division=0))
            f1_scores.append(f1_score(Y_test, Y_pred, average='weighted'))
            confusion_matrices.append(confusion_matrix(Y_test, Y_pred, labels=np.unique(Y_encoded)))

        mean_accuracy = np.mean(accuracies)
        mean_recall = np.mean(recall_scores)
        mean_f1_score = np.mean(f1_scores)
        mean_confusion_matrix = np.mean(confusion_matrices, axis=0)


        
        print("For normalized features :")
        print()
        print(f"Mean Accuracy ({name}): {mean_accuracy}")
        

        # original_labels_Y_pred = self.label_encoder.inverse_transform(Y_pred)
        # original_labels_Y_test = self.label_encoder.inverse_transform(Y_test)
        
        # print(classification_report(original_labels_Y_test, original_labels_Y_pred))
        
        
        print(f"Mean Recall ({name}): {mean_recall}")
        print(f"Mean F1 Score ({name}): {mean_f1_score}")
        
        
        print()
        print()
        print(f"Mean Confusion Matrix ({name}):")
        print(mean_confusion_matrix)

    def train_random_forest(self):
        self.rf_classifier = RandomForestClassifier(n_estimators=500, random_state=42)
        self._train_classifier(self.rf_classifier, "Random Forest", self.data['min_value_column'])

    def train_svm(self):
        self.svm_classifier = SVC(kernel='linear')
        self._train_classifier(self.svm_classifier, "SVM", self.data['min_value_column'])

    def train_xgboost(self):
        self.xgb_classifier = XGBClassifier()
        
        
        # Encode the target variable Y to ensure sequential class labels
        label_encoder = LabelEncoder()
        Y_encoded = label_encoder.fit_transform( self.data['min_value_column'])
        
        

        self._train_classifier(self.xgb_classifier, "XGBoost",  self.data['min_value_column'])

    def predict(self, classifier, X_new):
        if classifier is not None:
            return classifier.predict(X_new)
        else:
            raise ValueError("Classifier not trained yet.")

# Create an instance of ClassifierTrainer
target_column = ["BSqi", "BSrr", "CMA-CSA", "fmincon", "fminunc", "HCMA",
                  "HMLSL", "IPOP400D", "MCS", "MLSL", "OQNLP", "SMAC-BBOB"]


trainer = ClassifierTrainer_Normalized(ELA_Data, ERT_Data, target_column)

# # Train the Random Forest classifier
# trainer.train_random_forest()

# # Train the SVM classifier
# trainer.train_svm()

# Train the XGBoost classifier
trainer.train_xgboost()

# Optionally, you can make predictions using the trained classifiers
# For example, assuming you have a new data point X_new and you want to use the Random Forest classifier:
# predicted_labels_rf = trainer.predict(trainer.rf_classifier, X_new)


For normalized features :

Mean Accuracy (XGBoost): 0.9270833333333334
Mean Recall (XGBoost): 0.9270833333333334
Mean F1 Score (XGBoost): 0.9270833333333334


Mean Confusion Matrix (XGBoost):
[[0.05208333 0.01041667 0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.        ]
 [0.01041667 0.05208333 0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.        ]
 [0.         0.         0.07291667 0.         0.         0.
  0.         0.         0.         0.         0.         0.        ]
 [0.         0.         0.         0.14583333 0.         0.
  0.         0.         0.         0.         0.         0.        ]
 [0.         0.         0.         0.01041667 0.09375    0.
  0.         0.         0.         0.         0.01041667 0.        ]
 [0.         0.         0.         0.         0.         0.07291667
  0.         0.         0.         0.         0.         0.        ]
 [0.         0.         0.