# Importing required packages :

In [17]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.model_selection import LeaveOneOut
from sklearn.metrics import accuracy_score, recall_score, f1_score, classification_report, confusion_matrix
from xgboost import XGBClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import SequentialFeatureSelector
from sklearn.model_selection import StratifiedKFold
import numpy as np


# Importing the required modules created :

1. Feature_Classification_NormalValidation.py : In this module, we have used Normal validation, 80-20 test-train split.
2. Feature_Classification_LeaveOneValidation.py : In this module, we have used Leave One Out Validation.
3. Feature_Selection : We use SequentialFeatureSelector to select the best feature and test the accuracy. 


# Data Set :

1. ELA_Data : Not-Normalized features. There are Features other than ELA features too.
2. nELA_Data : Normalized features. There are Features other than ELA features too.
3. ERT_Data : Effective runtimes of all the algorithms.

In [18]:
nELA_Data = pd.read_csv('n_median_features.csv')
ELA_Data = pd.read_csv('median_features.csv')
ERT_Data = pd.read_csv('rel_ERT.csv')

In [19]:
from Features_Classification_NormalValidation import ClassifierTrainer_NV
from Features_Classification_LeaveOneValidation import ClassifierTrainer_LeaveOneValidation
from Features_Selection import ClassifierTrainer_with_Feature_Selection

# Target Values :

In [20]:
target_column = ["BSqi", "BSrr", "CMA-CSA", "fmincon", "fminunc", "HCMA",
                  "HMLSL", "IPOP400D", "MCS", "MLSL", "OQNLP", "SMAC-BBOB"]

# Results :

## Normal Validation : 80-20, train-test split

In [21]:

trainer = ClassifierTrainer_NV(ELA_Data, ERT_Data, target_column)

"""Train Random Forest"""
# Train the Random Forest classifier
trainer.train_random_forest()

"""Train XGBoost"""
# Train the XGBoost classifier
trainer.train_xgboost()


For not-normalized features :

Mean Accuracy (Random Forest): 0.7
              precision    recall  f1-score   support

        BSqi       1.00      1.00      1.00         2
        BSrr       1.00      1.00      1.00         1
     CMA-CSA       1.00      0.50      0.67         2
        HCMA       1.00      1.00      1.00         2
       HMLSL       1.00      0.50      0.67         4
    IPOP400D       0.67      1.00      0.80         2
         MCS       0.00      0.00      0.00         0
       OQNLP       0.00      0.00      0.00         1
   SMAC-BBOB       1.00      0.50      0.67         2
     fmincon       0.40      1.00      0.57         2
     fminunc       0.50      0.50      0.50         2

    accuracy                           0.70        20
   macro avg       0.69      0.64      0.62        20
weighted avg       0.81      0.70      0.70        20

Mean Recall (Random Forest): 0.7
Mean F1 Score (Random Forest): 0.7038095238095238
Mean Confusion Matrix (Random Forest):

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


For not-normalized features :

Mean Accuracy (XGBoost): 0.85
              precision    recall  f1-score   support

        BSqi       1.00      0.50      0.67         2
        BSrr       0.50      1.00      0.67         1
     CMA-CSA       1.00      1.00      1.00         2
        HCMA       1.00      1.00      1.00         2
       HMLSL       1.00      1.00      1.00         4
    IPOP400D       1.00      1.00      1.00         2
         MCS       0.00      0.00      0.00         0
       OQNLP       0.50      1.00      0.67         1
   SMAC-BBOB       1.00      0.50      0.67         2
     fmincon       1.00      1.00      1.00         2
     fminunc       1.00      0.50      0.67         2

    accuracy                           0.85        20
   macro avg       0.82      0.77      0.76        20
weighted avg       0.95      0.85      0.87        20

Mean Recall (XGBoost): 0.85
Mean F1 Score (XGBoost): 0.8666666666666666
Mean Confusion Matrix (XGBoost):
[[1. 1. 0. 0. 0. 0. 0

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [22]:
""" Here we can make predictions using the trained classifiers
"""
# For example, assuming you have a new data point X_new and you want to use the Random Forest classifier:
# predicted_labels_rf = trainer.predict(trainer.rf_classifier, X_new)

' Here we can make predictions using the trained classifiers\n'

## Leave one out validation :

In [23]:
trainer = ClassifierTrainer_LeaveOneValidation(ELA_Data, ERT_Data, target_column)

# # Train the Random Forest classifier
# trainer.train_random_forest()

# Train the XGBoost classifier
trainer.train_xgboost()



For normalized features :

Mean Accuracy (XGBoost): 0.9270833333333334
Mean Recall (XGBoost): 0.9270833333333334
Mean F1 Score (XGBoost): 0.9270833333333334


Mean Confusion Matrix (XGBoost):
[[0.05208333 0.01041667 0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.        ]
 [0.01041667 0.05208333 0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.        ]
 [0.         0.         0.07291667 0.         0.         0.
  0.         0.         0.         0.         0.         0.        ]
 [0.         0.         0.         0.14583333 0.         0.
  0.         0.         0.         0.         0.         0.        ]
 [0.         0.         0.         0.01041667 0.09375    0.
  0.         0.         0.         0.         0.01041667 0.        ]
 [0.         0.         0.         0.         0.         0.07291667
  0.         0.         0.         0.         0.         0.        ]
 [0.         0.         0.

In [None]:
# here we can make predictions using the trained classifiers
# For example, assuming you have a new data point X_new and you want to use the Random Forest classifier:
# predicted_labels_rf = trainer.predict(trainer.rf_classifier, X_new)

## Feature Selection :


# List of target values
target_column = ["BSqi", "BSrr", "CMA-CSA", "fmincon", "fminunc", "HCMA",
                  "HMLSL", "IPOP400D", "MCS", "MLSL", "OQNLP", "SMAC-BBOB"]

direction= 'forward'
trainer = ClassifierTrainer_with_Feature_Selection(ELA_Data, ERT_Data, target_column, direction)

In [None]:
directionF = 'forward'
directionB = 'backward'

trainer = ClassifierTrainer_with_Feature_Selection(ELA_Data, ERT_Data, target_column, directionF)

# Train the Random Forest classifier
trainer.train_random_forest()

# # Train the XGBoost classifier
# trainer.train_xgboost()
