# Workflow

In [227]:
import pandas as pd
import optuna
import ast
from sklearn.model_selection import StratifiedKFold, cross_val_score, train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.metrics import confusion_matrix, matthews_corrcoef, recall_score, precision_score, f1_score, accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import VotingClassifier

data_dir = '/home/darshana/Projects/druggable_proteins/processed_dataset'
feature_engineered_data_dir = '/home/darshana/Projects/druggable_proteins/feature_engineered_dataset'

cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=1)


def evaluate_model(name, model, X_train, y_train, X_test, y_test, results_dataframe, feature_type):
    # evaluate model
    scores = cross_val_score(model, X_train, y_train, scoring='accuracy', cv=cv, n_jobs=-1)
    accuracy = scores.mean()

    # fit the model on the training set
    model.fit(X_train, y_train)

    # predict the test set results
    y_pred = model.predict(X_test)

    # compute the confusion matrix
    cm = confusion_matrix(y_test, y_pred)

    # calculate precision, recall (sensitivity), f1-score
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    
    # calculate specificity
    tn, fp, fn, tp = cm.ravel()
    specificity = tn / (tn+fp)

    # calculate MCC
    mcc = matthews_corrcoef(y_test, y_pred)

    temp_df = pd.DataFrame({
        'feature_type': feature_type, 
        'model': name, 
        'with_hypertuning': False,
        'best_params': 'None',
        'accuracy': accuracy, 
        'sensitivity': recall, 
        'specificity': specificity, 
        'precision': precision, 
        'f1': f1, 
        'mcc': mcc,
        'index': f'{feature_type}_{name}_no_hypertuning'
        }, index=['index'])
    # results_dataframe is an empty dataframe to store results with the columns feature_type, model, with_hypertuning, accuracy, sensitivity, specificity, precision, f1, mcc
    return pd.concat([results_dataframe, temp_df])


def optimize_hyperparameters(name, model, objective, trials, results_dataframe, feature_type, X_train, y_train, X_test, y_test):
    def optuna_objective(trial):
        params = objective(trial)
        model_instance = model(**params)
        model_instance.fit(X_train, y_train)
        y_pred = model_instance.predict(X_test)

        # compute the confusion matrix
        cm = confusion_matrix(y_test, y_pred)

        # calculate precision, recall (sensitivity), f1-score
        precision = precision_score(y_test, y_pred)
        recall = recall_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred)
        
        # calculate specificity
        tn, fp, fn, tp = cm.ravel()
        specificity = tn / (tn+fp)

        # calculate MCC
        mcc = matthews_corrcoef(y_test, y_pred)

        accuracy = (tp + tn) / (tp + tn + fp + fn)

        # Set user attributes
        trial.set_user_attr("precision", precision)
        trial.set_user_attr("recall", recall)
        trial.set_user_attr("f1", f1)
        trial.set_user_attr("specificity", specificity)
        trial.set_user_attr("mcc", mcc)

        return accuracy

    study = optuna.create_study(direction='maximize')
    study.optimize(optuna_objective, n_trials=trials)

    temp_df = pd.DataFrame({
        'feature_type': feature_type, 
        'model': name, 
        'with_hypertuning': True,
        'best_params': [str(study.best_trial.params)],
        'accuracy': study.best_trial.value, 
        'sensitivity': study.best_trial.user_attrs['recall'], 
        'specificity': study.best_trial.user_attrs['specificity'], 
        'precision': study.best_trial.user_attrs['precision'], 
        'f1': study.best_trial.user_attrs['f1'], 
        'mcc': study.best_trial.user_attrs['mcc'],
        'index': f'{feature_type}_{name}_with_hypertuning'
        }, index=['index'])
    results_dataframe = pd.concat([results_dataframe, temp_df])
    return results_dataframe


# Define models
models = {
    'LogisticRegression': LogisticRegression(),
    'SVC': SVC(),
    'XGBClassifier': XGBClassifier(),
    'LGBMClassifier': LGBMClassifier()
}

models_ = {
    'LogisticRegression': LogisticRegression,
    'SVC': SVC,
    'XGBClassifier': XGBClassifier,
    'LGBMClassifier': LGBMClassifier
}

# Define objectives for hyperparameters tuning
objectives = {
    'LogisticRegression': lambda trial: {
        'C': trial.suggest_float('C', 1e-2, 1e-1),
        'penalty': trial.suggest_categorical('penalty', ['l1', 'l2']),
        'solver': trial.suggest_categorical('solver', ['liblinear', 'saga']),
        'max_iter': trial.suggest_int('max_iter', 100, 1000)
    },
    'SVC': lambda trial: {
        'C': trial.suggest_float('svc_c', 1e-2, 1e2),
        'gamma': trial.suggest_float('svc_gamma', 1e-2, 1e2),
    },
    'XGBClassifier': lambda trial: {
        'learning_rate': trial.suggest_float("learning_rate", 1e-2, 0.3),
        'max_depth': trial.suggest_int("max_depth", 2, 6),
        'n_estimators': trial.suggest_int("n_estimators", 100, 1000)
    },
    'LGBMClassifier': lambda trial: {
        'num_leaves': trial.suggest_int('num_leaves', 2, 256),
        'max_depth': trial.suggest_int('max_depth', 2, 50),
        'learning_rate': trial.suggest_float('learning_rate', 0.001, 0.3),
        'n_estimators': trial.suggest_int('n_estimators', 100, 2000)
    }
}

# Without Feature Selection

In [228]:
# empty dataframe to store results with the columns feature_type, model, with_hypertuning, accuracy, sensitivity, specificity, precision, f1, mcc
results = pd.DataFrame(columns=['feature_type', 'model', 'with_hypertuning', 'best_params', 'accuracy', 'sensitivity', 'specificity', 'precision', 'f1', 'mcc', 'index'])
feature_types = ['AAC', 'APAAC', 'CTD', 'DPC', 'PAAC']
for feature_type in feature_types:

    # Load the training dataset
    data = pd.read_csv(f'{data_dir}/TR_{feature_type}.csv')

    # Separate features and target
    X = data.drop(columns=['label', 'id'], axis=1)
    y = data['label']

    # Split the data into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    # Evaluate models without hyperparameters tuning
    for name, model in models.items():
        print(f"Evaluating {feature_type} {name}")
        results = evaluate_model(name, model, X_train, y_train, X_test, y_test, results, feature_type)
        print(results)

    # Optimize hyperparameters
    for name, model in models_.items():
        objective = objectives.get(name)
        if objective is not None:
            print(f"Optimizing {feature_type} {name}")
            results = optimize_hyperparameters(name, model, objective, trials=100, results_dataframe=results, feature_type=feature_type, X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test)
            print(results)

results.to_csv('results_v2.csv', index=False)

Evaluating AAC LogisticRegression
      feature_type               model with_hypertuning best_params  accuracy   
index          AAC  LogisticRegression            False        None  0.875077  \

       sensitivity  specificity  precision        f1       mcc   
index     0.791837     0.901515   0.881818  0.834409  0.699324  \

                                       index  
index  AAC_LogisticRegression_no_hypertuning  
Evaluating AAC SVC
      feature_type               model with_hypertuning best_params  accuracy   
index          AAC  LogisticRegression            False        None  0.875077  \
index          AAC                 SVC            False        None  0.897209   

       sensitivity  specificity  precision        f1       mcc   
index     0.791837     0.901515   0.881818  0.834409  0.699324  \
index     0.836735     0.931818   0.919283  0.876068  0.773969   

                                       index  
index  AAC_LogisticRegression_no_hypertuning  
index               

[32m[I 2023-05-14 10:09:55,554][0m A new study created in memory with name: no-name-daae56c1-c117-41a1-9c4a-b5bbc95a14d6[0m
[32m[I 2023-05-14 10:09:55,580][0m Trial 0 finished with value: 0.8428290766208252 and parameters: {'C': 0.024676775179329563, 'penalty': 'l2', 'solver': 'liblinear', 'max_iter': 782}. Best is trial 0 with value: 0.8428290766208252.[0m
[32m[I 2023-05-14 10:09:55,600][0m Trial 1 finished with value: 0.8565815324165029 and parameters: {'C': 0.09094766750588791, 'penalty': 'l2', 'solver': 'liblinear', 'max_iter': 431}. Best is trial 1 with value: 0.8565815324165029.[0m
[32m[I 2023-05-14 10:09:55,620][0m Trial 2 finished with value: 0.8546168958742633 and parameters: {'C': 0.06823992043723134, 'penalty': 'l2', 'solver': 'liblinear', 'max_iter': 310}. Best is trial 1 with value: 0.8565815324165029.[0m
[32m[I 2023-05-14 10:09:55,639][0m Trial 3 finished with value: 0.8526522593320236 and parameters: {'C': 0.041434628004611083, 'penalty': 'l2', 'solver': 'l

      feature_type               model with_hypertuning best_params  accuracy   
index          AAC  LogisticRegression            False        None  0.875077  \
index          AAC                 SVC            False        None  0.897209   
index          AAC       XGBClassifier            False        None  0.890297   
index          AAC      LGBMClassifier            False        None  0.883399   

       sensitivity  specificity  precision        f1       mcc   
index     0.791837     0.901515   0.881818  0.834409  0.699324  \
index     0.836735     0.931818   0.919283  0.876068  0.773969   
index     0.828571     0.928030   0.914414  0.869379  0.762316   
index     0.840816     0.909091   0.895652  0.867368  0.752881   

                                       index  
index  AAC_LogisticRegression_no_hypertuning  
index                 AAC_SVC_no_hypertuning  
index       AAC_XGBClassifier_no_hypertuning  
index      AAC_LGBMClassifier_no_hypertuning  
Optimizing AAC LogisticRegre

[32m[I 2023-05-14 10:09:55,778][0m Trial 8 finished with value: 0.8546168958742633 and parameters: {'C': 0.09673575707935296, 'penalty': 'l2', 'solver': 'saga', 'max_iter': 178}. Best is trial 1 with value: 0.8565815324165029.[0m
[32m[I 2023-05-14 10:09:55,820][0m Trial 9 finished with value: 0.8506876227897839 and parameters: {'C': 0.03917073268527012, 'penalty': 'l2', 'solver': 'saga', 'max_iter': 153}. Best is trial 1 with value: 0.8565815324165029.[0m
[32m[I 2023-05-14 10:09:55,863][0m Trial 10 finished with value: 0.8526522593320236 and parameters: {'C': 0.09505930844922625, 'penalty': 'l1', 'solver': 'liblinear', 'max_iter': 560}. Best is trial 1 with value: 0.8565815324165029.[0m
[32m[I 2023-05-14 10:09:55,908][0m Trial 11 finished with value: 0.8546168958742633 and parameters: {'C': 0.07594439674443194, 'penalty': 'l1', 'solver': 'liblinear', 'max_iter': 354}. Best is trial 1 with value: 0.8565815324165029.[0m
[32m[I 2023-05-14 10:09:55,950][0m Trial 12 finished w

      feature_type               model with_hypertuning   
index          AAC  LogisticRegression            False  \
index          AAC                 SVC            False   
index          AAC       XGBClassifier            False   
index          AAC      LGBMClassifier            False   
index          AAC  LogisticRegression             True   

                                             best_params  accuracy   
index                                               None  0.875077  \
index                                               None  0.897209   
index                                               None  0.890297   
index                                               None  0.883399   
index  {'C': 0.03277840391147581, 'penalty': 'l1', 's...  0.858546   

       sensitivity  specificity  precision        f1       mcc   
index     0.791837     0.901515   0.881818  0.834409  0.699324  \
index     0.836735     0.931818   0.919283  0.876068  0.773969   
index     0.828571     0.9

[32m[I 2023-05-14 10:09:59,265][0m Trial 0 finished with value: 0.5245579567779961 and parameters: {'svc_c': 97.98665538074123, 'svc_gamma': 50.5335805860252}. Best is trial 0 with value: 0.5245579567779961.[0m
[32m[I 2023-05-14 10:09:59,625][0m Trial 1 finished with value: 0.5245579567779961 and parameters: {'svc_c': 5.984359298284984, 'svc_gamma': 58.66433751509819}. Best is trial 0 with value: 0.5245579567779961.[0m
[32m[I 2023-05-14 10:10:00,004][0m Trial 2 finished with value: 0.5265225933202358 and parameters: {'svc_c': 42.17120071873789, 'svc_gamma': 16.75794800201258}. Best is trial 2 with value: 0.5265225933202358.[0m
[32m[I 2023-05-14 10:10:00,370][0m Trial 3 finished with value: 0.5225933202357563 and parameters: {'svc_c': 4.42946854773202, 'svc_gamma': 66.13533538870537}. Best is trial 2 with value: 0.5265225933202358.[0m
[32m[I 2023-05-14 10:10:00,729][0m Trial 4 finished with value: 0.5265225933202358 and parameters: {'svc_c': 11.81896991339862, 'svc_gamma':

      feature_type               model with_hypertuning   
index          AAC  LogisticRegression            False  \
index          AAC                 SVC            False   
index          AAC       XGBClassifier            False   
index          AAC      LGBMClassifier            False   
index          AAC  LogisticRegression             True   
index          AAC                 SVC             True   

                                             best_params  accuracy   
index                                               None  0.875077  \
index                                               None  0.897209   
index                                               None  0.890297   
index                                               None  0.883399   
index  {'C': 0.03277840391147581, 'penalty': 'l1', 's...  0.858546   
index  {'svc_c': 7.554678737818476, 'svc_gamma': 0.02...  0.882122   

       sensitivity  specificity  precision        f1       mcc   
index     0.791837     0.9015

[32m[I 2023-05-14 10:10:32,059][0m Trial 0 finished with value: 0.8683693516699411 and parameters: {'learning_rate': 0.07765118371411048, 'max_depth': 2, 'n_estimators': 936}. Best is trial 0 with value: 0.8683693516699411.[0m
[32m[I 2023-05-14 10:10:32,506][0m Trial 1 finished with value: 0.8722986247544204 and parameters: {'learning_rate': 0.2659848731751826, 'max_depth': 4, 'n_estimators': 232}. Best is trial 1 with value: 0.8722986247544204.[0m
[32m[I 2023-05-14 10:10:32,993][0m Trial 2 finished with value: 0.8703339882121808 and parameters: {'learning_rate': 0.11903843826405786, 'max_depth': 6, 'n_estimators': 194}. Best is trial 1 with value: 0.8722986247544204.[0m
[32m[I 2023-05-14 10:10:34,794][0m Trial 3 finished with value: 0.8762278978388998 and parameters: {'learning_rate': 0.14060499049435843, 'max_depth': 5, 'n_estimators': 996}. Best is trial 3 with value: 0.8762278978388998.[0m
[32m[I 2023-05-14 10:10:35,484][0m Trial 4 finished with value: 0.8781925343811

      feature_type               model with_hypertuning   
index          AAC  LogisticRegression            False  \
index          AAC                 SVC            False   
index          AAC       XGBClassifier            False   
index          AAC      LGBMClassifier            False   
index          AAC  LogisticRegression             True   
index          AAC                 SVC             True   
index          AAC       XGBClassifier             True   

                                             best_params  accuracy   
index                                               None  0.875077  \
index                                               None  0.897209   
index                                               None  0.890297   
index                                               None  0.883399   
index  {'C': 0.03277840391147581, 'penalty': 'l1', 's...  0.858546   
index  {'svc_c': 7.554678737818476, 'svc_gamma': 0.02...  0.882122   
index  {'learning_rate': 0.0110480093

[32m[I 2023-05-14 10:13:33,623][0m Trial 0 finished with value: 0.8781925343811395 and parameters: {'num_leaves': 127, 'max_depth': 32, 'learning_rate': 0.15295363700847528, 'n_estimators': 857}. Best is trial 0 with value: 0.8781925343811395.[0m
[32m[I 2023-05-14 10:13:34,564][0m Trial 1 finished with value: 0.8683693516699411 and parameters: {'num_leaves': 237, 'max_depth': 36, 'learning_rate': 0.20829406223755478, 'n_estimators': 1163}. Best is trial 0 with value: 0.8781925343811395.[0m
[32m[I 2023-05-14 10:13:34,923][0m Trial 2 finished with value: 0.8801571709233792 and parameters: {'num_leaves': 43, 'max_depth': 9, 'learning_rate': 0.15535758672785846, 'n_estimators': 247}. Best is trial 2 with value: 0.8801571709233792.[0m
[32m[I 2023-05-14 10:13:38,150][0m Trial 3 finished with value: 0.8840864440078585 and parameters: {'num_leaves': 256, 'max_depth': 15, 'learning_rate': 0.020958080646914356, 'n_estimators': 975}. Best is trial 3 with value: 0.8840864440078585.[0m


      feature_type               model with_hypertuning   
index          AAC  LogisticRegression            False  \
index          AAC                 SVC            False   
index          AAC       XGBClassifier            False   
index          AAC      LGBMClassifier            False   
index          AAC  LogisticRegression             True   
index          AAC                 SVC             True   
index          AAC       XGBClassifier             True   
index          AAC      LGBMClassifier             True   

                                             best_params  accuracy   
index                                               None  0.875077  \
index                                               None  0.897209   
index                                               None  0.890297   
index                                               None  0.883399   
index  {'C': 0.03277840391147581, 'penalty': 'l1', 's...  0.858546   
index  {'svc_c': 7.554678737818476, 'svc_gamma':

[32m[I 2023-05-14 10:17:08,150][0m A new study created in memory with name: no-name-14e58cb2-fc65-497c-a9bb-862681cf6fe8[0m


      feature_type               model with_hypertuning   
index          AAC  LogisticRegression            False  \
index          AAC                 SVC            False   
index          AAC       XGBClassifier            False   
index          AAC      LGBMClassifier            False   
index          AAC  LogisticRegression             True   
index          AAC                 SVC             True   
index          AAC       XGBClassifier             True   
index          AAC      LGBMClassifier             True   
index        APAAC  LogisticRegression            False   
index        APAAC                 SVC            False   
index        APAAC       XGBClassifier            False   
index        APAAC      LGBMClassifier            False   

                                             best_params  accuracy   
index                                               None  0.875077  \
index                                               None  0.897209   
index                 

[32m[I 2023-05-14 10:17:09,023][0m Trial 0 finished with value: 0.8695652173913043 and parameters: {'C': 0.04501902305401195, 'penalty': 'l2', 'solver': 'saga', 'max_iter': 600}. Best is trial 0 with value: 0.8695652173913043.[0m
[32m[I 2023-05-14 10:17:09,042][0m Trial 1 finished with value: 0.8853754940711462 and parameters: {'C': 0.06929258545739253, 'penalty': 'l1', 'solver': 'liblinear', 'max_iter': 844}. Best is trial 1 with value: 0.8853754940711462.[0m
[32m[I 2023-05-14 10:17:09,060][0m Trial 2 finished with value: 0.8774703557312253 and parameters: {'C': 0.032839713305619356, 'penalty': 'l1', 'solver': 'liblinear', 'max_iter': 335}. Best is trial 1 with value: 0.8853754940711462.[0m
[32m[I 2023-05-14 10:17:10,020][0m Trial 3 finished with value: 0.8636363636363636 and parameters: {'C': 0.022842884444580772, 'penalty': 'l2', 'solver': 'saga', 'max_iter': 903}. Best is trial 1 with value: 0.8853754940711462.[0m
[32m[I 2023-05-14 10:17:10,043][0m Trial 4 finished wi

      feature_type               model with_hypertuning   
index          AAC  LogisticRegression            False  \
index          AAC                 SVC            False   
index          AAC       XGBClassifier            False   
index          AAC      LGBMClassifier            False   
index          AAC  LogisticRegression             True   
index          AAC                 SVC             True   
index          AAC       XGBClassifier             True   
index          AAC      LGBMClassifier             True   
index        APAAC  LogisticRegression            False   
index        APAAC                 SVC            False   
index        APAAC       XGBClassifier            False   
index        APAAC      LGBMClassifier            False   
index        APAAC  LogisticRegression             True   

                                             best_params  accuracy   
index                                               None  0.875077  \
index                            

[32m[I 2023-05-14 10:17:27,791][0m Trial 0 finished with value: 0.5177865612648221 and parameters: {'svc_c': 10.558646932590793, 'svc_gamma': 26.536802544944056}. Best is trial 0 with value: 0.5177865612648221.[0m
[32m[I 2023-05-14 10:17:28,158][0m Trial 1 finished with value: 0.5790513833992095 and parameters: {'svc_c': 42.135889710368524, 'svc_gamma': 2.189225733502864}. Best is trial 1 with value: 0.5790513833992095.[0m
[32m[I 2023-05-14 10:17:28,623][0m Trial 2 finished with value: 0.5197628458498024 and parameters: {'svc_c': 5.014263037747232, 'svc_gamma': 18.42537080388125}. Best is trial 1 with value: 0.5790513833992095.[0m
[32m[I 2023-05-14 10:17:29,086][0m Trial 3 finished with value: 0.5197628458498024 and parameters: {'svc_c': 60.001335134357745, 'svc_gamma': 15.201376588476053}. Best is trial 1 with value: 0.5790513833992095.[0m
[32m[I 2023-05-14 10:17:29,458][0m Trial 4 finished with value: 0.5158102766798419 and parameters: {'svc_c': 54.11795105196635, 'svc_

      feature_type               model with_hypertuning   
index          AAC  LogisticRegression            False  \
index          AAC                 SVC            False   
index          AAC       XGBClassifier            False   
index          AAC      LGBMClassifier            False   
index          AAC  LogisticRegression             True   
index          AAC                 SVC             True   
index          AAC       XGBClassifier             True   
index          AAC      LGBMClassifier             True   
index        APAAC  LogisticRegression            False   
index        APAAC                 SVC            False   
index        APAAC       XGBClassifier            False   
index        APAAC      LGBMClassifier            False   
index        APAAC  LogisticRegression             True   
index        APAAC                 SVC             True   

                                             best_params  accuracy   
index                                       

[32m[I 2023-05-14 10:18:06,925][0m Trial 0 finished with value: 0.8853754940711462 and parameters: {'learning_rate': 0.20770919351354106, 'max_depth': 2, 'n_estimators': 183}. Best is trial 0 with value: 0.8853754940711462.[0m
[32m[I 2023-05-14 10:18:09,180][0m Trial 1 finished with value: 0.8794466403162056 and parameters: {'learning_rate': 0.1397319714351558, 'max_depth': 3, 'n_estimators': 811}. Best is trial 0 with value: 0.8853754940711462.[0m
[32m[I 2023-05-14 10:18:09,817][0m Trial 2 finished with value: 0.8913043478260869 and parameters: {'learning_rate': 0.21557448587004335, 'max_depth': 3, 'n_estimators': 181}. Best is trial 2 with value: 0.8913043478260869.[0m
[32m[I 2023-05-14 10:18:13,241][0m Trial 3 finished with value: 0.8814229249011858 and parameters: {'learning_rate': 0.08633580978558991, 'max_depth': 4, 'n_estimators': 964}. Best is trial 2 with value: 0.8913043478260869.[0m
[32m[I 2023-05-14 10:18:14,215][0m Trial 4 finished with value: 0.8656126482213

      feature_type               model with_hypertuning   
index          AAC  LogisticRegression            False  \
index          AAC                 SVC            False   
index          AAC       XGBClassifier            False   
index          AAC      LGBMClassifier            False   
index          AAC  LogisticRegression             True   
index          AAC                 SVC             True   
index          AAC       XGBClassifier             True   
index          AAC      LGBMClassifier             True   
index        APAAC  LogisticRegression            False   
index        APAAC                 SVC            False   
index        APAAC       XGBClassifier            False   
index        APAAC      LGBMClassifier            False   
index        APAAC  LogisticRegression             True   
index        APAAC                 SVC             True   
index        APAAC       XGBClassifier             True   

                                             best_param

[32m[I 2023-05-14 10:19:58,491][0m Trial 0 finished with value: 0.8814229249011858 and parameters: {'num_leaves': 48, 'max_depth': 37, 'learning_rate': 0.04630805574289482, 'n_estimators': 107}. Best is trial 0 with value: 0.8814229249011858.[0m
[32m[I 2023-05-14 10:20:00,045][0m Trial 1 finished with value: 0.8853754940711462 and parameters: {'num_leaves': 225, 'max_depth': 12, 'learning_rate': 0.21554888410581882, 'n_estimators': 1380}. Best is trial 1 with value: 0.8853754940711462.[0m
[32m[I 2023-05-14 10:20:00,984][0m Trial 2 finished with value: 0.8774703557312253 and parameters: {'num_leaves': 155, 'max_depth': 19, 'learning_rate': 0.1878955468734353, 'n_estimators': 279}. Best is trial 1 with value: 0.8853754940711462.[0m
[32m[I 2023-05-14 10:20:03,074][0m Trial 3 finished with value: 0.883399209486166 and parameters: {'num_leaves': 247, 'max_depth': 24, 'learning_rate': 0.059362876278883606, 'n_estimators': 382}. Best is trial 1 with value: 0.8853754940711462.[0m


      feature_type               model with_hypertuning   
index          AAC  LogisticRegression            False  \
index          AAC                 SVC            False   
index          AAC       XGBClassifier            False   
index          AAC      LGBMClassifier            False   
index          AAC  LogisticRegression             True   
index          AAC                 SVC             True   
index          AAC       XGBClassifier             True   
index          AAC      LGBMClassifier             True   
index        APAAC  LogisticRegression            False   
index        APAAC                 SVC            False   
index        APAAC       XGBClassifier            False   
index        APAAC      LGBMClassifier            False   
index        APAAC  LogisticRegression             True   
index        APAAC                 SVC             True   
index        APAAC       XGBClassifier             True   
index        APAAC      LGBMClassifier             True 

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

      feature_type               model with_hypertuning   
index          AAC  LogisticRegression            False  \
index          AAC                 SVC            False   
index          AAC       XGBClassifier            False   
index          AAC      LGBMClassifier            False   
index          AAC  LogisticRegression             True   
index          AAC                 SVC             True   
index          AAC       XGBClassifier             True   
index          AAC      LGBMClassifier             True   
index        APAAC  LogisticRegression            False   
index        APAAC                 SVC            False   
index        APAAC       XGBClassifier            False   
index        APAAC      LGBMClassifier            False   
index        APAAC  LogisticRegression             True   
index        APAAC                 SVC             True   
index        APAAC       XGBClassifier             True   
index        APAAC      LGBMClassifier             True 

[32m[I 2023-05-14 10:22:27,641][0m A new study created in memory with name: no-name-ee4b4757-71c6-4bc9-b70e-b630ef4c874b[0m
[32m[I 2023-05-14 10:22:27,826][0m Trial 0 finished with value: 0.862475442043222 and parameters: {'C': 0.06773426040847502, 'penalty': 'l2', 'solver': 'liblinear', 'max_iter': 833}. Best is trial 0 with value: 0.862475442043222.[0m


      feature_type               model with_hypertuning   
index          AAC  LogisticRegression            False  \
index          AAC                 SVC            False   
index          AAC       XGBClassifier            False   
index          AAC      LGBMClassifier            False   
index          AAC  LogisticRegression             True   
index          AAC                 SVC             True   
index          AAC       XGBClassifier             True   
index          AAC      LGBMClassifier             True   
index        APAAC  LogisticRegression            False   
index        APAAC                 SVC            False   
index        APAAC       XGBClassifier            False   
index        APAAC      LGBMClassifier            False   
index        APAAC  LogisticRegression             True   
index        APAAC                 SVC             True   
index        APAAC       XGBClassifier             True   
index        APAAC      LGBMClassifier             True 

[32m[I 2023-05-14 10:22:27,876][0m Trial 1 finished with value: 0.8546168958742633 and parameters: {'C': 0.050499999299842384, 'penalty': 'l1', 'solver': 'liblinear', 'max_iter': 444}. Best is trial 0 with value: 0.862475442043222.[0m
[32m[I 2023-05-14 10:22:30,274][0m Trial 2 finished with value: 0.8664047151277013 and parameters: {'C': 0.04626336348645811, 'penalty': 'l2', 'solver': 'saga', 'max_iter': 502}. Best is trial 2 with value: 0.8664047151277013.[0m
[32m[I 2023-05-14 10:22:30,405][0m Trial 3 finished with value: 0.862475442043222 and parameters: {'C': 0.060406106647597185, 'penalty': 'l2', 'solver': 'liblinear', 'max_iter': 877}. Best is trial 2 with value: 0.8664047151277013.[0m
[32m[I 2023-05-14 10:22:30,544][0m Trial 4 finished with value: 0.862475442043222 and parameters: {'C': 0.07857386031180366, 'penalty': 'l2', 'solver': 'liblinear', 'max_iter': 615}. Best is trial 2 with value: 0.8664047151277013.[0m
[32m[I 2023-05-14 10:22:32,284][0m Trial 5 finished 

      feature_type               model with_hypertuning   
index          AAC  LogisticRegression            False  \
index          AAC                 SVC            False   
index          AAC       XGBClassifier            False   
index          AAC      LGBMClassifier            False   
index          AAC  LogisticRegression             True   
index          AAC                 SVC             True   
index          AAC       XGBClassifier             True   
index          AAC      LGBMClassifier             True   
index        APAAC  LogisticRegression            False   
index        APAAC                 SVC            False   
index        APAAC       XGBClassifier            False   
index        APAAC      LGBMClassifier            False   
index        APAAC  LogisticRegression             True   
index        APAAC                 SVC             True   
index        APAAC       XGBClassifier             True   
index        APAAC      LGBMClassifier             True 

  _warn_prf(average, modifier, msg_start, len(result))
[32m[I 2023-05-14 10:26:18,162][0m Trial 0 finished with value: 0.518664047151277 and parameters: {'svc_c': 67.14569939034313, 'svc_gamma': 90.66559685846383}. Best is trial 0 with value: 0.518664047151277.[0m
  _warn_prf(average, modifier, msg_start, len(result))
[32m[I 2023-05-14 10:26:18,811][0m Trial 1 finished with value: 0.518664047151277 and parameters: {'svc_c': 85.91909749848747, 'svc_gamma': 87.46971584634281}. Best is trial 0 with value: 0.518664047151277.[0m
  _warn_prf(average, modifier, msg_start, len(result))
[32m[I 2023-05-14 10:26:19,534][0m Trial 2 finished with value: 0.518664047151277 and parameters: {'svc_c': 41.747023602485655, 'svc_gamma': 91.91138809484421}. Best is trial 0 with value: 0.518664047151277.[0m
  _warn_prf(average, modifier, msg_start, len(result))
[32m[I 2023-05-14 10:26:20,249][0m Trial 3 finished with value: 0.518664047151277 and parameters: {'svc_c': 46.83653197443338, 'svc_gamma'

      feature_type               model with_hypertuning   
index          AAC  LogisticRegression            False  \
index          AAC                 SVC            False   
index          AAC       XGBClassifier            False   
index          AAC      LGBMClassifier            False   
index          AAC  LogisticRegression             True   
index          AAC                 SVC             True   
index          AAC       XGBClassifier             True   
index          AAC      LGBMClassifier             True   
index        APAAC  LogisticRegression            False   
index        APAAC                 SVC            False   
index        APAAC       XGBClassifier            False   
index        APAAC      LGBMClassifier            False   
index        APAAC  LogisticRegression             True   
index        APAAC                 SVC             True   
index        APAAC       XGBClassifier             True   
index        APAAC      LGBMClassifier             True 

[32m[I 2023-05-14 10:27:26,887][0m Trial 0 finished with value: 0.8840864440078585 and parameters: {'learning_rate': 0.07452517271666802, 'max_depth': 5, 'n_estimators': 128}. Best is trial 0 with value: 0.8840864440078585.[0m
[32m[I 2023-05-14 10:27:30,577][0m Trial 1 finished with value: 0.8821218074656189 and parameters: {'learning_rate': 0.053746908740568505, 'max_depth': 5, 'n_estimators': 249}. Best is trial 0 with value: 0.8840864440078585.[0m
[32m[I 2023-05-14 10:27:37,318][0m Trial 2 finished with value: 0.8644400785854617 and parameters: {'learning_rate': 0.16636846154066057, 'max_depth': 6, 'n_estimators': 752}. Best is trial 0 with value: 0.8840864440078585.[0m
[32m[I 2023-05-14 10:27:44,751][0m Trial 3 finished with value: 0.8801571709233792 and parameters: {'learning_rate': 0.043711362983126986, 'max_depth': 5, 'n_estimators': 650}. Best is trial 0 with value: 0.8840864440078585.[0m
[32m[I 2023-05-14 10:27:49,841][0m Trial 4 finished with value: 0.8703339882

      feature_type               model with_hypertuning   
index          AAC  LogisticRegression            False  \
index          AAC                 SVC            False   
index          AAC       XGBClassifier            False   
index          AAC      LGBMClassifier            False   
index          AAC  LogisticRegression             True   
index          AAC                 SVC             True   
index          AAC       XGBClassifier             True   
index          AAC      LGBMClassifier             True   
index        APAAC  LogisticRegression            False   
index        APAAC                 SVC            False   
index        APAAC       XGBClassifier            False   
index        APAAC      LGBMClassifier            False   
index        APAAC  LogisticRegression             True   
index        APAAC                 SVC             True   
index        APAAC       XGBClassifier             True   
index        APAAC      LGBMClassifier             True 

[32m[I 2023-05-14 10:34:34,288][0m Trial 0 finished with value: 0.8585461689587426 and parameters: {'num_leaves': 87, 'max_depth': 2, 'learning_rate': 0.12299639310330228, 'n_estimators': 738}. Best is trial 0 with value: 0.8585461689587426.[0m
[32m[I 2023-05-14 10:34:39,622][0m Trial 1 finished with value: 0.8644400785854617 and parameters: {'num_leaves': 130, 'max_depth': 36, 'learning_rate': 0.20977921642169522, 'n_estimators': 1509}. Best is trial 1 with value: 0.8644400785854617.[0m
[32m[I 2023-05-14 10:34:46,221][0m Trial 2 finished with value: 0.8683693516699411 and parameters: {'num_leaves': 64, 'max_depth': 38, 'learning_rate': 0.08892516525735251, 'n_estimators': 1810}. Best is trial 2 with value: 0.8683693516699411.[0m
[32m[I 2023-05-14 10:34:50,145][0m Trial 3 finished with value: 0.8664047151277013 and parameters: {'num_leaves': 212, 'max_depth': 18, 'learning_rate': 0.21138729776864718, 'n_estimators': 895}. Best is trial 2 with value: 0.8683693516699411.[0m


      feature_type               model with_hypertuning   
index          AAC  LogisticRegression            False  \
index          AAC                 SVC            False   
index          AAC       XGBClassifier            False   
index          AAC      LGBMClassifier            False   
index          AAC  LogisticRegression             True   
index          AAC                 SVC             True   
index          AAC       XGBClassifier             True   
index          AAC      LGBMClassifier             True   
index        APAAC  LogisticRegression            False   
index        APAAC                 SVC            False   
index        APAAC       XGBClassifier            False   
index        APAAC      LGBMClassifier            False   
index        APAAC  LogisticRegression             True   
index        APAAC                 SVC             True   
index        APAAC       XGBClassifier             True   
index        APAAC      LGBMClassifier             True 

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

      feature_type               model with_hypertuning   
index          AAC  LogisticRegression            False  \
index          AAC                 SVC            False   
index          AAC       XGBClassifier            False   
index          AAC      LGBMClassifier            False   
index          AAC  LogisticRegression             True   
index          AAC                 SVC             True   
index          AAC       XGBClassifier             True   
index          AAC      LGBMClassifier             True   
index        APAAC  LogisticRegression            False   
index        APAAC                 SVC            False   
index        APAAC       XGBClassifier            False   
index        APAAC      LGBMClassifier            False   
index        APAAC  LogisticRegression             True   
index        APAAC                 SVC             True   
index        APAAC       XGBClassifier             True   
index        APAAC      LGBMClassifier             True 

[32m[I 2023-05-14 10:45:46,819][0m A new study created in memory with name: no-name-f6a3ddf3-c85a-4d2a-82ad-9a284b80b8f5[0m
[32m[I 2023-05-14 10:45:46,916][0m Trial 0 finished with value: 0.8840864440078585 and parameters: {'C': 0.09461855210914735, 'penalty': 'l1', 'solver': 'liblinear', 'max_iter': 345}. Best is trial 0 with value: 0.8840864440078585.[0m


      feature_type               model with_hypertuning   
index          AAC  LogisticRegression            False  \
index          AAC                 SVC            False   
index          AAC       XGBClassifier            False   
index          AAC      LGBMClassifier            False   
index          AAC  LogisticRegression             True   
index          AAC                 SVC             True   
index          AAC       XGBClassifier             True   
index          AAC      LGBMClassifier             True   
index        APAAC  LogisticRegression            False   
index        APAAC                 SVC            False   
index        APAAC       XGBClassifier            False   
index        APAAC      LGBMClassifier            False   
index        APAAC  LogisticRegression             True   
index        APAAC                 SVC             True   
index        APAAC       XGBClassifier             True   
index        APAAC      LGBMClassifier             True 

[32m[I 2023-05-14 10:45:49,484][0m Trial 1 finished with value: 0.8664047151277013 and parameters: {'C': 0.024675658691057936, 'penalty': 'l1', 'solver': 'saga', 'max_iter': 175}. Best is trial 0 with value: 0.8840864440078585.[0m
[32m[I 2023-05-14 10:45:49,697][0m Trial 2 finished with value: 0.8722986247544204 and parameters: {'C': 0.02364714439598118, 'penalty': 'l2', 'solver': 'liblinear', 'max_iter': 939}. Best is trial 0 with value: 0.8840864440078585.[0m
[32m[I 2023-05-14 10:45:49,780][0m Trial 3 finished with value: 0.8801571709233792 and parameters: {'C': 0.07403465571273415, 'penalty': 'l1', 'solver': 'liblinear', 'max_iter': 105}. Best is trial 0 with value: 0.8840864440078585.[0m
[32m[I 2023-05-14 10:45:53,017][0m Trial 4 finished with value: 0.8664047151277013 and parameters: {'C': 0.029283459631848432, 'penalty': 'l2', 'solver': 'saga', 'max_iter': 240}. Best is trial 0 with value: 0.8840864440078585.[0m
[32m[I 2023-05-14 10:45:53,283][0m Trial 5 finished wi

      feature_type               model with_hypertuning   
index          AAC  LogisticRegression            False  \
index          AAC                 SVC            False   
index          AAC       XGBClassifier            False   
index          AAC      LGBMClassifier            False   
index          AAC  LogisticRegression             True   
index          AAC                 SVC             True   
index          AAC       XGBClassifier             True   
index          AAC      LGBMClassifier             True   
index        APAAC  LogisticRegression            False   
index        APAAC                 SVC            False   
index        APAAC       XGBClassifier            False   
index        APAAC      LGBMClassifier            False   
index        APAAC  LogisticRegression             True   
index        APAAC                 SVC             True   
index        APAAC       XGBClassifier             True   
index        APAAC      LGBMClassifier             True 

[32m[I 2023-05-14 10:47:58,585][0m Trial 0 finished with value: 0.518664047151277 and parameters: {'svc_c': 55.42136569609044, 'svc_gamma': 70.56632598783725}. Best is trial 0 with value: 0.518664047151277.[0m
[32m[I 2023-05-14 10:48:00,571][0m Trial 1 finished with value: 0.518664047151277 and parameters: {'svc_c': 93.06292440273856, 'svc_gamma': 27.26896977946329}. Best is trial 0 with value: 0.518664047151277.[0m
[32m[I 2023-05-14 10:48:02,509][0m Trial 2 finished with value: 0.518664047151277 and parameters: {'svc_c': 96.43130283525338, 'svc_gamma': 48.15357264572661}. Best is trial 0 with value: 0.518664047151277.[0m
[32m[I 2023-05-14 10:48:04,418][0m Trial 3 finished with value: 0.518664047151277 and parameters: {'svc_c': 33.609121812055314, 'svc_gamma': 53.21941787628768}. Best is trial 0 with value: 0.518664047151277.[0m
[32m[I 2023-05-14 10:48:06,358][0m Trial 4 finished with value: 0.518664047151277 and parameters: {'svc_c': 98.38118033269531, 'svc_gamma': 79.17

      feature_type               model with_hypertuning   
index          AAC  LogisticRegression            False  \
index          AAC                 SVC            False   
index          AAC       XGBClassifier            False   
index          AAC      LGBMClassifier            False   
index          AAC  LogisticRegression             True   
index          AAC                 SVC             True   
index          AAC       XGBClassifier             True   
index          AAC      LGBMClassifier             True   
index        APAAC  LogisticRegression            False   
index        APAAC                 SVC            False   
index        APAAC       XGBClassifier            False   
index        APAAC      LGBMClassifier            False   
index        APAAC  LogisticRegression             True   
index        APAAC                 SVC             True   
index        APAAC       XGBClassifier             True   
index        APAAC      LGBMClassifier             True 

[32m[I 2023-05-14 10:51:37,915][0m Trial 0 finished with value: 0.888015717092338 and parameters: {'learning_rate': 0.01612243747149858, 'max_depth': 5, 'n_estimators': 994}. Best is trial 0 with value: 0.888015717092338.[0m
[32m[I 2023-05-14 10:51:47,862][0m Trial 1 finished with value: 0.9017681728880157 and parameters: {'learning_rate': 0.027802567766687936, 'max_depth': 4, 'n_estimators': 979}. Best is trial 1 with value: 0.9017681728880157.[0m
[32m[I 2023-05-14 10:51:52,491][0m Trial 2 finished with value: 0.8939096267190569 and parameters: {'learning_rate': 0.04225595772130987, 'max_depth': 5, 'n_estimators': 298}. Best is trial 1 with value: 0.9017681728880157.[0m
[32m[I 2023-05-14 10:51:58,620][0m Trial 3 finished with value: 0.8978388998035364 and parameters: {'learning_rate': 0.06465847871731155, 'max_depth': 5, 'n_estimators': 495}. Best is trial 1 with value: 0.9017681728880157.[0m
[32m[I 2023-05-14 10:52:05,803][0m Trial 4 finished with value: 0.8939096267190

      feature_type               model with_hypertuning   
index          AAC  LogisticRegression            False  \
index          AAC                 SVC            False   
index          AAC       XGBClassifier            False   
index          AAC      LGBMClassifier            False   
index          AAC  LogisticRegression             True   
index          AAC                 SVC             True   
index          AAC       XGBClassifier             True   
index          AAC      LGBMClassifier             True   
index        APAAC  LogisticRegression            False   
index        APAAC                 SVC            False   
index        APAAC       XGBClassifier            False   
index        APAAC      LGBMClassifier            False   
index        APAAC  LogisticRegression             True   
index        APAAC                 SVC             True   
index        APAAC       XGBClassifier             True   
index        APAAC      LGBMClassifier             True 

[32m[I 2023-05-14 11:03:47,538][0m Trial 0 finished with value: 0.8899803536345776 and parameters: {'num_leaves': 94, 'max_depth': 21, 'learning_rate': 0.21372388317648344, 'n_estimators': 228}. Best is trial 0 with value: 0.8899803536345776.[0m
[32m[I 2023-05-14 11:03:53,770][0m Trial 1 finished with value: 0.899803536345776 and parameters: {'num_leaves': 158, 'max_depth': 35, 'learning_rate': 0.2944440221886211, 'n_estimators': 521}. Best is trial 1 with value: 0.899803536345776.[0m
[32m[I 2023-05-14 11:04:07,348][0m Trial 2 finished with value: 0.8958742632612967 and parameters: {'num_leaves': 75, 'max_depth': 23, 'learning_rate': 0.04975425488910087, 'n_estimators': 289}. Best is trial 1 with value: 0.899803536345776.[0m
[32m[I 2023-05-14 11:04:17,675][0m Trial 3 finished with value: 0.8978388998035364 and parameters: {'num_leaves': 243, 'max_depth': 23, 'learning_rate': 0.19991268094580125, 'n_estimators': 1433}. Best is trial 1 with value: 0.899803536345776.[0m
[32m[

      feature_type               model with_hypertuning   
index          AAC  LogisticRegression            False  \
index          AAC                 SVC            False   
index          AAC       XGBClassifier            False   
index          AAC      LGBMClassifier            False   
index          AAC  LogisticRegression             True   
index          AAC                 SVC             True   
index          AAC       XGBClassifier             True   
index          AAC      LGBMClassifier             True   
index        APAAC  LogisticRegression            False   
index        APAAC                 SVC            False   
index        APAAC       XGBClassifier            False   
index        APAAC      LGBMClassifier            False   
index        APAAC  LogisticRegression             True   
index        APAAC                 SVC             True   
index        APAAC       XGBClassifier             True   
index        APAAC      LGBMClassifier             True 

[32m[I 2023-05-14 11:17:20,396][0m A new study created in memory with name: no-name-61bc6b46-aa9b-4218-b8fa-89fb5d019aeb[0m
[32m[I 2023-05-14 11:17:20,497][0m Trial 0 finished with value: 0.8735177865612648 and parameters: {'C': 0.04368021013276537, 'penalty': 'l2', 'solver': 'saga', 'max_iter': 433}. Best is trial 0 with value: 0.8735177865612648.[0m
[32m[I 2023-05-14 11:17:20,525][0m Trial 1 finished with value: 0.8656126482213439 and parameters: {'C': 0.013775483430216388, 'penalty': 'l2', 'solver': 'liblinear', 'max_iter': 856}. Best is trial 0 with value: 0.8735177865612648.[0m
[32m[I 2023-05-14 11:17:20,554][0m Trial 2 finished with value: 0.8537549407114624 and parameters: {'C': 0.010927987962956322, 'penalty': 'l2', 'solver': 'liblinear', 'max_iter': 618}. Best is trial 0 with value: 0.8735177865612648.[0m
[32m[I 2023-05-14 11:17:20,581][0m Trial 3 finished with value: 0.883399209486166 and parameters: {'C': 0.08325612286479676, 'penalty': 'l1', 'solver': 'libline

      feature_type               model with_hypertuning   
index          AAC  LogisticRegression            False  \
index          AAC                 SVC            False   
index          AAC       XGBClassifier            False   
index          AAC      LGBMClassifier            False   
index          AAC  LogisticRegression             True   
index          AAC                 SVC             True   
index          AAC       XGBClassifier             True   
index          AAC      LGBMClassifier             True   
index        APAAC  LogisticRegression            False   
index        APAAC                 SVC            False   
index        APAAC       XGBClassifier            False   
index        APAAC      LGBMClassifier            False   
index        APAAC  LogisticRegression             True   
index        APAAC                 SVC             True   
index        APAAC       XGBClassifier             True   
index        APAAC      LGBMClassifier             True 

[32m[I 2023-05-14 11:17:20,686][0m Trial 4 finished with value: 0.8695652173913043 and parameters: {'C': 0.07669323065355182, 'penalty': 'l2', 'solver': 'saga', 'max_iter': 486}. Best is trial 3 with value: 0.883399209486166.[0m
[32m[I 2023-05-14 11:17:20,715][0m Trial 5 finished with value: 0.8774703557312253 and parameters: {'C': 0.06240933621775592, 'penalty': 'l2', 'solver': 'liblinear', 'max_iter': 765}. Best is trial 3 with value: 0.883399209486166.[0m
[32m[I 2023-05-14 11:17:20,753][0m Trial 6 finished with value: 0.8754940711462451 and parameters: {'C': 0.04145235224852046, 'penalty': 'l2', 'solver': 'liblinear', 'max_iter': 747}. Best is trial 3 with value: 0.883399209486166.[0m
[32m[I 2023-05-14 11:17:20,856][0m Trial 7 finished with value: 0.8656126482213439 and parameters: {'C': 0.01694069830811886, 'penalty': 'l1', 'solver': 'saga', 'max_iter': 670}. Best is trial 3 with value: 0.883399209486166.[0m
[32m[I 2023-05-14 11:17:20,975][0m Trial 8 finished with val

      feature_type               model with_hypertuning   
index          AAC  LogisticRegression            False  \
index          AAC                 SVC            False   
index          AAC       XGBClassifier            False   
index          AAC      LGBMClassifier            False   
index          AAC  LogisticRegression             True   
index          AAC                 SVC             True   
index          AAC       XGBClassifier             True   
index          AAC      LGBMClassifier             True   
index        APAAC  LogisticRegression            False   
index        APAAC                 SVC            False   
index        APAAC       XGBClassifier            False   
index        APAAC      LGBMClassifier            False   
index        APAAC  LogisticRegression             True   
index        APAAC                 SVC             True   
index        APAAC       XGBClassifier             True   
index        APAAC      LGBMClassifier             True 

[32m[I 2023-05-14 11:17:26,860][0m Trial 0 finished with value: 0.5197628458498024 and parameters: {'svc_c': 58.44307061377046, 'svc_gamma': 23.55141792853813}. Best is trial 0 with value: 0.5197628458498024.[0m
[32m[I 2023-05-14 11:17:27,317][0m Trial 1 finished with value: 0.5138339920948617 and parameters: {'svc_c': 14.359849470028351, 'svc_gamma': 74.40481223481555}. Best is trial 0 with value: 0.5197628458498024.[0m
[32m[I 2023-05-14 11:17:27,794][0m Trial 2 finished with value: 0.5138339920948617 and parameters: {'svc_c': 95.78584081703389, 'svc_gamma': 71.2932693493338}. Best is trial 0 with value: 0.5197628458498024.[0m
[32m[I 2023-05-14 11:17:28,257][0m Trial 3 finished with value: 0.5138339920948617 and parameters: {'svc_c': 44.58732977269384, 'svc_gamma': 93.96163021949464}. Best is trial 0 with value: 0.5197628458498024.[0m
[32m[I 2023-05-14 11:17:28,822][0m Trial 4 finished with value: 0.5197628458498024 and parameters: {'svc_c': 33.192096078162166, 'svc_gamm

      feature_type               model with_hypertuning   
index          AAC  LogisticRegression            False  \
index          AAC                 SVC            False   
index          AAC       XGBClassifier            False   
index          AAC      LGBMClassifier            False   
index          AAC  LogisticRegression             True   
index          AAC                 SVC             True   
index          AAC       XGBClassifier             True   
index          AAC      LGBMClassifier             True   
index        APAAC  LogisticRegression            False   
index        APAAC                 SVC            False   
index        APAAC       XGBClassifier            False   
index        APAAC      LGBMClassifier            False   
index        APAAC  LogisticRegression             True   
index        APAAC                 SVC             True   
index        APAAC       XGBClassifier             True   
index        APAAC      LGBMClassifier             True 

[32m[I 2023-05-14 11:18:15,177][0m Trial 0 finished with value: 0.8972332015810277 and parameters: {'learning_rate': 0.2619850011443755, 'max_depth': 2, 'n_estimators': 587}. Best is trial 0 with value: 0.8972332015810277.[0m
[32m[I 2023-05-14 11:18:16,030][0m Trial 1 finished with value: 0.8913043478260869 and parameters: {'learning_rate': 0.21638737361637084, 'max_depth': 2, 'n_estimators': 448}. Best is trial 0 with value: 0.8972332015810277.[0m
[32m[I 2023-05-14 11:18:17,753][0m Trial 2 finished with value: 0.8992094861660079 and parameters: {'learning_rate': 0.09799562429942851, 'max_depth': 3, 'n_estimators': 535}. Best is trial 2 with value: 0.8992094861660079.[0m
[32m[I 2023-05-14 11:18:19,494][0m Trial 3 finished with value: 0.8913043478260869 and parameters: {'learning_rate': 0.10373571257505594, 'max_depth': 5, 'n_estimators': 422}. Best is trial 2 with value: 0.8992094861660079.[0m
[32m[I 2023-05-14 11:18:20,306][0m Trial 4 finished with value: 0.8873517786561

      feature_type               model with_hypertuning   
index          AAC  LogisticRegression            False  \
index          AAC                 SVC            False   
index          AAC       XGBClassifier            False   
index          AAC      LGBMClassifier            False   
index          AAC  LogisticRegression             True   
index          AAC                 SVC             True   
index          AAC       XGBClassifier             True   
index          AAC      LGBMClassifier             True   
index        APAAC  LogisticRegression            False   
index        APAAC                 SVC            False   
index        APAAC       XGBClassifier            False   
index        APAAC      LGBMClassifier            False   
index        APAAC  LogisticRegression             True   
index        APAAC                 SVC             True   
index        APAAC       XGBClassifier             True   
index        APAAC      LGBMClassifier             True 

[32m[I 2023-05-14 11:23:23,453][0m Trial 0 finished with value: 0.9071146245059288 and parameters: {'num_leaves': 151, 'max_depth': 16, 'learning_rate': 0.23393208125638126, 'n_estimators': 1876}. Best is trial 0 with value: 0.9071146245059288.[0m
[32m[I 2023-05-14 11:23:24,660][0m Trial 1 finished with value: 0.8952569169960475 and parameters: {'num_leaves': 19, 'max_depth': 13, 'learning_rate': 0.2325634678118051, 'n_estimators': 264}. Best is trial 0 with value: 0.9071146245059288.[0m
[32m[I 2023-05-14 11:23:25,500][0m Trial 2 finished with value: 0.8873517786561265 and parameters: {'num_leaves': 183, 'max_depth': 34, 'learning_rate': 0.29455523636689307, 'n_estimators': 242}. Best is trial 0 with value: 0.9071146245059288.[0m
[32m[I 2023-05-14 11:23:27,917][0m Trial 3 finished with value: 0.8932806324110671 and parameters: {'num_leaves': 186, 'max_depth': 48, 'learning_rate': 0.12323717380960289, 'n_estimators': 957}. Best is trial 0 with value: 0.9071146245059288.[0m


      feature_type               model with_hypertuning   
index          AAC  LogisticRegression            False  \
index          AAC                 SVC            False   
index          AAC       XGBClassifier            False   
index          AAC      LGBMClassifier            False   
index          AAC  LogisticRegression             True   
index          AAC                 SVC             True   
index          AAC       XGBClassifier             True   
index          AAC      LGBMClassifier             True   
index        APAAC  LogisticRegression            False   
index        APAAC                 SVC            False   
index        APAAC       XGBClassifier            False   
index        APAAC      LGBMClassifier            False   
index        APAAC  LogisticRegression             True   
index        APAAC                 SVC             True   
index        APAAC       XGBClassifier             True   
index        APAAC      LGBMClassifier             True 

# With Feature Selection

In [230]:
# empty dataframe to store results with the columns feature_type, model, with_hypertuning, accuracy, sensitivity, specificity, precision, f1, mcc
results = pd.DataFrame(columns=['feature_type', 'model', 'with_hypertuning', 'best_params', 'accuracy', 'sensitivity', 'specificity', 'precision', 'f1', 'mcc', 'index'])
feature_types = ['selected_features_all_best20', 'selected_features_all_best30', 'selected_features_all_best50', 'selected_features_all_best100']
for feature_type in feature_types:

    # Load the training dataset
    data = pd.read_csv(f'{feature_engineered_data_dir}/TR_{feature_type}.csv')

    # Separate features and target
    X = data.drop(columns=['label', 'id'], axis=1)
    y = data['label']

    # Split the data into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    # Evaluate models without hyperparameters tuning
    for name, model in models.items():
        print(f"Evaluating {feature_type} {name}")
        results = evaluate_model(name, model, X_train, y_train, X_test, y_test, results, feature_type)
        print(results)

    # Optimize hyperparameters
    for name, model in models_.items():
        objective = objectives.get(name)
        if objective is not None:
            print(f"Optimizing {feature_type} {name}")
            results = optimize_hyperparameters(name, model, objective, trials=100, results_dataframe=results, feature_type=feature_type, X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test)
            print(results)

results.to_csv(f'{feature_engineered_data_dir}/results_20&30&50&100.csv', index=False)

Evaluating selected_features_all_best20 LogisticRegression
                       feature_type               model with_hypertuning   
index  selected_features_all_best20  LogisticRegression            False  \

      best_params  accuracy  sensitivity  specificity  precision        f1   
index        None   0.86163     0.832653     0.850575   0.839506  0.836066  \

           mcc                                              index  
index  0.68342  selected_features_all_best20_LogisticRegressio...  
Evaluating selected_features_all_best20 SVC
                       feature_type               model with_hypertuning   
index  selected_features_all_best20  LogisticRegression            False  \
index  selected_features_all_best20                 SVC            False   

      best_params  accuracy  sensitivity  specificity  precision        f1   
index        None  0.861630     0.832653     0.850575   0.839506  0.836066  \
index        None  0.884356     0.816327     0.915709   0.900901  

[32m[I 2023-05-14 11:32:32,517][0m A new study created in memory with name: no-name-3df58edf-44c6-4cf2-9751-20d6cafa617a[0m
[32m[I 2023-05-14 11:32:32,628][0m Trial 0 finished with value: 0.857707509881423 and parameters: {'C': 0.07403839557995674, 'penalty': 'l2', 'solver': 'saga', 'max_iter': 496}. Best is trial 0 with value: 0.857707509881423.[0m
[32m[I 2023-05-14 11:32:32,657][0m Trial 1 finished with value: 0.8537549407114624 and parameters: {'C': 0.0810939833636358, 'penalty': 'l2', 'solver': 'liblinear', 'max_iter': 522}. Best is trial 0 with value: 0.857707509881423.[0m
[32m[I 2023-05-14 11:32:32,684][0m Trial 2 finished with value: 0.857707509881423 and parameters: {'C': 0.08162923941197098, 'penalty': 'l1', 'solver': 'liblinear', 'max_iter': 802}. Best is trial 0 with value: 0.857707509881423.[0m


                       feature_type               model with_hypertuning   
index  selected_features_all_best20  LogisticRegression            False  \
index  selected_features_all_best20                 SVC            False   
index  selected_features_all_best20       XGBClassifier            False   
index  selected_features_all_best20      LGBMClassifier            False   

      best_params  accuracy  sensitivity  specificity  precision        f1   
index        None  0.861630     0.832653     0.850575   0.839506  0.836066  \
index        None  0.884356     0.816327     0.915709   0.900901  0.856531   
index        None  0.865564     0.840816     0.908046   0.895652  0.867368   
index        None  0.870999     0.832653     0.919540   0.906667  0.868085   

            mcc                                              index  
index  0.683420  selected_features_all_best20_LogisticRegressio...  
index  0.737224    selected_features_all_best20_SVC_no_hypertuning  
index  0.751600  sele

[32m[I 2023-05-14 11:32:32,872][0m Trial 3 finished with value: 0.8596837944664032 and parameters: {'C': 0.06441953223187927, 'penalty': 'l1', 'solver': 'saga', 'max_iter': 574}. Best is trial 3 with value: 0.8596837944664032.[0m
[32m[I 2023-05-14 11:32:32,897][0m Trial 4 finished with value: 0.857707509881423 and parameters: {'C': 0.040412568834002696, 'penalty': 'l2', 'solver': 'liblinear', 'max_iter': 939}. Best is trial 3 with value: 0.8596837944664032.[0m
[32m[I 2023-05-14 11:32:32,923][0m Trial 5 finished with value: 0.857707509881423 and parameters: {'C': 0.026477898873890016, 'penalty': 'l2', 'solver': 'liblinear', 'max_iter': 184}. Best is trial 3 with value: 0.8596837944664032.[0m
[32m[I 2023-05-14 11:32:33,059][0m Trial 6 finished with value: 0.8616600790513834 and parameters: {'C': 0.03342895231698814, 'penalty': 'l1', 'solver': 'saga', 'max_iter': 773}. Best is trial 6 with value: 0.8616600790513834.[0m
[32m[I 2023-05-14 11:32:33,079][0m Trial 7 finished with

                       feature_type               model with_hypertuning   
index  selected_features_all_best20  LogisticRegression            False  \
index  selected_features_all_best20                 SVC            False   
index  selected_features_all_best20       XGBClassifier            False   
index  selected_features_all_best20      LGBMClassifier            False   
index  selected_features_all_best20  LogisticRegression             True   

                                             best_params  accuracy   
index                                               None  0.861630  \
index                                               None  0.884356   
index                                               None  0.865564   
index                                               None  0.870999   
index  {'C': 0.05772645471274076, 'penalty': 'l1', 's...  0.863636   

       sensitivity  specificity  precision        f1       mcc   
index     0.832653     0.850575   0.839506  0.836066  0.

[32m[I 2023-05-14 11:32:39,517][0m Trial 0 finished with value: 0.5177865612648221 and parameters: {'svc_c': 10.7533945146195, 'svc_gamma': 73.75896312119299}. Best is trial 0 with value: 0.5177865612648221.[0m
[32m[I 2023-05-14 11:32:39,983][0m Trial 1 finished with value: 0.5177865612648221 and parameters: {'svc_c': 13.280062430077226, 'svc_gamma': 79.73462726837084}. Best is trial 0 with value: 0.5177865612648221.[0m
[32m[I 2023-05-14 11:32:40,413][0m Trial 2 finished with value: 0.5177865612648221 and parameters: {'svc_c': 88.65711768594213, 'svc_gamma': 70.64852112597094}. Best is trial 0 with value: 0.5177865612648221.[0m
[32m[I 2023-05-14 11:32:40,929][0m Trial 3 finished with value: 0.5177865612648221 and parameters: {'svc_c': 18.089833877835254, 'svc_gamma': 67.08274284473659}. Best is trial 0 with value: 0.5177865612648221.[0m
[32m[I 2023-05-14 11:32:41,457][0m Trial 4 finished with value: 0.5177865612648221 and parameters: {'svc_c': 86.76762802466011, 'svc_gamm

                       feature_type               model with_hypertuning   
index  selected_features_all_best20  LogisticRegression            False  \
index  selected_features_all_best20                 SVC            False   
index  selected_features_all_best20       XGBClassifier            False   
index  selected_features_all_best20      LGBMClassifier            False   
index  selected_features_all_best20  LogisticRegression             True   
index  selected_features_all_best20                 SVC             True   

                                             best_params  accuracy   
index                                               None  0.861630  \
index                                               None  0.884356   
index                                               None  0.865564   
index                                               None  0.870999   
index  {'C': 0.05772645471274076, 'penalty': 'l1', 's...  0.863636   
index  {'svc_c': 95.94113392322917, 'svc_gamma'

[32m[I 2023-05-14 11:33:19,624][0m Trial 0 finished with value: 0.8656126482213439 and parameters: {'learning_rate': 0.07874189857592591, 'max_depth': 6, 'n_estimators': 154}. Best is trial 0 with value: 0.8656126482213439.[0m
[32m[I 2023-05-14 11:33:20,400][0m Trial 1 finished with value: 0.8596837944664032 and parameters: {'learning_rate': 0.1705788115114957, 'max_depth': 3, 'n_estimators': 484}. Best is trial 0 with value: 0.8656126482213439.[0m
[32m[I 2023-05-14 11:33:20,881][0m Trial 2 finished with value: 0.8557312252964426 and parameters: {'learning_rate': 0.07877682785006325, 'max_depth': 2, 'n_estimators': 451}. Best is trial 0 with value: 0.8656126482213439.[0m
[32m[I 2023-05-14 11:33:21,344][0m Trial 3 finished with value: 0.8537549407114624 and parameters: {'learning_rate': 0.14347569237428598, 'max_depth': 4, 'n_estimators': 268}. Best is trial 0 with value: 0.8656126482213439.[0m
[32m[I 2023-05-14 11:33:22,881][0m Trial 4 finished with value: 0.8715415019762

                       feature_type               model with_hypertuning   
index  selected_features_all_best20  LogisticRegression            False  \
index  selected_features_all_best20                 SVC            False   
index  selected_features_all_best20       XGBClassifier            False   
index  selected_features_all_best20      LGBMClassifier            False   
index  selected_features_all_best20  LogisticRegression             True   
index  selected_features_all_best20                 SVC             True   
index  selected_features_all_best20       XGBClassifier             True   

                                             best_params  accuracy   
index                                               None  0.861630  \
index                                               None  0.884356   
index                                               None  0.865564   
index                                               None  0.870999   
index  {'C': 0.05772645471274076, 'penalt

[32m[I 2023-05-14 11:35:53,434][0m Trial 0 finished with value: 0.8754940711462451 and parameters: {'num_leaves': 103, 'max_depth': 11, 'learning_rate': 0.03276897858896604, 'n_estimators': 488}. Best is trial 0 with value: 0.8754940711462451.[0m
[32m[I 2023-05-14 11:35:54,516][0m Trial 1 finished with value: 0.8794466403162056 and parameters: {'num_leaves': 139, 'max_depth': 18, 'learning_rate': 0.20546953893784983, 'n_estimators': 633}. Best is trial 1 with value: 0.8794466403162056.[0m
[32m[I 2023-05-14 11:35:55,226][0m Trial 2 finished with value: 0.8715415019762845 and parameters: {'num_leaves': 45, 'max_depth': 10, 'learning_rate': 0.060968532596519846, 'n_estimators': 334}. Best is trial 1 with value: 0.8794466403162056.[0m
[32m[I 2023-05-14 11:35:56,912][0m Trial 3 finished with value: 0.8735177865612648 and parameters: {'num_leaves': 211, 'max_depth': 48, 'learning_rate': 0.046618425613169104, 'n_estimators': 312}. Best is trial 1 with value: 0.8794466403162056.[0m

                       feature_type               model with_hypertuning   
index  selected_features_all_best20  LogisticRegression            False  \
index  selected_features_all_best20                 SVC            False   
index  selected_features_all_best20       XGBClassifier            False   
index  selected_features_all_best20      LGBMClassifier            False   
index  selected_features_all_best20  LogisticRegression             True   
index  selected_features_all_best20                 SVC             True   
index  selected_features_all_best20       XGBClassifier             True   
index  selected_features_all_best20      LGBMClassifier             True   

                                             best_params  accuracy   
index                                               None  0.861630  \
index                                               None  0.884356   
index                                               None  0.865564   
index                              

[32m[I 2023-05-14 11:38:23,091][0m A new study created in memory with name: no-name-c0c0039d-afa3-4213-8124-122d20179b0c[0m
[32m[I 2023-05-14 11:38:23,130][0m Trial 0 finished with value: 0.8893280632411067 and parameters: {'C': 0.09688479766360383, 'penalty': 'l2', 'solver': 'liblinear', 'max_iter': 289}. Best is trial 0 with value: 0.8893280632411067.[0m


                       feature_type               model with_hypertuning   
index  selected_features_all_best20  LogisticRegression            False  \
index  selected_features_all_best20                 SVC            False   
index  selected_features_all_best20       XGBClassifier            False   
index  selected_features_all_best20      LGBMClassifier            False   
index  selected_features_all_best20  LogisticRegression             True   
index  selected_features_all_best20                 SVC             True   
index  selected_features_all_best20       XGBClassifier             True   
index  selected_features_all_best20      LGBMClassifier             True   
index  selected_features_all_best30  LogisticRegression            False   
index  selected_features_all_best30                 SVC            False   
index  selected_features_all_best30       XGBClassifier            False   
index  selected_features_all_best30      LGBMClassifier            False   

           

[32m[I 2023-05-14 11:38:23,387][0m Trial 1 finished with value: 0.8893280632411067 and parameters: {'C': 0.08105103415050194, 'penalty': 'l2', 'solver': 'saga', 'max_iter': 562}. Best is trial 0 with value: 0.8893280632411067.[0m
[32m[I 2023-05-14 11:38:23,415][0m Trial 2 finished with value: 0.8873517786561265 and parameters: {'C': 0.046218227368261915, 'penalty': 'l2', 'solver': 'liblinear', 'max_iter': 907}. Best is trial 0 with value: 0.8893280632411067.[0m
[32m[I 2023-05-14 11:38:23,444][0m Trial 3 finished with value: 0.8873517786561265 and parameters: {'C': 0.05199988061090262, 'penalty': 'l2', 'solver': 'liblinear', 'max_iter': 492}. Best is trial 0 with value: 0.8893280632411067.[0m
[32m[I 2023-05-14 11:38:23,967][0m Trial 4 finished with value: 0.8853754940711462 and parameters: {'C': 0.06651836663093466, 'penalty': 'l1', 'solver': 'saga', 'max_iter': 333}. Best is trial 0 with value: 0.8893280632411067.[0m
[32m[I 2023-05-14 11:38:24,171][0m Trial 5 finished wit

                       feature_type               model with_hypertuning   
index  selected_features_all_best20  LogisticRegression            False  \
index  selected_features_all_best20                 SVC            False   
index  selected_features_all_best20       XGBClassifier            False   
index  selected_features_all_best20      LGBMClassifier            False   
index  selected_features_all_best20  LogisticRegression             True   
index  selected_features_all_best20                 SVC             True   
index  selected_features_all_best20       XGBClassifier             True   
index  selected_features_all_best20      LGBMClassifier             True   
index  selected_features_all_best30  LogisticRegression            False   
index  selected_features_all_best30                 SVC            False   
index  selected_features_all_best30       XGBClassifier            False   
index  selected_features_all_best30      LGBMClassifier            False   
index  selec

[32m[I 2023-05-14 11:38:34,655][0m Trial 0 finished with value: 0.5177865612648221 and parameters: {'svc_c': 45.48258663520493, 'svc_gamma': 29.982864410821676}. Best is trial 0 with value: 0.5177865612648221.[0m
[32m[I 2023-05-14 11:38:35,190][0m Trial 1 finished with value: 0.5177865612648221 and parameters: {'svc_c': 69.03665719583451, 'svc_gamma': 63.58316045628349}. Best is trial 0 with value: 0.5177865612648221.[0m
[32m[I 2023-05-14 11:38:35,674][0m Trial 2 finished with value: 0.5138339920948617 and parameters: {'svc_c': 10.981700362263517, 'svc_gamma': 92.44976306738184}. Best is trial 0 with value: 0.5177865612648221.[0m
[32m[I 2023-05-14 11:38:36,196][0m Trial 3 finished with value: 0.5177865612648221 and parameters: {'svc_c': 45.78783115598922, 'svc_gamma': 6.90138642611082}. Best is trial 0 with value: 0.5177865612648221.[0m
[32m[I 2023-05-14 11:38:36,757][0m Trial 4 finished with value: 0.5177865612648221 and parameters: {'svc_c': 11.490446606635224, 'svc_gam

                       feature_type               model with_hypertuning   
index  selected_features_all_best20  LogisticRegression            False  \
index  selected_features_all_best20                 SVC            False   
index  selected_features_all_best20       XGBClassifier            False   
index  selected_features_all_best20      LGBMClassifier            False   
index  selected_features_all_best20  LogisticRegression             True   
index  selected_features_all_best20                 SVC             True   
index  selected_features_all_best20       XGBClassifier             True   
index  selected_features_all_best20      LGBMClassifier             True   
index  selected_features_all_best30  LogisticRegression            False   
index  selected_features_all_best30                 SVC            False   
index  selected_features_all_best30       XGBClassifier            False   
index  selected_features_all_best30      LGBMClassifier            False   
index  selec

[32m[I 2023-05-14 11:39:22,560][0m Trial 0 finished with value: 0.883399209486166 and parameters: {'learning_rate': 0.15309337268637435, 'max_depth': 3, 'n_estimators': 993}. Best is trial 0 with value: 0.883399209486166.[0m
[32m[I 2023-05-14 11:39:23,772][0m Trial 1 finished with value: 0.8893280632411067 and parameters: {'learning_rate': 0.22028169260786543, 'max_depth': 3, 'n_estimators': 496}. Best is trial 1 with value: 0.8893280632411067.[0m
[32m[I 2023-05-14 11:39:24,947][0m Trial 2 finished with value: 0.8853754940711462 and parameters: {'learning_rate': 0.14942829827562668, 'max_depth': 4, 'n_estimators': 392}. Best is trial 1 with value: 0.8893280632411067.[0m
[32m[I 2023-05-14 11:39:26,892][0m Trial 3 finished with value: 0.8932806324110671 and parameters: {'learning_rate': 0.04132576060911622, 'max_depth': 4, 'n_estimators': 502}. Best is trial 3 with value: 0.8932806324110671.[0m
[32m[I 2023-05-14 11:39:29,048][0m Trial 4 finished with value: 0.88932806324110

                       feature_type               model with_hypertuning   
index  selected_features_all_best20  LogisticRegression            False  \
index  selected_features_all_best20                 SVC            False   
index  selected_features_all_best20       XGBClassifier            False   
index  selected_features_all_best20      LGBMClassifier            False   
index  selected_features_all_best20  LogisticRegression             True   
index  selected_features_all_best20                 SVC             True   
index  selected_features_all_best20       XGBClassifier             True   
index  selected_features_all_best20      LGBMClassifier             True   
index  selected_features_all_best30  LogisticRegression            False   
index  selected_features_all_best30                 SVC            False   
index  selected_features_all_best30       XGBClassifier            False   
index  selected_features_all_best30      LGBMClassifier            False   
index  selec

[32m[I 2023-05-14 11:42:48,639][0m Trial 0 finished with value: 0.8893280632411067 and parameters: {'num_leaves': 151, 'max_depth': 46, 'learning_rate': 0.124478597224429, 'n_estimators': 1893}. Best is trial 0 with value: 0.8893280632411067.[0m
[32m[I 2023-05-14 11:42:50,784][0m Trial 1 finished with value: 0.8913043478260869 and parameters: {'num_leaves': 137, 'max_depth': 18, 'learning_rate': 0.13594225171487398, 'n_estimators': 1698}. Best is trial 1 with value: 0.8913043478260869.[0m
[32m[I 2023-05-14 11:42:51,333][0m Trial 2 finished with value: 0.8972332015810277 and parameters: {'num_leaves': 141, 'max_depth': 11, 'learning_rate': 0.25414532320145833, 'n_estimators': 187}. Best is trial 2 with value: 0.8972332015810277.[0m
[32m[I 2023-05-14 11:42:54,517][0m Trial 3 finished with value: 0.8853754940711462 and parameters: {'num_leaves': 219, 'max_depth': 45, 'learning_rate': 0.060002349042560445, 'n_estimators': 1210}. Best is trial 2 with value: 0.8972332015810277.[0

                       feature_type               model with_hypertuning   
index  selected_features_all_best20  LogisticRegression            False  \
index  selected_features_all_best20                 SVC            False   
index  selected_features_all_best20       XGBClassifier            False   
index  selected_features_all_best20      LGBMClassifier            False   
index  selected_features_all_best20  LogisticRegression             True   
index  selected_features_all_best20                 SVC             True   
index  selected_features_all_best20       XGBClassifier             True   
index  selected_features_all_best20      LGBMClassifier             True   
index  selected_features_all_best30  LogisticRegression            False   
index  selected_features_all_best30                 SVC            False   
index  selected_features_all_best30       XGBClassifier            False   
index  selected_features_all_best30      LGBMClassifier            False   
index  selec

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


                       feature_type               model with_hypertuning   
index  selected_features_all_best20  LogisticRegression            False  \
index  selected_features_all_best20                 SVC            False   
index  selected_features_all_best20       XGBClassifier            False   
index  selected_features_all_best20      LGBMClassifier            False   
index  selected_features_all_best20  LogisticRegression             True   
index  selected_features_all_best20                 SVC             True   
index  selected_features_all_best20       XGBClassifier             True   
index  selected_features_all_best20      LGBMClassifier             True   
index  selected_features_all_best30  LogisticRegression            False   
index  selected_features_all_best30                 SVC            False   
index  selected_features_all_best30       XGBClassifier            False   
index  selected_features_all_best30      LGBMClassifier            False   
index  selec

[32m[I 2023-05-14 11:46:12,598][0m A new study created in memory with name: no-name-c3ef2b8a-55f4-4ddc-b565-7e3c9cc3467b[0m
[32m[I 2023-05-14 11:46:12,670][0m Trial 0 finished with value: 0.8774703557312253 and parameters: {'C': 0.08582067856630485, 'penalty': 'l1', 'solver': 'liblinear', 'max_iter': 790}. Best is trial 0 with value: 0.8774703557312253.[0m


                       feature_type               model with_hypertuning   
index  selected_features_all_best20  LogisticRegression            False  \
index  selected_features_all_best20                 SVC            False   
index  selected_features_all_best20       XGBClassifier            False   
index  selected_features_all_best20      LGBMClassifier            False   
index  selected_features_all_best20  LogisticRegression             True   
index  selected_features_all_best20                 SVC             True   
index  selected_features_all_best20       XGBClassifier             True   
index  selected_features_all_best20      LGBMClassifier             True   
index  selected_features_all_best30  LogisticRegression            False   
index  selected_features_all_best30                 SVC            False   
index  selected_features_all_best30       XGBClassifier            False   
index  selected_features_all_best30      LGBMClassifier            False   
index  selec

[32m[I 2023-05-14 11:46:13,055][0m Trial 1 finished with value: 0.8814229249011858 and parameters: {'C': 0.04575595001292701, 'penalty': 'l2', 'solver': 'saga', 'max_iter': 614}. Best is trial 1 with value: 0.8814229249011858.[0m
[32m[I 2023-05-14 11:46:13,096][0m Trial 2 finished with value: 0.8794466403162056 and parameters: {'C': 0.05986810018960859, 'penalty': 'l1', 'solver': 'liblinear', 'max_iter': 825}. Best is trial 1 with value: 0.8814229249011858.[0m
[32m[I 2023-05-14 11:46:13,126][0m Trial 3 finished with value: 0.8636363636363636 and parameters: {'C': 0.014235066072406884, 'penalty': 'l1', 'solver': 'liblinear', 'max_iter': 353}. Best is trial 1 with value: 0.8814229249011858.[0m
[32m[I 2023-05-14 11:46:13,163][0m Trial 4 finished with value: 0.8814229249011858 and parameters: {'C': 0.01703248903658563, 'penalty': 'l2', 'solver': 'liblinear', 'max_iter': 984}. Best is trial 1 with value: 0.8814229249011858.[0m
[32m[I 2023-05-14 11:46:13,202][0m Trial 5 finishe

                       feature_type               model with_hypertuning   
index  selected_features_all_best20  LogisticRegression            False  \
index  selected_features_all_best20                 SVC            False   
index  selected_features_all_best20       XGBClassifier            False   
index  selected_features_all_best20      LGBMClassifier            False   
index  selected_features_all_best20  LogisticRegression             True   
index  selected_features_all_best20                 SVC             True   
index  selected_features_all_best20       XGBClassifier             True   
index  selected_features_all_best20      LGBMClassifier             True   
index  selected_features_all_best30  LogisticRegression            False   
index  selected_features_all_best30                 SVC            False   
index  selected_features_all_best30       XGBClassifier            False   
index  selected_features_all_best30      LGBMClassifier            False   
index  selec

[32m[I 2023-05-14 11:46:37,959][0m Trial 0 finished with value: 0.5177865612648221 and parameters: {'svc_c': 40.793552289548295, 'svc_gamma': 12.001104843654636}. Best is trial 0 with value: 0.5177865612648221.[0m
[32m[I 2023-05-14 11:46:38,414][0m Trial 1 finished with value: 0.5138339920948617 and parameters: {'svc_c': 58.707983744808175, 'svc_gamma': 99.07600808314264}. Best is trial 0 with value: 0.5177865612648221.[0m
[32m[I 2023-05-14 11:46:39,087][0m Trial 2 finished with value: 0.5177865612648221 and parameters: {'svc_c': 17.864641376687583, 'svc_gamma': 25.242611630690515}. Best is trial 0 with value: 0.5177865612648221.[0m
[32m[I 2023-05-14 11:46:39,641][0m Trial 3 finished with value: 0.5177865612648221 and parameters: {'svc_c': 98.56400962744742, 'svc_gamma': 44.6188513506334}. Best is trial 0 with value: 0.5177865612648221.[0m
[32m[I 2023-05-14 11:46:40,244][0m Trial 4 finished with value: 0.5177865612648221 and parameters: {'svc_c': 45.454149872922336, 'svc_

                       feature_type               model with_hypertuning   
index  selected_features_all_best20  LogisticRegression            False  \
index  selected_features_all_best20                 SVC            False   
index  selected_features_all_best20       XGBClassifier            False   
index  selected_features_all_best20      LGBMClassifier            False   
index  selected_features_all_best20  LogisticRegression             True   
index  selected_features_all_best20                 SVC             True   
index  selected_features_all_best20       XGBClassifier             True   
index  selected_features_all_best20      LGBMClassifier             True   
index  selected_features_all_best30  LogisticRegression            False   
index  selected_features_all_best30                 SVC            False   
index  selected_features_all_best30       XGBClassifier            False   
index  selected_features_all_best30      LGBMClassifier            False   
index  selec

[32m[I 2023-05-14 11:47:35,034][0m Trial 0 finished with value: 0.8873517786561265 and parameters: {'learning_rate': 0.2747358549724469, 'max_depth': 4, 'n_estimators': 206}. Best is trial 0 with value: 0.8873517786561265.[0m
[32m[I 2023-05-14 11:47:38,764][0m Trial 1 finished with value: 0.8932806324110671 and parameters: {'learning_rate': 0.049433721318135074, 'max_depth': 3, 'n_estimators': 927}. Best is trial 1 with value: 0.8932806324110671.[0m
[32m[I 2023-05-14 11:47:40,534][0m Trial 2 finished with value: 0.8893280632411067 and parameters: {'learning_rate': 0.15622721022045377, 'max_depth': 5, 'n_estimators': 190}. Best is trial 1 with value: 0.8932806324110671.[0m
[32m[I 2023-05-14 11:47:42,826][0m Trial 3 finished with value: 0.8774703557312253 and parameters: {'learning_rate': 0.1511430892067905, 'max_depth': 2, 'n_estimators': 996}. Best is trial 1 with value: 0.8932806324110671.[0m
[32m[I 2023-05-14 11:47:45,294][0m Trial 4 finished with value: 0.8833992094861

                       feature_type               model with_hypertuning   
index  selected_features_all_best20  LogisticRegression            False  \
index  selected_features_all_best20                 SVC            False   
index  selected_features_all_best20       XGBClassifier            False   
index  selected_features_all_best20      LGBMClassifier            False   
index  selected_features_all_best20  LogisticRegression             True   
index  selected_features_all_best20                 SVC             True   
index  selected_features_all_best20       XGBClassifier             True   
index  selected_features_all_best20      LGBMClassifier             True   
index  selected_features_all_best30  LogisticRegression            False   
index  selected_features_all_best30                 SVC            False   
index  selected_features_all_best30       XGBClassifier            False   
index  selected_features_all_best30      LGBMClassifier            False   
index  selec

[32m[I 2023-05-14 11:53:27,429][0m Trial 0 finished with value: 0.8774703557312253 and parameters: {'num_leaves': 227, 'max_depth': 43, 'learning_rate': 0.2778019303343961, 'n_estimators': 1461}. Best is trial 0 with value: 0.8774703557312253.[0m
[32m[I 2023-05-14 11:53:31,940][0m Trial 1 finished with value: 0.8893280632411067 and parameters: {'num_leaves': 223, 'max_depth': 16, 'learning_rate': 0.10290531634605304, 'n_estimators': 1215}. Best is trial 1 with value: 0.8893280632411067.[0m
[32m[I 2023-05-14 11:53:38,623][0m Trial 2 finished with value: 0.8853754940711462 and parameters: {'num_leaves': 106, 'max_depth': 27, 'learning_rate': 0.031657949034507527, 'n_estimators': 1008}. Best is trial 1 with value: 0.8893280632411067.[0m
[32m[I 2023-05-14 11:53:40,454][0m Trial 3 finished with value: 0.8814229249011858 and parameters: {'num_leaves': 7, 'max_depth': 40, 'learning_rate': 0.2804540099026719, 'n_estimators': 1662}. Best is trial 1 with value: 0.8893280632411067.[0m

                       feature_type               model with_hypertuning   
index  selected_features_all_best20  LogisticRegression            False  \
index  selected_features_all_best20                 SVC            False   
index  selected_features_all_best20       XGBClassifier            False   
index  selected_features_all_best20      LGBMClassifier            False   
index  selected_features_all_best20  LogisticRegression             True   
index  selected_features_all_best20                 SVC             True   
index  selected_features_all_best20       XGBClassifier             True   
index  selected_features_all_best20      LGBMClassifier             True   
index  selected_features_all_best30  LogisticRegression            False   
index  selected_features_all_best30                 SVC            False   
index  selected_features_all_best30       XGBClassifier            False   
index  selected_features_all_best30      LGBMClassifier            False   
index  selec

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

                        feature_type               model with_hypertuning   
index   selected_features_all_best20  LogisticRegression            False  \
index   selected_features_all_best20                 SVC            False   
index   selected_features_all_best20       XGBClassifier            False   
index   selected_features_all_best20      LGBMClassifier            False   
index   selected_features_all_best20  LogisticRegression             True   
index   selected_features_all_best20                 SVC             True   
index   selected_features_all_best20       XGBClassifier             True   
index   selected_features_all_best20      LGBMClassifier             True   
index   selected_features_all_best30  LogisticRegression            False   
index   selected_features_all_best30                 SVC            False   
index   selected_features_all_best30       XGBClassifier            False   
index   selected_features_all_best30      LGBMClassifier            False   

[32m[I 2023-05-14 11:56:35,852][0m A new study created in memory with name: no-name-43b144b1-9ee0-4e09-9aa9-0b9af1db68db[0m
[32m[I 2023-05-14 11:56:35,963][0m Trial 0 finished with value: 0.8814229249011858 and parameters: {'C': 0.08534929086077442, 'penalty': 'l1', 'solver': 'liblinear', 'max_iter': 755}. Best is trial 0 with value: 0.8814229249011858.[0m


                        feature_type               model with_hypertuning   
index   selected_features_all_best20  LogisticRegression            False  \
index   selected_features_all_best20                 SVC            False   
index   selected_features_all_best20       XGBClassifier            False   
index   selected_features_all_best20      LGBMClassifier            False   
index   selected_features_all_best20  LogisticRegression             True   
index   selected_features_all_best20                 SVC             True   
index   selected_features_all_best20       XGBClassifier             True   
index   selected_features_all_best20      LGBMClassifier             True   
index   selected_features_all_best30  LogisticRegression            False   
index   selected_features_all_best30                 SVC            False   
index   selected_features_all_best30       XGBClassifier            False   
index   selected_features_all_best30      LGBMClassifier            False   

[32m[I 2023-05-14 11:56:39,417][0m Trial 1 finished with value: 0.8853754940711462 and parameters: {'C': 0.06225657591442953, 'penalty': 'l1', 'solver': 'saga', 'max_iter': 788}. Best is trial 1 with value: 0.8853754940711462.[0m
[32m[I 2023-05-14 11:56:39,514][0m Trial 2 finished with value: 0.8932806324110671 and parameters: {'C': 0.08263726939093811, 'penalty': 'l2', 'solver': 'liblinear', 'max_iter': 623}. Best is trial 2 with value: 0.8932806324110671.[0m
[32m[I 2023-05-14 11:56:40,858][0m Trial 3 finished with value: 0.8952569169960475 and parameters: {'C': 0.08082468014153397, 'penalty': 'l2', 'solver': 'saga', 'max_iter': 497}. Best is trial 3 with value: 0.8952569169960475.[0m
[32m[I 2023-05-14 11:56:40,941][0m Trial 4 finished with value: 0.8932806324110671 and parameters: {'C': 0.07663931196568836, 'penalty': 'l2', 'solver': 'liblinear', 'max_iter': 320}. Best is trial 3 with value: 0.8952569169960475.[0m
[32m[I 2023-05-14 11:56:41,018][0m Trial 5 finished with

                        feature_type               model with_hypertuning   
index   selected_features_all_best20  LogisticRegression            False  \
index   selected_features_all_best20                 SVC            False   
index   selected_features_all_best20       XGBClassifier            False   
index   selected_features_all_best20      LGBMClassifier            False   
index   selected_features_all_best20  LogisticRegression             True   
index   selected_features_all_best20                 SVC             True   
index   selected_features_all_best20       XGBClassifier             True   
index   selected_features_all_best20      LGBMClassifier             True   
index   selected_features_all_best30  LogisticRegression            False   
index   selected_features_all_best30                 SVC            False   
index   selected_features_all_best30       XGBClassifier            False   
index   selected_features_all_best30      LGBMClassifier            False   

[32m[I 2023-05-14 11:58:18,108][0m Trial 0 finished with value: 0.5177865612648221 and parameters: {'svc_c': 78.12209306408987, 'svc_gamma': 26.10190874864247}. Best is trial 0 with value: 0.5177865612648221.[0m
[32m[I 2023-05-14 11:58:18,822][0m Trial 1 finished with value: 0.5138339920948617 and parameters: {'svc_c': 56.51771659426793, 'svc_gamma': 93.67982138851708}. Best is trial 0 with value: 0.5177865612648221.[0m
[32m[I 2023-05-14 11:58:19,444][0m Trial 2 finished with value: 0.5177865612648221 and parameters: {'svc_c': 70.16516705408472, 'svc_gamma': 19.471233381399788}. Best is trial 0 with value: 0.5177865612648221.[0m
[32m[I 2023-05-14 11:58:20,054][0m Trial 3 finished with value: 0.5177865612648221 and parameters: {'svc_c': 6.87552501441381, 'svc_gamma': 42.74508324946356}. Best is trial 0 with value: 0.5177865612648221.[0m
[32m[I 2023-05-14 11:58:20,652][0m Trial 4 finished with value: 0.5138339920948617 and parameters: {'svc_c': 92.10256990605347, 'svc_gamma

                        feature_type               model with_hypertuning   
index   selected_features_all_best20  LogisticRegression            False  \
index   selected_features_all_best20                 SVC            False   
index   selected_features_all_best20       XGBClassifier            False   
index   selected_features_all_best20      LGBMClassifier            False   
index   selected_features_all_best20  LogisticRegression             True   
index   selected_features_all_best20                 SVC             True   
index   selected_features_all_best20       XGBClassifier             True   
index   selected_features_all_best20      LGBMClassifier             True   
index   selected_features_all_best30  LogisticRegression            False   
index   selected_features_all_best30                 SVC            False   
index   selected_features_all_best30       XGBClassifier            False   
index   selected_features_all_best30      LGBMClassifier            False   

[32m[I 2023-05-14 11:59:21,611][0m Trial 0 finished with value: 0.8893280632411067 and parameters: {'learning_rate': 0.21246312417174512, 'max_depth': 2, 'n_estimators': 616}. Best is trial 0 with value: 0.8893280632411067.[0m
[32m[I 2023-05-14 11:59:25,717][0m Trial 1 finished with value: 0.8972332015810277 and parameters: {'learning_rate': 0.05172888818693755, 'max_depth': 5, 'n_estimators': 403}. Best is trial 1 with value: 0.8972332015810277.[0m
[32m[I 2023-05-14 11:59:30,699][0m Trial 2 finished with value: 0.8932806324110671 and parameters: {'learning_rate': 0.05498622277393403, 'max_depth': 4, 'n_estimators': 496}. Best is trial 1 with value: 0.8972332015810277.[0m
[32m[I 2023-05-14 11:59:33,145][0m Trial 3 finished with value: 0.8873517786561265 and parameters: {'learning_rate': 0.2611868850156039, 'max_depth': 4, 'n_estimators': 354}. Best is trial 1 with value: 0.8972332015810277.[0m
[32m[I 2023-05-14 11:59:38,608][0m Trial 4 finished with value: 0.8932806324110

                        feature_type               model with_hypertuning   
index   selected_features_all_best20  LogisticRegression            False  \
index   selected_features_all_best20                 SVC            False   
index   selected_features_all_best20       XGBClassifier            False   
index   selected_features_all_best20      LGBMClassifier            False   
index   selected_features_all_best20  LogisticRegression             True   
index   selected_features_all_best20                 SVC             True   
index   selected_features_all_best20       XGBClassifier             True   
index   selected_features_all_best20      LGBMClassifier             True   
index   selected_features_all_best30  LogisticRegression            False   
index   selected_features_all_best30                 SVC            False   
index   selected_features_all_best30       XGBClassifier            False   
index   selected_features_all_best30      LGBMClassifier            False   

[32m[I 2023-05-14 12:06:48,046][0m Trial 0 finished with value: 0.8873517786561265 and parameters: {'num_leaves': 98, 'max_depth': 22, 'learning_rate': 0.26204601918854353, 'n_estimators': 444}. Best is trial 0 with value: 0.8873517786561265.[0m
[32m[I 2023-05-14 12:06:49,925][0m Trial 1 finished with value: 0.8913043478260869 and parameters: {'num_leaves': 37, 'max_depth': 32, 'learning_rate': 0.0747843128262703, 'n_estimators': 364}. Best is trial 1 with value: 0.8913043478260869.[0m
[32m[I 2023-05-14 12:06:53,805][0m Trial 2 finished with value: 0.8893280632411067 and parameters: {'num_leaves': 177, 'max_depth': 32, 'learning_rate': 0.14651735429553095, 'n_estimators': 1942}. Best is trial 1 with value: 0.8913043478260869.[0m
[32m[I 2023-05-14 12:06:56,951][0m Trial 3 finished with value: 0.8913043478260869 and parameters: {'num_leaves': 120, 'max_depth': 8, 'learning_rate': 0.09491243639916087, 'n_estimators': 1849}. Best is trial 1 with value: 0.8913043478260869.[0m
[

                        feature_type               model with_hypertuning   
index   selected_features_all_best20  LogisticRegression            False  \
index   selected_features_all_best20                 SVC            False   
index   selected_features_all_best20       XGBClassifier            False   
index   selected_features_all_best20      LGBMClassifier            False   
index   selected_features_all_best20  LogisticRegression             True   
index   selected_features_all_best20                 SVC             True   
index   selected_features_all_best20       XGBClassifier             True   
index   selected_features_all_best20      LGBMClassifier             True   
index   selected_features_all_best30  LogisticRegression            False   
index   selected_features_all_best30                 SVC            False   
index   selected_features_all_best30       XGBClassifier            False   
index   selected_features_all_best30      LGBMClassifier            False   

# Best Model with Full Training Dataset

In [248]:
# Load the results
results_without_selected_features = pd.read_csv('results_v2.csv')
results_with_selected_features = pd.read_csv(f'{feature_engineered_data_dir}/results_20&30&50&100.csv')

feature_types = ['AAC', 'APAAC', 'CTD', 'DPC', 'PAAC']
selected_feature_types = ['selected_features_all_best20', 'selected_features_all_best30', 'selected_features_all_best50', 'selected_features_all_best100']

# Combine the feature types
feature_types.extend(selected_feature_types)

test_results = []

# iterate through each row of results
for feature_type in feature_types:

    # Check if the feature type is selected features
    if 'selected_features' in feature_type:
        # Load the training dataset
        train_data = pd.read_csv(f'{feature_engineered_data_dir}/TR_{feature_type}.csv')
        test_data = pd.read_csv(f'{feature_engineered_data_dir}/TS_{feature_type}.csv')
        results = results_with_selected_features
    else:
        # Load the training dataset
        train_data = pd.read_csv(f'{data_dir}/TR_{feature_type}.csv')
        test_data = pd.read_csv(f'{data_dir}/TS_{feature_type}.csv')
        results = results_without_selected_features

    # Separate features and target
    X_train = train_data.drop(columns=['label', 'id'], axis=1)
    y_train = train_data['label']

    X_test = test_data.drop(columns=['label', 'id'], axis=1)
    y_test = test_data['label']

    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    # iterate through each model
    for name, model in models.items():
        # get the row of the model

        
        rows = results[(results['feature_type'] == feature_type) & (results['model'] == name)]

        # iterate through each row
        for index, row in rows.iterrows():

            # check whether the model has hyperparameters
            if row['with_hypertuning'] == True:
                hyperparameters = ast.literal_eval(row['best_params'])
                # check the model is SVC
                if row['model'] == 'SVC':
                    hyperparameters = {k[4:]: v for k, v in hyperparameters.items()}
                    # make key 'c' to 'C'
                    hyperparameters['C'] = hyperparameters.pop('c')
                # set best hyperparameters
                model.set_params(**hyperparameters)

            # fit model
            model.fit(X_train, y_train)

            # predict
            y_pred = model.predict(X_test)

            # evaluate using accuracy, sensitivity, specificity, precision, f1, mcc
            accuracy = accuracy_score(y_test, y_pred)
            sensitivity = recall_score(y_test, y_pred)
            specificity = recall_score(y_test, y_pred, pos_label=0)
            precision = precision_score(y_test, y_pred)
            f1 = f1_score(y_test, y_pred)
            mcc = matthews_corrcoef(y_test, y_pred)

            # append to test_results
            test_results.append({'feature_type': feature_type, 'model': name, 'with_hypertuning': row['with_hypertuning'], 'best_params': row['best_params'], 'accuracy': accuracy, 'sensitivity': sensitivity, 'specificity': specificity, 'precision': precision, 'f1': f1, 'mcc': mcc, 'index': row['index']})
    print(f'Feature Type: {feature_type} done!')

test_results = pd.DataFrame(test_results)
test_results.to_csv('test_results.csv', index=False)

Feature Type: AAC done!
Feature Type: APAAC done!




Feature Type: CTD done!




Feature Type: DPC done!
Feature Type: PAAC done!
Feature Type: selected_features_all_best20 done!
Feature Type: selected_features_all_best30 done!




Feature Type: selected_features_all_best50 done!




Feature Type: selected_features_all_best100 done!


# Model Ensembling

# Model Ensembling with Selected Features