In [96]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler

from sklearn.ensemble import GradientBoostingClassifier, ExtraTreesClassifier, AdaBoostClassifier
from sklearn.linear_model import ElasticNet, RidgeClassifier, Lasso, PassiveAggressiveClassifier, LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score

from xgboost import XGBClassifier


Load the data from the uploaded files

In [97]:

data = pd.read_csv("Data\\BP_features.csv")
labels = pd.read_csv("Data\\final_labels.csv")
labels = labels.iloc[:, 1]

Splitting the datasets into training and testing sets

In [98]:

# 80:20 -> 441: 111
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42, shuffle=True)




Scaling the data

In [99]:
ss_train = StandardScaler()
X_train = ss_train.fit_transform(X_train)
ss_test = StandardScaler()
X_test = ss_test.fit_transform(X_test)

In [100]:

# Function to train and evaluate a model
def train_evaluate_model(X_train, X_test, y_train, y_test, model, param_grid, model_name):
    
    # Hyperparameter tuning
    
    grid_search = GridSearchCV(model, param_grid, cv=5, scoring='accuracy',)
    grid_search.fit(X_train, y_train)

    best_model = grid_search.best_estimator_

    best_params =  grid_search.best_params_
    best_score =  grid_search.best_score_
    # Predicting
    y_train_pred = best_model.predict(X_train)
    y_test_pred = best_model.predict(X_test)

    # Evaluation
    train_accuracy = accuracy_score(y_train, y_train_pred)
    test_accuracy = accuracy_score(y_test, y_test_pred)


    return best_model, train_accuracy, test_accuracy, best_params, best_score


In [101]:

# Define the models and their hyperparameters

models = {
    'Gradient Boosting': (GradientBoostingClassifier(),{}),
    'K-Nearest Neighbors': (KNeighborsClassifier(),{}),
    'XGBClassifier': (XGBClassifier(),{
    'alpha': [0.001, 0.01, 0.1, 0.20, 0.25, 0.30],
    }),
    'Logistic Regression': (LogisticRegression(), {
        # 'penalty': ['l1','l2'], 'C': [0.001,0.01,0.1,1,10,100,1000]
        }),
    # 'Elastic Net': (ElasticNet(),{}),
    # 'Ridge': (RidgeClassifier(),{}),
    # 'Lasso': (Lasso(),{}),
    'Extra Trees': (ExtraTreesClassifier(),{}),
    'AdaBoost': (AdaBoostClassifier(),{}),
    'Passive Aggressive': (PassiveAggressiveClassifier(max_iter=1000, random_state=42, tol=1e-3), {})
}


# Support Vector Machines
from sklearn.svm import LinearSVC
models['Support Vector Machines'] = (LinearSVC(), {})

# Decision Trees
from sklearn.tree import DecisionTreeClassifier
models['Decision Trees'] = (DecisionTreeClassifier(), {})

# Random Forest
from sklearn.ensemble import RandomForestClassifier
models['Random Forest'] = (RandomForestClassifier(), {})

# Naive Bayes
from sklearn.naive_bayes import GaussianNB
models['Naive Bayes'] = (GaussianNB(), {})




In [102]:

# Train and evaluate models for AD8232
best_models = {}
# results = []
result = {}

for model_name, (model, param_grid) in models.items():
    best_model, train_accuracy, test_accuracy, best_params, best_score = train_evaluate_model(X_train, X_test, y_train, y_test, model, param_grid, model_name)
    best_models[model_name] = best_model
    result[model_name] = [train_accuracy, test_accuracy, best_score, best_params]



In [103]:
result

{'Gradient Boosting': [0.981859410430839,
  0.8108108108108109,
  np.float64(0.8233656792645556),
  {}],
 'K-Nearest Neighbors': [0.8299319727891157,
  0.7387387387387387,
  np.float64(0.7530132788559755),
  {}],
 'XGBClassifier': [1.0,
  0.8468468468468469,
  np.float64(0.8573289070480081),
  {'alpha': 0.3}],
 'Logistic Regression': [0.8321995464852607,
  0.8018018018018018,
  np.float64(0.7440245148110317),
  {}],
 'Extra Trees': [1.0, 0.8108108108108109, np.float64(0.8164708886619), {}],
 'AdaBoost': [0.9047619047619048,
  0.8018018018018018,
  np.float64(0.7643769152196118),
  {}],
 'Passive Aggressive': [0.7619047619047619,
  0.7387387387387387,
  np.float64(0.6849336057201226),
  {}],
 'Support Vector Machines': [0.854875283446712,
  0.7477477477477478,
  np.float64(0.7394790602655771),
  {}],
 'Decision Trees': [1.0,
  0.7477477477477478,
  np.float64(0.7825331971399387),
  {}],
 'Random Forest': [1.0,
  0.7837837837837838,
  np.float64(0.8277834525025536),
  {}],
 'Naive Bayes'

In [104]:
results = pd.DataFrame(result).T
results.columns = ['Train Accuracy', 'Test Accuracy', 'Best Score', 'Best Params']
results

Unnamed: 0,Train Accuracy,Test Accuracy,Best Score,Best Params
Gradient Boosting,0.981859,0.810811,0.823366,{}
K-Nearest Neighbors,0.829932,0.738739,0.753013,{}
XGBClassifier,1.0,0.846847,0.857329,{'alpha': 0.3}
Logistic Regression,0.8322,0.801802,0.744025,{}
Extra Trees,1.0,0.810811,0.816471,{}
AdaBoost,0.904762,0.801802,0.764377,{}
Passive Aggressive,0.761905,0.738739,0.684934,{}
Support Vector Machines,0.854875,0.747748,0.739479,{}
Decision Trees,1.0,0.747748,0.782533,{}
Random Forest,1.0,0.783784,0.827783,{}


In [105]:
best_models

{'Gradient Boosting': GradientBoostingClassifier(),
 'K-Nearest Neighbors': KNeighborsClassifier(),
 'XGBClassifier': XGBClassifier(alpha=0.3, base_score=None, booster=None, callbacks=None,
               colsample_bylevel=None, colsample_bynode=None,
               colsample_bytree=None, device=None, early_stopping_rounds=None,
               enable_categorical=False, eval_metric=None, feature_types=None,
               gamma=None, grow_policy=None, importance_type=None,
               interaction_constraints=None, learning_rate=None, max_bin=None,
               max_cat_threshold=None, max_cat_to_onehot=None,
               max_delta_step=None, max_depth=None, max_leaves=None,
               min_child_weight=None, missing=nan, monotone_constraints=None,
               multi_strategy=None, n_estimators=None, n_jobs=None,
               num_parallel_tree=None, ...),
 'Logistic Regression': LogisticRegression(),
 'Extra Trees': ExtraTreesClassifier(),
 'AdaBoost': AdaBoostClassifier(),
