# Test Multiple Classifiers for BB features

In [None]:
import pandas as pd
import os
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from tqdm import tqdm
from includes.helpers import get_bb_train_test_set
from includes.constants import GLOBAL_RANDOM_STATE

In [None]:
X_train, X_test, y_train, y_test = get_bb_train_test_set()

In [None]:
# Step 4: Feature scaling (if needed)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# X_train_scaled = X_train
# X_test_scaled = X_test

In [None]:
# Step 5: List of classifiers to test
classifiers = {
    "Logistic Regression": LogisticRegression(),
    "Random Forest 100": RandomForestClassifier(random_state=GLOBAL_RANDOM_STATE,n_jobs=int(os.cpu_count())),
    "Random Forest 1000": RandomForestClassifier(random_state=GLOBAL_RANDOM_STATE,n_jobs=int(os.cpu_count()), n_estimators=1000),
    "Random Forest 10000": RandomForestClassifier(random_state=GLOBAL_RANDOM_STATE,n_jobs=int(os.cpu_count()), n_estimators=10000),
    "Gradient Boosting": GradientBoostingClassifier(random_state=GLOBAL_RANDOM_STATE),
    "K-Nearest Neighbors": KNeighborsClassifier(n_jobs=int(os.cpu_count()),n_neighbors=10),
    "Naive Bayes": GaussianNB(),
    "Decision Tree": DecisionTreeClassifier(random_state=GLOBAL_RANDOM_STATE),
    "AdaBoost": AdaBoostClassifier(random_state=GLOBAL_RANDOM_STATE)
}

In [None]:
# Step 6: Loop through each classifier, train, and evaluate
results = {}

for name, clf in tqdm(classifiers.items(), desc="Training models", total=len(classifiers)):
    # Train the model
    clf.fit(X_train_scaled, y_train)
    
    # Predict on the test set
    y_pred = clf.predict(X_test_scaled)
    
    # Evaluate performance
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred, output_dict=True)  # Convert classification report to dict
    cm = confusion_matrix(y_test, y_pred)
    
    # Store results
    results[name] = {
        "Accuracy": accuracy,
        "Precision (Class 0)": report['0']['precision'],
        "Recall (Class 0)": report['0']['recall'],
        "F1-Score (Class 0)": report['0']['f1-score'],
        "Precision (Class 1)": report['1']['precision'],
        "Recall (Class 1)": report['1']['recall'],
        "F1-Score (Class 1)": report['1']['f1-score'],
        "Confusion Matrix": cm
    }

In [None]:
# Step 7: Print results
for name, result in results.items():
    print(f"Model: {name}")
    print(f"Accuracy: {result['Accuracy']:.2f}")
    print(f"Precision (Class 0): {result['Precision (Class 0)']:.2f}")
    print(f"Recall (Class 0): {result['Recall (Class 0)']:.2f}")
    print(f"F1-Score (Class 0): {result['F1-Score (Class 0)']:.2f}")
    print(f"Precision (Class 1): {result['Precision (Class 1)']:.2f}")
    print(f"Recall (Class 1): {result['Recall (Class 1)']:.2f}")
    print(f"F1-Score (Class 1): {result['F1-Score (Class 1)']:.2f}")
    print("Confusion Matrix:")
    print(result['Confusion Matrix'])
    print("=" * 50)