In [1]:
import os
import pandas as pd

from sklearn.ensemble import VotingClassifier
from sklearn.metrics import classification_report
from joblib import load

In [2]:
knn_model = os.path.join('..','trained_models','knn.joblib')
lr_model = os.path.join('..','trained_models','logistic_regression.joblib')
rf_model = os.path.join('..','trained_models','random_forest.joblib')
nb_model = os.path.join('..','trained_models','naive_bayes.joblib')
xgb_model = os.path.join('..','trained_models','xgboosting.joblib')

knn = load(knn_model) # K Nearest Neighbors 
lr = load(lr_model)   # Logistic Regression
rf = load(rf_model)   # Random Forest
nb = load(nb_model)    # Naive Bayes
xgb = load(xgb_model) # XGBoosting

In [3]:
# Datasets Paths
x_train_path = os.path.join('..','datasets','prepared_data','X_train_resampled.csv') 
x_test_path = os.path.join('..','datasets','prepared_data','X_test_transformed.csv')   
y_train_path = os.path.join('..','datasets','prepared_data','y_train_resampled.csv')
y_test_path = os.path.join('..','datasets','prepared_data','y_test.csv')

# Load Datasets
X_train = pd.read_csv(x_train_path) # PCA Reduced Dataset
X_test = pd.read_csv(x_test_path)   # PCA Reduced Dataset
y_train = pd.read_csv(y_train_path)
y_test = pd.read_csv(y_test_path)

# Ensure y are 1D arrays
y_train = y_train.values.ravel()
y_test = y_test.values.ravel()

In [4]:
y_train.shape, X_train.shape, y_test.shape, X_test.shape

((7244,), (7244, 23), (2113,), (2113, 23))

In [5]:
# Voting Classifier
voting_clf = VotingClassifier(
    estimators=[
        ('K Nearest Neighbors', knn),
        ('Random Forest', rf),
        ('Logistic regression', lr),
        ('Naive Bayes', nb),
        ('XGBoosting', xgb)
    ],
    voting='soft'  
)

# Training the Voting Classifier
voting_clf.fit(X_train, y_train)



In [6]:
# Making Predictions
y_pred = voting_clf.predict(X_test)

In [7]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.90      0.74      0.81      1552
           1       0.52      0.78      0.62       561

    accuracy                           0.75      2113
   macro avg       0.71      0.76      0.72      2113
weighted avg       0.80      0.75      0.76      2113

