## IMPORTING LIBRARIES

In [2]:
import numpy as np
import warnings
warnings.filterwarnings('ignore')
import pickle
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import *

In [3]:
# Load preprocessed data
file_path = r"D:\Peraisoodan Viswanath S - IABAC\3.Src\Data Processing\preprocessed_data.pkl"
with open(file_path, 'rb') as f:
    x_train_smote, x_test, y_train_smote, y_test = pickle.load(f)

In [4]:
# Convert y_train_smote and y_test to NumPy arrays with consistent data types
y_train_smote = np.array(y_train_smote, dtype=np.int32)  
y_test = np.array(y_test, dtype=np.int32)  

## Models Without Hyperparameter Tuning

In [6]:
models = {"LogisticRegression": LogisticRegression(multi_class='multinomial',random_state=42),
          "RandomForest": RandomForestClassifier(random_state=42),
          "GradientBoosting": GradientBoostingClassifier(random_state=42),
          "SVM": SVC(random_state=42),
          "KNN": KNeighborsClassifier(),
          "DecisionTree": DecisionTreeClassifier(random_state=42),
          "MLP": MLPClassifier(random_state=42)}

In [7]:
for name, model in models.items():
    print(f"Training {name}...")
    model.fit(x_train_smote,y_train_smote) # Train the model
    y_pred = model.predict(x_test)    # Make predictions
    y_pred = np.array(y_pred, dtype=np.int32) # Ensure predictions have the same type as y_test
    acc = accuracy_score(y_test, y_pred)
    cm = confusion_matrix(y_test, y_pred)
    cr = classification_report(y_test, y_pred)
    print(f"\n{name} Model Results:")
    print(f"Accuracy: {acc:.4f}")
    print("Confusion Matrix:")
    print(cm)
    print("Classification Report:")
    print(cr)
    print("-"*50)

Training LogisticRegression...

LogisticRegression Model Results:
Accuracy: 0.7750
Confusion Matrix:
[[ 23   5   1]
 [ 26 142  16]
 [  0   6  21]]
Classification Report:
              precision    recall  f1-score   support

           2       0.47      0.79      0.59        29
           3       0.93      0.77      0.84       184
           4       0.55      0.78      0.65        27

    accuracy                           0.78       240
   macro avg       0.65      0.78      0.69       240
weighted avg       0.83      0.78      0.79       240

--------------------------------------------------
Training RandomForest...

RandomForest Model Results:
Accuracy: 0.9500
Confusion Matrix:
[[ 26   3   0]
 [  2 182   0]
 [  0   7  20]]
Classification Report:
              precision    recall  f1-score   support

           2       0.93      0.90      0.91        29
           3       0.95      0.99      0.97       184
           4       1.00      0.74      0.85        27

    accuracy          

## Models With Hyperparameter Tuning

In [9]:
models = {"LogisticRegression": {"model": LogisticRegression(multi_class='multinomial',random_state=42),"params": {"C": [0.01, 0.1, 1, 10], "max_iter": [100, 200]}},
          "RandomForest": {"model": RandomForestClassifier(random_state=42),"params": {"n_estimators": [50, 100, 200], "max_depth": [5, 10, None]}},
          "GradientBoosting": {"model": GradientBoostingClassifier(random_state=42),"params": {"n_estimators": [50, 100, 200], "max_depth": [5, 10, None]}},
          "SVM": {"model": SVC(random_state=42),"params": {"C": [0.1, 1, 10], "kernel": ["linear", "rbf"]}},
          "KNN": {"model": KNeighborsClassifier(),"params": {"n_neighbors": [3, 5, 7], "weights": ["uniform", "distance"]}},
          "DecisionTree": {"model": DecisionTreeClassifier(random_state=42),"params": {"max_depth": [5, 10, None], "criterion": ["gini", "entropy"]}},
          "MLP": {"model": MLPClassifier(random_state=42), "params": {"hidden_layer_sizes": [100,200], "activation": ["relu", "tanh"], "solver": ["adam", "sgd"]}}}

In [10]:
for name, param in models.items():
    print(f"Training {name} with GridSearchCV...")
    # Perform hyperparameter tuning using GridSearchCV
    grid_search = GridSearchCV(param["model"], param["params"], cv=5, scoring='accuracy', verbose=2, n_jobs=-1)
    grid_search.fit(x_train_smote,y_train_smote)
    # Best model after tuning
    best_model = grid_search.best_estimator_
    # Make predictions
    y_pred = best_model.predict(x_test)
    y_pred = np.array(y_pred, dtype=np.int32) # Ensure predictions have the same type as y_test
    hyper_acc = accuracy_score(y_test, y_pred)
    hyper_cm = confusion_matrix(y_test, y_pred)
    hyper_cr = classification_report(y_test, y_pred)
    print(f"\n{name} Model Results:")
    print(f"Accuracy: {hyper_acc:.4f}")
    print("Confusion Matrix:")
    print(hyper_cm)
    print("Classification Report:")
    print(hyper_cr)
    print("-"*50)

Training LogisticRegression with GridSearchCV...
Fitting 5 folds for each of 8 candidates, totalling 40 fits

LogisticRegression Model Results:
Accuracy: 0.7750
Confusion Matrix:
[[ 22   5   2]
 [ 25 143  16]
 [  0   6  21]]
Classification Report:
              precision    recall  f1-score   support

           2       0.47      0.76      0.58        29
           3       0.93      0.78      0.85       184
           4       0.54      0.78      0.64        27

    accuracy                           0.78       240
   macro avg       0.65      0.77      0.69       240
weighted avg       0.83      0.78      0.79       240

--------------------------------------------------
Training RandomForest with GridSearchCV...
Fitting 5 folds for each of 9 candidates, totalling 45 fits

RandomForest Model Results:
Accuracy: 0.9500
Confusion Matrix:
[[ 26   3   0]
 [  2 182   0]
 [  0   7  20]]
Classification Report:
              precision    recall  f1-score   support

           2       0.93      