In [4]:
#Setup and imports

import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (
    accuracy_score, roc_auc_score, precision_score,
    recall_score, f1_score, matthews_corrcoef
)

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier


# Loading the data set

train_path= r"C:\Users\HP\Documents\ML\mobile_train.csV"
train_df = pd.read_csv(train_path)

X = train_df.drop("price_range", axis=1)
y = train_df["price_range"]


#Train - Test Splitting

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

#Feature Scaling for LR & KNN

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

#Model Training & Evaluation - Helper function for Metrics

def evaluate_model(model, X_tr, X_te):
    model.fit(X_tr, y_train)
    y_pred = model.predict(X_te)
    y_prob = model.predict_proba(X_te)

    return {
        "Accuracy": accuracy_score(y_test, y_pred),
        "AUC": roc_auc_score(y_test, y_prob, multi_class="ovr"),
        "Precision": precision_score(y_test, y_pred, average="weighted"),
        "Recall": recall_score(y_test, y_pred, average="weighted"),
        "F1 Score": f1_score(y_test, y_pred, average="weighted"),
        "MCC": matthews_corrcoef(y_test, y_pred)
    }

#Logistic Regression

lr = LogisticRegression(max_iter=1000)
lr_results = evaluate_model(lr, X_train_scaled, X_test_scaled)

#Decision Tree Classifier

dt = DecisionTreeClassifier(random_state=42)
dt_results = evaluate_model(dt, X_train, X_test)

#KNN

knn = KNeighborsClassifier(n_neighbors=5)
knn_results = evaluate_model(knn, X_train_scaled, X_test_scaled)

#Naive Bayes ( Gaussian)

nb = GaussianNB()
nb_results = evaluate_model(nb, X_train, X_test)

#Random Forest (Ensemble)

rf = RandomForestClassifier(n_estimators=200, random_state=42)
rf_results = evaluate_model(rf, X_train, X_test)

#XGBoost (Ensemble)

xgb = XGBClassifier(
    n_estimators=200,
    max_depth=5,
    learning_rate=0.1,
    objective="multi:softprob",
    eval_metric="mlogloss",
    random_state=42
)

xgb_results = evaluate_model(xgb, X_train, X_test)

#Comparision Table

results_df = pd.DataFrame({
    "Logistic Regression": lr_results,
    "Decision Tree": dt_results,
    "KNN": knn_results,
    "Naive Bayes": nb_results,
    "Random Forest": rf_results,
    "XGBoost": xgb_results
}).T

print(results_df)

                     Accuracy       AUC  Precision  Recall  F1 Score       MCC
Logistic Regression    0.9650  0.998667   0.965045  0.9650  0.964986  0.953357
Decision Tree          0.8300  0.886667   0.831883  0.8300  0.830168  0.773811
KNN                    0.5000  0.769750   0.521130  0.5000  0.505355  0.334993
Naive Bayes            0.8100  0.950567   0.811326  0.8100  0.810458  0.746804
Random Forest          0.8775  0.979608   0.877649  0.8775  0.877400  0.836785
XGBoost                0.9225  0.993842   0.922631  0.9225  0.922482  0.896719
