In [8]:
# import Libs
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

In [9]:
df=pd.read_csv(r"/Users/nishantbhardwaj/Documents/MachineLearning/Dataset/PreProcessedData.csv")

In [10]:
df.head(2)

Unnamed: 0,Device Model,Operating System,App Usage Time (min/day),Screen On Time (hours/day),Battery Drain (mAh/day),Number of Apps Installed,Data Usage (MB/day),Age,Gender,User Behavior Class
0,Google Pixel 5,Android,393,6.4,1872,67,1122,40,Male,4
1,OnePlus 9,Android,268,4.7,1331,42,944,47,Female,3


In [11]:
X = df.drop(columns=['User Behavior Class'],axis=1)

In [12]:
y=df["User Behavior Class"]

In [13]:
# Create Column Transformer with 3 types of transformers
num_features = X.select_dtypes(exclude="object").columns
cat_features = X.select_dtypes(include="object").columns

from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer

numeric_transformer = StandardScaler()
oh_transformer = OneHotEncoder()

preprocessor = ColumnTransformer(
    [
        ("OneHotEncoder", oh_transformer, cat_features),
         ("StandardScaler", numeric_transformer, num_features),        
    ]
)

In [14]:
X = preprocessor.fit_transform(X)

In [15]:
# separate dataset into train and test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42)
X_train.shape, X_test.shape

((560, 15), (140, 15))

In [16]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_auc_score

def evaluate_classification_model(true, predicted, average='binary'):
    accuracy = accuracy_score(true, predicted)
    precision = precision_score(true, predicted, average=average)
    recall = recall_score(true, predicted, average=average)
    f1 = f1_score(true, predicted, average=average)
    cm = confusion_matrix(true, predicted)
    
    try:
        roc_auc = roc_auc_score(true, predicted)
    except ValueError:
        roc_auc = "ROC AUC is not available for multi-class without proper probabilistic predictions"
    
    return {
        "accuracy": accuracy,
        "precision": precision,
        "recall": recall,
        "f1_score": f1,
        "confusion_matrix": cm,
        "roc_auc_score": roc_auc
    }


In [17]:
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from xgboost import XGBClassifier

# Example classification models
models = {
    "Logistic Regression": LogisticRegression(multi_class='ovr'),
    "K-Neighbors Classifier": KNeighborsClassifier(),
    "Decision Tree Classifier": DecisionTreeClassifier(),
    "Random Forest Classifier": RandomForestClassifier(),
    "AdaBoost Classifier": AdaBoostClassifier()
}

# Storing results
model_list = []
accuracy_list = []
precision_list = []
recall_list = []
f1_list = []

for i in range(len(list(models))):
    model = list(models.values())[i]
    model.fit(X_train, y_train)  # Train model

    # Make predictions
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)

    # Evaluate Train and Test dataset
    train_accuracy = accuracy_score(y_train, y_train_pred)
    test_accuracy = accuracy_score(y_test, y_test_pred)

    train_precision = precision_score(y_train, y_train_pred, average='macro')
    test_precision = precision_score(y_test, y_test_pred, average='macro')

    train_recall = recall_score(y_train, y_train_pred, average='macro')
    test_recall = recall_score(y_test, y_test_pred, average='macro')

    train_f1 = f1_score(y_train, y_train_pred, average='macro')
    test_f1 = f1_score(y_test, y_test_pred, average='macro')

    # Store model name and evaluation metrics
    model_list.append(list(models.keys())[i])
    accuracy_list.append(test_accuracy)
    precision_list.append(test_precision)
    recall_list.append(test_recall)
    f1_list.append(test_f1)

    # Print metrics
    print(list(models.keys())[i])

    print('Model performance for Training set:')
    print(f"- Accuracy: {train_accuracy:.4f}")
    print(f"- Precision: {train_precision:.4f}")
    print(f"- Recall: {train_recall:.4f}")
    print(f"- F1 Score: {train_f1:.4f}")
    print('----------------------------------')

    print('Model performance for Test set:')
    print(f"- Accuracy: {test_accuracy:.4f}")
    print(f"- Precision: {test_precision:.4f}")
    print(f"- Recall: {test_recall:.4f}")
    print(f"- F1 Score: {test_f1:.4f}")

    print('=' * 35)
    print('\n')


Logistic Regression
Model performance for Training set:
- Accuracy: 0.8696
- Precision: 0.8739
- Recall: 0.8697
- F1 Score: 0.8709
----------------------------------
Model performance for Test set:
- Accuracy: 0.8286
- Precision: 0.8412
- Recall: 0.8456
- F1 Score: 0.8422


K-Neighbors Classifier
Model performance for Training set:
- Accuracy: 0.9821
- Precision: 0.9824
- Recall: 0.9825
- F1 Score: 0.9823
----------------------------------
Model performance for Test set:
- Accuracy: 0.9643
- Precision: 0.9706
- Recall: 0.9675
- F1 Score: 0.9672


Decision Tree Classifier
Model performance for Training set:
- Accuracy: 1.0000
- Precision: 1.0000
- Recall: 1.0000
- F1 Score: 1.0000
----------------------------------
Model performance for Test set:
- Accuracy: 1.0000
- Precision: 1.0000
- Recall: 1.0000
- F1 Score: 1.0000


Random Forest Classifier
Model performance for Training set:
- Accuracy: 1.0000
- Precision: 1.0000
- Recall: 1.0000
- F1 Score: 1.0000
-------------------------------

In [19]:
pd.DataFrame(list(zip(model_list, accuracy_list)), columns=['Model Name', 'Accuracy']).sort_values(by=["Accuracy"], ascending=False)


Unnamed: 0,Model Name,Accuracy
2,Decision Tree Classifier,1.0
3,Random Forest Classifier,1.0
1,K-Neighbors Classifier,0.964286
0,Logistic Regression,0.828571
4,AdaBoost Classifier,0.564286
