**Import Libraries**

In [None]:
# Import required Python libraries

import os
import zipfile
import urllib.request

import numpy as np
import pandas as pd

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier

from xgboost import XGBClassifier


# Evaluation metrics

from sklearn.metrics import (
    accuracy_score, precision_score, recall_score,
    f1_score, roc_auc_score, matthews_corrcoef
)


# Label encoding for target variable

from sklearn.preprocessing import LabelEncoder


**Load Dataset**

In [None]:
# Download and extract UCI HAR Dataset automatically

DATA_URL = "https://archive.ics.uci.edu/ml/machine-learning-databases/00240/UCI%20HAR%20Dataset.zip"
ZIP_FILE = "uci_har.zip"
DATA_DIR = "UCI HAR Dataset"

# Download dataset if not present
if not os.path.exists(DATA_DIR):
    print("Downloading dataset...")
    urllib.request.urlretrieve(DATA_URL, ZIP_FILE)

    print("Extracting dataset...")
    with zipfile.ZipFile(ZIP_FILE, 'r') as zip_ref:
        zip_ref.extractall()

    print("Dataset ready!")
else:
    print("Dataset already exists.")


Downloading dataset...
Extracting dataset...
Dataset ready!


**Load Train & Test Data**

In [None]:
# Load training and testing data from files


# Load feature data


X_train = pd.read_csv(
    f"{DATA_DIR}/train/X_train.txt",
    sep=r"\s+",
    header=None
)

X_test = pd.read_csv(
    f"{DATA_DIR}/test/X_test.txt",
    sep=r"\s+",
    header=None
)

# Load target labels

y_train = pd.read_csv(
    f"{DATA_DIR}/train/y_train.txt",
    header=None
)

y_test = pd.read_csv(
    f"{DATA_DIR}/test/y_test.txt",
    header=None
)


**Encode Labels**

In [None]:

# Encode class labels into integers
encoder = LabelEncoder()

# Convert target values into 0,1,2,... classes

y_train = encoder.fit_transform(y_train.values.ravel())
y_test = encoder.transform(y_test.values.ravel())


**Define Models**

In [None]:
# Define all six classification models

models = {
    "Logistic Regression": LogisticRegression(max_iter=2000),
    "Decision Tree": DecisionTreeClassifier(),
    "KNN": KNeighborsClassifier(),
    "Naive Bayes": GaussianNB(),
    "Random Forest": RandomForestClassifier(n_estimators=100),
    "XGBoost": XGBClassifier(eval_metric="mlogloss")
}


**Train & Evaluate Models**

In [None]:
# Train each model and compute evaluation metrics

results = []   # List to store evaluation results

for name, model in models.items():
    print(f"Training {name}...")

 # Train model using training data
    model.fit(X_train, y_train)

 # Predict class labels
    y_pred = model.predict(X_test)

# Predict class probabilities (required for AUC)
    y_prob = model.predict_proba(X_test)

# Calculate evaluation metrics
    results.append({
        "Model": name,
        "Accuracy": accuracy_score(y_test, y_pred),
        "AUC": roc_auc_score(y_test, y_prob, multi_class="ovr"),
        "Precision": precision_score(y_test, y_pred, average="weighted"),
        "Recall": recall_score(y_test, y_pred, average="weighted"),
        "F1": f1_score(y_test, y_pred, average="weighted"),
        "MCC": matthews_corrcoef(y_test, y_pred)
    })


Training Logistic Regression...
Training Decision Tree...
Training KNN...
Training Naive Bayes...
Training Random Forest...
Training XGBoost...


In [None]:
from google.colab import drive
drive.mount('/content/drive')

**Display Results**

In [None]:
# Display model comparison results

results_df = pd.DataFrame(results)

print("\n================ MODEL COMPARISON TABLE ================\n")
print(results_df)
print("\n========================================================\n")



                 Model  Accuracy       AUC  Precision    Recall        F1  \
0  Logistic Regression  0.960299  0.997587   0.961645  0.960299  0.960161   
1        Decision Tree  0.857822  0.913206   0.858710  0.857822  0.857019   
2                  KNN  0.901595  0.979910   0.905930  0.901595  0.900698   
3          Naive Bayes  0.770275  0.957772   0.794683  0.770275  0.768770   
4        Random Forest  0.925348  0.995321   0.926674  0.925348  0.925057   
5              XGBoost  0.938242  0.996961   0.939294  0.938242  0.938045   

        MCC  
0  0.952611  
1  0.829608  
2  0.882790  
3  0.728609  
4  0.910598  
5  0.926054  


