# Experiment 8
---
Compare the models SVM , decision tree , random forest and xgboost by tuning their hyperparameters using gridsearchCV

### Steps:
1. Load Dataset
2. Preprocess the data
3. Split Data
4. Define Hyperparameter grid
    - f1_estimators : [50,100,200]
    - max_depth : [None, 10, 20, 30]
    - min_samples_split: [2,5,10]
    - min_samples_leaf: [1,2,4]
5. Perform grid search: Use GridSearchCV with cv=5
6. Train and evaluate: Asess performance on the test set
7. Report results: Print the best parameneters and test accuracy


In [7]:
# Import Libraries
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

from xgboost import XGBClassifier

from sklearn.metrics import accuracy_score

import kagglehub
from kagglehub import KaggleDatasetAdapter


In [None]:
# -----------------------------
# 1. Load Dataset
# -----------------------------
file_path = "heart.csv"
data = kagglehub.load_dataset(
    KaggleDatasetAdapter.PANDAS,
    "johnsmith88/heart-disease-dataset",
    file_path,
)
X = data.drop("target", axis=1)
y = data["target"]


  data = kagglehub.load_dataset(


Download already complete (38114 bytes).


In [9]:
# -----------------------------
# 2. Preprocessing (Scaling)
# -----------------------------

scaler = StandardScaler()


# -----------------------------
# 3. Train-Test Split (80-20)
# -----------------------------

X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

In [10]:
# -----------------------------
# 4. Models & Parameter Grids
# -----------------------------

# SVM
svm_pipeline = Pipeline([
    ('scaler', scaler),
    ('svm', SVC())
])

svm_params = {
    'svm__C': [0.1, 1, 10],
    'svm__kernel': ['linear', 'rbf'],
    'svm__gamma': ['scale', 'auto']
}


# Decision Tree
dt = DecisionTreeClassifier(random_state=42)

dt_params = {
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}


# Random Forest
rf = RandomForestClassifier(random_state=42)

rf_params = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}


# XGBoost
xgb = XGBClassifier(
    use_label_encoder=False,
    eval_metric='logloss',
    random_state=42
)

xgb_params = {
    'n_estimators': [50, 100, 200],
    'max_depth': [3, 5, 7],
    'learning_rate': [0.01, 0.1, 0.2],
    'subsample': [0.8, 1.0]
}


In [12]:
# -----------------------------
# 5. GridSearchCV (cv=5)
# -----------------------------

models = {
    "SVM": (svm_pipeline, svm_params),
    "Decision Tree": (dt, dt_params),
    "Random Forest": (rf, rf_params),
    "XGBoost": (xgb, xgb_params)
}

results = {}


for name, (model, params) in models.items():

    print(f"\nTraining {name}...")

    grid = GridSearchCV(
        model,
        params,
        cv=5,
        scoring='accuracy',
        n_jobs=-1
    )

    grid.fit(X_train, y_train)

    best_model = grid.best_estimator_

    y_pred = best_model.predict(X_test)

    acc = accuracy_score(y_test, y_pred)

    results[name] = {
        "Best Params": grid.best_params_,
        "Test Accuracy": acc
    }

# -----------------------------
# 6. Print Results
# -----------------------------

print("\n========== FINAL RESULTS ==========\n")

for model in results:
    print(f"Model: {model}")
    print("Best Parameters:", results[model]["Best Params"])
    print("Test Accuracy:", round(results[model]["Test Accuracy"], 4))
    print("----------------------------------")



Training SVM...

Training Decision Tree...

Training Random Forest...

Training XGBoost...


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)




Model: SVM
Best Parameters: {'svm__C': 10, 'svm__gamma': 'scale', 'svm__kernel': 'rbf'}
Test Accuracy: 0.9854
----------------------------------
Model: Decision Tree
Best Parameters: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2}
Test Accuracy: 0.9854
----------------------------------
Model: Random Forest
Best Parameters: {'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 200}
Test Accuracy: 1.0
----------------------------------
Model: XGBoost
Best Parameters: {'learning_rate': 0.1, 'max_depth': 7, 'n_estimators': 100, 'subsample': 1.0}
Test Accuracy: 1.0
----------------------------------
