# Hyper Parameter Tuning
This notebook optimizes the performance of the models using hyperparameter tuning techniques such as GridSearchCV and Randomized SearchCV to find the best-performing model.

## 1. Importing necessary libraries

In [None]:
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

## 2. Loading feature selected dataset

In [None]:
df = pd.read_csv("../data/heart_disease.csv")

X = df.drop("target", axis=1)
y = df["target"]

## 3. Splitting the data into training and testing

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

## 4. Calculating the baseline performance for every model

In [None]:
baseline = {}

# Logistic Regression
lr = LogisticRegression(random_state=42)
lr.fit(X_train, y_train)
baseline["Logistic Regression"] = accuracy_score(y_test, lr.predict(X_test))

# Decision Tree
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)
baseline["Decision Tree"] = accuracy_score(y_test, dt.predict(X_test))

# Random Forest
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train, y_train)
baseline["Random Forest"] = accuracy_score(y_test, rf.predict(X_test))

# SVM
svm = SVC(probability=True, random_state=42)
svm.fit(X_train, y_train)
baseline["SVM"] = accuracy_score(y_test, svm.predict(X_test))

## 5. Optimize model hyper parameters

### 5.1 Logistic Regression

In [None]:
grid_lr = GridSearchCV(LogisticRegression(random_state=42), {
    "C": [0.01, 0.1, 1, 10, 100],
    "solver": ["liblinear", "saga"]
}, cv=5, scoring="accuracy", n_jobs=-1)

grid_lr.fit(X_train, y_train)
print("Best Logistic Regression:", grid_lr.best_params_)


### 5.2 Decision Tree

In [None]:
rand_dt = RandomizedSearchCV(DecisionTreeClassifier(random_state=42), {
    "max_depth": [None, 5, 10, 20],
    "min_samples_split": [2, 5, 10],
    "min_samples_leaf": [1, 2, 4]
}, cv=5, scoring="accuracy", random_state=42, n_jobs=-1)

rand_dt.fit(X_train, y_train)
print("Best Decision Tree:", rand_dt.best_params_)


### 5.3 Random Forest

In [None]:
rand_rf = RandomizedSearchCV(RandomForestClassifier(random_state=42), {
    "n_estimators": [50, 100, 200],
    "max_depth": [None, 10, 20, 30],
    "min_samples_split": [2, 5, 10],
    "min_samples_leaf": [1, 2, 4],
    "bootstrap": [True, False]
}, cv=5, scoring="accuracy", random_state=42, n_jobs=-1)

rand_rf.fit(X_train, y_train)
print("Best Random Forest:", rand_rf.best_params_)


### 5.4 SVM

In [None]:
grid_svm = GridSearchCV(SVC(probability=True, random_state=42), {
    "C": [0.1, 1, 10],
    "kernel": ["linear", "rbf", "poly"],
    "gamma": ["scale", "auto"]
}, cv=5, scoring="accuracy", n_jobs=-1)

grid_svm.fit(X_train, y_train)
print("Best SVM:", grid_svm.best_params_)


## 6. Comparing optimized models with baseline performance

In [None]:
optimized = {}

optimized["Logistic Regression"] = accuracy_score(y_test, grid_lr.best_estimator_.predict(X_test))
optimized["Decision Tree"] = accuracy_score(y_test, rand_dt.best_estimator_.predict(X_test))
optimized["Random Forest"] = accuracy_score(y_test, rand_rf.best_estimator_.predict(X_test))
optimized["SVM"] = accuracy_score(y_test, grid_svm.best_estimator_.predict(X_test))

comparison = pd.DataFrame({
    "Baseline Accuracy": baseline,
    "Tuned Accuracy": optimized
})

print("\t\t=== Model Comparison ===")
print(comparison)


According to the comparison above we can conclude that Logistic Regression is the best model

## 7. Model Exporting

In [None]:
# Build pipeline
pipeline = Pipeline([
    ("scaler", StandardScaler()), 
    ("model", grid_lr.best_estimator_)
])

pipeline.fit(X, y) # Fit on full dataset (train + test together)

joblib.dump(pipeline, "../models/final_model.pkl")