# HyperParameter Tuning

In [2]:
import pandas as pd
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler

In [3]:
# loading the cleaned Data from 01_data_preprocessing
X = pd.read_csv('../data/cleaned_X.csv')
y = pd.read_csv('../data/clean_y.csv')

In [4]:
# Split the dataset to train and test datasets
# Test size = 20%
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# scalling X 
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [6]:


# Create and train Logistic Regression Model
lr_model = LogisticRegression()
lr_model.fit(X_train_scaled, y_train.values.ravel())

param_grid_lr = {
    'C': [0.01, 0.1, 1, 10],
    'solver': ['liblinear','lbfgs']
}
grid_lr = GridSearchCV(lr_model, param_grid_lr, cv=5, scoring='accuracy')
grid_lr.fit(X_train_scaled, y_train.values.ravel())
best_lr = grid_lr.best_estimator_

# Make prediction after Hyperparameter
y_pred_lr = best_lr.predict(X_test_scaled)

# Print Logistic Regression Model Results after Hyperparameter
print("Logistic Regression Best Params:", grid_lr.best_params_)
print("Accuracy:", accuracy_score(y_test, y_pred_lr))
print(classification_report(y_test, y_pred_lr))


Logistic Regression Best Params: {'C': 1, 'solver': 'liblinear'}
Accuracy: 0.8833333333333333
              precision    recall  f1-score   support

           0       0.91      0.89      0.90        36
           1       0.84      0.88      0.86        24

    accuracy                           0.88        60
   macro avg       0.88      0.88      0.88        60
weighted avg       0.88      0.88      0.88        60

