In [11]:
from sklearn.model_selection import GridSearchCV
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [12]:
data = pd.DataFrame(pd.read_csv("/home/kalema/Downloads/heart.csv", header=0))

In [13]:
data

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,52,1,0,125,212,0,1,168,0,1.0,2,2,3,0
1,53,1,0,140,203,1,0,155,1,3.1,0,0,3,0
2,70,1,0,145,174,0,1,125,1,2.6,0,0,3,0
3,61,1,0,148,203,0,1,161,0,0.0,2,1,3,0
4,62,0,0,138,294,1,1,106,0,1.9,1,3,2,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1020,59,1,1,140,221,0,1,164,1,0.0,2,0,2,1
1021,60,1,0,125,258,0,0,141,1,2.8,1,1,3,0
1022,47,1,0,110,275,0,0,118,1,1.0,1,1,2,0
1023,50,0,0,110,254,0,0,159,0,0.0,2,0,2,1


In [14]:
selected_features = ["exang", "chol", "cp", "thalach", "trestbps"]
X = data[selected_features]  
y = data["target"] 

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

default_lr_model = LogisticRegression()
default_lr_model.fit(X_train, y_train)

y_pred_default = default_lr_model.predict(X_test)

# Evaluation metrics
accuracy_default = accuracy_score(y_test, y_pred_default)
conf_matrix_default = confusion_matrix(y_test, y_pred_default)
class_report_default = classification_report(y_test, y_pred_default)

print("Default Logistic Regression Model Metrics:")
print("Accuracy:", accuracy_default)
print("Confusion Matrix:\n", conf_matrix_default)
print("Classification Report:\n", class_report_default)

Default Logistic Regression Model Metrics:
Accuracy: 0.7365853658536585
Confusion Matrix:
 [[69 33]
 [21 82]]
Classification Report:
               precision    recall  f1-score   support

           0       0.77      0.68      0.72       102
           1       0.71      0.80      0.75       103

    accuracy                           0.74       205
   macro avg       0.74      0.74      0.74       205
weighted avg       0.74      0.74      0.74       205



In [26]:
param_grid = param_grid = [{'solver': ['liblinear'],
    'penalty': ['l1', 'l2'],
    'C': [0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000],
    'class_weight': [None, 'balanced']
   }, {
    'solver': ['saga'],
    'penalty': ['elasticnet'],
    'l1_ratio': [0.1, 0.3, 0.5, 0.7, 0.9],  
    'class_weight': [None, 'balanced']
}]

grid_search = GridSearchCV(LogisticRegression(), param_grid, scoring='recall_macro', cv=200)
grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_

print("Best hyperparameters:", best_params)




Best hyperparameters: {'C': 0.1, 'class_weight': None, 'penalty': 'l1', 'solver': 'liblinear'}




In [27]:
# Predictions with the best model
best_lr_model = grid_search.best_estimator_
y_pred_tuned = best_lr_model.predict(X_test)

# Evaluation metrics for tuned model
accuracy_tuned = accuracy_score(y_test, y_pred_tuned)
conf_matrix_tuned = confusion_matrix(y_test, y_pred_tuned)
class_report_tuned = classification_report(y_test, y_pred_tuned)

print("Tuned Logistic Regression Model Metrics:")
print("Accuracy:", accuracy_tuned)
print("Confusion Matrix:\n", conf_matrix_tuned)
print("Classification Report:\n", class_report_tuned)


Tuned Logistic Regression Model Metrics:
Accuracy: 0.7902439024390244
Confusion Matrix:
 [[73 29]
 [14 89]]
Classification Report:
               precision    recall  f1-score   support

           0       0.84      0.72      0.77       102
           1       0.75      0.86      0.81       103

    accuracy                           0.79       205
   macro avg       0.80      0.79      0.79       205
weighted avg       0.80      0.79      0.79       205



In [28]:
print("Best hyperparameters:", best_params)


Best hyperparameters: {'C': 0.1, 'class_weight': None, 'penalty': 'l1', 'solver': 'liblinear'}
