In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


# Import the dataset
data = pd.read_csv("heart.csv")

# Separate the features (X) and target (y)
X = data.drop("target", axis=1)  # Features
y = data["target"]  # Target variable

# Split the dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Feature scaling using StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Create a logistic regression model
model = LogisticRegression()

# Define the hyperparameters to tune
hyperparameters = {
    'C': [0.001, 0.01, 0.1, 1, 10, 100],  # Inverse of regularization strength
    'penalty': ['l1', 'l2'],  # Regularization type
    'solver': ['liblinear']  # Algorithm to use for optimization
}

# Perform grid search to find the best hyperparameters
grid_search = GridSearchCV(model, hyperparameters, cv=5)
grid_search.fit(X_train_scaled, y_train)

# Get the best model with tuned hyperparameters
best_model = grid_search.best_estimator_

# Train the best model on the entire training set
best_model.fit(X_train_scaled, y_train)

# Evaluate the best model using cross-validation
cv_scores = cross_val_score(best_model, X_train_scaled, y_train, cv=5)
mean_cv_accuracy = np.mean(cv_scores)

# Predict on the test set using the best model
predictions = best_model.predict(X_test_scaled)

# Calculate accuracy
accuracy = accuracy_score(y_test, predictions)

# Calculate confusion matrix and classification report
confusion_mat = confusion_matrix(y_test, predictions)
classification_rep = classification_report(y_test, predictions)

# Print the results
print("Best Model:", best_model)
print("Mean Cross-Validation Accuracy:", mean_cv_accuracy)
print("Test Set Accuracy:", accuracy)
print("Confusion Matrix:\n", confusion_mat)
print("Classification Report:\n", classification_rep)


Best Model: LogisticRegression(C=0.1, penalty='l1', solver='liblinear')
Mean Cross-Validation Accuracy: 0.8580256344962228
Test Set Accuracy: 0.8015564202334631
Confusion Matrix:
 [[ 94  38]
 [ 13 112]]
Classification Report:
               precision    recall  f1-score   support

           0       0.88      0.71      0.79       132
           1       0.75      0.90      0.81       125

    accuracy                           0.80       257
   macro avg       0.81      0.80      0.80       257
weighted avg       0.81      0.80      0.80       257



In [None]:
 pwd

'/content'