In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

## Load Heart Disease Dataset
- The dataset is loaded from a CSV file or UCI repository.
- Target variable: 0 (no heart disease) or 1 (heart disease).

In [None]:
# Load dataset (adjust path if needed)
# Alternatively, use: from sklearn.datasets import fetch_openml; data = fetch_openml(name='heart-disease')
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.cleveland.data'
columns = ['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach', 'exang', 'oldpeak', 'slope', 'ca', 'thal', 'target']
df = pd.read_csv(url, names=columns)

# Handle missing values (marked as '?')
df = df.replace('?', np.nan)
df = df.dropna()

# Convert target to binary: >0 indicates heart disease
df['target'] = (df['target'] > 0).astype(int)
df.head()

In [None]:
df.info()

In [None]:
# Prepare features and target
X = df.drop('target', axis=1)
y = df['target']

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=1)
print(f"X_train shape: {X_train.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"y_test shape: {y_test.shape}")

## Logistic Regression without Regularization

In [None]:
# Train Logistic Regression (C=1e9 approximates no regularization)
lr = LogisticRegression(C=1e9, max_iter=1000)
lr.fit(X_train, y_train)

# Predict
y_train_pred = lr.predict(X_train)
y_test_pred = lr.predict(X_test)

# Evaluate
train_accuracy = accuracy_score(y_train, y_train_pred)
test_accuracy = accuracy_score(y_test, y_test_pred)

print("Logistic Regression without Regularization:")
print(f"Train Accuracy: {train_accuracy:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")
print("\nClassification Report (Test):")
print(classification_report(y_test, y_test_pred))

## Logistic Regression with Polynomial Features (Inducing Overfitting)

In [None]:
# Create polynomial features
poly = PolynomialFeatures(degree=3)
X_poly = poly.fit_transform(X_scaled)

# Split polynomial data
X_poly_train, X_poly_test, y_train, y_test = train_test_split(X_poly, y, test_size=0.2, random_state=1)

# Train Logistic Regression on polynomial features
lr_poly = LogisticRegression(C=1e9, max_iter=1000)
lr_poly.fit(X_poly_train, y_train)

# Predict
y_poly_train_pred = lr_poly.predict(X_poly_train)
y_poly_test_pred = lr_poly.predict(X_poly_test)

# Evaluate
poly_train_accuracy = accuracy_score(y_train, y_poly_train_pred)
poly_test_accuracy = accuracy_score(y_test, y_poly_test_pred)

print("\nLogistic Regression with Polynomial Features (Degree=3):")
print(f"Train Accuracy: {poly_train_accuracy:.4f}")
print(f"Test Accuracy: {poly_test_accuracy:.4f}")
print("\nClassification Report (Test):")
print(classification_report(y_test, y_poly_test_pred))

## Logistic Regression with Regularization

In [None]:
# Train Logistic Regression with L2 regularization
lr_reg = LogisticRegression(C=1.0, max_iter=1000)
lr_reg.fit(X_poly_train, y_train)

# Predict
y_reg_train_pred = lr_reg.predict(X_poly_train)
y_reg_test_pred = lr_reg.predict(X_poly_test)

# Evaluate
reg_train_accuracy = accuracy_score(y_train, y_reg_train_pred)
reg_test_accuracy = accuracy_score(y_test, y_reg_test_pred)

print("\nLogistic Regression with Regularization (C=1.0):")
print(f"Train Accuracy: {reg_train_accuracy:.4f}")
print(f"Test Accuracy: {reg_test_accuracy:.4f}")
print("\nClassification Report (Test):")
print(classification_report(y_test, y_reg_test_pred))

## Visualization of Predictions

In [None]:
# Plot confusion matrices for comparison
from sklearn.metrics import confusion_matrix
import seaborn as sns

fig, axes = plt.subplots(1, 3, figsize=(18, 5))
models = [('Without Regularization', y_test_pred), ('Polynomial (Degree=3)', y_poly_test_pred), ('With Regularization', y_reg_test_pred)]

for ax, (title, y_pred) in zip(axes, models):
    cm = confusion_matrix(y_test, y_pred)
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=ax)
    ax.set_title(title)
    ax.set_xlabel('Predicted')
    ax.set_ylabel('Actual')

plt.tight_layout()
plt.savefig('heart_disease_confusion_matrices.png')
plt.show()