# Lab 05: Logistic Regression Analysis - Results Summary

**Department of Electrical and Computer Engineering**  
**Pak-Austria Fachhochschule: Institute of Applied Sciences & Technology**  
**Subject: Machine Learning**  
**Subject Teacher: Dr. Abid Ali**  
**Lab Supervisor: Miss. Sana Saleem**

## Executive Summary

This notebook presents a comprehensive summary of the logistic regression analysis results for heart disease prediction. It includes all key metrics, visualizations, and performance analysis in a format suitable for presentation and reporting.


In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (accuracy_score, precision_score, recall_score, f1_score,
                           confusion_matrix, roc_curve, auc, precision_recall_curve,
                           classification_report, roc_auc_score)
import warnings
warnings.filterwarnings('ignore')

# Set plotting style
plt.style.use('seaborn-v0_8')
sns.set_palette("Set2")

print("Libraries imported successfully!")


## 1. Load Data and Train Model


In [None]:
# Load the dataset
df = pd.read_csv('../Data/heart_disease_dataset.csv')

# Prepare data
X = df.drop('target', axis=1)
y = df['target']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train logistic regression model
log_reg = LogisticRegression(random_state=42, max_iter=1000)
log_reg.fit(X_train_scaled, y_train)

# Make predictions
y_pred = log_reg.predict(X_test_scaled)
y_pred_proba = log_reg.predict_proba(X_test_scaled)[:, 1]

print("Model trained successfully!")
print(f"Dataset shape: {df.shape}")
print(f"Training samples: {X_train.shape[0]}")
print(f"Test samples: {X_test.shape[0]}")
print(f"Target distribution: {y.value_counts().to_dict()}")


## 2. Performance Metrics Summary


In [None]:
# Calculate all performance metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_pred_proba)

# Confusion matrix
cm = confusion_matrix(y_test, y_pred)
tn, fp, fn, tp = cm.ravel()
specificity = tn / (tn + fp)
sensitivity = tp / (tp + fn)

# Cross-validation
cv_scores = cross_val_score(log_reg, X_train_scaled, y_train, cv=5, scoring='accuracy')
cv_precision = cross_val_score(log_reg, X_train_scaled, y_train, cv=5, scoring='precision')
cv_recall = cross_val_score(log_reg, X_train_scaled, y_train, cv=5, scoring='recall')
cv_f1 = cross_val_score(log_reg, X_train_scaled, y_train, cv=5, scoring='f1')

# Create performance summary table
performance_data = {
    'Metric': ['Accuracy', 'Precision', 'Recall', 'F1-Score', 'ROC-AUC', 'Specificity', 'Sensitivity'],
    'Test Score': [f"{accuracy:.4f}", f"{precision:.4f}", f"{recall:.4f}", f"{f1:.4f}", 
                   f"{roc_auc:.4f}", f"{specificity:.4f}", f"{sensitivity:.4f}"],
    'CV Mean': [f"{cv_scores.mean():.4f}", f"{cv_precision.mean():.4f}", f"{cv_recall.mean():.4f}", 
                f"{cv_f1.mean():.4f}", "N/A", "N/A", "N/A"],
    'CV Std': [f"±{cv_scores.std():.4f}", f"±{cv_precision.std():.4f}", f"±{cv_recall.std():.4f}", 
               f"±{cv_f1.std():.4f}", "N/A", "N/A", "N/A"]
}

performance_df = pd.DataFrame(performance_data)
print("=== MODEL PERFORMANCE SUMMARY ===")
print(performance_df.to_string(index=False))

print(f"\n=== CONFUSION MATRIX ===")
print(f"True Negatives: {tn}")
print(f"False Positives: {fp}")
print(f"False Negatives: {fn}")
print(f"True Positives: {tp}")

print(f"\n=== DETAILED CLASSIFICATION REPORT ===")
print(classification_report(y_test, y_pred, target_names=['No Heart Disease', 'Heart Disease']))


## 3. Key Visualizations


In [None]:
# Create comprehensive visualization dashboard
fig = plt.figure(figsize=(20, 16))

# 1. Confusion Matrix
plt.subplot(3, 3, 1)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=['No Heart Disease', 'Heart Disease'],
            yticklabels=['No Heart Disease', 'Heart Disease'])
plt.title('Confusion Matrix', fontsize=14, fontweight='bold')
plt.xlabel('Predicted')
plt.ylabel('Actual')

# 2. ROC Curve
plt.subplot(3, 3, 2)
fpr, tpr, thresholds = roc_curve(y_test, y_pred_proba)
roc_auc = auc(fpr, tpr)
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (AUC = {roc_auc:.4f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--', label='Random Classifier')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve', fontsize=14, fontweight='bold')
plt.legend(loc="lower right")
plt.grid(True, alpha=0.3)

# 3. Precision-Recall Curve
plt.subplot(3, 3, 3)
precision_curve, recall_curve, pr_thresholds = precision_recall_curve(y_test, y_pred_proba)
pr_auc = auc(recall_curve, precision_curve)
plt.plot(recall_curve, precision_curve, color='darkorange', lw=2, label=f'PR curve (AUC = {pr_auc:.4f})')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve', fontsize=14, fontweight='bold')
plt.legend(loc="lower left")
plt.grid(True, alpha=0.3)

# 4. Performance Metrics Bar Chart
plt.subplot(3, 3, 4)
metrics = ['Accuracy', 'Precision', 'Recall', 'F1-Score', 'ROC-AUC']
scores = [accuracy, precision, recall, f1, roc_auc]
bars = plt.bar(metrics, scores, color=['skyblue', 'lightgreen', 'lightcoral', 'gold', 'plum'])
plt.title('Performance Metrics', fontsize=14, fontweight='bold')
plt.ylabel('Score')
plt.ylim(0, 1)
for i, bar in enumerate(bars):
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2., height + 0.01,
             f'{scores[i]:.3f}', ha='center', va='bottom', fontweight='bold')
plt.xticks(rotation=45)

# 5. Cross-Validation Scores
plt.subplot(3, 3, 5)
cv_metrics = ['Accuracy', 'Precision', 'Recall', 'F1-Score']
cv_means = [cv_scores.mean(), cv_precision.mean(), cv_recall.mean(), cv_f1.mean()]
cv_stds = [cv_scores.std(), cv_precision.std(), cv_recall.std(), cv_f1.std()]
x_pos = np.arange(len(cv_metrics))
bars = plt.bar(x_pos, cv_means, yerr=cv_stds, capsize=5, color=['skyblue', 'lightgreen', 'lightcoral', 'gold'])
plt.title('Cross-Validation Scores', fontsize=14, fontweight='bold')
plt.ylabel('Score')
plt.xticks(x_pos, cv_metrics, rotation=45)
plt.ylim(0, 1)
for i, (mean, std) in enumerate(zip(cv_means, cv_stds)):
    plt.text(i, mean + std + 0.01, f'{mean:.3f}', ha='center', va='bottom', fontweight='bold')

# 6. Feature Importance
plt.subplot(3, 3, 6)
feature_importance = pd.DataFrame({
    'feature': X.columns,
    'coefficient': log_reg.coef_[0],
    'abs_coefficient': np.abs(log_reg.coef_[0])
}).sort_values('abs_coefficient', ascending=True)

colors = ['red' if x < 0 else 'green' for x in feature_importance['coefficient']]
bars = plt.barh(range(len(feature_importance)), feature_importance['coefficient'], color=colors)
plt.title('Feature Importance (Coefficients)', fontsize=14, fontweight='bold')
plt.xlabel('Coefficient Value')
plt.yticks(range(len(feature_importance)), feature_importance['feature'])
plt.axvline(x=0, color='black', linestyle='--', alpha=0.5)

# 7. Target Distribution
plt.subplot(3, 3, 7)
target_counts = y.value_counts()
plt.pie(target_counts.values, labels=['No Heart Disease', 'Heart Disease'], 
        autopct='%1.1f%%', colors=['lightcoral', 'lightblue'], startangle=90)
plt.title('Target Distribution', fontsize=14, fontweight='bold')

# 8. Prediction Probability Distribution
plt.subplot(3, 3, 8)
plt.hist(y_pred_proba[y_test == 0], bins=20, alpha=0.7, label='No Heart Disease', color='lightcoral')
plt.hist(y_pred_proba[y_test == 1], bins=20, alpha=0.7, label='Heart Disease', color='lightblue')
plt.axvline(x=0.5, color='black', linestyle='--', alpha=0.7, label='Decision Threshold')
plt.xlabel('Predicted Probability')
plt.ylabel('Frequency')
plt.title('Prediction Probability Distribution', fontsize=14, fontweight='bold')
plt.legend()

# 9. Model Performance Summary
plt.subplot(3, 3, 9)
plt.axis('off')
summary_text = f"""
MODEL PERFORMANCE SUMMARY

Accuracy: {accuracy:.4f}
Precision: {precision:.4f}
Recall: {recall:.4f}
F1-Score: {f1:.4f}
ROC-AUC: {roc_auc:.4f}

Confusion Matrix:
TN: {tn}    FP: {fp}
FN: {fn}    TP: {tp}

Cross-Validation:
Accuracy: {cv_scores.mean():.4f} ± {cv_scores.std():.4f}
Precision: {cv_precision.mean():.4f} ± {cv_precision.std():.4f}
Recall: {cv_recall.mean():.4f} ± {cv_recall.std():.4f}
F1-Score: {cv_f1.mean():.4f} ± {cv_f1.std():.4f}
"""
plt.text(0.1, 0.9, summary_text, transform=plt.gca().transAxes, fontsize=10,
         verticalalignment='top', fontfamily='monospace',
         bbox=dict(boxstyle="round,pad=0.3", facecolor="lightgray", alpha=0.8))

plt.tight_layout()
plt.show()
