# 5_Logistic_Regression.ipynb

In [1]:
# --- Imports ---
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
    accuracy_score, confusion_matrix, classification_report,
    roc_auc_score, precision_score, recall_score, f1_score
)

In [9]:
# --- Load dataset ---
data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = data.target

In [10]:
# --- Normalize ---
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [11]:
# --- PCA (optional) ---
pca = PCA(n_components=5)
X_pca = pca.fit_transform(X_scaled)

In [12]:
# --- Train-Test Split ---
X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42)

In [13]:
# --- Model ---
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

In [14]:
y_pred = model.predict(X_test)
y_prob = model.predict_proba(X_test)[:, 1]

In [15]:
# --- Metrics ---
accuracy = accuracy_score(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_prob)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print("Confusion Matrix:\n", cm)
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"AUC: {roc_auc:.4f}")

Confusion Matrix:
 [[42  1]
 [ 1 70]]

Classification Report:
               precision    recall  f1-score   support

           0       0.98      0.98      0.98        43
           1       0.99      0.99      0.99        71

    accuracy                           0.98       114
   macro avg       0.98      0.98      0.98       114
weighted avg       0.98      0.98      0.98       114

Accuracy: 0.9825
Precision: 0.9859
Recall: 0.9859
F1 Score: 0.9859
AUC: 0.9980


In [16]:
# --- k-Fold Cross-validation ---
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
cv_scores = cross_val_score(model, X_pca, y, cv=kfold, scoring='accuracy')

print("\nCross-validation Accuracy Scores:", cv_scores)
print("Average CV Accuracy:", np.mean(cv_scores))


Cross-validation Accuracy Scores: [0.95614035 0.94736842 0.96491228 0.95614035 0.99115044]
Average CV Accuracy: 0.9631423691973297
