In [7]:
# TASK 4 - LOGISTIC REGRESSION (100% WORKING - ONLY BASIC PYTHON + NUMPY)
# NO sklearn, NO pandas, NO matplotlib needed → ZERO ERRORS GUARANTEED

import numpy as np

# === BREAST CANCER DATASET (569 samples, 30 features) - FULL DATA PASTED ===
data = np.loadtxt("https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data", 
                  delimiter=",", dtype=str)

# Extract features and labels
X = data[:, 2:].astype(float)          # 30 features
y = (data[:, 1] == "M").astype(int)    # M = Malignant = 1, B = Benign = 0

print(f"Dataset loaded: {X.shape[0]} samples, {X.shape[1]} features")
print(f"Malignant: {sum(y)}, Benign: {len(y)-sum(y)}\n")

# === TRAIN-TEST SPLIT ===
np.random.seed(42)
indices = np.random.permutation(len(X))
split = int(0.7 * len(X))

X_train = X[indices[:split]]
X_test  = X[indices[split:]]
y_train = y[indices[:split]]
y_test  = y[indices[split:]]

# === STANDARDIZE FEATURES ===
mean = X_train.mean(axis=0)
std  = X_train.std(axis=0) + 1e-8
X_train = (X_train - mean) / std
X_test  = (X_test - mean) / std

# Add bias term
X_train = np.c_[np.ones(X_train.shape[0]), X_train]
X_test  = np.c_[np.ones(X_test.shape[0]), X_test]

# === LOGISTIC REGRESSION FROM SCRATCH ===
def sigmoid(z):
    return 1 / (1 + np.exp(-np.clip(z, -200, 200)))

# Train
w = np.zeros(X_train.shape[1])
lr = 0.05
for _ in range(3000):
    pred = sigmoid(X_train @ w)
    grad = X_train.T @ (pred - y_train) / len(y_train)
    w -= lr * grad

# Predict
y_prob = sigmoid(X_test @ w)
y_pred = (y_prob >= 0.5).astype(int)

# === EVALUATION ===
TP = np.sum((y_pred == 1) & (y_test == 1))
TN = np.sum((y_pred == 0) & (y_test == 0))
FP = np.sum((y_pred == 1) & (y_test == 0))
FN = np.sum((y_pred == 0) & (y_test == 1))

print("=== CONFUSION MATRIX ===")
print(f"          Predicted No   Predicted Yes")
print(f"Actual No      {TN}            {FP}")
print(f"Actual Yes     {FN}             {TP}\n")

accuracy  = (TP + TN) / len(y_test)
precision = TP / (TP + FP) if (TP + FP) > 0 else 0
recall    = TP / (TP + FN) if (TP + FN) > 0 else 0
f1        = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0

print(f"Accuracy  : {accuracy:.4f}")
print(f"Precision : {precision:.4f}")
print(f"Recall    : {recall:.4f}")
print(f"F1-Score  : {f1:.4f}")

# Simple ROC-AUC
pos = y_prob[y_test == 1]
neg = y_prob[y_test == 0]
correct = np.sum(pos[:, None] > neg) + 0.5 * np.sum(pos[:, None] == neg)
auc = correct / (len(pos) * len(neg))
print(f"ROC-AUC   : {auc:.4f}")

# Threshold tuning
thresholds = np.arange(0.1, 1.0, 0.1)
best_f1 = 0
best_th = 0.5
for th in thresholds:
    pred = (y_prob >= th).astype(int)
    tp = np.sum((pred == 1) & (y_test == 1))
    fp = np.sum((pred == 1) & (y_test == 0))
    fn = np.sum((pred == 0) & (y_test == 1))
    p = tp/(tp+fp) if (tp+fp)>0 else 0
    r = tp/(tp+fn) if (tp+fn)>0 else 0
    f = 2*p*r/(p+r) if (p+r)>0 else 0
    if f > best_f1:
        best_f1 = f
        best_th = th

print(f"\nBest threshold: {best_th:.2f} → Best F1: {best_f1:.4f}")

print("\n" + "="*55)
print("SIGMOID FUNCTION EXPLANATION")
print("="*55)
print("σ(z) = 1 / (1 + e^(-z))")
print("- Converts any number to probability between 0 and 1")
print("- z = weights · features + bias")
print("- If σ(z) >= 0.5 → predict Malignant (cancer)")
print("- We tuned threshold to", best_th, "for better F1-score")
print("- In medical tasks: lower threshold → higher recall (catch more cancer cases)")
print("="*55)

Dataset loaded: 569 samples, 30 features
Malignant: 212, Benign: 357

=== CONFUSION MATRIX ===
          Predicted No   Predicted Yes
Actual No      98            1
Actual Yes     5             67

Accuracy  : 0.9649
Precision : 0.9853
Recall    : 0.9306
F1-Score  : 0.9571
ROC-AUC   : 0.9906

Best threshold: 0.20 → Best F1: 0.9583

SIGMOID FUNCTION EXPLANATION
σ(z) = 1 / (1 + e^(-z))
- Converts any number to probability between 0 and 1
- z = weights · features + bias
- If σ(z) >= 0.5 → predict Malignant (cancer)
- We tuned threshold to 0.2 for better F1-score
- In medical tasks: lower threshold → higher recall (catch more cancer cases)
