# Logistics Regession

In [None]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


In [None]:
# Load data
data = load_breast_cancer()
X = data.data
y = data.target  # 0 or 1

In [None]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [None]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))


In [None]:
def compute_loss(y, y_hat):
    epsilon = 1e-9  # to avoid log(0)
    return -np.mean(
        y * np.log(y_hat + epsilon) +
        (1 - y) * np.log(1 - y_hat + epsilon)
    )


In [None]:
X.shape

(569, 30)

In [None]:
def train_logistic_regression(X, y, lr=0.01, epochs=1000):
  n_samples, n_features = X.shape
  weights = np.zeros(n_features)
  bias = 0

  for epoch in range(epochs):
    # Linear model
    linear_output = np.dot(X, weights) + bias

    # Prediction
    y_hat = sigmoid(linear_output)

    # Gradients
    dw = (1 / n_samples) * np.dot(X.T, (y_hat - y))
    db = (1 / n_samples) * np.sum(y_hat - y)

    # Update
    weights -= lr * dw
    bias -= lr * db

    if epoch % 100 == 0:
      loss = compute_loss(y, y_hat)
      print(f"Epoch {epoch}, Loss: {loss:.4f}")

  return weights, bias



In [None]:
weights, bias = train_logistic_regression(
    X_train, y_train, lr=0.01, epochs=1000
)


Epoch 0, Loss: 0.6931
Epoch 100, Loss: 0.2543
Epoch 200, Loss: 0.1917
Epoch 300, Loss: 0.1633
Epoch 400, Loss: 0.1465
Epoch 500, Loss: 0.1350
Epoch 600, Loss: 0.1267
Epoch 700, Loss: 0.1202
Epoch 800, Loss: 0.1150
Epoch 900, Loss: 0.1107


In [None]:
def predict(X, weights, bias):
    linear_output = np.dot(X, weights) + bias
    y_hat = sigmoid(linear_output)
    return (y_hat >= 0.5).astype(int)

# Predictions
y_pred = predict(X_test, weights, bias)


In [None]:
# Accuracy
accuracy = np.mean(y_pred == y_test)
print("Accuracy:", accuracy)


Accuracy: 0.9824561403508771


# Logistic Regession Sklearn

In [None]:
from sklearn.datasets import load_breast_cancer
import pandas as pd

# Load dataset
data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target)



In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [None]:
from sklearn.linear_model import LogisticRegression

# Create the model
model = LogisticRegression(max_iter=10000, random_state=42)

# Train the model
model.fit(X_train_scaled, y_train)


In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

y_pred = model.predict(X_test_scaled)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.9824561403508771
Confusion Matrix:
 [[41  1]
 [ 1 71]]

Classification Report:
               precision    recall  f1-score   support

           0       0.98      0.98      0.98        42
           1       0.99      0.99      0.99        72

    accuracy                           0.98       114
   macro avg       0.98      0.98      0.98       114
weighted avg       0.98      0.98      0.98       114



In [None]:
import joblib

# Save model
joblib.dump(model, "logreg_breast_cancer.pkl")
joblib.dump(scaler, "scaler.pkl")

# Load model later
loaded_model = joblib.load("logreg_breast_cancer.pkl")
loaded_scaler = joblib.load("scaler.pkl")
