In [None]:
Create models: Logistic Regression

In [2]:
from ucimlrepo import fetch_ucirepo
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix

# Load and preprocess data
breast_cancer = fetch_ucirepo(id=15)
X = breast_cancer.data.features
y = breast_cancer.data.targets['Class'].replace({2:0, 4:1})

# Handle missing values (entries with '?')
X = X.replace('?', np.nan).apply(pd.to_numeric)
X = X.fillna(X.mean())

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train Logistic Regression
logreg = LogisticRegression()
logreg.fit(X_train_scaled, y_train)

# Make predictions
y_pred = logreg.predict(X_test_scaled)

# Evaluate model
accuracy = accuracy_score(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)

# Create labeled confusion matrix with error types
conf_matrix_df = pd.DataFrame(
    cm,
    index=['Actual Benign (TN/FP)', 'Actual Malignant (FN/TP)'],
    columns=['Predicted Benign', 'Predicted Malignant']
)

# Display results
print(f"Linear Regression Accuracy: {accuracy:.4f}\n")
print("Confusion Matrix with Error Types:")
print(conf_matrix_df)

Linear Regression Accuracy: 0.9657

Confusion Matrix with Error Types:
                          Predicted Benign  Predicted Malignant
Actual Benign (TN/FP)                  117                    1
Actual Malignant (FN/TP)                 5                   52
