In [1]:
from sklearn.datasets import load_breast_cancer
import pandas as pd

data = load_breast_cancer()
df = pd.DataFrame(data.data[:, :5], columns=data.feature_names[:5])
df['diagnosis'] = data.target

In [2]:
# Import libraries
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
import numpy as np

In [3]:
# Load data
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

In [4]:
# Preprocessing
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [5]:
# Train-test split (80-20)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
# Initialize models
models = {
    "Logistic Regression": LogisticRegression(),
    "SVM": SVC(),
    "Random Forest": RandomForestClassifier()
}

In [7]:
# Train and evaluate
results = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    results[name] = {
        'accuracy': accuracy_score(y_test, y_pred),
        'confusion_matrix': confusion_matrix(y_test, y_pred)
    }

In [8]:
for model, metrics in results.items():
    print(f"=== {model} ===")
    print(f"Accuracy: {metrics['accuracy']:.3f}")
    print("Confusion Matrix:")
    print(metrics['confusion_matrix'])
    print("\n")

=== Logistic Regression ===
Accuracy: 0.939
Confusion Matrix:
[[39  4]
 [ 3 68]]


=== SVM ===
Accuracy: 0.947
Confusion Matrix:
[[40  3]
 [ 3 68]]


=== Random Forest ===
Accuracy: 0.956
Confusion Matrix:
[[42  1]
 [ 4 67]]


