In [18]:
import numpy as numpy
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

In [19]:
data = load_breast_cancer()
x = data.data
y = data.target

In [20]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [21]:
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.fit_transform(x_test)

In [22]:
# Dictionary of models
# Each model is initialized with its default parameters and can be trained and evaluated using cross-validation and accuracy score

models = {
    "Logistic Regression": LogisticRegression(penalty=None, solver='lbfgs', max_iter=1000),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "Random Forest": RandomForestClassifier(random_state=42)
}

In [23]:
results = {}

In [24]:
for name, model in models.items():
    model.fit(x_train_scaled, y_train)
    train_acc = accuracy_score(y_train, model.predict(x_train_scaled))
    test_acc = accuracy_score(y_test, model.predict(x_test_scaled))
    cv_score = cross_val_score(model, x_train_scaled, y_train, cv=5).mean()

    results[name] = {
        "Train Accuracy": train_acc,
        "Test Accuracy": test_acc,
        "Cross-Validation Score": cv_score
    }

    print(f"{name}")
    print(f"Train Accuracy: {train_acc:.4f}")
    print(f"Test Accuracy: {test_acc:.4f}")
    print(f"Cross-Validation Score: {cv_score:.4f}")
    print()



Logistic Regression
Train Accuracy: 1.0000
Test Accuracy: 0.9035
Cross-Validation Score: 0.9582

Decision Tree
Train Accuracy: 1.0000
Test Accuracy: 0.9211
Cross-Validation Score: 0.9165

Random Forest
Train Accuracy: 1.0000
Test Accuracy: 0.9649
Cross-Validation Score: 0.9582



In [26]:
models = {
    "Logistic Regression": LogisticRegression(penalty='l2', solver='lbfgs', max_iter=1000),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "Random Forest": RandomForestClassifier(random_state=42)
}

regularized_result = {}

for name, model in models.items():
    model.fit(x_train_scaled, y_train)
    train_acc = accuracy_score(y_train, model.predict(x_train_scaled))
    test_acc = accuracy_score(y_test, model.predict(x_test_scaled))
    cv_score = cross_val_score(model, x_train_scaled, y_train, cv=5).mean()

    regularized_result[name] = {
        "Train Accuracy": train_acc,
        "Test Accuracy": test_acc,
        "Cross-Validation Score": cv_score
    }

    print(f"{name}")
    print(f"Train Accuracy: {train_acc:.4f}")
    print(f"Test Accuracy: {test_acc:.4f}")
    print(f"Cross-Validation Score: {cv_score:.4f}")
    print()

Logistic Regression
Train Accuracy: 0.9868
Test Accuracy: 0.9825
Cross-Validation Score: 0.9736

Decision Tree
Train Accuracy: 1.0000
Test Accuracy: 0.9211
Cross-Validation Score: 0.9165

Random Forest
Train Accuracy: 1.0000
Test Accuracy: 0.9649
Cross-Validation Score: 0.9582

