In [1]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import pandas as pd

In [None]:
#1. Loading and Preprocessing:

Loads the breast cancer dataset.

Splits it into training and test sets.

Apply feature scaling using StandardScaler().

In [2]:
# Load dataset
data = load_breast_cancer()
X, y = data.data, data.target

In [3]:
# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [4]:
# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
# 2. Classification Algorithm Implementation:

Implements five classification algorithms: Logistic Regression, Decision Tree, Random Forest, SVM, and k-NN.

Trains each model and evaluates accuracy on the test set.

In [5]:
# 2. Classification Algorithm Implementation
models = {
    "Logistic Regression": LogisticRegression(),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(),
    "SVM": SVC(),
    "k-NN": KNeighborsClassifier()
}


In [6]:
# Train and evaluate models
results = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    results[name] = accuracy


In [None]:
#3. Model Comparison:

Compares models based on accuracy.

Displays the results in a sorted DataFrame.

In [7]:
# 3. Model Comparison
results_df = pd.DataFrame(list(results.items()), columns=["Model", "Accuracy"])
results_df = results_df.sort_values(by="Accuracy", ascending=False)
print(results_df)

                 Model  Accuracy
3                  SVM  0.982456
0  Logistic Regression  0.973684
2        Random Forest  0.964912
4                 k-NN  0.947368
1        Decision Tree  0.929825
