In [None]:
# import libraries
import pandas as pd
import numpy as np

In [None]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    confusion_matrix,
    classification_report,
    roc_auc_score,
    roc_curve
)

In [None]:
#load dataset
data = load_breast_cancer()
X = data.data
y = data.target

In [None]:
# train test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [None]:
# standard scaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [None]:
# decision tree classifier
dt = DecisionTreeClassifier(random_state = 42)
dt.fit(X_train, y_train)

In [None]:
#random forest classifier
rf = RandomForestClassifier(n_estimators = 200, random_state = 42)
rf.fit(X_train, y_train)

In [None]:
#logistic regression
lg = LogisticRegression(max_iter=1000 )
lg.fit(X_train, y_train)

In [None]:
#k n n
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)

In [None]:
# pridiction
dt_pred = dt.predict(X_test)
rf_pred = rf.predict(X_test)
lg_pred = lg.predict(X_test)
knn_pred = knn.predict(X_test)

In [None]:
# probablity
dt_prob = dt.predict_proba(X_test)[:, 1]
rf_prob = rf.predict_proba(X_test)[:, 1]
lg_prob = lg.predict_proba(X_test)[:, 1]
knn_prob = knn.predict_proba(X_test)[:, 1]

In [None]:

def evaluate_model(name,y_test, y_pred, y_prob):
  print(f"\n{name} Evaluation:")
  print("Accuracy:", accuracy_score(y_test, y_pred))
  print('Precision:', precision_score(y_test, y_pred))
  print('Recall   :', recall_score(y_test, y_pred))
  print('F1 Score :', f1_score(y_test, y_pred))
  print('ROC Score:', roc_auc_score(y_test, y_prob))

  print('\nConfusion Matrix:\n', confusion_matrix (y_test, y_pred))
  print('\nClassification Report:\n', classification_report(y_test, y_pred))

In [None]:
#evaluate models
evaluate_model('Decision Tree', y_test, dt_pred, dt_prob)
evaluate_model('Random Forest', y_test, rf_pred, rf_prob)
evaluate_model('Logistic Regression', y_test, lg_pred, lg_prob)
evaluate_model('KNN', y_test, knn_pred, knn_prob)



Decision Tree Evaluation:
Accuracy: 0.9473684210526315
Precision: 0.9577464788732394
Recall   : 0.9577464788732394
F1 Score : 0.9577464788732394
ROC Score: 0.9439895185063871

Confusion Matrix:
 [[40  3]
 [ 3 68]]

Classification Report:
               precision    recall  f1-score   support

           0       0.93      0.93      0.93        43
           1       0.96      0.96      0.96        71

    accuracy                           0.95       114
   macro avg       0.94      0.94      0.94       114
weighted avg       0.95      0.95      0.95       114


Random Forest Evaluation:
Accuracy: 0.9649122807017544
Precision: 0.958904109589041
Recall   : 0.9859154929577465
F1 Score : 0.9722222222222222
ROC Score: 0.9959056665574845

Confusion Matrix:
 [[40  3]
 [ 1 70]]

Classification Report:
               precision    recall  f1-score   support

           0       0.98      0.93      0.95        43
           1       0.96      0.99      0.97        71

    accuracy                  