In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report,precision_score,f1_score,recall_score,roc_auc_score

In [None]:
data = load_breast_cancer()
x = data.data
y = data.target

In [None]:
print(x.shape)

(569, 30)


In [None]:
print(np.unique(y))

[0 1]


In [None]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.2,random_state = 42)

In [None]:
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

In [None]:
log_model = LogisticRegression()
log_model.fit(x_train,y_train)

In [None]:
pred_log_model = log_model.predict(x_test)

In [None]:
def logistic_output(y_test,pred_log_model):
  print("Accuracy Score",accuracy_score(y_test,pred_log_model))
  print("Confusion Matrix:",confusion_matrix(y_test,pred_log_model))
  print("Classfication Report:",classification_report(y_test,pred_log_model))
  print("Precision Score",precision_score(y_test,pred_log_model))
  print("F1 Score",f1_score(y_test,pred_log_model))
  print("Recall_score",recall_score(y_test,pred_log_model))


In [None]:
logistic_output(y_test,pred_log_model)

Accuracy Score 0.9736842105263158
Confusion Matrix: [[41  2]
 [ 1 70]]
Classfication Report:               precision    recall  f1-score   support

           0       0.98      0.95      0.96        43
           1       0.97      0.99      0.98        71

    accuracy                           0.97       114
   macro avg       0.97      0.97      0.97       114
weighted avg       0.97      0.97      0.97       114

Precision Score 0.9722222222222222
F1 Score 0.9790209790209791
Recall_score 0.9859154929577465


--------DECISION TREE------

In [None]:
decision_model = DecisionTreeClassifier(criterion='gini',max_depth = 9,random_state = 42)
decision_model.fit(x_train,y_train)

In [None]:
pred_decision_model = decision_model.predict(x_test)

In [None]:
def decision_output(y_test,pred_decision_model):
    print("Accuracy;",accuracy_score(y_test,pred_decision_model))
    print("Precision:",precision_score(y_test,pred_decision_model))
    print("Recall:",recall_score(y_test,pred_decision_model))
    print("F1 Score:",f1_score(y_test,pred_decision_model))
    print("Roc_auc", roc_auc_score(y_test,pred_decision_model))
    print("Confusion Matrix:\n",confusion_matrix(y_test,pred_decision_model))
    print("classification Report:",classification_report(y_test,pred_decision_model))

In [None]:
decision_output(y_test,pred_decision_model)

Accuracy; 0.9473684210526315
Precision: 0.9577464788732394
Recall: 0.9577464788732394
F1 Score: 0.9577464788732394
Roc_auc 0.9439895185063871
Confusion Matrix:
 [[40  3]
 [ 3 68]]
classification Report:               precision    recall  f1-score   support

           0       0.93      0.93      0.93        43
           1       0.96      0.96      0.96        71

    accuracy                           0.95       114
   macro avg       0.94      0.94      0.94       114
weighted avg       0.95      0.95      0.95       114



-----random forest------

In [None]:
random_model = RandomForestClassifier(n_estimators=100, random_state=42)
random_model.fit(x_train, y_train)

In [None]:
pred_random_model = random_model.predict(x_test)

In [None]:
def random_model(x_test,pred_random_model):
  print(accuracy_score(y_test,pred_random_model))
  print(confusion_matrix(y_test,pred_random_model))
  print(classification_report(y_test,pred_random_model))
  print(precision_score(y_test,pred_random_model))
  print(recall_score(y_test,pred_random_model))
  print(f1_score(y_test,pred_random_model))
  print(roc_auc_score(y_test,pred_random_model))

In [None]:
random_model(x_test,pred_random_model)

0.9649122807017544
[[40  3]
 [ 1 70]]
              precision    recall  f1-score   support

           0       0.98      0.93      0.95        43
           1       0.96      0.99      0.97        71

    accuracy                           0.96       114
   macro avg       0.97      0.96      0.96       114
weighted avg       0.97      0.96      0.96       114

0.958904109589041
0.9859154929577465
0.9722222222222222
0.9580740255486406


Compare F1-score (MAIN metric)

Logistic Regression → 0.9790  (highest)

Random Forest → 0.9581

Decision Tree → 0.9577

 Winner: Logistic Regression

Check Recall (important for false negatives)

Logistic Regression → 0.9859

Random Forest → 0.9859

Decision Tree → 0.9577

Logistic Regression is selected as the best model because it achieves the highest F1-score, high recall, and excellent accuracy, while remaining simpler and more interpretable than tree-based models.