In [52]:
import numpy as np
import pandas as pd

In [53]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


In [54]:
# Load data
data = load_breast_cancer()
X = data.data
y = data.target


In [55]:
# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [56]:
# Scale features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [57]:
from sklearn.linear_model import LogisticRegression

In [58]:
model = LogisticRegression()

In [59]:
model.fit(X_train, y_train)

In [60]:
y_pred = model.predict(X_test)

In [61]:
print("\n🔹 Logistic Regression")
print("Accuracy :", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall   :", recall_score(y_test, y_pred))
print("F1 Score :", f1_score(y_test, y_pred))


🔹 Logistic Regression
Accuracy : 0.9736842105263158
Precision: 0.9722222222222222
Recall   : 0.9859154929577465
F1 Score : 0.9790209790209791


In [62]:
from sklearn.ensemble import RandomForestClassifier

In [63]:
model = RandomForestClassifier()
model.fit(X_train, y_train)

In [64]:
y_pred = model.predict(X_test)

In [65]:
print("\n🔹 Random Forest")
print("Accuracy :", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall   :", recall_score(y_test, y_pred))
print("F1 Score :", f1_score(y_test, y_pred))


🔹 Random Forest
Accuracy : 0.9649122807017544
Precision: 0.958904109589041
Recall   : 0.9859154929577465
F1 Score : 0.9722222222222222


In [66]:
from sklearn.svm import SVC

In [67]:
model = SVC()
model.fit(X_train, y_train)

In [68]:
y_pred = model.predict(X_test)

In [69]:
print("\n🔹 SVM")
print("Accuracy :", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall   :", recall_score(y_test, y_pred))
print("F1 Score :", f1_score(y_test, y_pred))


🔹 SVM
Accuracy : 0.9824561403508771
Precision: 0.9726027397260274
Recall   : 1.0
F1 Score : 0.9861111111111112


In [70]:
from sklearn.model_selection import GridSearchCV

In [71]:
param_grid = {
    'C': [0.01, 0.1, 1, 10],        # Regularization strength
    'penalty': ['l2'],              # L1/L2 penalty
    'solver': ['lbfgs', 'liblinear']  # solvers that support 'l2'
}

In [72]:
log_reg = LogisticRegression()

In [73]:
grid_search = GridSearchCV(log_reg, param_grid, cv=5, scoring='f1', n_jobs=-1)
grid_search.fit(X_train, y_train)

In [74]:
print(" Best Parameters for Logistic Regression:")
print(grid_search.best_params_)

 Best Parameters for Logistic Regression:
{'C': 0.1, 'penalty': 'l2', 'solver': 'liblinear'}


In [75]:
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)

In [76]:
print("\n Tuned Logistic Regression Performance")
print("Accuracy :", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall   :", recall_score(y_test, y_pred))
print("F1 Score :", f1_score(y_test, y_pred))


 Tuned Logistic Regression Performance
Accuracy : 0.9912280701754386
Precision: 0.9861111111111112
Recall   : 1.0
F1 Score : 0.993006993006993


In [78]:
param_grid_rf = {
    'n_estimators': [50, 100, 150],        # number of trees
    'max_depth': [None, 5, 10],            # maximum depth of tree
    'min_samples_split': [2, 4, 6]         # minimum samples to split a node
}


In [79]:
rf = RandomForestClassifier(random_state=42)

In [80]:
grid_search_rf = GridSearchCV(rf, param_grid_rf, cv=5, scoring='f1', n_jobs=-1)
grid_search_rf.fit(X_train, y_train)

In [81]:
print(" Best Parameters for Random Forest:")
print(grid_search_rf.best_params_)

 Best Parameters for Random Forest:
{'max_depth': None, 'min_samples_split': 2, 'n_estimators': 150}


In [82]:
best_rf = grid_search_rf.best_estimator_
y_pred_rf = best_rf.predict(X_test)


In [83]:
print("\n Tuned Random Forest Performance")
print("Accuracy :", accuracy_score(y_test, y_pred_rf))
print("Precision:", precision_score(y_test, y_pred_rf))
print("Recall   :", recall_score(y_test, y_pred_rf))
print("F1 Score :", f1_score(y_test, y_pred_rf))


 Tuned Random Forest Performance
Accuracy : 0.9649122807017544
Precision: 0.958904109589041
Recall   : 0.9859154929577465
F1 Score : 0.9722222222222222


In [84]:
param_dist_svm = {
    'C': np.logspace(-2, 2, 5),         # [0.01, 0.1, 1, 10, 100]
    'gamma': ['scale', 'auto'],        # kernel coefficient
    'kernel': ['linear', 'rbf']        # linear or Gaussian RBF
}

In [85]:
svm = SVC()

random_search_svm = RandomizedSearchCV(
    svm, 
    param_distributions=param_dist_svm,
    n_iter=10,            # number of random combinations to try
    cv=5,
    scoring='f1',
    n_jobs=-1,
    random_state=42
)

random_search_svm.fit(X_train, y_train)

print(" Best Parameters for SVM:")
print(random_search_svm.best_params_)


 Best Parameters for SVM:
{'kernel': 'rbf', 'gamma': 'auto', 'C': 10.0}


In [86]:
best_svm = random_search_svm.best_estimator_
y_pred_svm = best_svm.predict(X_test)

print("\n Tuned SVM Performance")
print("Accuracy :", accuracy_score(y_test, y_pred_svm))
print("Precision:", precision_score(y_test, y_pred_svm))
print("Recall   :", recall_score(y_test, y_pred_svm))
print("F1 Score :", f1_score(y_test, y_pred_svm))


 Tuned SVM Performance
Accuracy : 0.9736842105263158
Precision: 0.9722222222222222
Recall   : 0.9859154929577465
F1 Score : 0.9790209790209791


In [87]:
from sklearn.metrics import f1_score

# Predictions from each best model
y_pred_log = best_model.predict(X_test)       # Logistic Regression
y_pred_rf = best_rf.predict(X_test)           # Random Forest
y_pred_svm = best_svm.predict(X_test)         # SVM

# Compute F1 scores
scores = {
    "Logistic Regression": f1_score(y_test, y_pred_log),
    "Random Forest": f1_score(y_test, y_pred_rf),
    "SVM": f1_score(y_test, y_pred_svm)
}

# Display scores
for model_name, score in scores.items():
    print(f"{model_name} F1 Score: {score:.4f}")

Logistic Regression F1 Score: 0.9930
Random Forest F1 Score: 0.9722
SVM F1 Score: 0.9790


In [88]:
# Find the model with the highest F1 score
best_model_name = max(scores, key=scores.get)
if best_model_name == "Logistic Regression":
    final_model = best_model
elif best_model_name == "Random Forest":
    final_model = best_rf
else:
    final_model = best_svm
print(f"\n Best Model: {best_model_name}")


 Best Model: Logistic Regression
