In [2]:
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from scipy.stats import randint, uniform
from sklearn.preprocessing import StandardScaler

In [3]:
data = load_breast_cancer()
X, y = data.data, data.target

df = pd.DataFrame(X, columns=data.feature_names)
df['target'] = y
print("First 5 rows of the dataset:")
print(df.head())

First 5 rows of the dataset:
   mean radius  mean texture  mean perimeter  mean area  mean smoothness  \
0        17.99         10.38          122.80     1001.0          0.11840   
1        20.57         17.77          132.90     1326.0          0.08474   
2        19.69         21.25          130.00     1203.0          0.10960   
3        11.42         20.38           77.58      386.1          0.14250   
4        20.29         14.34          135.10     1297.0          0.10030   

   mean compactness  mean concavity  mean concave points  mean symmetry  \
0           0.27760          0.3001              0.14710         0.2419   
1           0.07864          0.0869              0.07017         0.1812   
2           0.15990          0.1974              0.12790         0.2069   
3           0.28390          0.2414              0.10520         0.2597   
4           0.13280          0.1980              0.10430         0.1809   

   mean fractal dimension  ...  worst texture  worst perimeter 

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
models = {
    "LogisticRegression": LogisticRegression(max_iter=5000),
    "RandomForest": RandomForestClassifier(),
    "SVM": SVC(),
    "KNN": KNeighborsClassifier()
}

In [5]:
param_grids = {
    "LogisticRegression": {
        "C": [0.1, 1, 10],
        "solver": ["liblinear", "lbfgs"]
    },
    "RandomForest": {
        "n_estimators": [50, 100, 200],
        "max_depth": [None, 5, 10]
    },
    "SVM": {
        "C": [0.1, 1, 10],
        "kernel": ["linear", "rbf"]
    },
    "KNN": {
        "n_neighbors": [3, 5, 7, 9],
        "weights": ["uniform", "distance"]
    }
}


In [6]:
searches = {}

# GridSearchCV for LogisticRegression and RandomForest
for name in ["LogisticRegression", "RandomForest"]:
    clf = models[name]
    grid = GridSearchCV(clf, param_grids[name], cv=5, scoring='accuracy')
    grid.fit(X_train, y_train)
    searches[name] = grid

# RandomizedSearchCV for SVM and KNN
svc = models["SVM"]
svc_search = RandomizedSearchCV(
    svc, param_grids["SVM"], cv=5, n_iter=10, scoring='accuracy', random_state=42
)
svc_search.fit(X_train, y_train)
searches["SVM"] = svc_search

knn = models["KNN"]
knn_search = RandomizedSearchCV(
    knn, param_grids["KNN"], cv=5, n_iter=10, scoring='accuracy', random_state=42
)
knn_search.fit(X_train, y_train)
searches["KNN"] = knn_search

# Evaluate and compare models
for name, search in searches.items():
    best_model = search.best_estimator_
    y_pred = best_model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred)
    rec = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    print(f"\n--- {name} ---")
    print("Best Params:", search.best_params_)
    print(f"Accuracy: {acc:.4f}")
    print(f"Precision: {prec:.4f}")
    print(f"Recall: {rec:.4f}")
    print(f"F1-score: {f1:.4f}")
    print("\nClassification Report:\n", classification_report(y_test, y_pred))
    print("-"*5)




--- LogisticRegression ---
Best Params: {'C': 0.1, 'solver': 'liblinear'}
Accuracy: 0.9942
Precision: 0.9908
Recall: 1.0000
F1-score: 0.9954

Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.98      0.99        63
           1       0.99      1.00      1.00       108

    accuracy                           0.99       171
   macro avg       1.00      0.99      0.99       171
weighted avg       0.99      0.99      0.99       171

-----

--- RandomForest ---
Best Params: {'max_depth': 10, 'n_estimators': 100}
Accuracy: 0.9708
Precision: 0.9640
Recall: 0.9907
F1-score: 0.9772

Classification Report:
               precision    recall  f1-score   support

           0       0.98      0.94      0.96        63
           1       0.96      0.99      0.98       108

    accuracy                           0.97       171
   macro avg       0.97      0.96      0.97       171
weighted avg       0.97      0.97      0.97       171

-----



In [7]:
#  final results into a list of dicts
results = []

for name, search in searches.items():
    best_model = search.best_estimator_
    y_pred = best_model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred)
    rec = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    results.append({
        'Model': name,
        'Accuracy': acc,
        'Precision': prec,
        'Recall': rec,
        'F1 Score': f1
    })

final_results_df = pd.DataFrame(results)

# Sort by F1 Score
final_sorted = final_results_df.sort_values(by='F1 Score', ascending=False)

print("\n Final Model Comparison (After Tuning):\n")
print(final_sorted)

# Best Model Summary
best_model = final_sorted.iloc[0]
print(f"\n Best Performing Model: {best_model['Model']} with F1 Score: {best_model['F1 Score']:.4f}")



 Final Model Comparison (After Tuning):

                Model  Accuracy  Precision    Recall  F1 Score
0  LogisticRegression  0.994152   0.990826  1.000000  0.995392
2                 SVM  0.982456   0.981651  0.990741  0.986175
1        RandomForest  0.970760   0.963964  0.990741  0.977169
3                 KNN  0.959064   0.963303  0.972222  0.967742

 Best Performing Model: LogisticRegression with F1 Score: 0.9954
