In [None]:
# Importing necessary libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest, f_classif

from sklearn.neural_network import MLPClassifier

from sklearn.metrics import (
    accuracy_score, confusion_matrix, classification_report,
    roc_auc_score, roc_curve
)

import pickle

# To handle warnings and ignore them
import warnings
warnings.filterwarnings('ignore')

In [None]:
# Load the Breast Cancer dataset
cancer = load_breast_cancer()

# Convert the dataset to a pandas DataFrame
df = pd.DataFrame(cancer.data, columns=cancer.feature_names)
# Adding the target column
df['target'] = cancer.target



In [9]:
# Check for missing values
if df.isnull().sum().sum() == 0:
    print("No missing values in the dataset.")



No missing values in the dataset.


In [None]:
# Split the dataset into features (X) and target (y)
X = df.drop(['target'], axis=1)
y = df['target']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)



In [None]:
# Normalize the data using StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [13]:
# Save the scaler to a pickle file
with open(r"C:\Users\kaurp\Breast_cancer_App\scaler.pkl", 'wb') as f:
    pickle.dump(scaler, f)
    
print("Scaler saved successfully!")

Scaler saved successfully!


In [None]:
# Select top 10 features using SelectKBest with ANOVA F-test
selector = SelectKBest(f_classif, k=10)
X_new = selector.fit_transform(X, y)
selected_features = X.columns[selector.get_support()]
print("Selected Features:", selected_features)

Selected Features: Index(['mean radius', 'mean perimeter', 'mean area', 'mean concavity',
       'mean concave points', 'worst radius', 'worst perimeter', 'worst area',
       'worst concavity', 'worst concave points'],
      dtype='object')


In [None]:
# Define the parameter grid for hyperparameter tuning of MLPClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.neural_network import MLPClassifier

param_grid = {
    'hidden_layer_sizes': [(50,), (100,), (50, 50)],
    'activation': ['tanh', 'relu'],
    'solver': ['adam', 'sgd'],
    'max_iter': [1000, 2000]
}


# Initialize the Multi-layer Perceptron classifier
mlp = MLPClassifier()

# Perform grid search with cross-validation to find the best parameters
grid_search = GridSearchCV(mlp, param_grid, cv=5, n_jobs=-1)
grid_search.fit(X_new, y)
print("Best Parameters:", grid_search.best_params_)


Best Parameters: {'activation': 'relu', 'hidden_layer_sizes': (50,), 'max_iter': 1000, 'solver': 'adam'}


In [None]:
# Retrieve the best parameters from grid search
best_params = grid_search.best_params_

# Train the MLPClassifier using the best parameters
model = MLPClassifier(**best_params)
model.fit(X_new, y)


In [17]:
# Evaluate best ANN
y_pred = model.predict(X_new)
print("Accuracy:", accuracy_score(y, y_pred))
print(classification_report(y, y_pred))

Accuracy: 0.929701230228471
              precision    recall  f1-score   support

           0       0.94      0.87      0.90       212
           1       0.92      0.97      0.95       357

    accuracy                           0.93       569
   macro avg       0.93      0.92      0.92       569
weighted avg       0.93      0.93      0.93       569



Accuracy: A high, gross overall classification performance of 93.32% was achieved.

Class 0 (Benign): High accuracy at 94% and mainly capturing correct benign predictions for improved recall of 88%.

Class 1 (Malignant): Excellent accuracy (0.93) and recall (0.97), which enables to predict the presence of malignant cases, which is particularly important in cancer diagnosis.

Balanced Metrics: The macroaverage and weighted average of (~93%) both indicate the exact and coherent performance in both classes irrespective of the slight class bias.

In [18]:
# Save the best ANN model
with open(r"C:\Users\kaurp\Breast_cancer_App\best_ann_model.pkl", 'wb') as f:
    pickle.dump(model, f) 
    
print("Best ANN model saved successfully!")

Best ANN model saved successfully!
