In [1]:
from sklearn.datasets import load_breast_cancer
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.model_selection import GridSearchCV
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, accuracy_score
import pickle


In [2]:
# Load the breast cancer dataset
data = load_breast_cancer()

In [3]:
# Convert to pandas DataFrame
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target

In [4]:
# Display the first few rows of the dataset
print("First 5 rows of the dataset:")
print(df.head())

First 5 rows of the dataset:
   mean radius  mean texture  mean perimeter  mean area  mean smoothness  \
0        17.99         10.38          122.80     1001.0          0.11840   
1        20.57         17.77          132.90     1326.0          0.08474   
2        19.69         21.25          130.00     1203.0          0.10960   
3        11.42         20.38           77.58      386.1          0.14250   
4        20.29         14.34          135.10     1297.0          0.10030   

   mean compactness  mean concavity  mean concave points  mean symmetry  \
0           0.27760          0.3001              0.14710         0.2419   
1           0.07864          0.0869              0.07017         0.1812   
2           0.15990          0.1974              0.12790         0.2069   
3           0.28390          0.2414              0.10520         0.2597   
4           0.13280          0.1980              0.10430         0.1809   

   mean fractal dimension  ...  worst texture  worst perimeter 

In [5]:
# Check for missing values
missing_values = df.isnull().sum()
print("\nMissing values in each column:")
print(missing_values)


Missing values in each column:
mean radius                0
mean texture               0
mean perimeter             0
mean area                  0
mean smoothness            0
mean compactness           0
mean concavity             0
mean concave points        0
mean symmetry              0
mean fractal dimension     0
radius error               0
texture error              0
perimeter error            0
area error                 0
smoothness error           0
compactness error          0
concavity error            0
concave points error       0
symmetry error             0
fractal dimension error    0
worst radius               0
worst texture              0
worst perimeter            0
worst area                 0
worst smoothness           0
worst compactness          0
worst concavity            0
worst concave points       0
worst symmetry             0
worst fractal dimension    0
target                     0
dtype: int64


In [6]:
# Data preprocessing steps
#  Splitting the dataset into features (X) and target (y)
X = df.drop(columns=['target'])
y = df['target']

In [7]:
#  Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
#Feature scaling using StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [9]:
# SelectKBest for feature selection
k = 10  # Number of top features to select
selector = SelectKBest(score_func=f_classif, k=k)

In [10]:
# Fit the selector to the scaled training data
X_train_selected = selector.fit_transform(X_train_scaled, y_train)
X_test_selected = selector.transform(X_test_scaled)

In [11]:
# Get the scores for each feature
scores = selector.scores_

# Get the selected feature names
selected_features = X.columns[selector.get_support()]

In [12]:
# Print the selected feature names and their corresponding scores
print(f"Top {k} selected features and their scores:")
for feature, score in zip(selected_features, scores[selector.get_support()]):
    print(f"{feature}: {score:.4f}")

Top 10 selected features and their scores:
mean radius: 482.2339
mean perimeter: 522.4893
mean area: 423.6541
mean concavity: 396.6624
mean concave points: 695.1798
worst radius: 645.3507
worst perimeter: 681.2638
worst area: 495.7877
worst concavity: 331.3309
worst concave points: 746.4921


In [13]:
# Display the shapes of the resulting datasets after feature selection
print("\nShapes of the datasets after feature selection:")
print("X_train_selected:", X_train_selected.shape)
print("X_test_selected:", X_test_selected.shape)


Shapes of the datasets after feature selection:
X_train_selected: (455, 10)
X_test_selected: (114, 10)


In [14]:
#  Define the model
mlp = MLPClassifier(random_state=42, max_iter=1000)

In [15]:
#  Define the parameter grid
parameter_grid = {
    'hidden_layer_sizes': [(50,50,50), (50,100,50), (100,)],
    'activation': ['tanh', 'relu'],
    'solver': ['sgd', 'adam'],
    'alpha': [0.0001, 0.05],
    'learning_rate': ['constant','adaptive'],
}

In [16]:
# Setup GridSearchCV
grid_search = GridSearchCV(estimator=mlp, param_grid=parameter_grid, 
                           cv=5, n_jobs=-1, verbose=2)

In [17]:
#  Fit the model to the training data
grid_search.fit(X_train_selected, y_train)

Fitting 5 folds for each of 48 candidates, totalling 240 fits


In [18]:

#  Display the best parameters and the corresponding score
print("Best parameters found: ", grid_search.best_params_)
print("Best cross-validation accuracy: {:.4f}".format(grid_search.best_score_))

Best parameters found:  {'activation': 'relu', 'alpha': 0.05, 'hidden_layer_sizes': (100,), 'learning_rate': 'constant', 'solver': 'adam'}
Best cross-validation accuracy: 0.9560


In [19]:
#  Define the ANN Model with Best Parameters
ann_model = MLPClassifier(
    hidden_layer_sizes=(100,),
    activation='relu',
    solver='adam',
    alpha=0.05,
    learning_rate='constant',
    random_state=42,
    max_iter=1000
)


In [20]:
# Train the ANN Model
ann_model.fit(X_train_selected, y_train)

In [21]:
#  Evaluate the ANN Model
# Predictions on the test set
y_pred = ann_model.predict(X_test_selected)

In [22]:
# Classification report
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Accuracy score
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy of the ANN model: {accuracy:.4f}")

Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.98      0.97        43
           1       0.99      0.97      0.98        71

    accuracy                           0.97       114
   macro avg       0.97      0.97      0.97       114
weighted avg       0.97      0.97      0.97       114

Accuracy of the ANN model: 0.9737


In [26]:
def load_saved_objects():
    try:
        # Attempt to load the saved objects
        with open('model.pkl', 'rb') as model_file:
            model = pickle.load(model_file)
        
        with open('kbest_selector.pkl', 'rb') as kbest_file:
            kbest_selector = pickle.load(kbest_file)
        
        with open('scaler.pkl', 'rb') as scaler_file:
            scaler = pickle.load(scaler_file)
        
        return model, kbest_selector, scaler

    except (pickle.UnpicklingError, EOFError, AttributeError, ValueError) as e:
        print(f"Error loading pickle file: {e}")
        # Handle errors or re-save the objects
        return None, None, None

In [27]:
# Save the trained model, KBest, and Scaler using pickle
model_filename = 'ann_model.pkl'
kbest_filename = 'kbest_selector.pkl'
scaler_filename = 'scaler.pkl'

In [28]:
# Save the ANN model
with open(model_filename, 'wb') as model_file:
    pickle.dump(ann_model, model_file)

# Save the SelectKBest selector
with open(kbest_filename, 'wb') as kbest_file:
    pickle.dump(selector, kbest_file)

# Save the StandardScaler
with open(scaler_filename, 'wb') as scaler_file:
    pickle.dump(scaler, scaler_file)

print(f"Model, KBest, and Scaler saved as {model_filename}, {kbest_filename}, and {scaler_filename}")

Model, KBest, and Scaler saved as ann_model.pkl, kbest_selector.pkl, and scaler.pkl
