In [1]:
# Import necessary libraries
import pandas as pd
from sklearn.datasets import load_breast_cancer

# Load the breast cancer dataset
data = load_breast_cancer()

# Create a Pandas DataFrame
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target

# Save the DataFrame to a CSV file
df.to_csv('breast_cancer_data.csv', index=False)

## Feature Selection

Implement feature selection using methods like SelectKBest from sklearn.feature_selection.

In [2]:
# Import necessary libraries
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2

# Load the breast cancer dataset
df = pd.read_csv('breast_cancer_data.csv')

# Define the feature selection method
selector = SelectKBest(chi2, k=10)

# Fit the selector to the data
selector.fit(df.drop('target', axis=1), df['target'])

# Get the selected features
selected_features = selector.get_support(indices=True)

# Create a new DataFrame with the selected features
selected_df = df.iloc[:, selected_features]
selected_df['target'] = df['target']

# Save the selected DataFrame to a CSV file
selected_df.to_csv('selected_breast_cancer_data.csv', index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  selected_df['target'] = df['target']


## Grid Search CV for Model Tuning

In [3]:
# Import necessary libraries
from sklearn.model_selection import GridSearchCV
from sklearn.neural_network import MLPClassifier

# Define the hyperparameter space
param_grid = {
    'hidden_layer_sizes': [(10,), (20,), (30,)],
    'activation': ['relu', 'tanh'],
    'solver': ['adam', 'sgd'],
    'learning_rate_init': [0.01, 0.001]
}

# Define the Grid Search CV object
grid_search = GridSearchCV(MLPClassifier(), param_grid, cv=5, scoring='accuracy')

# Fit the Grid Search CV object to the data
grid_search.fit(selected_df.drop('target', axis=1), selected_df['target'])

# Get the best hyperparameters and the corresponding score
best_params = grid_search.best_params_
best_score = grid_search.best_score_

# Print the best hyperparameters and the corresponding score
print('Best Hyperparameters:', best_params)
print('Best Score:', best_score)



Best Hyperparameters: {'activation': 'relu', 'hidden_layer_sizes': (30,), 'learning_rate_init': 0.001, 'solver': 'adam'}
Best Score: 0.9244527247321844




## Implementing an Artificial Neural Network (ANN) Model



In [4]:
# Import necessary libraries
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Define the ANN model
ann_model = MLPClassifier(hidden_layer_sizes=(10,), activation='relu', solver='adam', learning_rate_init=0.01)

# Train the ANN model
ann_model.fit(selected_df.drop('target', axis=1), selected_df['target'])

# Make predictions on the test set
y_pred = ann_model.predict(selected_df.drop('target', axis=1))

# Evaluate the ANN model
accuracy = accuracy_score(selected_df['target'], y_pred)
print('Accuracy:', accuracy)
print('Classification Report:')
print(classification_report(selected_df['target'], y_pred))
print('Confusion Matrix:')
print(confusion_matrix(selected_df['target'], y_pred))

Accuracy: 0.9191564147627417
Classification Report:
              precision    recall  f1-score   support

           0       0.90      0.89      0.89       212
           1       0.93      0.94      0.94       357

    accuracy                           0.92       569
   macro avg       0.91      0.91      0.91       569
weighted avg       0.92      0.92      0.92       569

Confusion Matrix:
[[188  24]
 [ 22 335]]


## Save Model

In [6]:
!pip install joblib

import joblib

# Save the model to a specific file location
joblib.dump(ann_model, '/content/Model_File/breast_cancer_model.joblib')



['/content/Model_File/breast_cancer_model.joblib']