<a href="https://www.kaggle.com/code/easyice/breast-cancer-ann-model-and-hyperparamstuning-98?scriptVersionId=262119091" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
df = pd.read_csv('/kaggle/input/breast-cancer-dataset/breast-cancer.csv')
df.head()

In [None]:
df.shape

In [None]:
# drop id column
df.drop('id',axis=1,inplace=True)

In [None]:
df.info()

In [None]:
# check for duplicates
df.duplicated().sum()

In [None]:
# any null
df.isna().sum().sum()

In [None]:
# key statistical measures of data
df.describe()

In [None]:
df.groupby('diagnosis').describe()

In [None]:
df.diagnosis.value_counts()

In [None]:
# splitting data into train test
from sklearn.model_selection import train_test_split
X = df.drop('diagnosis',axis=1)
y = df['diagnosis']

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)

print(X_train.shape,X_test.shape,y_train.shape,y_test.shape)

In [None]:
# Scaling the data

from sklearn.preprocessing import StandardScaler
scaler=StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
# encoding target column
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_test = le.transform(y_test)

## Building the Neural Network using ANN

In [None]:
import tensorflow as tf
tf.random.set_seed(3)
from tensorflow import keras
from keras import Sequential
from keras.layers import Dense
import warnings
warnings.filterwarnings('ignore')

In [None]:
# setting up layers in neural network

model = Sequential()
model.add(Dense(20,activation='relu',input_dim=30))
model.add(Dense(2,activation='sigmoid'))

In [None]:
model.summary()

In [None]:
# compiling the model
model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])


In [None]:
# training the model
history=model.fit(X_train_scaled,y_train,epochs=15,validation_split=0.1)

## Evaluating the model

In [None]:
loss,accuracy = model.evaluate(X_test_scaled,y_test)
print(accuracy)

In [None]:
y_prob = model.predict(X_test_scaled)

y_pred = y_prob.argmax(axis=1)

In [None]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test,y_pred)

## Visualizing accuracy and loss

In [None]:
import matplotlib.pyplot as plt 

# Accuracy
plt.plot(history.history['accuracy'],label = 'train_accuracy')
plt.plot(history.history['val_accuracy'],label = 'test_accuracy')
plt.title('ANN model accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

In [None]:
# Loss

plt.plot(history.history['loss'],label = 'train_loss')
plt.plot(history.history['val_loss'],label = 'test_loss')
plt.title('ANN model Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

## Building predictive system

In [None]:
input_data = (13.34,15.86,86.49,520,0.1078,0.1535,0.1169,0.06987,0.1942,0.06902,0.286,1.016,1.535,12.96,0.006794,0.03575,0.0398,0.01383,0.02134,0.004603,15.53,23.19,96.66,614.9,0.1536,0.4791,0.4858,0.1708,0.3527,0.1016)

# Convert input data into numpy array
input_to_array = np.asarray(input_data)

# Reshape array as we predict for one point
input_data_reshape = input_to_array.reshape(1,-1)

# Standardize the input data
input_scaled = scaler.transform(input_data_reshape)

# Prediction from input data
prediction = model.predict(input_scaled)
print(prediction)

predict_diagnosis = [np.argmax(prediction)]
print(predict_diagnosis)

if predict_diagnosis[0]==0:
    print('The tumor is Malignant')
else:
    print('The tumor is Benign')

In [None]:
from keras.layers import Dropout
from keras_tuner import RandomSearch
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping

# Define the model-building function
def build_model(hp):
    model = keras.Sequential()
    
    # Tune number of layers
    for i in range(hp.Int('num_layers', 1, 4)):
        model.add(Dense(
            units=hp.Int(f'units_{i}', min_value=8, max_value=96, step=32),
            activation=hp.Choice(f'act_{i}', values=['relu', 'tanh', 'sigmoid'])
        ))
        
        # Optional dropout
        if hp.Boolean(f'dropout_{i}'):
            model.add(Dropout(rate=hp.Float(f'drop_rate_{i}', min_value=0.1, max_value=0.5, step=0.1)))

    # Output layer
    model.add(Dense(1, activation='sigmoid'))  # For binary classification

    # Compile
    model.compile(
        optimizer=hp.Choice('optimizer', ['adam', 'rmsprop', 'sgd']),
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    
    return model

# Instantiate tuner
tuner = RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=5,
    executions_per_trial=1,
    directory='ann_tuning',
    project_name='hyperparam_search'
)

# Early stopping
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Search
tuner.search(
    X_train_scaled, y_train,
    validation_data=(X_test_scaled, y_test),
    epochs=50,
    batch_size=None,  # Let tuner decide
    callbacks=[early_stop]
)

# Get best model
best_model = tuner.get_best_models(num_models=1)[0]
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

# Summary
print("Best hyperparameters:")
for param in best_hps.values:
    print(f"{param}: {best_hps.get(param)}")

# Evaluate
loss, acc = best_model.evaluate(X_test_scaled, y_test)
print(f"Validation Accuracy: {acc:.4f}")


## ThankYOU!!