# Lecture: Neural Network with Keras and Tensorflow
---

NOTE: **The notebook needs to be run in Tensorflow environment!**

Import dependencies

In [None]:
from numpy import loadtxt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow import keras
import pandas as pd
import numpy as np

Load dataset

In [None]:
# This dataset can be found on Kaggle as well: https://www.kaggle.com/datasets/yasserh/breast-cancer-dataset
dataset_name = 'breast-cancer.csv'
bucket_data_name = 'bah-data'
data_location = 's3://{}/{}'.format(bucket_data_name, dataset_name)

data = pd.read_csv(data_location)

In [None]:
data.head()

Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
0,842302,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,842517,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,84300903,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,84348301,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,84358402,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


#### Check for missing data in all columns

In [None]:
data.isnull().any()

id                         False
diagnosis                  False
radius_mean                False
texture_mean               False
perimeter_mean             False
area_mean                  False
smoothness_mean            False
compactness_mean           False
concavity_mean             False
concave points_mean        False
symmetry_mean              False
fractal_dimension_mean     False
radius_se                  False
texture_se                 False
perimeter_se               False
area_se                    False
smoothness_se              False
compactness_se             False
concavity_se               False
concave points_se          False
symmetry_se                False
fractal_dimension_se       False
radius_worst               False
texture_worst              False
perimeter_worst            False
area_worst                 False
smoothness_worst           False
compactness_worst          False
concavity_worst            False
concave points_worst       False
symmetry_w

No column has missing data. Great.

In [None]:
data.dtypes

id                           int64
diagnosis                   object
radius_mean                float64
texture_mean               float64
perimeter_mean             float64
area_mean                  float64
smoothness_mean            float64
compactness_mean           float64
concavity_mean             float64
concave points_mean        float64
symmetry_mean              float64
fractal_dimension_mean     float64
radius_se                  float64
texture_se                 float64
perimeter_se               float64
area_se                    float64
smoothness_se              float64
compactness_se             float64
concavity_se               float64
concave points_se          float64
symmetry_se                float64
fractal_dimension_se       float64
radius_worst               float64
texture_worst              float64
perimeter_worst            float64
area_worst                 float64
smoothness_worst           float64
compactness_worst          float64
concavity_worst     

Check for label column distribution

In [None]:
data['diagnosis'].value_counts()

B    357
M    212
Name: diagnosis, dtype: int64

Drop `id` column, we won't be using it in trainig

In [None]:
data.drop('id', axis=1, inplace=True)

Convert target column into numberical representation

In [None]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
label = le.fit_transform(data['diagnosis'])

data.drop('diagnosis', axis=1, inplace=True)
data["label"] = label

In [None]:
data.head()

Unnamed: 0,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,fractal_dimension_mean,...,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst,label
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,1
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,1
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,1
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,1
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,1


In [None]:
data.shape

(569, 31)

In [None]:
X = data.iloc[:, :len(data.columns)-1]
y = data.iloc[:, len(data.columns)-1]

In [None]:
print(X.shape, y.shape)

(569, 30) (569,)


Split data into train/test dataset.

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

Define the keras model.

The Sequential class indicates that our network will be feedforward and layers will be added to the class sequentially.

In [None]:
input_shape_value = (len(data.columns)-1,)
input_shape_value

(30,)

Use `relu` activation function  (`rectified linear unit`)

Other available functions in keras are:
    
- relu function
- sigmoid function
- softmax function
- softplus function
- softsign function
- tanh function
- selu function
- elu function
- exponential function

In [None]:
model = Sequential()
model.add(Dense(32, input_shape=(len(data.columns)-1,), activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

Compile the keras model
Set hyperparametes:

- `binary_crossentropy` for loss function for binary classification
- `adam` as optimizer
- `accuracy` for model evaluation performance
- `epoch` as number of 
Set your `learning_rate` as well in optimization function.

Keras provides other types of loss functions.

**Probabilistic loss functions**

- BinaryCrossentropy 
- CatgoricalCrossentropy
- SparseCategoricalCrossentropy
- Poisson 
- binary_crossentropy
- categorical_crossentropy
- sparse_categorical_crossentropy
- poisson
- KLDivergence
- kl_divergence

In [None]:
LEARNING_RATE = 0.01
BATCH = 10
EPOCH = 50

# Set loss function to be used
loss_function = keras.losses.BinaryCrossentropy(from_logits=True)

# Set optimizer to be used. 
optimizer = keras.optimizers.Adam(learning_rate=LEARNING_RATE)

Now that the model is defined, you can compile it.

When compiling, you must specify some additional properties required when training the network.

You must specif:

- the `loss function` to use to evaluate a set of weights
- the `optimizer` used to search through different weights for the network
- `metrics` (optional) you want to collect and report during training

In [None]:
model.compile(loss=loss_function,
              optimizer=optimizer, 
              metrics=['accuracy'])

Fit the keras model on the dataset

In [None]:
model.fit(X_train, y_train, epochs=EPOCH, batch_size=BATCH, verbose=0)

  output, from_logits = _get_logits(


<keras.callbacks.History at 0x7f57ddde5c30>

Once a model is "built", you can call its summary() method to display its contents:

In [None]:
model.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_13 (Dense)            (None, 32)                992       
                                                                 
 dense_14 (Dense)            (None, 16)                528       
                                                                 
 dense_15 (Dense)            (None, 1)                 17        
                                                                 
Total params: 1,537
Trainable params: 1,537
Non-trainable params: 0
_________________________________________________________________


Predict on test dataset

In [None]:
yhat = model.predict(X_test)



Print classification report based on test dataset using sklearn

In [None]:
from sklearn.metrics import classification_report

print(classification_report(y_test, np.round(yhat), target_names=['benign', 'malignant']))

              precision    recall  f1-score   support

      benign       0.99      0.96      0.97        89
   malignant       0.93      0.98      0.95        54

    accuracy                           0.97       143
   macro avg       0.96      0.97      0.96       143
weighted avg       0.97      0.97      0.97       143

