---

Imports 

---


In [1]:
import importlib
import pipeline as pl
importlib.reload(pl)
import model as ml
importlib.reload(ml)

import os
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
from tensorflow.keras import layers
import matplotlib.pyplot as plt
import keras_tuner
from kerastuner.tuners import Hyperband

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

print(tf.__version__)

2.19.0


  from kerastuner.tuners import Hyperband


---

Load Augmented data from pipeline

---

In [2]:
csv_path = "cancer_dataset.csv"
train, test = pl.loadprocesseddata(csv_path)

Found 104001 validated image filenames belonging to 26 classes.
Found 26001 validated image filenames belonging to 26 classes.


---

Build Model

---

In [None]:
def build_cnn_model(hp, n_classes=26):
    activation = hp.Choice("activation", values=["relu", "tanh", "elu"])
    n_conv_layers = hp.Int("n_conv_layers", min_value=2, max_value=5, step=1)
    n_filters = hp.Int("n_filters", min_value=8, max_value=64, step=8)
    
    kernel_size_val = hp.Choice("kernel_size", values=[3, 5])
    pool_size_val = hp.Choice("pool_size", values=[2, 3])
    
    learning_rate = hp.Float("learning_rate", min_value=1e-5, max_value=1e-2, sampling="log")
    decay_steps = hp.Int("decay_steps", min_value=1000, max_value=10000, step=1000)
    decay_rate = hp.Float("decay_rate", min_value=0.9, max_value=0.999, step=0.001)
    dropout_rate = hp.Float("dropout_rate", min_value=0.2, max_value=0.5, step=0.1)
    batch_norm = hp.Choice("batch_norm", values=[True, False])
    optimizer_choice = hp.Choice("optimizer", values=["sgd", "RMSprop", "adam", "AdamW"])
    
    expon_dec = keras.optimizers.schedules.ExponentialDecay(
        initial_learning_rate=learning_rate,
        decay_steps=decay_steps,
        decay_rate=decay_rate
    )
    if optimizer_choice == "sgd":
        optimizer = keras.optimizers.SGD(learning_rate=expon_dec)
    elif optimizer_choice == "RMSprop":
        optimizer = keras.optimizers.RMSprop(learning_rate=expon_dec)
    else:
        optimizer = keras.optimizers.Adam(learning_rate=expon_dec)
    
    model = keras.Sequential()
    
    model.add(layers.Conv2D(
        filters=n_filters,
        kernel_size=(kernel_size_val, kernel_size_val),
        activation=activation,
        input_shape=(64, 64, 3),
        padding='same'
    ))
    if batch_norm:
        model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D(pool_size=(pool_size_val, pool_size_val)))
    
    for i in range(n_conv_layers - 1):
        model.add(layers.Conv2D(
            ### I adeed the 2**i part because I read that it is a common practice.
            filters=n_filters * (2**i),
            kernel_size=(kernel_size_val, kernel_size_val),
            activation=activation,
            padding='same'
        ))
        if batch_norm:
            model.add(layers.BatchNormalization())
        model.add(layers.MaxPooling2D(pool_size=(pool_size_val, pool_size_val)))
        model.add(layers.Dropout(dropout_rate))

    model.add(layers.GlobalAveragePooling2D())
    
    n_dense_units = hp.Int("n_dense_units", min_value=32, max_value=256, step=32)
    model.add(layers.Dense(units=n_dense_units, activation=activation))
    if batch_norm:
        model.add(layers.BatchNormalization())
    model.add(layers.Dropout(dropout_rate))
    
    model.add(layers.Dense(n_classes, activation="softmax"))
    
    model.compile(loss="categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])
    
    return model


---

Hyperband Search

---

In [14]:
tuner = Hyperband(
    lambda hp: build_cnn_model(hp),
    objective='val_accuracy',
    max_epochs=15,
    factor=3,
    directory='hyperbanding',
    project_name='phase3')

Reloading Tuner from hyperbanding\phase3\tuner0.json


In [15]:
tuner.search(train, epochs=10, validation_data=test)

Trial 25 Complete [05h 19m 48s]
val_accuracy: 0.8371601104736328

Best val_accuracy So Far: 0.9167724251747131
Total elapsed time: 1d 20h 09m 29s


In [16]:
best_model = tuner.get_best_models(num_models=1)[0]

  saveable.load_own_variables(weights_store.get(inner_path))


---

Train Best Model

---

In [None]:
history = best_model.fit(train, epochs=100, validation_data=test)
### I am not sure why this has a keyboard interrupt. I would run it again but it's not my final model and it takes days to run.

Epoch 1/100


KeyboardInterrupt: 

---

Plot Learning Curve

---

In [None]:
plt.figure(figsize=(12, 4))
plt.plot(history.history['accuracy'], label='train accuracy')
plt.plot(history.history['val_accuracy'], label='val accuracy')
plt.title('Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.show()

---

Obtain model Metrics

---

In [None]:
y_pred = best_model.predict(test, verbose=1)
y_pred_classes = np.argmax(y_pred, axis=1)

y_true = test.classes
results = classification_report(y_true, y_pred_classes)
print(results)
