In [1]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-

# CNN


##  Flower Data

In [2]:
###-----------------
### Import Libraries
###-----------------

import os
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

import tensorflow as tf

from utils.helper import fn_plot_tf_hist,fn_plot_confusion_matrix




In [3]:
###----------------------
### Some basic parameters
###----------------------
inpDir = '../..\Classwork/input'
outDir = './output'
subDir = 'flower_photos'
modelDir = './models'
logDir = './logs'
altName = 'cnn_base'

RANDOM_STATE = 24 # for initialization ----- REMEMBER: to remove at the time of promotion to production
tf.random.set_seed(RANDOM_STATE) # setting for Tensorflow as well

TEST_SIZE = 0.2

ALPHA = 0.001
EPOCHS = 100 # number of cycles to run
PATIENCE = 20
LR_PATIENCE = 10
FACTOR_LR = 0.1
BATCH_SIZE = 32 # inline of Training Rows being 60000
IMG_HEIGHT = 187
IMG_WIDTH = 187


# Set parameters for decoration of plots
params = {'legend.fontsize' : 'large',
          'figure.figsize'  : (15,10),
          'axes.labelsize'  : 'x-large',
          'axes.titlesize'  :'x-large',
          'xtick.labelsize' :'large',
          'ytick.labelsize' :'large',
         }

CMAP = plt.cm.coolwarm

plt.rcParams.update(params) # update rcParams

plt.style.use('seaborn-v0_8-darkgrid') # plt.style.use('ggplot')

## Import data

In [6]:
'''
import pathlib
dataset_url = "https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz"

data_dir = tf.keras.utils.get_file(origin=dataset_url,
                                   fname='flower_photos',
                                   untar=True)
data_dir = pathlib.Path(data_dir)

'''
data_dir = os.path.join(inpDir, subDir)
data_dir


Downloading data from https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz


'\ndata_dir = os.path.join(inpDir, subDir)\ndata_dir\n'

## Creating datasets

In [8]:
# create training data
train_ds =tf.keras.preprocessing.image_dataset_from_directory(
    data_dir, # path the the data directory
    validation_split=TEST_SIZE, # what ratio of validation data
    subset='training', # purpose
    seed=RANDOM_STATE,
    image_size=[IMG_HEIGHT, IMG_WIDTH], ## @@@ WHAT!
    batch_size=BATCH_SIZE
)
# test data
test_ds =tf.keras.preprocessing.image_dataset_from_directory(
    data_dir, # path the the data directory
    validation_split=TEST_SIZE, # what ratio of validation data
    subset='validation', # purpose
    seed=RANDOM_STATE,
    image_size=[IMG_HEIGHT, IMG_WIDTH], ## @@@ WHAT!
    batch_size=BATCH_SIZE
)

Found 3670 files belonging to 5 classes.
Using 2936 files for training.
Found 3670 files belonging to 5 classes.
Using 734 files for validation.


In [9]:
# is it picking class names
class_names = train_ds.class_names
class_names

['daisy', 'dandelion', 'roses', 'sunflowers', 'tulips']

In [10]:
class_dict = {k:v for k,v in enumerate(class_names)}
class_dict

{0: 'daisy', 1: 'dandelion', 2: 'roses', 3: 'sunflowers', 4: 'tulips'}

## Visualize data in train_ds and test_ds

In [11]:
'''
plt.figure(figsize=(15,8))

for images, labels in train_ds.take(1):
    for i in range (BATCH_SIZE):
        plt.subplot(int(BATCH_SIZE/8), 8, i +1)
        plt.grid(False)
        plt.imshow(images[i].numpy().astype('uint8'))
        plt.title(class_names[labels[i]])
        plt.axis('off')
    plt.tight_layout()
plt.show()

'''

"\nplt.figure(figsize=(15,8))\n\nfor images, labels in train_ds.take(1):\n    for i in range (BATCH_SIZE):\n        plt.subplot(int(BATCH_SIZE/8), 8, i +1)\n        plt.grid(False)\n        plt.imshow(images[i].numpy().astype('uint8'))\n        plt.title(class_names[labels[i]])\n        plt.axis('off')\n    plt.tight_layout()\nplt.show()\n\n"

In [12]:
'''
plt.figure(figsize=(15,8))

for images, labels in test_ds.take(1): # get me one batch

    for i in range (BATCH_SIZE): # loop over batch

        plt.subplot(int(BATCH_SIZE/8), 8, i +1) # access the axis

        plt.grid(False) # no to grid

        plt.imshow(images[i].numpy().astype('uint8')) # show image convert to numpy and int

        plt.title(class_names[labels[i]])

        plt.axis('off')

    plt.tight_layout()

plt.show()
'''

"\nplt.figure(figsize=(15,8))\n\nfor images, labels in test_ds.take(1): # get me one batch\n\n    for i in range (BATCH_SIZE): # loop over batch\n\n        plt.subplot(int(BATCH_SIZE/8), 8, i +1) # access the axis\n\n        plt.grid(False) # no to grid\n\n        plt.imshow(images[i].numpy().astype('uint8')) # show image convert to numpy and int\n\n        plt.title(class_names[labels[i]])\n\n        plt.axis('off')\n\n    plt.tight_layout()\n\nplt.show()\n"

## To check whether data is balanced or not

In [13]:
'''
def fn_plot_label(tr_ds, ts_ds):

    plt.figure(figsize = (15,5)) # instantiate the figure

    plt.subplot(1,2,1) # first out of 2

    train_labels = tf.concat([lbl for img, lbl in tr_ds], axis = 0).numpy() # get the labels

    unique, _, counts = tf.unique_with_counts(train_labels) # get counts

    plt.bar(range(len(unique)), counts, align='center', color = 'DarkBlue') # barplot the counts

    plt.xticks(range(len(unique)), class_names)

    plt.title('Training Set')

    plt.subplot(1,2,2)

    test_labels = tf.concat([lbl for img, lbl in ts_ds], axis = 0).numpy()

    unique, _, counts = tf.unique_with_counts(test_labels)

    plt.bar(range(len(unique)), counts, align='center', color = 'Orange')

    plt.xticks(range(len(unique)), class_names)

    plt.title('Test Set')
'''

"\ndef fn_plot_label(tr_ds, ts_ds):\n\n    plt.figure(figsize = (15,5)) # instantiate the figure\n\n    plt.subplot(1,2,1) # first out of 2\n\n    train_labels = tf.concat([lbl for img, lbl in tr_ds], axis = 0).numpy() # get the labels\n\n    unique, _, counts = tf.unique_with_counts(train_labels) # get counts\n\n    plt.bar(range(len(unique)), counts, align='center', color = 'DarkBlue') # barplot the counts\n\n    plt.xticks(range(len(unique)), class_names)\n\n    plt.title('Training Set')\n\n    plt.subplot(1,2,2)\n\n    test_labels = tf.concat([lbl for img, lbl in ts_ds], axis = 0).numpy()\n\n    unique, _, counts = tf.unique_with_counts(test_labels)\n\n    plt.bar(range(len(unique)), counts, align='center', color = 'Orange')\n\n    plt.xticks(range(len(unique)), class_names)\n\n    plt.title('Test Set')\n"

In [14]:
# fn_plot_label(train_ds, test_ds)

In [None]:
train_ds = train_ds.cache().prefetch(buffer_size = tf.data.AUTOTUNE)
test_ds = test_ds.cache().prefetch(buffer_size = tf.data.AUTOTUNE)

## Model Building

In [15]:
input_shape = (IMG_HEIGHT, IMG_WIDTH, 3)
num_classes = len(class_names)
input_shape, num_classes

((190, 190, 3), 5)

In [16]:
def build_model (input_shape, num_classes):
    
    krnl_initializer = tf.keras.initializers.GlorotUniform()

    model = tf.keras.Sequential()

    ## increasing dropout rate
    drop1 = 0.1
    drop2 = 0.1
    drop3 = 0.2
    drop4 = 0.2
    drop5 = 0.3
    drop6 = 0.3
    drop7 = 0.4
    drop8 = 0.4

    ## preprocessing (scaling)
    model.add(tf.keras.layers.Rescaling(1./255.))

    ## Augmentation

    model.add(tf.keras.layers.RandomZoom((0.2,0.2), fill_mode = 'nearest', seed=RANDOM_STATE))

    ## 1 layer
    model.add(tf.keras.layers.Conv2D(32,(5,5),
                                     strides=(2, 2),
                                     kernel_initializer = krnl_initializer,
                                     input_shape =input_shape)) ## output shape expected - 92*92*32

    model.add(tf.keras.layers.BatchNormalization())

    model.add(tf.keras.layers.ReLU())

    model.add(tf.keras.layers.MaxPool2D(pool_size=(2,2))) ## 46*46*32

    model.add(tf.keras.layers.Dropout(drop1))

    ## 2 layer
    model.add(tf.keras.layers.Conv2D(64,3,
                                     kernel_initializer = krnl_initializer)) ## output shape expected - 44*44*64

    model.add(tf.keras.layers.BatchNormalization())

    model.add(tf.keras.layers.ReLU())

    model.add(tf.keras.layers.MaxPool2D(pool_size=(2,2))) ## 22*22*256

    model.add(tf.keras.layers.Dropout(drop2))

    ## 3 layer
    model.add(tf.keras.layers.Conv2D(128,(3,3),
                                     kernel_initializer = krnl_initializer)) ## output shape expected - 20*20*512

    model.add(tf.keras.layers.BatchNormalization())

    model.add(tf.keras.layers.ReLU())

    model.add(tf.keras.layers.MaxPool2D(pool_size=(2,2))) ## 10*10*128

    model.add(tf.keras.layers.Dropout(drop3))

    ## 4 layer
    model.add(tf.keras.layers.Conv2D(256,(3,3),
                                     kernel_initializer = krnl_initializer)) ## output shape expected - 8*8*256

    model.add(tf.keras.layers.BatchNormalization())

    model.add(tf.keras.layers.ReLU())

    model.add(tf.keras.layers.MaxPool2D(pool_size=(2,2))) ## 4*4*256

    model.add(tf.keras.layers.Dropout(drop4))


    ## 5 layer
    model.add(tf.keras.layers.Conv2D(512,(3,3),
                                     kernel_initializer = krnl_initializer)) ## output shape expected - 2*2*512

    model.add(tf.keras.layers.BatchNormalization())

    model.add(tf.keras.layers.ReLU())

    model.add(tf.keras.layers.Dropout(drop5))


    ## Head
    model.add(tf.keras.layers.Flatten())

    model.add(tf.keras.layers.Dense(512, kernel_initializer = krnl_initializer))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.ReLU())
    model.add(tf.keras.layers.Dropout(drop6))

    model.add(tf.keras.layers.Dense(128, kernel_initializer = krnl_initializer))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.ReLU())
    model.add(tf.keras.layers.Dropout(drop7))

    model.add(tf.keras.layers.Dense(32, kernel_initializer = krnl_initializer))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.ReLU())
    model.add(tf.keras.layers.Dropout(drop8))

    model.add(tf.keras.layers.Dense(num_classes))

    return model

In [17]:
model = build_model(input_shape, num_classes)
model





<keras.src.engine.sequential.Sequential at 0x2a6c2f72cd0>

In [18]:
checkpoint_path = './weights_tf_flower'

model_checkpoint = tf.keras.callbacks.ModelCheckpoint(
    filepath = checkpoint_path,
    monitor='val_loss',
    verbose=2,
    save_best_only=True,
    save_weights_only=True,
    mode='auto',
    save_freq='epoch',
    initial_value_threshold=None
)

es_callback = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    min_delta=0,
    patience=PATIENCE,
    verbose=2,
    mode='auto',
    baseline=None,
    restore_best_weights=True,
    start_from_epoch=0
)

lr_callback = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=FACTOR_LR,
    patience=LR_PATIENCE,
    verbose=2,
    mode='auto',
    min_delta=0.00001,
    cooldown=0,
    min_lr=0.0,
)

## Compile and train

In [19]:
optimizer = tf.keras.optimizers.Adam(learning_rate=ALPHA)

model.compile(optimizer = optimizer,
              loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics = ['accuracy'])

In [None]:
history = model.fit(train_ds,
                    validation_data = test_ds ,
                    batch_size = BATCH_SIZE,
                    epochs = EPOCHS, verbose=2,
                    callbacks=[model_checkpoint,es_callback,lr_callback])

Epoch 1/100







Epoch 1: val_loss improved from inf to 1.86715, saving model to .\weights_tf_flower
92/92 - 84s - loss: 1.7894 - accuracy: 0.2473 - val_loss: 1.8671 - val_accuracy: 0.2384 - lr: 0.0010 - 84s/epoch - 910ms/step
Epoch 2/100

Epoch 2: val_loss did not improve from 1.86715
92/92 - 72s - loss: 1.4712 - accuracy: 0.3573 - val_loss: 2.0527 - val_accuracy: 0.2561 - lr: 0.0010 - 72s/epoch - 786ms/step
Epoch 3/100

Epoch 3: val_loss did not improve from 1.86715
92/92 - 73s - loss: 1.3561 - accuracy: 0.4384 - val_loss: 2.3980 - val_accuracy: 0.2452 - lr: 0.0010 - 73s/epoch - 790ms/step
Epoch 4/100

Epoch 4: val_loss did not improve from 1.86715
92/92 - 72s - loss: 1.2875 - accuracy: 0.4653 - val_loss: 2.1124 - val_accuracy: 0.3297 - lr: 0.0010 - 72s/epoch - 787ms/step
Epoch 5/100

Epoch 5: val_loss improved from 1.86715 to 1.51805, saving model to .\weights_tf_flower
92/92 - 73s - loss: 1.2193 - accuracy: 0.5109 - val_loss: 1.5180 - val_accuracy: 0.4169 - lr: 0.0010 - 73s/epoch - 796ms/step
Ep

In [None]:
model.summary()

In [None]:
# tf.keras.utils.plot_model(model,'model.png', show_shapes=True, show_dtype=True, dpi=96, show_layer_activations=True)

In [None]:
history_df = pd.DataFrame(history.history)
fn_plot_tf_hist(history_df)