In [None]:
import os
import tensorflow as tf
import numpy as np
import pandas as pd


SEED = 1234
tf.random.set_seed(SEED)

cwd = os.getcwd()

## Mount Google Drive and unzip the dataset

---



In [None]:
from google.colab import drive
drive.mount('/content/gdrive')
!unzip '/content/gdrive/My Drive/Kaggle/artificial-neural-networks-and-deep-learning-2020.zip'

In [None]:
ls /content/artificial-neural-networks-and-deep-learning-2020/MaskDataset/training/

## Load the json file with all the class labels for training data

In [None]:
import json
with open('/content/artificial-neural-networks-and-deep-learning-2020/MaskDataset/train_gt.json', 'r') as f:
  data = json.load(f)
df = pd.DataFrame(list(data.items()), columns=['filename', 'category'])

df = df.applymap(str)
df.category.value_counts()
filenames = [*data]
#print(filenames)
df.head()

   

## **Preprocessing - ImageDataGenerator**

Apply data augmentation

In [None]:
# ImageDataGenerator
# ------------------

from tensorflow.keras.preprocessing.image import ImageDataGenerator

apply_data_augmentation = True

# Create training ImageDataGenerator object
if apply_data_augmentation:
    train_data_gen = ImageDataGenerator(rotation_range=10,
                                        width_shift_range=0.2,
                                        height_shift_range=0.2,
                                        zoom_range=0.3,
                                        validation_split=0.15,
                                        horizontal_flip=True,
                                        vertical_flip=True,
                                        fill_mode='constant',
                                        cval=0,
                                        rescale=1./255)
else:
    train_data_gen = ImageDataGenerator(rescale=1./255)

# Create validation and test ImageDataGenerator objects
valid_data_gen = ImageDataGenerator(rescale=1./255)
test_data_gen = ImageDataGenerator(rescale=1./255)


In [None]:

from tensorflow.keras.preprocessing import image

dataset_dir = os.path.join(cwd, 'MaskDatase')

# img

# Batch size
bs = 128

# img shape
img_h = 32
img_w = 32
img_size=(32,32)



## Split dataset into train and test dataset

In [None]:
from sklearn.model_selection import train_test_split

train_df, test_df = train_test_split(df, test_size=0.25, random_state=42)

## Create generators to read images from dataset directory

In [None]:
train_gen = train_data_gen.flow_from_dataframe(
            train_df,
            directory='/content/artificial-neural-networks-and-deep-learning-2020/MaskDataset/training',
            x_col="filename",
            y_col="category",
            subset="training",
            batch_size=bs,
            seed=42,
            shuffle=True,
            class_mode="categorical",
            target_size=img_size)
valid_gen = train_data_gen.flow_from_dataframe(
            train_df,
            directory='/content/artificial-neural-networks-and-deep-learning-2020/MaskDataset/training',
            x_col="filename",
            y_col="category",
            subset="validation",
            batch_size=bs,
            seed=42,
            shuffle=True,
            class_mode="categorical",
            target_size=img_size)


In [None]:
# Create Dataset objects
# ----------------------


num_classes = 3
# Training
train_dataset = tf.data.Dataset.from_generator(lambda: train_gen,
                                               output_types=(tf.float32, tf.float32),
                                               output_shapes=([None, img_h, img_w, 3], [None, num_classes]))


train_dataset = train_dataset.repeat()

# Validation
# ----------
valid_dataset = tf.data.Dataset.from_generator(lambda: valid_gen, 
                                               output_types=(tf.float32, tf.float32),
                                               output_shapes=([None, img_h, img_w, 3], [None, num_classes]))

# Repeat
valid_dataset = valid_dataset.repeat()

# Test
# ----
test_dataset = tf.data.Dataset.from_generator(lambda: test_gen,
                                              output_types=(tf.float32, tf.float32),
                                              output_shapes=([None, img_h, img_w, 3], [None, num_classes]))

# Repeat
test_dataset = valid_dataset.repeat()

## Test data augmentation

In [None]:
# Let's test data augmentation
# ----------------------------
import time
import matplotlib.pyplot as plt

%matplotlib inline
    
iterator = iter(train_dataset)

In [None]:
augmented_img, target = next(iterator)
augmented_img = np.array(augmented_img[0])   # First element
augmented_img = augmented_img * 255  # denormalize
   
plt.imshow(np.uint8(augmented_img))
# fig.canvas.draw()
plt.plot()

## **CNN Model**

In [None]:
train_gen.class_indices

##**Transfer learning**



In [None]:
# Create Model
# ------------
# Load VGG16 Model
from tensorflow.keras.applications import DenseNet201


densenet = tf.keras.applications.DenseNet201(weights='imagenet',include_top=False, input_shape=(img_h, img_w, 3))
finetuning = True

if finetuning:
    freeze_until = 15 # layer from which we want to fine-tune
    
    for layer in densenet.layers[:freeze_until]:
        layer.trainable = False
else:
    densenet.trainable = False
    
model = tf.keras.Sequential()
model.add(densenet)
model.add(tf.keras.layers.GlobalAveragePooling2D())
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(256, activation='relu'))
model.add(tf.keras.layers.Dropout(0.25))
model.add(tf.keras.layers.Dense(units=num_classes, activation='softmax'))


In [None]:
# Visualize created model as a table
model.summary()



## Prepare the model for training


In [None]:
# Optimization params
# -------------------

# Loss
loss = tf.keras.losses.CategoricalCrossentropy()

# learning rate
lr = 0.0001
optimizer = tf.keras.optimizers.RMSprop(learning_rate=lr)
# -------------------

# Validation metrics
# ------------------

metrics = ['accuracy']
# ------------------

# Compile Model
model.compile(optimizer=optimizer, loss=loss, metrics=metrics)

## Training with callbacks

Added ReduceLROnPlateau to reduce the learning rate 

In [None]:
import os
from datetime import datetime


cwd = os.getcwd()

exps_dir = os.path.join('content/drive/My Drive/Keras3/', 'classification_experiments')
if not os.path.exists(exps_dir):
    os.makedirs(exps_dir)

now = datetime.now().strftime('%b%d_%H-%M-%S')

model_name = 'CNN'

exp_dir = os.path.join(exps_dir, model_name + '_' + str(now))
if not os.path.exists(exp_dir):
    os.makedirs(exp_dir)
    
callbacks = []

# Model checkpoint
# ----------------
ckpt_dir = os.path.join(exp_dir, 'ckpts')
if not os.path.exists(ckpt_dir):
    os.makedirs(ckpt_dir)


ckpt_callback = tf.keras.callbacks.ModelCheckpoint(filepath=os.path.join(ckpt_dir, 'cp.ckpt'), 
                                                   monitor='val_loss', verbose=1, save_best_only=True, mode='min',
                                                   save_weights_only=True)  # False to save the model directly

callbacks.append(ckpt_callback)

# Visualize Learning on Tensorboard
# ---------------------------------
tb_dir = os.path.join(exp_dir, 'tb_logs')
if not os.path.exists(tb_dir):
    os.makedirs(tb_dir)
    
# By default shows losses and metrics for both training and validation
tb_callback = tf.keras.callbacks.TensorBoard(log_dir=tb_dir,
                                             profile_batch=0,
                                             histogram_freq=1)  # if 1 shows weights histograms
callbacks.append(tb_callback)

# Early Stopping
# --------------
early_stop = True
if early_stop:
    es_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)
    callbacks.append(es_callback)

reduce_lr_loss = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', factor=0.1, patience=10, verbose=1, mode='max')
callbacks.append(reduce_lr_loss)

## Fit the model

In [None]:
model.fit(x=train_dataset,
          epochs=100,  #### set repeat in training dataset
          steps_per_epoch=len(train_gen),
          validation_data=valid_dataset,
          validation_steps=len(valid_gen), 
          callbacks=(callbacks))

# 1. tensorboard --logdir EXPERIMENTS_DIR --port PORT     <- from terminal
# 2. localhost:PORT   <- in your browser

## Evaluate the model

In [None]:
model.load_weights('/content/drive/MyDrive/Keras3/transfer_learning_experiments/CNN_Nov21_22-58-17/ckpts') 


eval_out = model.evaluate(x=test_dataset,
                          steps=len(test_gen),
                          verbose=0)
eval_out

## Test the model

In [None]:
 test_files_df=pd.DataFrame()
 test_files_df['file']=os.listdir('/content/artificial-neural-networks-and-deep-learning-2020/MaskDataset/test')
print("Loaded test files list")

## Generate output file for submission

In [None]:
generator=ImageDataGenerator(rescale=1./255.).flow_from_dataframe(
                    dataframe=test_files_df,
                    directory="/content/artificial-neural-networks-and-deep-learning-2020/MaskDataset/test/",
                    x_col="file",
                    y_col=None,
                    class_mode=None,
                    batch_size=bs,
                    seed=42,
                    shuffle=False,
                    target_size=img_size)    
print('Submission generator created')  

In [None]:
print("Forming submission dataframe...")
y_pred = model.predict_generator(generator)
y_pred = np.argmax(y_pred, axis=1)
# Create submission df
submission_df = pd.DataFrame({
    'Id':generator.filenames,
    'Category':y_pred })
#submission_df['filename'] = submission_df['filename'].apply(lambda x: x.split('.')[0])
print(f"Submission dataframe created. Rows:{len(submission_df.values)}")
  
        

In [None]:
submission_df.to_csv('submission.csv',index=False)
print("Submission completed: written submission.csv")  