### Imports

In [None]:
import os
import keras
import numpy as np
from glob import glob
from tqdm import tqdm 
from keras.preprocessing.image import ImageDataGenerator
import seaborn as sns
import plotly.express as px
import matplotlib.pyplot as plt
from keras import Sequential, Model
from keras.models import load_model
from keras.layers import Dense, GlobalAvgPool2D as GAP, Dropout
from keras.callbacks import TensorBoard, Callback, EarlyStopping,ModelCheckpoint, ReduceLROnPlateau, LambdaCallback
from tensorflow.keras.applications import ResNet50, VGG16
from sklearn.metrics import classification_report, confusion_matrix
import itertools

### GPU Memory fix for TensorFlow

In [None]:
import tensorflow as tf
tf.keras.backend.clear_session()
print("Securing from instant GPU memory overflow")
gpu_devices = tf.config.experimental.list_physical_devices('GPU')
for device in gpu_devices:
    tf.config.experimental.set_memory_growth(device, True)

### Dataset path 

In [None]:
# Main Path
root = "C:/Users/Krzysiek/Desktop/wsb2stopienv3a/magisterka/MODELS"
ds_path = root + "/animals_raw_c"
# Get Class Names
class_names = sorted(os.listdir(ds_path))
n_classes = len(class_names)
print(f"Class Names: \n{class_names}")
print(f"Total Number of Classes : {n_classes}")

### Dataset visualization

In [None]:
class_dis = [len(os.listdir(ds_path + f"/{name}")) for name in class_names]
print(f"Class Distribution : \n{class_dis}")

In [None]:
fig = px.pie(names=class_names, values=class_dis, width=600)
fig.update_layout({"title":{'text':"Dystrybucja klas","x":0.5}})
fig.show()

In [None]:
plt.figure(figsize=(10,5))
sns.barplot(x=class_names, y=class_dis)
plt.grid()
plt.axhline(np.mean(class_dis), color='k', linestyle='--', label="Średnia il. zdjęć")
plt.legend()
plt.show()

### Split monolitic dataset

In [None]:
splitfolders.ratio(ds_path, output = root+"/animals_raw_comb_processed/", seed=101, ratio=(.8, .1, .1))

### Set work path

In [None]:
ds_f_path = root + "/animals_raw_comb_processed"

train_path = ds_f_path + '/train'
val_path = ds_f_path + '/val'
test_path = ds_f_path + '/test'
class_names = os.listdir(train_path)
class_names_val = os.listdir(val_path)
class_names_test = os.listdir(test_path)

### Set dataloaders

In [None]:
batch_size = 32
img_size = 224

datagen = ImageDataGenerator(rescale=1/255.,
                             rotation_range=20,
                             width_shift_range=0.2,
                             height_shift_range=0.2,
                             shear_range=0.2,
                             zoom_range=0.2,
                             horizontal_flip=True,
                             fill_mode='nearest')

test_datagen = ImageDataGenerator(rescale=1/255.)

train_ds = datagen.flow_from_directory(train_path,  
                                       target_size=(img_size, img_size), 
                                       batch_size=batch_size,
                                       shuffle=True,
                                       class_mode='categorical')  

val_ds = datagen.flow_from_directory(val_path,
                                     target_size=(img_size, img_size),
                                     batch_size=batch_size,
                                     shuffle=False,
                                     class_mode='categorical')

test_ds = test_datagen.flow_from_directory(test_path,
                                           target_size=(img_size, img_size),
                                           batch_size=batch_size,
                                           shuffle=False,
                                           class_mode='categorical')

### Plot for confusion matrix

In [None]:
def plot_confusion_matrix(cm, saveloc, dpi, classes, normalize=False, title='Confusion matrix',cmap=plt.cm.Blues):
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)

    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')
        print(cm)
    
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
        horizontalalignment="center",
        color="white" if cm[i, j] > thresh else "black")
    plt.xlabel("Przewidziana etykieta")
    plt.ylabel("Prawdziwa etykieta")
    plt.savefig(saveloc+"cm.svg",dpi=dpi, bbox_inches='tight')

### Models definition

## ResNet50 - learning from scratch

In [None]:
# RESNET50 - SCRATCH - PREP
name = "ResNet50"
save_path = f'{root}/runs/{name}_SCRATCH'
if not os.path.isdir(save_path):
    os.makedirs(save_path)
    
# Pretrained Model
base_model = ResNet50(include_top=False, input_shape=(256,256,3), weights=None)
base_model.trainable = False # Freeze the Weights

# Model 
resnet50 = Sequential([
    base_model,
    GAP(),
    Dense(256, activation='relu'),
    Dropout(0.2),
    Dense(n_classes, activation='softmax')
], name=name)

# Compile
resnet50.compile(
    loss='sparse_categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

In [None]:
# RESNET50 - SCRATCH - TRAIN
tf.keras.utils.plot_model(resnet50, to_file=save_path+'/model_plot.png', show_shapes=True, show_layer_names=True)

# Callbacks 
cbs = [
    EarlyStopping(patience=3, restore_best_weights=True),
    ModelCheckpoint(f'{save_path}/{name}.h5', save_best_only=True),
    TensorBoard(log_dir=save_path, histogram_freq=1),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)
]

# Train Model
resnet50.fit(
    train_ds, validation_data=val_ds,
    epochs=30, callbacks=cbs
)

In [None]:
# RESNET50 - SCRATCH - RESULTS
name = "RESNET50"
save_path = f'{root}/runs/{name}_SCRATCH'

resnet50.load_weights(save_path+"/ResNet50.h5")
y_val = test_ds.classes
y_pred = resnet50.predict(test_ds)
y_pred = np.argmax(y_pred,axis=1)
cr = classification_report(y_val,y_pred)
print(cr)

cm_plot_labels = ['dog', 'spider', 'cow', 'squirrel', 'elephant', 'sheep', 'cat', 'butterfly', 'chicken', 'horse']
cm = confusion_matrix(y_true=y_val, y_pred=y_pred)

plot_confusion_matrix(cm=cm, dpi = 200, classes=cm_plot_labels, title='Macierz błędu', saveloc=save_path)

## ResNet50 - transfer learning

In [None]:
# RESNET50 - TL - PREP
name = "ResNet50"
save_path = f'{root}/runs/{name}_TL2'
if not os.path.isdir(save_path):
    os.makedirs(save_path)
    
# Pretrained Model
base_model = ResNet50(include_top=False, input_shape=(img_size,img_size,3), weights='imagenet')
base_model.trainable = True # Freeze the Weights

# Model 
resnet50t = Sequential([
    base_model,
    GAP(),
    Dense(256, activation='relu'),
    Dropout(0.2),
    Dense(n_classes, activation='softmax')
], name=name)

# Compile
resnet50t.compile(
    loss='categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

In [None]:
# RESNET50 - TL - TRAIN
# Callbacks 
cbs = [
    #EarlyStopping(patience=3, restore_best_weights=True),
    ModelCheckpoint(f'{save_path}/{name}.h5', save_best_only=True),
    TensorBoard(log_dir=save_path, histogram_freq=1),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)
]

# Train Model
resnet50t.fit(
    train_ds, validation_data=val_ds,
    epochs=30, callbacks=cbs
)

In [None]:
# RESNET50 - TL - RESULTS
name = "RESNET50"
save_path = f'{root}/runs/{name}_TL'

resnet50t.load_weights(save_path+"/ResNet50.h5")
y_val = test_ds.classes
y_pred = resnet50t.predict(test_ds)
y_pred = np.argmax(y_pred,axis=1)
cr = classification_report(y_val,y_pred)
print(cr)

cm_plot_labels = ['dog', 'spider', 'cow', 'squirrel', 'elephant', 'sheep', 'cat', 'butterfly', 'chicken', 'horse']
cm = confusion_matrix(y_true=y_val, y_pred=y_pred)

plot_confusion_matrix(cm=cm, dpi = 200, classes=cm_plot_labels, title='Macierz błędów', saveloc=save_path)

## VGG16 - learning from scratch

In [None]:
# VGG16 - SCRATCH - PREP
name = "VGG16"
save_path = f'{root}/runs/{name}_SCRATCH'
if not os.path.isdir(save_path):
    os.makedirs(save_path)

# Pretrained Model
base_model = VGG16(include_top = False,
                   weights = None,
                   input_shape = (img_size,img_size,3))

last = base_model.layers[-1].output
x = GAP()(last)
x = Dense(2048, activation='relu')(x)
x = Dropout(0.45)(x)
x = Dense(2048, activation='relu')(x)
x = Dropout(0.45)(x)
x = Dense(10, activation='softmax')(x)
vgg16 = Model(inputs=base_model.input, outputs=x)

# Compile
vgg16.compile(
    loss='categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

In [None]:
# VGG16 - SCRATCH - TRAIN
tf.keras.utils.plot_model(vgg16, to_file=save_path+'/model_plot.png', show_shapes=True, show_layer_names=True)

# Callbacks 
cbs = [
    #EarlyStopping(patience=3, restore_best_weights=True),
    ModelCheckpoint(f'{save_path}/{name}.h5', save_best_only=True),
    TensorBoard(log_dir=save_path, histogram_freq=1),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)
]

# Train Model
vgg16.fit(
    train_ds, validation_data=val_ds,
    epochs=30, callbacks=cbs
)

In [None]:
# VGG16 - SCRATCH - RESULTS
name = "VGG16"
save_path = f'{root}/runs/{name}_SCRATCH'

vgg16.load_weights(save_path+"/VGG16.h5")
y_val = test_ds.classes
y_pred = resnet50.predict(test_ds)
y_pred = np.argmax(y_pred,axis=1)
cr = classification_report(y_val,y_pred)
print(cr)

cm_plot_labels = ['dog', 'spider', 'cow', 'squirrel', 'elephant', 'sheep', 'cat', 'butterfly', 'chicken', 'horse']
cm = confusion_matrix(y_true=y_val, y_pred=y_pred)

plot_confusion_matrix(cm=cm, dpi = 200, classes=cm_plot_labels, title='Macierz błędów', saveloc=save_path)

## VGG16 - transfer learning

In [None]:
# VGG16 - TL - PREP
name = "VGG16"
save_path = f'{root}/runs/{name}_TL'
if not os.path.isdir(save_path):
    os.makedirs(save_path)

# Pretrained Model
base_model = VGG16(include_top = False,
                   weights = None,
                   input_shape = (img_size,img_size,3))
base_model.trainable = False

last = base_model.layers[-1].output
x = GAP()(last)
x = Dense(2048, activation='relu')(x)
x = Dropout(0.45)(x)
x = Dense(2048, activation='relu')(x)
x = Dropout(0.45)(x)
x = Dense(10, activation='softmax')(x)
vgg16t = Model(inputs=base_model.input, outputs=x)

# Compile
vgg16t.compile(
    loss='categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

In [None]:
# VGG16 - TL - TRAIN
tf.keras.utils.plot_model(vgg16t, to_file=save_path+'/model_plot.png', show_shapes=True, show_layer_names=True)

# Callbacks 
cbs = [
    #EarlyStopping(patience=3, restore_best_weights=True),
    ModelCheckpoint(f'{save_path}/{name}.h5', save_best_only=True),
    TensorBoard(log_dir=save_path, histogram_freq=1),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)
]

# Train Model
vgg16t.fit(
    train_ds, validation_data=val_ds,
    epochs=30, callbacks=cbs
)

In [None]:
# VGG16 - TL - RESULTS
name = "VGG16"
save_path = f'{root}/runs/{name}_TL'

vgg16t.load_weights(save_path+"/VGG16.h5")
y_val = test_ds.classes
y_pred = vgg16t.predict(test_ds)
y_pred = np.argmax(y_pred,axis=1)
cr = classification_report(y_val,y_pred)
print(cr)

cm_plot_labels = ['dog', 'spider', 'cow', 'squirrel', 'elephant', 'sheep', 'cat', 'butterfly', 'chicken', 'horse']
cm = confusion_matrix(y_true=y_val, y_pred=y_pred)

plot_confusion_matrix(cm=cm, dpi = 200, classes=cm_plot_labels, title='Macierz błędów', saveloc=save_path)