In [8]:
import os, shutil
import cv2
import zipfile

import seaborn as sns
import matplotlib.pyplot as plt

import numpy as np
import pandas as pd

from tqdm import tqdm

import keras
from keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

In [None]:
def zipdir(src_path: str, zip_file: zipfile.ZipFile):
    ''' add dirctory with relative path to the zip archive

    Args:
      src_path: path to the directory
      zip_file: zip archive
    '''
    for root, dirs, files in os.walk(src_path):
        for file in files:
            file_path = os.path.join(root, file)
            arcname = os.path.relpath(file_path, src_path)
            zip_file.write(file_path, arcname)

def archive_directory(src_dir: str, dst_file: str):
    ''' archive directory

    Args:
      src_dir: path to the directory
      dst_file: name of the archive
    '''
    with zipfile.ZipFile(dst_file, 'w', zipfile.ZIP_DEFLATED) as zipf:
        zipdir(src_dir, zipf)

In [None]:
USE_COLAB = False


MODEL_NAME = 'Template'
SEED = 42
image_size = (224, 224)
batch_size = 32

In [None]:
dataset_dir = os.path.join('../', 'data', 'SmallPreprocessed')

if USE_COLAB:
    from google.colab import drive
    drive.mount('/content/drive')
    filename = 'smallpreprocessed.zip'

    dest_path = f'{filename}_extracted'
    shutil.rmtree(dest_path, ignore_errors=True)

    with zipfile.ZipFile( os.path.join('/content/drive/MyDrive', filename), 'r') as zip_ref:
        zip_ref.extractall(dest_path)


    dataset_dir = os.path.join('/content/', dest_path, 'data', 'SmallPreprocessed')


In [9]:

datagen = ImageDataGenerator(
    preprocessing_function= tf.keras.applications.resnet.preprocess_input,
    rescale=1./255,
    horizontal_flip=True
)

train_generator = datagen.flow_from_directory(
    os.path.join(dataset_dir, 'train'),
    target_size = image_size,
    batch_size = batch_size,
    class_mode = 'categorical',
    shuffle = True
)

test_generator = datagen.flow_from_directory(
    os.path.join(dataset_dir, 'test'),
    target_size = image_size,
    batch_size = batch_size,
    class_mode = 'categorical',
    shuffle = False
)

val_generator = datagen.flow_from_directory(
    os.path.join(dataset_dir, 'val'),
    target_size = image_size,
    batch_size = batch_size,
    class_mode = 'categorical',
    shuffle = True
)

Found 12000 images belonging to 4 classes.
Found 2000 images belonging to 4 classes.
Found 2000 images belonging to 4 classes.


In [None]:
class_num = list(train_generator.class_indices.keys())
class_num

In [10]:
model = keras.models.Sequential()
model.add(
    tf.keras.applications.VGG16(
        input_shape = (image_size[0], image_size[1], 3),
        include_top = False,
        pooling = 'avg',
        weights = 'imagenet'))

model.add(keras.layers.Flatten())
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.Dense(2048, activation='relu'))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.Dense(1024, activation='relu'))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.Dense(4, activation='softmax'))
model.layers[0].trainable = False

# Build the model with the correct input shape
model.build(input_shape=(None, image_size[0], image_size[1], 3))
model.summary()

In [11]:
checkpoint_cb = ModelCheckpoint(
    f"model_{MODEL_NAME}_checkpoint.keras",
    save_best_only = True)

early_stopping_cb = EarlyStopping(
    patience = 10,
    restore_best_weights = True
)

model.compile(
    optimizer = 'adam',
    loss = 'categorical_crossentropy',
    metrics = ['accuracy']
)

In [12]:
EPOCHS = 20

In [14]:
hist = model.fit(
    train_generator,
    epochs = EPOCHS,
    validation_data = val_generator,
    callbacks = [checkpoint_cb, early_stopping_cb]
)

Epoch 1/2


  self._warn_if_super_not_called()
2025-05-27 16:27:33.863561: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 536870912 exceeds 10% of free system memory.
2025-05-27 16:27:34.056185: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 536870912 exceeds 10% of free system memory.
2025-05-27 16:27:34.560417: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 268435456 exceeds 10% of free system memory.
2025-05-27 16:27:34.746455: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 268435456 exceeds 10% of free system memory.


[1m  1/375[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m33:37[0m 5s/step - accuracy: 0.0625 - loss: 2.2934

2025-05-27 16:27:37.110755: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 536870912 exceeds 10% of free system memory.


[1m 18/375[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m17:53[0m 3s/step - accuracy: 0.3775 - loss: 3.5754

KeyboardInterrupt: 

In [None]:
if USE_COLAB:
    model_dir = os.path.join('/content/', 'models')
    zip_name = os.path.join('/content/', f'{MODEL_NAME}_model.zip')
    gdrive_file = os.path.join('/content/drive/MyDrive/', f'{MODEL_NAME}_model.zip')

    os.makedirs(model_dir, exist_ok=True)

    model.save(os.path.join(model_dir, f'{MODEL_NAME}_model.keras'))
    model.export(os.path.join(model_dir, MODEL_NAME))
    archive_directory(model_dir, zip_name)

    if os.path.exists(zip_name):
        os.remove(zip_name)

    archive_directory(model_dir, zip_name)

    shutil.copyfile(zip_name, gdrive_file)

    shutil.rmtree(model_dir, ignore_errors=True)
    os.remove(zip_name)

    print(f"model data was stored on google drive as {gdrive_file}")
else:
    model.save(f'../models/{MODEL_NAME}_model.keras')
    model.export(f'../models/{MODEL_NAME}_model')

In [None]:
score, acc= model.evaluate(test_generator)
print('Val Loss =', score)
print('Val Accuracy =', acc)

In [None]:
hist_=pd.DataFrame(hist.history)

In [None]:
plt.figure(figsize=(15,10))
plt.subplot(1,2,1)
plt.plot(hist_['loss'],label='Train_Loss')
plt.plot(hist_['val_loss'],label='Validation_Loss')
plt.title('Train_Loss & Validation_Loss',fontsize=20)
plt.legend()
plt.subplot(1,2,2)
plt.plot(hist_['accuracy'],label='Train_Accuracy')
plt.plot(hist_['val_accuracy'],label='Validation_Accuracy')
plt.title('Train_Accuracy & Validation_Accuracy',fontsize=20)
plt.legend()
plt.show()

In [None]:
y_test =test_generator.classes
predictions = model.predict(test_generator)
y_pred = np.argmax(predictions,axis=1)
y_test = np.ravel(y_test)
y_pred = np.ravel(y_pred)
df = pd.DataFrame({'Actual': y_test, 'Prediction': y_pred})

In [None]:
CM = confusion_matrix(y_test,y_pred)
CM_percent = CM.astype('float') / CM.sum(axis=1)[:, np.newaxis]
sns.heatmap(CM_percent,fmt='g',center = True,cbar=False,annot=True,cmap='Blues',xticklabels=class_num, yticklabels=class_num)
CM

In [None]:
ClassificationReport = classification_report(y_test,y_pred,target_names=class_num)
print('Classification Report is : ', ClassificationReport)