In [1]:

#VGG 
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

In [2]:
#importing lib
import os
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras.models import Model, load_model, Sequential
from sklearn.metrics import confusion_matrix, classification_report
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Activation, Dropout, BatchNormalization




In [3]:
def define_paths(dir):
    filepaths = []
    labels = []
    folds = os.listdir(dir)
    for fold in folds:
        foldpath = os.path.join(dir, fold)
        filelist = os.listdir(foldpath)
        for file in filelist:
            fpath = os.path.join(foldpath, file)
            filepaths.append(fpath)
            labels.append(fold)
    return filepaths, labels


def define_df(files, classes):
    Fseries = pd.Series(files, name= 'filepaths')
    Lseries = pd.Series(classes, name='labels')
    return pd.concat([Fseries, Lseries], axis= 1)


def create_df(dir):
    files, classes = define_paths(dir)
    df = define_df(files, classes)
    return df

In [4]:
#training/test
def create_gens(train_df, valid_df, test_df, batch_size):
    img_size = (224, 224)
    channels = 3
    img_shape = (img_size[0], img_size[1], channels)
    ts_length = len(test_df)
    def scalar(img):
        return img
    tr_gen = ImageDataGenerator(preprocessing_function= scalar, horizontal_flip= True)
    ts_gen = ImageDataGenerator(preprocessing_function= scalar)
    train_gen = tr_gen.flow_from_dataframe( train_df, x_col= 'filepaths', y_col= 'labels', target_size= img_size, class_mode= 'categorical',
                                        color_mode= 'rgb', shuffle= True, batch_size= 40)
    valid_gen = ts_gen.flow_from_dataframe( valid_df, x_col= 'filepaths', y_col= 'labels', target_size= img_size, class_mode= 'categorical',
                                        color_mode= 'rgb', shuffle= True, batch_size= 40)
    test_gen = ts_gen.flow_from_dataframe( test_df, x_col= 'filepaths', y_col= 'labels', target_size= img_size, class_mode= 'categorical',
                                        color_mode= 'rgb', shuffle= False, batch_size= 40)
    return train_gen, valid_gen, test_gen


dir = 'DATASET/TRAIN'

df = create_df(dir)
train_df, test_valid_df = train_test_split(df, test_size=0.2, random_state=42)
test_df, valid_df = train_test_split(test_valid_df, test_size=0.5, random_state=42)

# Get Generators
batch_size = 32
train_gen, valid_gen, test_gen = create_gens(train_df, valid_df, test_df, batch_size)

Found 1600 validated image filenames belonging to 10 classes.
Found 200 validated image filenames belonging to 10 classes.
Found 200 validated image filenames belonging to 10 classes.


In [5]:
train_df

Unnamed: 0,filepaths,labels
968,DATASET/TRAIN\EOS\EOS_05180.jpg,EOS
240,DATASET/TRAIN\BAS\BAS_00061.jpg,BAS
819,DATASET/TRAIN\EOS\EOS_05021.jpg,EOS
692,DATASET/TRAIN\EBO\EBO_26109.jpg,EBO
420,DATASET/TRAIN\BLA\BLA_11025.jpg,BLA
...,...,...
1130,DATASET/TRAIN\HAC\HAC_00140.jpg,HAC
1294,DATASET/TRAIN\LYT\LYT_19111.jpg,LYT
860,DATASET/TRAIN\EOS\EOS_05065.jpg,EOS
1459,DATASET/TRAIN\MMZ\MMZ_02071.jpg,MMZ


In [6]:
test_df

Unnamed: 0,filepaths,labels
1198,DATASET/TRAIN\HAC\HAC_00214.jpg,HAC
720,DATASET/TRAIN\EBO\EBO_26141.jpg,EBO
1381,DATASET/TRAIN\LYT\LYT_19220.jpg,LYT
63,DATASET/TRAIN\ART\ART_19079.jpg,ART
630,DATASET/TRAIN\EBO\EBO_26034.jpg,EBO
...,...,...
383,DATASET/TRAIN\BAS\BAS_00252.jpg,BAS
620,DATASET/TRAIN\EBO\EBO_26023.jpg,EBO
1364,DATASET/TRAIN\LYT\LYT_19198.jpg,LYT
1510,DATASET/TRAIN\MMZ\MMZ_02128.jpg,MMZ


In [7]:
test_df.shape

(200, 2)

In [8]:
train_df.shape

(1600, 2)

In [9]:
valid_df.shape

(200, 2)

In [10]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import ModelCheckpoint

# Assuming train_gen and valid_gen are your image data generators

img_size = (224, 224)
channels = 3
img_shape = (img_size[0], img_size[1], channels)
class_count = len(list(train_gen.class_indices.keys()))

# Create pre-trained model
base_model = tf.keras.applications.VGG16(include_top=False, weights="imagenet", input_shape=img_shape, pooling='max')
for layer in base_model.layers:
    layer.trainable = False
model = Sequential([
    base_model,
    BatchNormalization(),
    Dense(256, activation='relu'),
    Dropout(0.3),
     Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(class_count, activation='softmax')
])

optimizer = tf.keras.optimizers.Adam()
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

# Define filepath to save the best model
filepath = 'best_model_vgg.h5'

# Create ModelCheckpoint callback to save the best model based on validation accuracy
checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
# Train the model with the added callback
history = model.fit(
    x=train_gen,
    epochs=5,
    verbose=1,
    validation_data=valid_gen,
    callbacks=[checkpoint]
)



Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
Epoch 1/5


Epoch 1: val_accuracy improved from -inf to 0.17500, saving model to best_model_vgg.h5


  saving_api.save_model(


Epoch 2/5
Epoch 2: val_accuracy improved from 0.17500 to 0.29000, saving model to best_model_vgg.h5
Epoch 3/5
Epoch 3: val_accuracy improved from 0.29000 to 0.34500, saving model to best_model_vgg.h5
Epoch 4/5
Epoch 4: val_accuracy improved from 0.34500 to 0.40000, saving model to best_model_vgg.h5
Epoch 5/5
Epoch 5: val_accuracy improved from 0.40000 to 0.40500, saving model to best_model_vgg.h5


In [11]:
import tensorflow as tf
from tensorflow.keras.models import load_model, Model
from tensorflow.keras.layers import Input, Average



model= load_model('best_model_vgg.h5')

ts_length = len(test_df)
test_batch_size = test_batch_size = max(sorted([ts_length // n for n in range(1, ts_length + 1) if ts_length%n == 0 and ts_length/n <= 80]))
test_steps = ts_length // test_batch_size
train_score = model.evaluate(train_gen, steps= test_steps, verbose= 1)
valid_score = model.evaluate(valid_gen, steps= test_steps, verbose= 1)
test_score = model.evaluate(test_gen, steps= test_steps, verbose= 1)

print("Train Loss: ", train_score[0])
print("Train Accuracy: ", train_score[1])
print('-' * 20)
print("Validation Loss: ", valid_score[0])
print("Validation Accuracy: ", valid_score[1])
print('-' * 20)
print("Test Loss: ", test_score[0])
print("Test Accuracy: ", test_score[1])

Train Loss:  0.7819350361824036
Train Accuracy:  0.8125
--------------------
Validation Loss:  1.562109112739563
Validation Accuracy:  0.41874998807907104
--------------------
Test Loss:  1.5239102840423584
Test Accuracy:  0.512499988079071
