## Import libraries.

In [1]:
import os                           # For file processing.
import csv                          # For reading csv files.
import shutil
from keras.models import Model
#from keras.optimizers import Adam
from keras.optimizers import adam_v2

from keras.applications.vgg16 import VGG16, preprocess_input
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.layers import Dense, Dropout, Flatten
from pathlib import Path
import numpy as np

In [2]:
from google.colab import drive
drive.mount ('/content/gdrive')

!unzip gdrive/My\ Drive/data/archive.zip > /dev/null

Mounted at /content/gdrive


In [14]:
#Set directory values
#split_size=0.1
#print('split_size =',split_size)

# Get the current directory.
current_dir = os.getcwd()

# the data will be written to a working directory.
working_dir    = os.path.join(current_dir,'data')
train_data_dir = os.path.join(current_dir,'Train')
test_data_dir  = os.path.join(current_dir,'Test')
meta_data_dir  = os.path.join(current_dir,'Meta')

print('current_dir      = ',current_dir)
print('working_dir      = ',working_dir)
print('train_data_dir   = ',train_data_dir)
print('test_data_dir    = ',test_data_dir)
print('meta_data_dir    = ',meta_data_dir)
meta_csv_file = (os.path.join(current_dir,'Meta.csv'))
print('meta_csv_file    = ',meta_csv_file)


current_dir      =  /content
working_dir      =  /content/data
train_data_dir   =  /content/Train
test_data_dir    =  /content/Test
meta_data_dir    =  /content/Meta
meta_csv_file    =  /content/Meta.csv


### To properly reference the 43 classes in teh metadata, create a directory structure to allow the data generators to match the classes to the test, validation and training datasets.

In [5]:
with open(meta_csv_file,"r") as csvfile:
    r= csv.reader(csvfile,delimiter =',')
    for i,row in enumerate(r):
        if i == 0: 
            continue
        label = row[1]
        #print('label',label)
        img_name = label + '.png'
        #print('img_name',img_name)

        # Creayte the sub-folder if it does not exist
        dest = os.path.join(meta_data_dir,label)
        if not os.path.isdir(dest):
            os.makedirs(dest)
        # Copy files to the relevant working directory   
        to_move = os.path.join(meta_data_dir,img_name)
        #print('to_move',to_move)
        #print('dest',dest)
        shutil.move(to_move,dest)

In [6]:
class_subset = sorted(os.listdir(meta_data_dir))[:43] # Using only the first 10 classes
print('class_subset    = ',class_subset)

class_subset    =  ['.~lock.ClassesInformation.ods#', '.~lock.ClassesInformationStrong.ods#', '0', '1', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '2', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '3', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '4', '40', '41', '42', '5', '6', '7']


### Create a training data generator to resize the images to the VGG16 expected size and to split the traininf data into:
  -  A training set
  -  A validation set

Use an 85% to 15% split for now.

In [8]:
BATCH_SIZE = 64
# The default input size for this model is 224x224.
IMAGE_WIDTH = 224
IMAGE_HEIGHT = 224
dim = (IMAGE_WIDTH, IMAGE_HEIGHT)

train_generator = ImageDataGenerator(rotation_range=90, 
                                     brightness_range=[0.1, 0.7],
                                     width_shift_range=0.5, 
                                     height_shift_range=0.5,
                                     horizontal_flip=True, 
                                     vertical_flip=True,
                                     validation_split=0.15,
                                     preprocessing_function=preprocess_input) # VGG16 preprocessing

test_generator = ImageDataGenerator(preprocessing_function=preprocess_input) # VGG16 preprocessing

### Create a training data generator to resize the images to the VGG16 expected size and to split the traininf data into:

In [9]:
#print(train_data_dir)
traingen = train_generator.flow_from_directory(train_data_dir,
                                               target_size=(IMAGE_WIDTH, IMAGE_HEIGHT),
                                               class_mode='categorical',
                                               classes=class_subset,
                                               subset='training',
                                               batch_size=BATCH_SIZE, 
                                               shuffle=True,
                                               seed=42)

/content/Train
Found 30888 images belonging to 43 classes.


### Create a training data generator to resize the images to the VGG16 expected size and to split the traininf data into:

In [10]:
validgen = train_generator.flow_from_directory(train_data_dir,
                                               target_size=(IMAGE_WIDTH, IMAGE_HEIGHT),
                                               class_mode='categorical',
                                               classes=class_subset,
                                               subset='validation',
                                               batch_size=BATCH_SIZE,
                                               shuffle=True,
                                               seed=42)

Found 5441 images belonging to 43 classes.


In [11]:
### Create a test dataset.
test_csv_file = (os.path.join(current_dir,'Test.csv'))
print('test_csv_file    = ',test_csv_file)


test_csv_file    =  /content/Test.csv


In [17]:
#prepare_test(source_test_folder,test_csv_file)
with open(test_csv_file,"r") as csvfile:
    r= csv.reader(csvfile,delimiter =',')
    for i,row in enumerate(r):
        if i == 0: 
            continue
        label = row[-2]
        #print('label =',label)
        img_name = row[-1]
        #print('img_name =',img_name)
        # Creayte the sub-folder if it does not exist
        #print('test_data_dir =',test_data_dir)
        dest = os.path.join(test_data_dir,label)
        if not os.path.isdir(dest):
            os.makedirs(dest)
        print('dest          =',dest)

        # Copy files to the relevant working directory   
        to_move = os.path.join(current_dir,img_name)
        print('to_move       =',to_move)
        shutil.move(to_move,dest)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
dest          = /content/Test/12
to_move       = /content/Test/10130.png
dest          = /content/Test/33
to_move       = /content/Test/10131.png
dest          = /content/Test/1
to_move       = /content/Test/10132.png
dest          = /content/Test/31
to_move       = /content/Test/10133.png
dest          = /content/Test/35
to_move       = /content/Test/10134.png
dest          = /content/Test/3
to_move       = /content/Test/10135.png
dest          = /content/Test/7
to_move       = /content/Test/10136.png
dest          = /content/Test/21
to_move       = /content/Test/10137.png
dest          = /content/Test/10
to_move       = /content/Test/10138.png
dest          = /content/Test/2
to_move       = /content/Test/10139.png
dest          = /content/Test/31
to_move       = /content/Test/10140.png
dest          = /content/Test/10
to_move       = /content/Test/10141.png
dest          = /content/Test/28
to_move       = /content/Test/

In [None]:
testgen = test_generator.flow_from_directory(test_data_dir,
                                             target_size=(IMAGE_WIDTH, IMAGE_HEIGHT),
                                             class_mode=None,
                                             classes=class_subset,
                                             batch_size=1,
                                             shuffle=False,
                                             seed=42)

Found 12630 images belonging to 43 classes.


Using Pre-trained Layers for Feature Extraction
In this section, we'll demonstrate how to perform Transfer Learning without fine-tuning the pre-trained layers. Instead, we'll first use pre-trained layers to process our image dataset and extract visual features for prediction. Then we are creating a Fully-connected layer and Output layer for our image dataset. Finally, we will train these layers with backpropagation.