# Basic Image Preprocessing, Data Augmentation, CNN and Transfer Learning

In [1]:
# To remove deprecated warnings from the tensorflow
import warnings
warnings.filterwarnings("ignore")

#### Get the current working directory

In [5]:
import os
PATH = os.getcwd()

#### Define the data path

In [38]:
DATA_PATH = os.path.join(PATH, 'data/data')
data_dir_list = os.listdir(DATA_PATH)

#### Get the list of folders inside data path

In [39]:
print(data_dir_list)
#del data_dir_list[2]

['Cats', 'Dogs', 'Horses', 'Humans']


#### Required variables declaration and initialization

In [40]:
img_rows=224
img_cols=224
num_channel=3

num_epoch=10
batch_size=32

img_data_list=[]
classes_names_list=[]

#### Read the images and store them in the list

In [41]:
#!pip install opencv-python

In [42]:
import cv2

for dataset in data_dir_list:
    classes_names_list.append(dataset) 
    print ('Loading images from {} folder\n'.format(dataset)) 
    img_list=os.listdir(DATA_PATH+'/'+ dataset)
    for img in img_list:
        input_img=cv2.imread(DATA_PATH + '/'+ dataset + '/'+ img )
        input_img_resize=cv2.resize(input_img,(img_rows, img_cols))
        img_data_list.append(input_img_resize)

Loading images from Cats folder

Loading images from Dogs folder

Loading images from Horses folder

Loading images from Humans folder



#### Get the number of classes

In [43]:
num_classes = len(classes_names_list)
print(num_classes)

4


####  Image preprocessiong

In [44]:
import numpy as np

img_data=np.array(img_data_list)
img_data=img_data.astype('float32')
img_data/=255

In [45]:
print (img_data.shape)

(808, 224, 224, 3)


In [47]:
num_of_samples = img_data.shape[0]
input_shape = img_data[0].shape
print(input_shape)

(224, 224, 3)


In [30]:
classes = np.ones((num_of_samples,), dtype='int64')

classes[0:202]=0
classes[202:404]=1
classes[404:606]=2
classes[606:]=3

Convert class labels to numberic using on-hot encoding

In [31]:
from keras.utils import to_categorical

classes = to_categorical(classes, num_classes)

Using TensorFlow backend.


#### Shuffle the dataset

In [32]:
from sklearn.utils import shuffle

X, Y = shuffle(img_data, classes, random_state=2)

#### Split the dataset

In [33]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=2)

In [34]:
y_test.shape

(324, 4)

####  Defining the model

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D

In [None]:
#### Build the model

model.add(Conv2D(32, (3,3), activation='relu',input_shape=input_shape))

model.add(Conv2D(32, (3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.5))

model.add(conv2D(64, (3,3), activation='relu'))
model.add(conv2D(64, (3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.5))

model.add(flatten())
model.add(Dense(64,activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(4, activation='softmax'))

####  Compile the model

In [None]:
model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=["accuracy"])

#### Model Summary

In [None]:
model.layers[0].get_weights()

#### Training/fit the model 

In [None]:
hist = model.fit(X_train, y_train, batch_size=batch_size, epochs=num_epoch, verbose=1, validation_data=(X_test, y_test))

#### Evaluating the model

In [None]:
score = model.evaluate(X_test, y_test, batch_size=batch_size)

print('Test Loss:', score[0])
print('Test Accuracy:', score[1])

#### Predict and compute the confusion matrix

In [None]:
from sklearn.metrics import confusion_matrix

Y_pred = model.predict(X_test)
print(Y_pred)

In [None]:
y_pred = np.argmax(Y_pred, axis=1)
print(y_pred)

In [None]:
print(confusion_matrix(np.argmax(y_test, axis=1), y_pred))

#### Saving and loading model and weights

In [None]:
from keras.models import model_from_json, load_model

In [None]:

# Serialize weights to HDF5
model.save_weights("model.h5")

In [None]:
# Load weights into new model
loaded_model.load_weights("model.h5")

In [None]:
model.save("model.hdfs")
loaded_model=load_model("model.hdfs")

## Image Augmentation using ImageDataGenerator class

__ImageDataGenerator__

    Generates batches of tensor image data with real-time data augmentation.

#### Create the Image Data Generator

In [6]:
from keras.preprocessing.image import ImageDataGenerator

train_data_gen = ImageDataGenerator(
    rotation_range=20,
    shear_range=0.5, 
    zoom_range=0.4, 
    validation_split=0.2,
    vertical_flip=True, 
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True) 

Using TensorFlow backend.


In [7]:
test_data_gen = ImageDataGenerator()

Path to save Augmented Images

In [None]:
TRN_AUGMENTED = os.path.join(PATH , 'Trn_Augmented_Images')
TST_AUGMENTED = os.path.join(PATH , 'Tst_Augmented_Images')

#### 1. Using .flow 

In [None]:
ftrain_generator = train_data_gen.flow(
        X_train,
        y_train,
        batch_size=batch_size, 
        shuffle=True)

In [None]:
ftest_generator = test_data_gen.flow(
        X_test,
        y_test,
        batch_size=batch_size, 
        shuffle=True)

In [None]:
model.fit_generator(ftrain_generator, epochs = num_epoch, validation_data=ftest_generator,validation_steps=25,steps_per_epoch=X_train.shape[0]/batch_size)

#### Predict and compute the confusion matrix

In [None]:
Y_pred = model.predict(X_test)
print(Y_pred)

In [None]:
y_pred = np.argmax(Y_pred, axis=1)
print(y_pred)

In [None]:
print(confusion_matrix(np.argmax(y_test, axis=1), y_pred))

#### 2. Using flow_from_directory()

#### Create the Image Data Generator

In [None]:
from keras.preprocessing.image import ImageDataGenerator

train_data_gen = ImageDataGenerator(
    rotation_range=20,
    shear_range=0.5, 
    zoom_range=0.4, 
    vertical_flip=True,
    rescale=1./255,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True) 

In [None]:
test_data_gen = ImageDataGenerator(rescale=1./255)

Read training Images in batches for Image Augmentation

In [None]:
train_generator = train_data_gen.flow_from_directory(
        DATA_PATH,
        target_size=(img_rows, img_cols), 
        batch_size=batch_size,
        class_mode='categorical',
        color_mode='rgb', 
        shuffle=True,  
        save_to_dir=TRN_AUGMENTED, 
        save_prefix='TrainAugmented', 
        save_format='png')

Classes will be automatically assigned based on the folder structure

In [None]:
train_generator.class_indices

In [None]:
test_generator = test_data_gen.flow_from_directory(
        TEST_DATA_PATH,
        target_size=(img_rows, img_cols),
        batch_size=32,
        class_mode='categorical',
        color_mode='rgb', 
        shuffle=True, 
        seed=None, 
        save_to_dir=TST_AUGMENTED, 
        save_prefix='TestAugmented', 
        save_format='png')

In [None]:
test_generator.class_indices

Fit the model

In [None]:
model.fit_generator(train_generator, epochs=num_epoch, validation_data=test_generator)

#### Evaluate the model 

Use evaluate_generator

In [None]:
fd_model_evaluate = model.evaluate_generator(test_generator, verbose=1)

##### Predict 

Using predict_generator:
    
    Generates predictions for the input samples from a data generator.

In [None]:
fd_model_predict = model.predict_generator(test_generator, verbose=1)

In [None]:
#Predict the classes of Validation data

fd_model_predict.argmax(axis=-1)

# Transfer Learning 

##### VGG Architecture

In [None]:
from IPython.display import Image
Image(filename='vgg16.png')

In [None]:
from keras.layers import Input, Dense

In [None]:
# Custom_vgg_model_1
#Training the classifier alone
image_input = Input(shape=(img_rows, img_cols, num_channel))

In [None]:
from keras.applications.vgg16 import VGG16
model=VGG16(input_tensor=image_input, include_Top=True, weights='imagenet')

In [None]:
model.summary()

In [None]:
last_layer = model.get_layer('fc2').output
out = Dense(num_classes, activation='softmax', name='output')(last_layer)

In [None]:
from keras.models import Model

custom_vgg_model = Model(image_input, out)
custom_vgg_model.summary()

###### custom_vgg_model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

In [None]:
custom_vgg_model.fit(X_train, y_train, batch_size=batch_size, epochs=num_epoch, verbose=1, validation_data=(X_test, y_test))

In [None]:
(loss, accuracy) = custom_vgg_model.evaluate(X_test, y_test, batch_size=batch_size, verbose=1)

print("[INFO] loss={:.4f}, accuracy: {:.4f}%".format(loss, accuracy * 100))

In [None]:
Y_train_pred = custom_vgg_model.predict(X_test)

In [None]:
y_train_pred = np.argmax(Y_train_pred, axis=1)
print(y_train_pred)

In [None]:
print(confusion_matrix(np.argmax(y_test, axis=1), y_train_pred))