----------------------------------------------------------------------------------------------------------
***********************************************************************************************************

# <span style="color:Purple"> Computer vision for machine learning Project: "Detecting hand gestures"


#### Task 9: 

#### Author: Lynda Attouche
#### Link: https://colab.research.google.com/drive/1Wcj0j0PrfgYsSPSDUW0kEjo_NNHxTk9e?usp=sharing
*******************************************************************************************
----------------------------------------------------------------------------------------------------------


## README
* Throughout this notebook, no special commands are needed to run the code. Simply run the cells in order.


In the previous tasks, we used image frames to train our models and make predictions. In this task, we will, instead, first extract features from the images using a CNN. In this case, we use a VGG pre-trained neural network via transfer learning. Then, we will build a model using these features as input to the new model that will be implemented. 

## Importing libraries

In [1]:
from keras.models import Model
from keras.applications.vgg19 import VGG19
from keras.preprocessing import image
from keras.applications.vgg19 import preprocess_input
import sklearn
import matplotlib.pyplot as plt
import numpy as np
import os
import keras
from keras.models import Sequential
import tensorflow as tf
from keras.layers import Dense, Dropout
from google.colab.output import eval_js
from IPython.display import display, Javascript
from keras.models import model_from_json
import numpy as np
from PIL import Image
import io
import cv2
import time
from base64 import b64decode, b64encode
import random

import glob
import numpy as np

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
dataset_path1 = "/content/drive/MyDrive/ComputerVision/HandGesture_1/" #balanced + variaability
path = "/content/drive/MyDrive/ComputerVision/"

## Creating dataset: "Features extraction"

In this section, we will upload the 224x224 size images to feed the VGG19 so that it can extract their features.We will extract features of the balanced and variable dataset.

In [4]:
def dataset_creation(path,dataset_path,num_dataset):
  """
  Writes images in txt file
  @params:
          path: path to the file
          dataset_path : path to images
  @return:
          txt file in colab with all images written on it
  """
  images_y = os.listdir(dataset_path+'y/') #reading folder containing images of label y
  images_a = os.listdir(dataset_path+'a/') #reading folder containing images of label a
  images_e = os.listdir(dataset_path+'e/') #reading folder containing images of label e
  images_k = os.listdir(dataset_path+'k/') #reading folder containing images of label k
  n = int(.5*(len(images_y)+len(images_a)+len(images_e)+len(images_k)))
  data = np.empty((n, 224, 224, 3))
  labels = []
  with open(path+'dataset'+str(num_dataset)+'.txt','w') as f: #writing on the file dataset
    i = 0
    c = 0
    for im_ in images_y: #for each image of label Y
      im =  image.load_img(dataset_path+'y/'+im_,cv2.IMREAD_GRAYSCALE) #reading current image
      im = image.img_to_array(im)
      if im.shape[0] == 224: #selecting only images of shape 16
        im =  preprocess_input(im)
        data[i] = im 
        i+=1
        labels.append('Y')

    for im_ in images_a: #for each image of label A
      im =  image.load_img(dataset_path+'a/'+im_,cv2.IMREAD_GRAYSCALE) #reading current image
      im = image.img_to_array(im)
      if im.shape[0] == 224: #selecting only images of shape 16
        im =  preprocess_input(im)
        data[i] = im 
        i+=1
        labels.append('A')

    for im_ in images_e: #for each image of label E
      im =  image.load_img(dataset_path+'e/'+im_,cv2.IMREAD_GRAYSCALE) #reading current image
      im = image.img_to_array(im)
      if im.shape[0] == 224: #selecting only images of shape 16
        im =  preprocess_input(im)
        data[i] = im 
        i+=1
        labels.append('E')

    for im_ in images_k: #for each image of label K
      im =  image.load_img(dataset_path+'k/'+im_,cv2.IMREAD_GRAYSCALE) #reading current image
      im = image.img_to_array(im)
      if im.shape[0] == 224: #selecting only images of shape 16
        im =  preprocess_input(im)
        data[i] = im 
        i+=1
        labels.append('K')
    return data, labels

In [5]:
#creating dataset
X,y = dataset_creation(path,dataset_path1,1) #Balanced dataset with variability

In [6]:
#Loading VGG model and weights
vgg_model = VGG19(weights='imagenet') 
model_ext_ftr = Model(inputs=vgg_model.input, outputs=[vgg_model.get_layer("fc2").output]) # Get the fc2 layer instead of the prediction layer trained for ImageNet
model_ext_ftr .summary() # print model

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0     

In [7]:
#features extraction
output_feats = model_ext_ftr.predict(np.array(X))# Extract features of our images. Size = (nb_images, 4096) 

In [8]:
print("Shape of features",output_feats.shape)

Shape of features (672, 4096)


## Saving data in text file

Now that the features are extracted, we will save them in a text file as follows. 

In [9]:
def to_txt(output_feats,y):
  """
    Saves data in text file and shuffles them
    @params:
        output_feats(array): extracted features to save
        y(array): labels corresponding to features
    @return:
          1 if savec
  """
  with open(path+"vgg_res.txt", "w+") as f:
    for i in range(len(y)):
      f.write(''.join(str(y[i])+',')) #writing label of the image (letter)
      np.savetxt(f, output_feats[i],fmt='%d',newline=",") #writing image on the file 
      f.write(''.join('\n'))
  #shuffling data
  lines = open(path+"vgg_res.txt").readlines()
  random.shuffle(lines)
  open(path+"vgg_res.txt", 'w').writelines(lines)
  return 1

In [10]:
to_txt(output_feats,y)

1

## Loading data

In [11]:
def load_dataset(dataset_file_path):
  """
    Loads dataset from text file 
    @param:
          dataset_file_path (string): dataset path
    @return: 
          samples (array): images
          letter (array): labels
  """
  with open(dataset_file_path) as f: 
      lines = [line.rstrip().split(",") for line in f] #reading file line by line
  dic = {'A':0,'E':1,'K':2,'Y':3} #helps to convert string to int labels
  letters = [dic.get(line[0]) for line in lines] #select first column of each line, it contains the letter
  samples = np.array([lines[i][1:len(lines[i])-1] for i in range(len(lines))],dtype=float) #select the other part of line "image"
  return samples, letters

In [12]:
samples,letters = load_dataset(path+'/vgg_res.txt')
print('Dataset size:',len(samples))
print('Dataset shape:',samples.shape)

Dataset size: 672
Dataset shape: (672, 4096)


## Splitting dataset

In [13]:
def split(train_ratio, samples, letters):
  """
  Splits data in train and validation set
  @params:
      train_ratio (float): proportion of the train set
      samples (array): images
      letters (array): labels
  @return: 
      x_train,y_train: images and labels of training set
      x_val,y_val: images and labels of validation set
  """
  n_train_samples = int(len(samples) * train_ratio)
  x_train, y_train = samples[:n_train_samples], letters[:n_train_samples]
  x_val, y_val = samples[n_train_samples:], letters[n_train_samples:]
  return x_train,y_train,x_val,y_val

In [14]:
#splitting
train_ratio = 0.7  #here 70% of the data will be the trainset and 30% the validation one
x_train,y_train,x_val,y_val = split(train_ratio, samples, letters)

In [15]:
num_classes = 4
x_train = x_train.astype('float32')
x_val = x_val.astype('float32')
# we normalize data (convert value to range from 0 to 1)
# this is done to reduce computation complexity (because origin values are high and thus it's more complex)
# by this reduction, the computation will be faster and easier
x_train /= 255
x_val /= 255
# convert class vectors to binary class matrices
# converting them is required to train our models
y_train = tf.keras.utils.to_categorical(y_train, num_classes)
y_val = tf.keras.utils.to_categorical(y_val, num_classes)

## Creating model

In [16]:
model = Sequential() #sequential network
model.add(Dense(150, activation='relu', input_shape=(4096,))) #first hidden layer with 150 neurons, it takes input of shape (4096,), which is the shape of our images in the text file
#we use relu activation function to avoid gradient vanishing
model.add(Dropout(0.2)) #dropout layer with proba 0.2
model.add(Dense(50, activation='relu')) #dense layer with 50 neurons
model.add(Dense(num_classes, activation='softmax')) #output layer: number_classes (4) and we use the activation function softmax because we have a multiclassification problem

model.summary() #print a summary of the neural network

model.compile(loss='categorical_crossentropy', #we use categorical cross entrpy as loss function since we have many classes 
              optimizer='adam', #as an optimizer Adam is used to speed up computations 
              metrics=['accuracy']) #as metric we use accuracy (proportion of true results among all the predictions)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 150)               614550    
                                                                 
 dropout (Dropout)           (None, 150)               0         
                                                                 
 dense_1 (Dense)             (None, 50)                7550      
                                                                 
 dense_2 (Dense)             (None, 4)                 204       
                                                                 
Total params: 622,304
Trainable params: 622,304
Non-trainable params: 0
_________________________________________________________________


## Model training and evaluating

In [17]:
def plot_acc_loss(str,history):
    """
    Plot accuracy and loss of a model
    @params:
            - history: history of the model
    @return:
            plots
    """
    fig, ax = plt.subplots(1, 2, figsize=(15,5))
    fig.suptitle(str, fontsize=16)

    l = list(history.history.keys())
    print(l)
    # accuracy plot
    ax[0].plot(history.history[l[1]])
    ax[0].plot(history.history[l[3]])
    ax[0].set_title('Accuracy')
    ax[0].set_ylabel('Accuracy')
    ax[0].set_xlabel('Epoch')
    ax[0].legend(['Train', 'Valid'], loc='upper left')

    # loss plot
    ax[1].plot(history.history[l[0]])
    ax[1].plot(history.history[l[2]])
    ax[1].set_title('Loss') 
    ax[1].set_ylabel('Loss') 
    ax[1].set_xlabel('Epoch') 
    ax[1].legend(['Train', 'Valid'], loc='upper left') 
    plt.show()

In [None]:
epochs = 40
#as an optimization technique, early stopping is used with respect to accuracy, it will help us to stop the training before it overfits 
callback = tf.keras.callbacks.EarlyStopping(patience=3) 
#training model on the train set
history = model.fit(x_train, y_train,
                    epochs=epochs,
                    verbose=1,
                    validation_data=(x_val, y_val),
                   callbacks=[callback]
                    )
#evaluating the model using validation set
score = model.evaluate(x_val, y_val, verbose=0)
print('Validation loss: ', score[0])
print('Validation accuracy: ', score[1])

In [None]:
plot_acc_loss('',history) #plotting accuracy and loss function

The results obtained are superior to those obtained in task 7. The model converges faster and reaches 90% accuracy. 
Thus, the feature extraction method shows a significant improvement.  

## Saving model and weights


Now that the model is generated, we save it so that we can test it live as we did in task 8.

In [20]:
model_json = model.to_json()
with open(path+"/"+"model.json", "w") as json_file:
     json_file.write(model_json)
model.save_weights(path+"/"+"model_weights.h5")
print("Saved model to disk")

Saved model to disk
