In [1]:
!unzip -q "drive/MyDrive/trainset.zip"

In [2]:
import os
import numpy as np
import cv2
import tensorflow as tf
import matplotlib.pyplot as plt
from matplotlib import gridspec
from sklearn.preprocessing import LabelEncoder
from scipy.spatial.distance import cdist
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
import keras

In [3]:
def crop_image(image):
    face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
    faces = face_cascade.detectMultiScale(image, 1.3, 5)
    biggest = 0
    if len(faces) != 0:
        for face in faces :  
            area = face[2]*face[3]
            if area > biggest :
                biggest = area
                x = face[0]
                y = face[1]
                w = face[2]
                h = face[3]
        x = 0 if x < 0 else x
        y = 0 if y < 0 else y
        r = max(w, h) / 2
        centerx = x + w / 2
        centery = y + h / 2
        nx = int(centerx - r)
        ny = int(centery - r)
        nr = int(r * 2)
        faceimg = image[ny:ny+nr, nx:nx+nr]
        faceimg = cv2.resize(faceimg,(255,255))
        
        return faceimg
    else:
        return image

In [4]:
def get_dataset():
    src = './trainset'
    X =[]
    y =[]
    X_passport =[]
    y_passport=[]
    for directory in os.listdir(src):
        try:
            directory_1 = os.path.join(src,directory)
            for d in os.listdir(directory_1):
                sub_direct = os.path.join(directory_1,d)
                for image in os.listdir(sub_direct):
                    path = os.path.join(sub_direct,image)
                    img = cv2.imread(path)
                    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
                    img = cv2.resize(img,(255,255))
                    #if 'script' in image:
                      #X_passport.append(img)
                      #y_passport.append(sub_direct[16:].replace('_',''))
                    #else:
                      #X.append(img)
                      #y.append(sub_direct[16:].replace('_',''))
                    X.append(img)
                    y.append(int(sub_direct[16:20]))
        except:
            pass
    return X,y
            

In [5]:
def get_output_parameters(y):
  i = 0
  y_dict ={}
  for val in y:
    if val not in y_dict.values() :
      y_dict[i] = val
      i += 1
  for i,val in enumerate(y) :
    for key, value in y_dict.items():
      if int(val) == int(value):
        y[i] = key
  return y, y_dict

In [6]:
def get_input_faces(x):
  X=[]
  for img in x:
    cropped_image = crop_image(img)
    X.append(cropped_image)
  return X


In [7]:
def image_parameters_to_array(X):
  X = np.squeeze(np.array(X, dtype= np.float32))
  X = np.resize(X,(X.shape[0],32,32,3))
  return X

In [8]:
def output_parameters_to_array(y):
  Y = np.squeeze(np.array(y, dtype= np.int32))
  Y = np.resize(Y,(Y.shape[0],1))
  return Y

In [9]:
def feature_model():
  model = Sequential()
  model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape= X.shape[1:], padding='SAME'))
  model.add(layers.MaxPooling2D((2,2),padding='SAME'))
  model.add(layers.Conv2D(64, (3, 3), activation='relu',padding='SAME'))
  model.add(layers.MaxPooling2D((2,2),padding='SAME'))
  model.add(layers.Conv2D(128, (3, 3), activation='relu',padding='SAME'))
  model.add(layers.MaxPooling2D((2,2),padding='SAME'))
  model.add(layers.Conv2D(128, (3, 3), activation='relu',padding='SAME'))
  model.add(layers.MaxPooling2D((2,2),padding='SAME'))
  model.add(layers.Conv2D(128, (3, 3), activation='relu',padding='SAME'))
  model.add(layers.MaxPooling2D((2,2),padding='SAME'))
  model.add(layers.Conv2D(256, (1, 1), activation='relu',padding='SAME'))
  model.add(layers.MaxPooling2D((2,2),padding='SAME'))
  model.add(layers.Conv2D(256, (1, 1), activation='relu',padding='SAME'))
  model.add(layers.MaxPooling2D((2,2),padding='SAME'))
  model.add(layers.Conv2D(256, (1, 1), activation='relu',padding='SAME'))
  model.add(layers.MaxPooling2D((2,2),padding='SAME'))
  model.add(layers.Conv2D(256, (1, 1), activation='relu',padding='SAME'))
  model.add(layers.MaxPooling2D((2,2),padding='SAME'))
  model.add(layers.Conv2D(256, (1, 1), activation='relu',padding='SAME'))
  model.add(layers.MaxPooling2D((2,2),padding='SAME'))
  model.add(layers.Conv2D(128, (3, 3), activation='relu',padding='SAME'))
  model.add(layers.MaxPooling2D((2,2),padding='SAME'))
  model.add(layers.Conv2D(512, (1, 1), activation= None,padding='SAME'))
  model.add(layers.MaxPooling2D((2,2),padding='SAME'))  
  model.add(layers.Conv2D(512, (1, 1), activation= None,padding='SAME'))
  model.add(layers.MaxPooling2D((2,2),padding='SAME'))
  model.add(layers.Conv2D(512, (1, 1), activation= None,padding='SAME'))
  model.add(layers.MaxPooling2D((2,2),padding='SAME'))
  model.add(layers.Flatten())
  model.add(layers.Dense(256, activation= 'relu'))
  model.add(layers.Dense(512, activation= 'relu'))
  model.add(layers.Dense(1012, activation= 'softmax'))
  opt = keras.optimizers.Adam(lr = 0.00001)
  model.compile(optimizer= opt,loss='sparse_categorical_crossentropy',metrics =['accuracy'])
  return model

In [10]:
def save_model_and_weights(model):
  !mkdir -p saved_model
  model.save('saved_model/my_model') 
  model.save_weights("model.h5")
   
  model_json = model.to_json()  # Converting the model into JSON format and storing it in "picture_model.json" file.
  with open("picture_model.json", "w") as json_file:
    json_file.write(model_json)

In [11]:
x, y = get_dataset() # Getting the Data from Trainset

In [12]:
y, y_dict = get_output_parameters(y) # Getting output Values and it's respective dictionary

In [14]:
X = get_input_faces(x) #Cropping Faces out of Images using Haarcascade_frontalface_default.xml

In [16]:
X = image_parameters_to_array(X) # Image to numpy array
Y = output_parameters_to_array(y) # Output parameter to numpy array

In [17]:
encoder = LabelEncoder() # Transforming output variable to categories
Y = encoder.fit_transform(y)

In [18]:
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.3,random_state=42) #Splitting te available Data into training and test set

In [19]:
model = feature_model() # Getting the CNN model

In [20]:
model.fit(X_train,Y_train, epochs=100, batch_size= 32) # Training on given training set

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x7f35f805e160>

In [21]:
model.evaluate(X_test,Y_test) # Evaluating on Test set



[2.470165729522705, 0.4517345428466797]

In [22]:
save_model_and_weights(model) # Saving the model and it's Weights 

INFO:tensorflow:Assets written to: saved_model/my_model/assets


In [23]:
from google.colab import files
files.download('saved_model')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>