In [None]:

from google.colab import drive
drive.mount('/content/drive',force_remount=True)

Mounted at /content/drive


In [None]:
from __future__ import print_function, division

In [None]:
# !nvidia-smi


In [None]:
import sys, glob, subprocess
from itertools import product

import shutil
from IPython.display import Image as JImage

import numpy as np
import cv2
import dlib
from PIL import Image
from skimage import io
from matplotlib.gridspec import GridSpec
from matplotlib import pyplot as plt

import tensorflow as tf

import keras
from keras.layers import Dense
from keras.models import Model
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import img_to_array,array_to_img, load_img
from tensorflow.keras.applications.resnet50 import ResNet50,preprocess_input, decode_predictions
# from tensorflow.keras.applications.resnet50.ResNet50 import preprocess_input, decode_predictions


In [None]:
batch_size= 32

In [None]:
# ImageDataGenerator from Keras
datagen = ImageDataGenerator(
        rotation_range=20,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest',
        #data_format="channels_last",
        preprocessing_function=preprocess_input,
)

In [None]:
train_dir,test_dir=('/content/drive/MyDrive/eye_dataset/train','/content/drive/MyDrive/eye_dataset/test')

In [None]:
# generator for train data
train_generator = datagen.flow_from_directory(
        train_dir,  #target directory
        target_size=(224, 224),  #all images will be resized to (224,224)
        batch_size=batch_size,
        class_mode='categorical',
        shuffle=True
)

Found 1559 images belonging to 2 classes.


In [None]:
# generator for validation data
validation_generator = datagen.flow_from_directory(
        test_dir, #target directory
        target_size=(224, 224),  #all images will be resized to (224,224)
        batch_size=batch_size,
        class_mode='categorical',
        shuffle=True
)

Found 864 images belonging to 2 classes.


In [None]:
# Resnet model pretrained on "imagenet" dataset
resnet50=ResNet50(include_top=True,
                                     weights="imagenet",
                                     input_tensor=None,
                                     input_shape=None,
                                     pooling=None,
                                     )

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels.h5


In [None]:
resnet50.summary()

Model: "resnet50"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv1_pad (ZeroPadding2D)      (None, 230, 230, 3)  0           ['input_1[0][0]']                
                                                                                                  
 conv1_conv (Conv2D)            (None, 112, 112, 64  9472        ['conv1_pad[0][0]']              
                                )                                                                 
                                                                                           

In [None]:
out=Dense(2, activation="softmax")(resnet50.layers[-2].output) #new output layer
model_facex=Model(resnet50.input, out, name="EyeInTheSky") # new model with above output layer instead of 1000d from imagenet
model_facex.compile(loss="categorical_crossentropy",optimizer='rmsprop',metrics=['accuracy'])
#all layers in the model are trainable

In [None]:
model_facex.summary()

Model: "EyeInTheSky"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv1_pad (ZeroPadding2D)      (None, 230, 230, 3)  0           ['input_1[0][0]']                
                                                                                                  
 conv1_conv (Conv2D)            (None, 112, 112, 64  9472        ['conv1_pad[0][0]']              
                                )                                                                 
                                                                                        

In [None]:
486//batch_size,

(15,)

In [None]:
model_facex.fit_generator(train_generator,
                          steps_per_epoch=len(train_generator),
                          epochs=8,
                          validation_data=validation_generator,
                          validation_steps=len(validation_generator),
) #train the model for 25 epochs

  model_facex.fit_generator(train_generator,


Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<keras.callbacks.History at 0x7fba2f77ba30>

In [None]:
# save model (layers, train config) and weights
model_facex.save("FINAL1.h5")

In [None]:
# load complete model
keras.models.load_model("FINAL1.h5")

<keras.engine.functional.Functional at 0x7fba2ffa2650>

In [None]:
def predict_on_video(model_path, in_video_path, out_video_path):
    """
        Makes prediction for video file (at `video_path`) and saves video annonated with predictions to `out_video_path`
        `model_path` - path to Keras model data (containing model layer data, model config data and weights)
    """

    def label_img(img, label, loc=(3,50)): #annotates the image with the predicted label (close/open)
        return cv2.putText(img, label, loc, cv2.FONT_HERSHEY_SIMPLEX, 3.4, (0, 255, 0), 2, cv2.LINE_AA)

    classes={1:"open",0:"close"}
    model=keras.models.load_model(model_path)
    # setup to read video
    vid = cv2.VideoCapture(in_video_path)
    vid.open(in_video_path)
    # setup to write video
    h,w=map(int, [vid.get(cv2.CAP_PROP_FRAME_HEIGHT), vid.get(cv2.CAP_PROP_FRAME_WIDTH)])
    fps=int(vid.get(cv2.CAP_PROP_FPS))
    fourcc = cv2.VideoWriter_fourcc(*"XVID")
    vidout = cv2.VideoWriter(out_video_path,fourcc, fps, (w, h), True)
    secs=np.ceil(1.0*vid.get(cv2.CAP_PROP_FRAME_COUNT)/vid.get(cv2.CAP_PROP_FPS)).astype(np.int32) #time of video
    for sec in range(secs): # proces 1 second of video each iteration
        images=list()
        for i in range(fps):
            code,image=vid.read()
            if code:
                images.append(image)
            else: #end of video
                break
        # preprocess each frame(image) - resize and remove mean from RGB channels
        processed_images=np.stack([cv2.resize(preprocess_input(img.astype(np.float32)),(224,224)) for img in images], axis=0)

        #predict labels for 1 sec length of video
        preds=model.predict(processed_images)
        labels=[classes[p] for p in np.argmax(preds, axis=1)] #close/open label for each frame
        for j,image in enumerate(images):
            which=0 if labels[j]=="close" else 1
            label=labels[j] + " (%0.2f)" %(preds[j][which])
            img_a=label_img(image, label) #annotate each original frame with predicted label
            vidout.write(img_a) #write annoated frame to video file
    #close input and output video files
    vid.release()
    vidout.release()
    return

model_path="/content/FINAL1.h5"
in_video_path="/content/test_vedio2.mp4"
out_video_path="test_vedio2_labbled.avi"
predict_on_video(model_path, in_video_path, out_video_path)



In [None]:
def detect_faces(image):
    """
        Using dlib library to extract faces from images, as the model was trained on faces.
        It would be better to use `dlib.cnn_face_detection_model_v1` but slower (http://dlib.net/cnn_face_detector.py.html)
    """
    face_detector=dlib.get_frontal_face_detector()
    detected_faces=face_detector(image,1)
    face_frames=[(x.left(),x.top(),x.right(),x.bottom()) for x in detected_faces]

    #using cnn face detector
    #cnn_face_detector=dlib.cnn_face_detection_model_v1("mmod_human_face_detector.dat")
    # get `mmod_human_face_detector.dat` at http://dlib.net/files/mmod_human_face_detector.dat.bz2
    #detected_faces=cnn_face_detector(img,1)
    #face_frames=[(x.rect.left(),x.rect.top(),x.rect.right(),x.rect.bottom()) for x in detected_faces]

    return face_frames

In [None]:
# Accurate than above method, but slower
# This predictor is more accurate (and slow) as we extract the face and feed it to the model
def predict_on_video2(model_path, in_video_path, out_video_path):
    """
        Makes prediction for video file (at `video_path`) and saves video annonated with predictions to `out_video_path`
        `model_path` - path to Keras model data (containing model layer data, model config data and weights)
    """

    def label_img(img, label, loc=(3,50)): #annotates the image with the predicted label (close/open)
        return cv2.putText(img, label, loc, cv2.FONT_HERSHEY_SIMPLEX, 3.4, (0, 255, 0), 2, cv2.LINE_AA)

    classes={1:"open",0:"close"}
    model=keras.models.load_model(model_path)
    # setup to read video
    vid = cv2.VideoCapture(in_video_path)
    vid.open(in_video_path)
    # setup to write video
    h,w=map(int, [vid.get(cv2.CAP_PROP_FRAME_HEIGHT), vid.get(cv2.CAP_PROP_FRAME_WIDTH)])
    fps=int(vid.get(cv2.CAP_PROP_FPS))
    fourcc = cv2.VideoWriter_fourcc(*"XVID")
    vidout = cv2.VideoWriter(out_video_path,fourcc, fps, (w, h), True)
    secs=np.ceil(1.0*vid.get(cv2.CAP_PROP_FRAME_COUNT)/vid.get(cv2.CAP_PROP_FPS)).astype(np.int32) #time of video
    noface=list()
    for sec in range(secs): # proces 1 second of video each iteration
        images=list()
        for i in range(fps):
            code,image=vid.read()
            if code:
                face_rect=detect_faces(image)
                if len(face_rect)>0:
                    image=np.array(Image.fromarray(image).crop(face_rect[0]))
                else:
                    noface.append(i)
                images.append(image)
            else: #end of video
                break
        # preprocess each frame(image) - resize and remove mean from RGB channels
        processed_images=np.stack([cv2.resize(preprocess_input(img.astype(np.float32)),(224,224)) for img in images], axis=0)

        #predict labels for 1 sec length of video
        preds=model.predict(processed_images)
        labels=[classes[p] for p in np.argmax(preds, axis=1)] #close/open/noface label for each frame
        labels=["noface" if i in noface else label for i,label in enumerate(labels)]
        for j,image in enumerate(images):
            which=0 if labels[j]=="close" else 1
            # label=labels[j] + (" (%0.2f)" %(preds[j][which]) if label in classes.keys() else '')
            label = labels[j] + (" (%0.2f)" % (preds[j][which]) if labels[j] in classes.keys() else '')

            img_a=label_img(image, label) #annotate each original frame with predicted label
            vidout.write(img_a) #write annoated frame to video file
    #close input and output video files
    vid.release()
    vidout.release()
    return

model_path="FINAL1.h5"
in_video_path="/content/test_vedio2.mp4"
out_video_path="test_vedio2_labbled2.avi"
predict_on_video2(model_path, in_video_path, out_video_path)



In [None]:
import csv

def predict_on_video2(model_path, in_video_path, out_video_path, csv_path):
    """
        Makes prediction for video file (at `video_path`) and saves video annotated with predictions to `out_video_path`
        `model_path` - path to Keras model data (containing model layer data, model config data, and weights)
    """

    def label_img(img, label, loc=(3, 50)):  # annotates the image with the predicted label (close/open)
        return cv2.putText(img, label, loc, cv2.FONT_HERSHEY_SIMPLEX, 3.4, (0, 255, 0), 2, cv2.LINE_AA)

    classes = {1: "open", 0: "close"}
    model = keras.models.load_model(model_path)

    # setup to read video
    vid = cv2.VideoCapture(in_video_path)
    vid.open(in_video_path)

    # setup to write video
    h, w = map(int, [vid.get(cv2.CAP_PROP_FRAME_HEIGHT), vid.get(cv2.CAP_PROP_FRAME_WIDTH)])
    fps = int(vid.get(cv2.CAP_PROP_FPS))
    fourcc = cv2.VideoWriter_fourcc(*"XVID")
    vidout = cv2.VideoWriter(out_video_path, fourcc, fps, (w, h), True)

    secs = np.ceil(1.0 * vid.get(cv2.CAP_PROP_FRAME_COUNT) / vid.get(cv2.CAP_PROP_FPS)).astype(np.int32)  # time of video
    noface = []
    predictions = []

    frame_count = 0
    while True:
        code, image = vid.read()

        if not code:  # end of video
            break

        face_rect = detect_faces(image)

        if len(face_rect) > 0:
            image = np.array(Image.fromarray(image).crop(face_rect[0]))
        else:
            noface.append(frame_count)

        processed_image = cv2.resize(preprocess_input(image.astype(np.float32)), (224, 224))
        processed_image = np.expand_dims(processed_image, axis=0)

        pred = model.predict(processed_image)
        label = classes[np.argmax(pred)]
        confidence = pred[0][np.argmax(pred)]

        if label == "close":
            confidence = 1 - confidence

        label_with_confidence = f"{label} ({confidence:.2f})"

        annotated_image = label_img(image, label_with_confidence)
        vidout.write(annotated_image)

        # Store prediction information for CSV
        frame_time = frame_count / fps
        predictions.append((frame_time, label, confidence))

        frame_count += 1

    # close input and output video files
    vid.release()
    vidout.release()

    # Save predictions to CSV file
    with open(csv_path, "w", newline="") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(["Time (s)", "Label", "Confidence"])
        writer.writerows(predictions)

    return

model_path = "FINAL1.h5"
in_video_path = "/content/test_vedio2.mp4"
out_video_path = "test_vedio2_labeled2.avi"
csv_path = "predictions.csv"

predict_on_video2(model_path, in_video_path, out_video_path, csv_path)


