In [1]:
#importing required libraries
import cv2
import math
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline
from keras.preprocessing import image
import numpy as np
from skimage.transform import resize

Using TensorFlow backend.


In [2]:
#step 1 - read the video, extract frames from it and save them as images

count = 0
videoFile = "F:/Datasets/doggy_dataset/testvideos/testdog3video1.mp4"
cap = cv2.VideoCapture(videoFile)
frameRate = cap.get(5) #frame rate
x=1
while(cap.isOpened()):
    frameId = cap.get(1) #current frame number
    ret, frame = cap.read()
    if (ret != True):
        break
    if (frameId % math.floor(frameRate) == 0):
        filename ="frame%d.jpg" % count;count+=1
        cv2.imwrite(filename, frame)
cap.release()
print ("Done!")

Done!


In [3]:
#step 2 - Label a few images for training the model

#label 0 - nodog, label 1 - lyingdog, label 2 - notlyingdog
data = pd.read_csv('mapping.csv')     # reading the csv file
data.head()      # printing first five rows of the file
#the mapping file contains two columns as Image_ID (Contains the name of each image) and Class (Contains corresponding class for each image)

Unnamed: 0,Image_ID,Class
0,frame0.jpg,0
1,frame1.jpg,0
2,frame2.jpg,0
3,frame3.jpg,0
4,frame4.jpg,0


In [4]:
#read images based on their names(Image_ID)
X = []
for img_name in data.Image_ID:
    img = plt.imread('' + img_name)
    X.append(img)
X = np.array(X)

In [5]:
#since there are three classes, i will one hot encode them using the to_categorical() function of keras.utils
from keras.utils import np_utils
y = data.Class
dummy_y = np_utils.to_categorical(y)    # one hot encoding Classes

In [6]:
#ResNet50 pretrained model is used to build the model and it takes an input image of shape (224 X 224 X 3)
#since the images are in a different size, it is necessary to reshape all of them
#resize() function of skimage.transform is used to do this
image = []
for i in range(0,X.shape[0]):
    a = resize(X[i], preserve_range=True, output_shape=(224,224)).astype(int)      # reshaping to 224*224*3
    image.append(a)
X = np.array(image)

In [7]:
#to make the model performe well, it requires to preprocess all the inputs before passing them to the model
#preprocess_input() function of keras.applications.resnet50 is used to do this
from keras.applications.resnet50 import preprocess_input
X = preprocess_input(X, mode='tf')

In [8]:
#a validation set is used to check the performance of the model on unseen images
#train_test_split() function of the sklearn.model_selection module is used to randomly divide images into training and validation set
from sklearn.model_selection import train_test_split
X_train, X_valid, y_train, y_valid = train_test_split(X, dummy_y, test_size=0.3, random_state=42)

In [9]:
#step 3 - building the model

#import required libraries to build the model
from keras.models import Sequential
from keras.applications.resnet50 import ResNet50
from keras.layers import Dense, InputLayer, Dropout

In [10]:
#load the ResNet50 pretrained model and store it as base_model
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))



In [11]:
#make predictions using this model for X_train and X_valid, get the features, and then use those features to retrain the model
X_train = base_model.predict(X_train)
X_valid = base_model.predict(X_valid)
X_train.shape, X_valid.shape

((82, 7, 7, 2048), (36, 7, 7, 2048))

In [12]:
#the shape of X_train and X_valid is (82, 7, 7, 2048), (36, 7, 7, 2048) respectively
#in order to pass it to neural network, it needs to reshape to 1-D
X_train = X_train.reshape(82, 7*7*2048)      # converting to 1-D
X_valid = X_valid.reshape(36, 7*7*2048)

In [13]:
# preprocess the images and make them zero-centered which helps the model to converge faster
train = X_train/X_train.max()      # centering the data
X_valid = X_valid/X_train.max()

In [14]:
#step 3.1 - building the model

model = Sequential()
model.add(InputLayer((7*7*2048,)))    # input layer
model.add(Dense(units=1024, activation='sigmoid')) # hidden layer
model.add(Dense(3, activation='softmax'))    # output layer

In [15]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 1024)              102761472 
_________________________________________________________________
dense_2 (Dense)              (None, 3)                 3075      
Total params: 102,764,547
Trainable params: 102,764,547
Non-trainable params: 0
_________________________________________________________________


In [16]:
#step 3.2 - compiling the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [17]:
#step 3.3 - training the model
model.fit(train, y_train, epochs=10, validation_data=(X_valid, y_valid))

Train on 82 samples, validate on 36 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.callbacks.History at 0x25a28635d30>

In [18]:
#training with a new video
#load the new video and extract frames from it
count = 0
videoFile = "F:/Datasets/doggy_dataset/testvideos/testdog2video2.mp4"
cap = cv2.VideoCapture(videoFile)
frameRate = cap.get(5) #frame rate
x=1
while(cap.isOpened()):
    frameId = cap.get(1) #current frame number
    ret, frame = cap.read()
    if (ret != True):
        break
    if (frameId % math.floor(frameRate) == 0):
        filename ="test%d.jpg" % count;count+=1
        cv2.imwrite(filename, frame)
cap.release()
print ("Done!")

Done!


In [19]:
#load the testing.csv file which contains the names of each extracted frame
test = pd.read_csv('testing.csv')

In [20]:
#import the images for testing and then reshape them as per the requirements of the aforementioned pretrained model
test_image = []
for img_name in test.Image_ID:
    img = plt.imread('' + img_name)
    test_image.append(img)
test_img = np.array(test_image)

In [44]:
test_y = np_utils.to_categorical(test.Class)

In [45]:
test_image = []
for i in range(0,test_img.shape[0]):
    a = resize(test_img[i], preserve_range=True, output_shape=(224,224,3)).astype(int)
    test_image.append(a)
test_image = np.array(test_image)

In [46]:
#now it needs to make changes to test images similar to the ones we did for the training images

#preprocessing the images
test_image = preprocess_input(test_image, mode='tf')
#test_image.shape

#extracting features from the images using pretrained model
test_image = base_model.predict(test_image)

#converting the images to 1-D form
test_image = test_image.reshape(62, 7, 7, 2048)

#zero centered images
test_image = test_image/test_image.max()

In [48]:
#step 4 - make predictions for the remaining images

predictions = model.predict_classes(test_image)

In [39]:
#step 5 - calculate resting time of the dog
print("Resting time of the dog = ", predictions[predictions==1].shape[0], "seconds")

Resting time of the dog = 60 seconds


In [40]:
model.save_weights('dogresting.h5')