# Face tracking by using CNNs

In [3]:
from PIL import Image
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as fun
import torch.optim as optim
import pandas as pd
import os
import cv2
import gc
import torchvision.transforms as transforms
from IPython.display import Video

In [2]:
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"= Using device {device}")
torch.cuda.is_available()

= Using device cuda


True

In [4]:
class conv_block(nn.Module):
    
    def __init__(self, in_channels, out_channels, **kwargs):
        
        super(conv_block, self).__init__()
        
        self.conv = nn.Conv2d(in_channels, out_channels, **kwargs)
        self.batchnorm = nn.BatchNorm2d(out_channels)
        
    def forward(self, val):
        return fun.relu(self.batchnorm(self.conv(val)))
        

class Inception_block(nn.Module):
    
    def __init__(self, in_channels, out1x1, red3x3, out3x3, red5x5, out5x5, out1x1pool):
        
        super(Inception_block, self).__init__()
        
        self.branch1 = conv_block(in_channels, out1x1, kernel_size=1)
        
        self.branch2 = nn.Sequential(
            conv_block(in_channels, red3x3, kernel_size=1),
            conv_block(red3x3, out3x3, kernel_size=3, padding=1)
        )
        
        self.branch3 = nn.Sequential(
            conv_block(in_channels, red5x5, kernel_size=1),
            conv_block(red5x5, out5x5, kernel_size=5, padding=2)
        )
        
        self.branch4 = nn.Sequential(
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
            conv_block(in_channels, out1x1pool, kernel_size=1)
        )
        
        
    def forward(self, val):
        return torch.cat([self.branch1(val), self.branch2(val), self.branch3(val), self.branch4(val)], 1)

        
#GoogLeNet
class cnn_model(nn.Module):

    def __init__(self):
        
        super(cnn_model, self).__init__()

        self.conv1 = conv_block(
            in_channels=3,
            out_channels=64,
            kernel_size=7,
            stride=2,
            padding=3
        )

        self.conv2 = conv_block(
            in_channels=64,
            out_channels=192,
            kernel_size=3,
            stride=1,
            padding=1
        )

        self.inception3a = Inception_block(192, 64, 96, 128, 16, 32, 32)
        self.inception3b = Inception_block(256, 128, 128, 192, 32, 96, 64)
        
        self.inception4a = Inception_block(480, 192, 96, 208, 16, 48, 64)
        self.inception4b = Inception_block(512, 160, 112, 224, 24, 64, 64)
        self.inception4c = Inception_block(512, 128, 128, 256, 24, 64, 64)
        self.inception4d = Inception_block(512, 112, 144, 288, 32, 64, 64)
        self.inception4e = Inception_block(528, 256, 160, 320, 32, 128, 128)
        
        self.inception5a = Inception_block(832, 256, 160, 320, 32, 128, 128)
        self.inception5b = Inception_block(832, 384, 192, 384, 48, 128, 128)
        
        self.fc1 = nn.Linear(
            in_features=1024,
            out_features=4
        )
        

    def forward(self, val):
        val = fun.relu(self.conv1(val))
        val = fun.max_pool2d(val, kernel_size=3, stride=2)
        val = fun.relu(self.conv2(val))
        val = fun.max_pool2d(val, kernel_size=3, stride=2)
        
        val = self.inception3a(val)
        val = self.inception3b(val)
        val = fun.max_pool2d(val, kernel_size=3, stride=2)
        
        val = self.inception4a(val)
        val = self.inception4b(val)
        val = self.inception4c(val)
        val = self.inception4d(val)
        val = self.inception4e(val)
        val = fun.max_pool2d(val, kernel_size=3, stride=2)

        val = self.inception5a(val)
        val = self.inception5b(val)
        val = fun.avg_pool2d(val, kernel_size=6, stride=1)
        
        val = val.reshape(val.shape[0], -1)
        val = fun.dropout(val, p=0.4, training=self.training)
        val = self.fc1(val)
        
        return val

In [5]:
model = cnn_model().to(device)
model.load_state_dict(torch.load('./model_google.pth'))

<All keys matched successfully>

In [42]:
Video("chess-nutz.mp4")

In [10]:
mean = [0.4344, 0.4542, 0.4789]
std = [0.2987, 0.3016, 0.3028]
transform_norm = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

In [37]:
def videoToImages(pathIn, pathOut, resize_size=224):
    count = 0
    vidcap = cv2.VideoCapture(pathIn)
    success,image = vidcap.read()
    success = True
    while success:
        success,image = vidcap.read()
        print("Frame " + str(count) + ": " + str(success))
        if success:
            image = cv2.resize(image, (resize_size, resize_size))
            inputs = transform_norm(image)
            #inputs = torch.Tensor(image.ravel())
            inputs = inputs.view(1, 3, 224, 224)
            predict = torch.squeeze(model(inputs.to(device)).detach().cpu()).tolist()
        
            image_pred = cv2.rectangle(image, (int(predict[0]), int(predict[1])),(int(predict[2]) + 
                                                                               int(predict[0]),
                                              int(predict[3]) + int(predict[1])) , (255, 0, 0), 1)
        
            cv2.imwrite(pathOut + "\\frame%d.jpg" % count, image_pred)     # save frame as JPEG file
            count = count + 1
    return vidcap.get(cv2.CAP_PROP_FPS)

In [25]:
def imagesToVideo(pathIn, pathOut, fps):
    images = [img for img in os.listdir(pathIn) if img.endswith(".jpg")]
    images = sorted(images, key=lambda x:int(x[5:-4]))
    frame = cv2.imread(os.path.join(pathIn, images[0]))
    height, width, layers = frame.shape

    video = cv2.VideoWriter(pathOut, 0, fps, (width,height))

    for image in images:
        video.write(cv2.imread(os.path.join(pathIn, image)))

    cv2.destroyAllWindows()
    video.release()

In [43]:
fps=videoToImages("chess-nutz.mp4", "video_frames")
fps

Frame 0: True
Frame 1: True
Frame 2: True
Frame 3: True
Frame 4: True
Frame 5: True
Frame 6: True
Frame 7: True
Frame 8: True
Frame 9: True
Frame 10: True
Frame 11: True
Frame 12: True
Frame 13: True
Frame 14: True
Frame 15: True
Frame 16: True
Frame 17: True
Frame 18: True
Frame 19: True
Frame 20: True
Frame 21: True
Frame 22: True
Frame 23: True
Frame 24: True
Frame 25: True
Frame 26: True
Frame 27: True
Frame 28: True
Frame 29: True
Frame 30: True
Frame 31: True
Frame 32: True
Frame 33: True
Frame 34: True
Frame 35: True
Frame 36: True
Frame 37: True
Frame 38: True
Frame 39: True
Frame 40: True
Frame 41: True
Frame 42: True
Frame 43: True
Frame 44: True
Frame 45: True
Frame 46: True
Frame 47: True
Frame 48: True
Frame 49: True
Frame 50: True
Frame 51: True
Frame 52: True
Frame 53: True
Frame 54: True
Frame 55: True
Frame 56: True
Frame 57: True
Frame 58: True
Frame 59: True
Frame 60: True
Frame 61: True
Frame 62: True
Frame 63: True
Frame 64: True
Frame 65: True
Frame 66: True
Frame

30.0

In [44]:
imagesToVideo("video_frames", "chess-nutz2.mp4", fps)

In [46]:
Video("chess-nutz2.mp4")

In [None]:
def streamToImages(pathOut, resize_size=224):
    count = 0
    vidcap = cv2.VideoCapture(0)
    print()
    success,image = vidcap.read()
    success = True
    while success:
        success,image = vidcap.read()
        #print("Frame " + str(count) + ": " + str(success))
        
        if success:
            image = cv2.resize(image, (resize_size, resize_size))
            inputs = transform_norm(image)
            #inputs = torch.Tensor(image.ravel())
            inputs = inputs.view(1, 3, 224, 224)
            predict = torch.squeeze(model(inputs.to(device)).detach().cpu()).tolist()
        
            image_pred = cv2.rectangle(image, (int(predict[0]), int(predict[1])),(int(predict[2]) + 
                                                                               int(predict[0]),
                                              int(predict[3]) + int(predict[1])) , (255, 0, 0), 1)
        
            #cv2.imwrite(pathOut + "\\frame%d.jpg" % count, image_pred)     # save frame as JPEG file
            cv2.imshow('frame', image_pred)
            count = count + 1
            
            cv2.waitKey(0)
            cv2.destroyAllWindows()
    

streamToImages("stream_frames")




In [12]:
vid.release()
# Destroy all the windows
cv2.destroyAllWindows()