In [1]:
import torch
import torch.nn as nn
import torchvision as tv
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets, transforms
import torch.nn.functional as F

from typing import Tuple
from pathlib import Path
from mtcnn.mtcnn import MTCNN

import re
import cv2

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)




In [2]:
train_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.RandomVerticalFlip(p=1),
    transforms.Resize((64, 64)),
])
test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((64, 64)),
])

In [3]:
def find_base(input_string):
    letters = re.findall(r'\D', input_string)
    return ''.join(letters)

In [4]:
class CustomFolderFaces(Dataset):
    def __init__(self,root:str,transform=None,unique:bool = False,cv2_color=cv2.COLOR_BGR2GRAY, take_length=False,length=100) -> None:
        self.root = root
        self.transform = transform
        self.cv2_color=cv2_color
        self.unique = unique
        self.take_length = take_length
        self.length=length    
        self.paths = sorted(list(map(str,Path(self.root).glob("*.jpg"))))
        self.unique_paths = list(set(find_base(i.split(".")[0])+'0.jpg' for i in self.paths))
        
    def __len__(self) -> int:
        if(self.take_length):
            return self.length
        else:
            return len(self.paths) if not self.unique else len(self.unique_paths)
    
    def __getitem__(self, index:int) -> Tuple[torch.Tensor,str]:
        
        if not self.unique:
            pths = self.paths
        else:
            pths = self.unique_paths
            
        img1_path = pths[index]
        name= str(Path(img1_path).stem)
        img1 = cv2.imread(img1_path)
        if self.cv2_color:
            img1 = cv2.cvtColor(img1,self.cv2_color)
        if self.transform:
            img1 = self.transform(img1)
        return img1,name


test_dataset_p = CustomFolderFaces('./dataset',train_transform,unique=True)
test_dataloader_p = DataLoader(test_dataset_p, batch_size=1, shuffle=True)

In [5]:
class ContrastiveLoss(nn.Module):
    """
    Contrastive loss function.
    Based on:
    """

    def __init__(self, margin=1.0):
        super(ContrastiveLoss, self).__init__()
        self.margin = margin

    def forward(self, x0, x1, y):
        # euclidian distance
        diff = x0 - x1
        dist_sq = torch.sum(torch.pow(diff, 2), 1)
        dist = torch.sqrt(dist_sq)

        mdist = self.margin - dist
        dist = torch.clamp(mdist, min=0.0)
        loss = y * dist_sq + (1 - y) * torch.pow(dist, 2)
        loss = torch.sum(loss) / 2.0 / x0.size()[0]
        return loss

In [6]:
class SiameseNetwork(nn.Module):
    def __init__(self):
        super(SiameseNetwork, self).__init__()
        # Setting up the Sequential of CNN Layers
        self.cnn1 = nn.Sequential(
            nn.Conv2d(1, 96, kernel_size=5,stride=1),
            nn.ReLU(inplace=True),
#             nn.LocalResponseNorm(5,alpha=0.0001,beta=0.75,k=2),
            nn.MaxPool2d(3, stride=2),
            nn.BatchNorm2d(96),
            
            nn.Conv2d(96, 256, kernel_size=5,stride=1,padding=2),
            nn.ReLU(inplace=True),
#             nn.LocalResponseNorm(5,alpha=0.0001,beta=0.75,k=2),
            nn.MaxPool2d(3, stride=2),
            nn.BatchNorm2d(256),
            nn.Dropout2d(p=0.3),

            nn.Conv2d(256,384 , kernel_size=3,stride=1,padding=1),
            nn.ReLU(inplace=True),
            
            nn.Conv2d(384,256 , kernel_size=3,stride=1,padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(3, stride=2),
            nn.Dropout2d(p=0.3),
        )
        # Defining the fully connected layers
        self.fc1 = nn.Sequential(
            nn.LazyLinear(1024),
            nn.ReLU(inplace=True),
            nn.Dropout2d(p=0.5),
            
            nn.Linear(1024, 128),
            nn.ReLU(inplace=True),
            nn.Linear(128,2)
        )
        
    def forward_once(self, x):
        # Forward pass 
        output = self.cnn1(x)
        output = output.view(output.size()[0], -1)
        output = self.fc1(output)
        return output

    def forward(self, input1, input2):
        # forward pass of input 1
        output1 = self.forward_once(input1)
        # forward pass of input 2
        output2 = self.forward_once(input2)
        return output1, output2

In [7]:
class Network(nn.Module):
    def __init__(self, emb_dim=128):
        super(Network, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(1, 32, 5),
            nn.PReLU(),
            nn.MaxPool2d(2, stride=2),
            nn.Dropout(0.3),
            nn.Conv2d(32, 64, 5),
            nn.PReLU(),
            nn.MaxPool2d(2, stride=2),
            nn.Dropout(0.3)
        )
        
        self.fc = nn.Sequential(
            nn.LazyLinear(512),
            nn.PReLU(),
            nn.Linear(512, 2)
        )
        
    def forward(self, x):
        x = self.conv(x)
        x = x.view(-1)
        x = self.fc(x)
        # x = nn.functional.normalize(x)
        return x

In [8]:
model = SiameseNetwork()
model.load_state_dict(torch.load("model.pt",map_location=torch.device('cpu')))
# model = Network()
# model.load_state_dict(torch.load("model_triplet.pt",map_location=torch.device('cpu')))



<All keys matched successfully>

In [9]:
def prediction_contrastive(input:torch.Tensor)->Tuple:
    x0 = test_transform(input)
    x0 = x0.unsqueeze(0)
    distances=[]
    img_names=[]
    for i, data in enumerate(test_dataloader_p):
        x1,name = data
        # Example batch is a list containing 3x1 images&labels [0] - [8,1,64,64], [1] - [8,1,64,64],[2] - [8,]
        # If the label is 0, it means that it is not the same person, label is 1, same person in both images
        
        with torch.no_grad():
            model.eval()
            # x0,x1 = x0.cuda(),x1.cuda()
            out1,out2 = model(x0,x1)
            res = F.pairwise_distance(out1, out2)
            # x0,x1 = x0.cpu(),x1.cpu()
            distances.append(res)
            img_names.append(name)
    
    idx = torch.argmin(torch.tensor(distances))
    # print(idx,distances,img_names)
    return distances[idx],img_names[idx]

def prediction_triplet(input:torch.Tensor)->Tuple:
    x0 = test_transform(input)
    x0 = x0.unsqueeze(0)
    distances=[]
    img_names=[]
    for i, data in enumerate(test_dataloader_p):
        x1,name = data
        # Example batch is a list containing 3x1 images&labels [0] - [8,1,64,64], [1] - [8,1,64,64],[2] - [8,]
        # If the label is 0, it means that it is not the same person, label is 1, same person in both images
        
        with torch.no_grad():
            model.eval()
            # x0,x1 = x0.cuda(),x1.cuda()
            out1= model(x0)
            out2= model(x1)
            res = F.pairwise_distance(out1, out2)
            # x0,x1 = x0.cpu(),x1.cpu()
            distances.append(res)
            img_names.append(name)
    
    idx = torch.argmin(torch.tensor(distances))
    # print(idx,distances,img_names)
    return distances[idx],img_names[idx]

In [10]:
cap = cv2.VideoCapture('ankith1.jpg')

# width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))   # float `width`
# height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
# fps = cap.get(cv2.CAP_PROP_FPS)
# print(width, height,fps) 
colors = [(0, 255, 0), (0, 0, 255), (255, 0, 0)]

while cap.isOpened():
    # read frame
    _, frame = cap.read()

    # # resize the frame for portrait video
    # frame = cv2.resize(frame, (64,64))

    # convert to RGB
    frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    cv2.imshow('test',frame)


    # detector is defined above, otherwise uncomment
    detector = MTCNN()
    # detect faces in the image
    faces = detector.detect_faces(frame)
    if len(faces)!=0:
        for i,face in enumerate(faces): # for each face found in single frame
            x, y, w, h = face['box']
            img = cv2.rectangle(frame, (x, y), (x+w, y+h), colors[i%len(colors)], 2)
            
            dist,name = prediction_contrastive(frame_gray[y:y+h,x:x+w])
            # dist,name = prediction_triplet(frame_gray[y:y+h,x:x+w])

            cv2.putText(frame, f'{name} - {dist}', (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36,255,12), 2)
        cv2.imshow('test',frame)

    if cv2.waitKey(1) == ord('q'):
            break
        
cap.release()
cv2.destroyAllWindows()








error: OpenCV(4.9.0) D:\a\opencv-python\opencv-python\opencv\modules\imgproc\src\color.cpp:196: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'
