In [2]:
import os
from os import walk
from pathlib import Path
import numpy as np
import cv2 as cv
from matplotlib import cm
import tkinter as tk
from tkinter import ttk
import tkinter.messagebox as msgbox
from tkinter import font
from PIL import ImageTk, Image
import torch
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
from torchvision.io import read_image
from torch.utils.data import DataLoader

# Class to store the loaded Siamese Network model
class SiameseNet(nn.Module):
    def __init__(self):
        # call super constructor
        super().__init__()
        # fully connected layer
        self.fc1 = nn.Linear(in_features=128*2, out_features=512)
        self.fc2 = nn.Linear(in_features=512, out_features=1024)
        self.fc3 = nn.Linear(in_features=1024, out_features=1)
        
    def forward(self, x1, x2):
        x = torch.cat([x1, x2], dim=1) #concatenate 2 feature vector from 2 images (512D + 512D)
        # fc layer
        x = F.relu(x)
        x = F.relu(self.fc1(x))
        #x = F.dropout(x, p=0.5)
        x = F.relu(self.fc2(x))
        #x = F.dropout(x, p=0.5)
        x = self.fc3(x)
        x = torch.sigmoid(x)
        return x

# Preprocessing for images
recog_transform = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])


# Model to detect human face in preprocessing step
face_cascade = cv.CascadeClassifier('haarcascade_frontalface_default.xml')

resNet = torch.load("saved_best_resNet34.pt") 
siameseNet = torch.load("saved_best_siameseNet.pt")

# Check GPU availability and use if available
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

siameseNet = siameseNet.to(device)
siameseNet.eval()
resNet = resNet.to(device)
resNet.eval()

class Application(tk.Tk):
    def __init__(self, *args, **kwargs):
        tk.Tk.__init__(self, *args, **kwargs)
        self.title('Face Recognition App')
        
        # Store features of registered faces
        self.registered_label = []
        self.registered = []
        
        # Directory that stores registered faces
        path = os.getcwd()+"\Registered Faces"

        # Check if folder exists
        if not os.path.exists(path):
            os.mkdir(path)

        # Go to the directory
        os.chdir(path)
        
        # Get the features of all registered faces
        self.initialize_registered_faces()
        
        # Stored captured image
        self.captured_image = None
        
        # Initialise camera
        self.cam = cv.VideoCapture(0)
        
        # Create a container
        container = tk.Frame(self) 
        container.pack(side = "top", fill = "both", expand = True)
        container.grid_rowconfigure(0, weight = 1)
        container.grid_columnconfigure(0, weight = 1)
  
        # Store frames to an empty array
        self.frames = {} 
        for F in (RecognisePage, AddNewPersonPage, StoreNewPersonImage):
            frame = F(container, self)
            self.frames[F] = frame
            frame.grid(row = 0, column = 0, sticky ="nsew")
        self.show_frame(RecognisePage)
  
    # Switch frame
    def show_frame(self, cont):
        self.frame = cont
        frame = self.frames[cont]
        frame.tkraise()
    
    def initialize_registered_faces(self):
        self.registered_label = []
        self.registered = []
        faces = []
        for(dirpath, dirnames, filenames) in walk(os.getcwd()):
            faces.extend(filenames)
        for i in range (len(faces)):
            img = Image.open(faces[i]).convert('RGB')
            img = recog_transform(img)
            img = img.unsqueeze(0)
            img = img.to(device)
            feature = resNet(img)
            self.registered_label.append(Path(faces[i]).stem)
            self.registered.append(feature)
        
class RecognisePage(tk.Frame):
    def __init__(self, parent, controller):
        tk.Frame.__init__(self, parent)
        self.controller = controller
        
        # Initialise camera
        self.cam = controller.cam
        
        # Create tkinter label to show captured image
        self.cam_capture = ttk.Label(self)
        self.cam_capture.grid(row = 0, column = 1, padx = 10, pady = 10)
        self.Camera()

        # Add New Person Button
        style = ttk.Style()
        style.configure('my.TButton', font=("Verdana", 20))
        AddPersonBtn = ttk.Button(self, text ="Add New Person", style='my.TButton', command = lambda : controller.show_frame(AddNewPersonPage))
        AddPersonBtn.grid(row = 1, column = 1, padx = 10, pady = 10)
          
    def Camera(self):
        _, frame = self.cam.read()
        # Preprocess the camera capture:
        # - Resize if the captured image is larger than window size
        # - Flip the captured image horizontally
        # - Change image from BGR to RGB format
        width = frame.shape[1]
        if(width > 650):
            height = frame.shape[0]
            scale = width/650
            frame = cv.resize(frame, (650, int(height/scale)))
        frame = cv.flip(frame, 1)
        frame = cv.cvtColor(frame, cv.COLOR_BGR2RGB)
#---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
        # Detect human face
        faces = face_cascade.detectMultiScale(frame, 1.1, 4)
        for (x, y, w, h) in faces:
#             cropped = frame[y:y+h, x:x+w]
            cropped = frame
            cropped = Image.fromarray(cropped.astype('uint8'), 'RGB')
            cropped = recog_transform(cropped)
            cropped = cropped.unsqueeze(0)
            cropped = cropped.to(device)
            feature = resNet(cropped)
            largest_similarity = 0
            largest_label = ""
            for i in range (len(self.controller.registered_label)):
                similarity = siameseNet(feature, self.controller.registered[i])
                if (similarity >= largest_similarity):
                    largest_similarity = similarity
                    largest_label =  self.controller.registered_label[i]
            cv.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
            if(largest_similarity > 0.7):
                frame = cv.putText(frame, largest_label, (0, 25), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv.LINE_AA)
                frame = cv.putText(frame, str(round(largest_similarity.item(),3)), (0, 50), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv.LINE_AA)
            else:
                frame = cv.putText(frame, "Unknown", (0, 25), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv.LINE_AA)
                
#---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
        # Display the image
        frame = Image.fromarray(frame)
        imgtk = ImageTk.PhotoImage(image = frame)
        self.cam_capture.imgtk = imgtk
        self.cam_capture.configure(image=imgtk)
        # Loop
        self.stream = self.cam_capture.after(100, self.Camera)

        
# second window frame page1
class AddNewPersonPage(tk.Frame):
    def __init__(self, parent, controller):
        tk.Frame.__init__(self, parent)
        self.controller = controller
        
        #Store captured image
        self.last_image = None
        
        # Initialise camera
        self.cam = controller.cam
        
        # Create tkinter label to show captured image
        self.cam_capture = ttk.Label(self)
        self.cam_capture.grid(row = 0, column = 1, padx = 10, pady = 10)
        self.Camera()
  
        # Define word style
        style = ttk.Style()
        style.configure('my.TButton', font=("Verdana", 20))

        # Capture image and register new face
        CaptureBtn = ttk.Button(self, text ="Capture", style='my.TButton', command = lambda : self.Capture(controller))
        CaptureBtn.grid(row = 1, column = 1, padx = 10, pady = 10)
        
        # Back to recognise page
        BackBtn = ttk.Button(self, text ="Back", style='my.TButton', command = lambda : controller.show_frame(RecognisePage))
        BackBtn.grid(row = 2, column = 1, padx = 10, pady = 10)
  
    def Camera(self):
        _, frame = self.cam.read()
        self.last_image = frame
        # Preprocess the camera capture:
        # - Resize if the captured image is larger than window size
        # - Flip the captured image horizontally
        # - Change image from BGR to RGB format        
        width = frame.shape[1]
        if(width > 650):
            height = frame.shape[0]
            scale = width/650
            frame = cv.resize(frame, (650, int(height/scale)))
        frame = cv.flip(frame, 1)
        frame = cv.cvtColor(frame, cv.COLOR_BGR2RGB)
        
        # Display the image
        frame = Image.fromarray(frame)
        imgtk = ImageTk.PhotoImage(image = frame)
        self.cam_capture.imgtk = imgtk
        self.cam_capture.configure(image=imgtk)
        # Loop
        self.stream = self.cam_capture.after(100, self.Camera)
    
    def Capture(self, controller):
        controller.captured_image = self.last_image
        controller.show_frame(StoreNewPersonImage)
        
# third window frame page2
class StoreNewPersonImage(tk.Frame):
    def __init__(self, parent, controller):
        tk.Frame.__init__(self, parent)
        self.controller = controller
        
        # Create tkinter label to show captured image
        self.imglabel = ttk.Label(self)
        self.imglabel.grid(row = 0, column = 1, columnspan=5, padx = 10, pady = 10)
        
        # Define word style
        style = ttk.Style()
        style.configure('my.TButton', font=("Verdana", 20))
    
        # Enter text label
        EnterNameLabel = ttk.Label(self, text ="Enter name:", font=("Verdana", 20))
        EnterNameLabel.grid(row = 1, column = 2, padx = 10, pady = 10)
    
        # Input to get name
        self.inputtxt = tk.Text(self, height = 1, width = 30)
        self.inputtxt.configure(wrap=None)
        self.inputtxt.grid(row = 1, column = 4, padx = 10, pady = 10)
        
        # Press enter key to submit
        self.inputtxt.bind('<Return>', self.Submit)
        
        # Back to capture image page
        BackBtn = ttk.Button(self, text ="Back", style='my.TButton', command = lambda : controller.show_frame(AddNewPersonPage))
        BackBtn.grid(row = 2, column = 2, padx = 10, pady = 10)
    
        # Button to add Person
        AddBtn = ttk.Button(self, text ="Add", style='my.TButton', command = lambda : self.Add(controller))
        AddBtn.grid(row = 2, column = 4, padx = 10, pady = 10)
        
    def tkraise(self, aboveThis=None):
        #Change image
        frame = self.controller.captured_image
        width = frame.shape[1]
        if(width > 650):
            height = frame.shape[0]
            scale = width/650
            frame = cv.resize(frame, (650, int(height/scale)))
        frame = cv.flip(frame, 1)
        frame = cv.cvtColor(frame, cv.COLOR_BGR2RGB)
        
        frame = Image.fromarray(frame)
        imgtk = ImageTk.PhotoImage(image = frame)
        self.imglabel.imgtk = imgtk
        self.imglabel.configure(image=imgtk)

        # Call the real .tkraise
        super().tkraise(aboveThis)

    def Add(self, controller):
        # Get name
        inputName = self.inputtxt.get("1.0","end-1c").strip()
        
        # Check user input
        if (inputName == ""):
            msgbox.showerror(title = "Failed", message = "Please Input Name!")
        else:
            msgbox.showinfo(title = "Success", message = "Face Added Successfully")
            self.inputtxt.delete("1.0","end")

            # Save image
            cv.imwrite(inputName + ".png", controller.captured_image)
        
            controller.show_frame(RecognisePage)
            controller.initialize_registered_faces()
    
    def Submit(self, event):
        self.Add(self.controller)

app = Application()
app.mainloop()
app.cam.release()

FileNotFoundError: [Errno 2] No such file or directory: 'saved_best_resNet34.pt'