# import files and directories

In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import cv2
import uuid
from PIL import Image
import torch
import torchvision.transforms.functional as F
import random
from torch.utils.data import ConcatDataset, DataLoader, SubsetRandomSampler
from torchvision import transforms
from torch import nn
from torch.utils.data import Dataset
from torch import flatten

In [2]:
POS_PATH = os.path.join("..",'data','positive')
NEG_PATH = os.path.join("..",'data','negetive')
ANC_PATH = os.path.join("..",'data','anchor')

In [4]:
# add directories
os.makedirs(POS_PATH)
os.makedirs(NEG_PATH)
os.makedirs(ANC_PATH)

# Untar Labelled Faces in the Wild Dataset

In [7]:
!tar -xf lfw.tgz

In [8]:
for directory in os.listdir('lfw'):
  for file in os.listdir(os.path.join('lfw',directory)):
    EX_PATH = os.path.join('lfw',directory, file)
    NEW_PATH = os.path.join(NEG_PATH, file)
    os.replace(EX_PATH, NEW_PATH)

In [3]:
# Establish a connection to the webcam
cap = cv2.VideoCapture(0)
while cap.isOpened(): 
    ret, frame = cap.read()
   
    # Cut down frame to 250x250px
    frame = frame[120:120+250,200:200+250, :]
    
    # Collect anchors 
    if cv2.waitKey(1) & 0XFF == ord('a'):
        # Create the unique file path 
        imgname = os.path.join(ANC_PATH, '{}.jpg'.format(uuid.uuid1()))
        # Write out anchor image
        cv2.imwrite(imgname, frame)
    
    # Collect positives
    if cv2.waitKey(1) & 0XFF == ord('p'):
        # Create the unique file path 
        imgname = os.path.join(POS_PATH, '{}.jpg'.format(uuid.uuid1()))
        # Write out positive image
        cv2.imwrite(imgname, frame)
    
    # Show image back to screen
    cv2.imshow('Image Collection', frame)
    
    # Breaking gracefully
    if cv2.waitKey(1) & 0XFF == ord('q'):
        break
        
# Release the webcam
cap.release()
# Close the image show frame
cv2.destroyAllWindows()

# Data Augmentation

In [3]:
import torchvision.transforms.functional as F
import random
def data_aug(img):
    flag = [0,0,0,0]

    data = []
    while not all(x == 1 for x in flag):
        if flag[0] == 0 and random.randint(0,100) / 100 < 0.5:
            img = F.adjust_brightness(img, brightness_factor=1.1)
            data.append(img)
            flag[0] = 1
        
        if flag[1] == 0 and random.randint(0,100) / 100 < 0.5:
            img = F.adjust_contrast(img, contrast_factor=torch.empty(1).uniform_(0.6, 1).item())
            data.append(img)
            flag[1] = 1
            
        if flag[2] == 0 and random.randint(0,100) / 100 < 0.5:
            img = F.hflip(img)
            data.append(img)
            flag[2] = 1
            
        if flag[3] == 0 and random.randint(0,100) / 100 < 0.5:
            img = F.adjust_saturation(img, saturation_factor=torch.empty(1).uniform_(0.9, 1).item())
            data.append(img)
            flag[3] = 1
        
        
    return data


In [25]:
for file_name in os.listdir(os.path.join(POS_PATH)):
    img_path = os.path.join(POS_PATH, file_name)
    img = Image.open(img_path).convert('RGB')
    img_tensor = F.to_tensor(img)
    augmented_images = data_aug(img_tensor)
    for i, image in enumerate(augmented_images):
        image = F.to_pil_image(image)
        image.save(os.path.join(POS_PATH, '{}.jpg'.format(uuid.uuid1())))

# Load Data in Dataloader

In [3]:
class MergeImageDataset(Dataset):
    def __init__(self,ANC_PATH  ,POS_PATH, NEG_PATH, types ,transform = None):
        
        self.POS_PATH = POS_PATH
        self.NEG_PATH = NEG_PATH
        self.ANC_PATH = ANC_PATH
        
        self.types = types
        
        self.transform = transform
        
        self.POS_IMG = os.listdir(POS_PATH)
        self.NEG_IMG = os.listdir(NEG_PATH)
        self.ANC_IMG = os.listdir(ANC_PATH)
        
    def __len__(self):
        return len(self.ANC_IMG)

    def __getitem__(self, idx):
        
        anc_dir  = os.path.join(self.ANC_PATH, self.ANC_IMG[idx])
        anc_image = Image.open(anc_dir).convert('RGB')
        
        if self.types == 1: 
            pos_dir  = os.path.join(self.POS_PATH, self.POS_IMG[idx])
            pos_image = Image.open(pos_dir).convert('RGB')

        if self.types == 0: 
            neg_dir  = os.path.join(self.NEG_PATH, self.NEG_IMG[idx])
            neg_image = Image.open(neg_dir).convert('RGB')
        
        data = [anc_image, pos_image if self.types == 1 else neg_image, torch.ones(1) if self.types == 1 else torch.zeros(1) ]

        if self.transform:
            data[0] = self.transform(data[0])
            data[1] = self.transform(data[1])
        
        return data

In [4]:
transform = transforms.Compose([transforms.Resize(224),
                                transforms.ToTensor()])

In [5]:
positive = MergeImageDataset(ANC_PATH, POS_PATH, NEG_PATH, types = 1, transform = transform)
negtive = MergeImageDataset(ANC_PATH, POS_PATH, NEG_PATH, types = 0, transform = transform)

In [6]:
data = ConcatDataset([negtive,positive])

In [7]:
# Split the individual datasets into training and validation parts
num_samples = len(data)
indices = list(range(num_samples))
np.random.shuffle(indices)
split = int(np.floor(0.2 * num_samples))  # Use 20% of data for validation
train_indices, val_indices = indices[split:], indices[:split]

# Create samplers for the training and validation parts
train_sampler = SubsetRandomSampler(train_indices)
val_sampler = SubsetRandomSampler(val_indices)


In [8]:
# Create data loaders for the training and validation parts
train_loader = DataLoader(data, batch_size=32, sampler=train_sampler)
val_loader = DataLoader(data, batch_size=32, sampler=val_sampler)

# Build Model

In [9]:
for i in train_loader:
    print(i[0].shape)
    break

torch.Size([32, 3, 224, 224])


In [65]:
class Embedding(nn.Module):
    def __init__(self):
            super(Embedding, self).__init__()

            self.conv_1 = nn.Conv2d(3, 64,kernel_size=(10, 10))
            self.relu_1 = nn.ReLU(inplace = True)
            self.maxpool_1 = nn.MaxPool2d(64, (2,2), padding = (1,1))
            
            self.conv_2 =  nn.Conv2d(64, 128,kernel_size=(7, 7))
            self.relu_2 =  nn.ReLU()
            self.maxpool_2 = nn.MaxPool2d(64, (2,2))
            
            self.conv_3 =  nn.Conv2d(128, 128,kernel_size=(4, 4))
            self.relu_3 =  nn.ReLU()
            self.maxpool_3 =  nn.MaxPool2d(64, (2,2), padding = (1,1))
            
            self.conv_4 =  nn.Conv2d(128, 256,kernel_size=(4, 4))
            self.relu_4 =  nn.ReLU()       
            
            self.Flatten = nn.Flatten()
            self.linear = nn.Linear(256 * 6 * 6, 4096)
            self.sigmoid = nn.Sigmoid()
        
    def forward(self,x):
        y = self.conv_1(x)
        y = self.relu_1(y)
        y = self.maxpool_1(y)
        
        
        y = self.maxpool_2(self.relu_2(self.conv_2(y)))
        y = self.maxpool_3(self.relu_3(self.conv_3(y)))
        y = self.relu_4(self.conv_4(y))
        return self.sigmoid(self.linear(self.Flatten(y)))
        

In [63]:
from torchsummary import summary

model = Embedding()

# Specify the input size
input_size = (3, 100, 100)

# Move the model to the desired device (e.g., CPU or GPU)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
inputs = torch.randn(input_size).to(device)
# Print the model summary
summary(model, inputs)

RuntimeError: Failed to run torchsummary. See above stack traces for more details. Executed layers up to: [Conv2d: 1-1, ReLU: 1-2, MaxPool2d: 1-3, Conv2d: 1-4, ReLU: 1-5]

In [66]:
model = Embedding()

# Specify the input size
input_size = (3, 100, 100)

# Move the model to the desired device (e.g., CPU or GPU)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
inputs = torch.randn(input_size).to(device)

print(inputs.shape)
for name, layer in model.named_children():
    i = 1
    print(f"layer {i} / {name}")
    x = layer(inputs)
    print(f"{name}: {x.shape}")
    i = i + 1

torch.Size([3, 100, 100])
layer 1 / conv_1
conv_1: torch.Size([64, 91, 91])
layer 1 / relu_1
relu_1: torch.Size([3, 100, 100])
layer 1 / maxpool_1
maxpool_1: torch.Size([3, 20, 20])
layer 1 / conv_2


RuntimeError: Given groups=1, weight of size [128, 64, 7, 7], expected input[1, 3, 100, 100] to have 64 channels, but got 3 channels instead