# Imports & Constants

<font size="4">Imports </font>

In [2]:
import os
import torch.nn as nn
import zipfile
from glob import glob

<font size="4">Constants</font>

In [7]:
current_working_directory = os.getcwd()
DATA_BASE_DIRECTORY: str = os.path.join(current_working_directory, 'data')

# Acquiring & Handling Data

<font size="6">Acquiring_Data</font>

In [9]:
!pip install -q gdown

# 1. Download from Google Drive using gdown (extract the file ID from your link)
file_id = "1p1wjaqpTh_5RHfJu4vUh8JJCdKwYMHCp"
zip_path = "dataset.zip"
!gdown {file_id} -O {zip_path}

# 2. Extract the zip
os.makedirs(DATA_BASE_DIRECTORY, exist_ok=True)
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(DATA_BASE_DIRECTORY)

updated_dir_location: str = os.path.join(DATA_BASE_DIRECTORY, 'lfw2', 'lfw2')

Downloading...
From (original): https://drive.google.com/uc?id=1p1wjaqpTh_5RHfJu4vUh8JJCdKwYMHCp
From (redirected): https://drive.google.com/uc?id=1p1wjaqpTh_5RHfJu4vUh8JJCdKwYMHCp&confirm=t&uuid=2345476c-676a-4ecd-8a71-b9001afa67a4
To: /content/dataset.zip
100% 104M/104M [00:01<00:00, 77.2MB/s] 
lfw2


<font size="4">Checking data was aquired successfully</font>

In [16]:
def loads_files_paths_to_memory(base_directory: str) -> None:
    images: dict[str, str] = dict()
    for root, subdirs, files in os.walk(base_directory):
        if root == base_directory:
            continue
        person_name: str = root.split(os.sep)[-1]
        for file in files:
            if not file.endswith('.jpg'):
                raise Warning(f"File {file} is not a .jpg file. Continuing...")
                continue
            break
    if len(images) < 1:
        raise ValueError(f"No images were found in {base_directory}, aborting...")

images_paths = loads_files_paths_to_memory(DATA_BASE_DIRECTORY)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
root is: /content/data/lfw2/lfw2/lfw2/Tim_Curry, subdirs: [], files: ['Tim_Curry_0001.jpg', 'Tim_Curry_0002.jpg']
root is: /content/data/lfw2/lfw2/lfw2/Patrik_Kristiansson, subdirs: [], files: ['Patrik_Kristiansson_0001.jpg']
root is: /content/data/lfw2/lfw2/lfw2/Stacey_Dales-Schuman, subdirs: [], files: ['Stacey_Dales-Schuman_0001.jpg']
root is: /content/data/lfw2/lfw2/lfw2/Allan_Wagner, subdirs: [], files: ['Allan_Wagner_0001.jpg']
root is: /content/data/lfw2/lfw2/lfw2/Jean_Chretien, subdirs: [], files: ['Jean_Chretien_0031.jpg', 'Jean_Chretien_0040.jpg', 'Jean_Chretien_0021.jpg', 'Jean_Chretien_0025.jpg', 'Jean_Chretien_0008.jpg', 'Jean_Chretien_0038.jpg', 'Jean_Chretien_0027.jpg', 'Jean_Chretien_0024.jpg', 'Jean_Chretien_0030.jpg', 'Jean_Chretien_0048.jpg', 'Jean_Chretien_0036.jpg', 'Jean_Chretien_0050.jpg', 'Jean_Chretien_0032.jpg', 'Jean_Chretien_0019.jpg', 'Jean_Chretien_0009.jpg', 'Jean_Chretien_0007.jpg', 'Jean_C

NameError: name 'images_encountered' is not defined

<font size="4">Loading all images to memory</font>

In [None]:
def load_images_to_memory(base_directory: str) -> list:
    images = []


<font size="4">Organizing According to example-validation-train</font>

<font size="6">Handling_Data</font>

<font size="6">Creating Network</font>

In [5]:
class ConvNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.cnn = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=64, kernel_size=10),  # TODO Change the in channels to support more colors.
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=7),  # TODO Change the in channels to support more colors.
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=4),  # TODO Change the in channels to support more colors.
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=4),  # TODO Change the in channels to support more colors.
            nn.ReLU(),
        )

        self.fc1 = nn.Sequential(
            nn.Flatten(),                         # -> [256*6*6 = 9216]
            nn.Linear(256*6*6, 4096),
            nn.Sigmoid()
        )

        self.fc2 = nn.Sequential(
            nn.Linear(4096, 1),
            nn.Sigmoid()  # similarity score
        )

    def forward_once(self, x):
        x = self.cnn(x)
        x = self.fc1(x)
        return x

    def forward(self, input1, input2):
        out1 = self.forward_once(input1)
        out2 = self.forward_once(input2)
        # L1 distance
        diff = torch.abs(out1 - out2)
        similarity_score = self.fc2(diff)
        return similarity_score



# asd = ConvNet()