In [10]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [1]:
root_path = "/content/drive/MyDrive/Colab Notebooks/COEN240_TA/data"
train_path = root_path + "/train"
grade_path = root_path + "/grade"

In [3]:
pip install facenet-pytorch


Collecting facenet-pytorch
  Downloading facenet_pytorch-2.5.3-py3-none-any.whl (1.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch==2.2.1->torchvision->facenet-pytorch)
  Downloading nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.7/23.7 MB[0m [31m18.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nvidia-cuda-runtime-cu12==12.1.105 (from torch==2.2.1->torchvision->facenet-pytorch)
  Downloading nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m823.6/823.6 kB[0m [31m36.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nvidia-cuda-cupti-cu12==12.1.105 (from torch==2.2.1->torchvision->facenet-pytorch)
  Downloading nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (



In [4]:
from sklearn.svm import SVC
import numpy as np
from tqdm import tqdm
import torch
from facenet_pytorch import InceptionResnetV1
import pickle
# from dataset import TestDataset
from typing import List
import numpy as np
import pandas as pd
from pathlib import Path
from torchvision.transforms import transforms
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import transforms
from PIL import Image, ImageDraw

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

BATCH_SIZE = 5
STARTING_SHAPE = (250, 250)
INPUT_SHAPE = (224, 224)

LEARNING_RATE = 0.001
NUM_EPOCHS = 1

TRAINING_DATA_PATH = train_path
TRAINING_CSV_FILE = train_path + "/train.csv"

TESTING_DATA_PATH = grade_path
TESTING_CSV_FILE = grade_path + "/solution.csv"

In [6]:

train_transforms = transforms.Compose([
    transforms.Resize(STARTING_SHAPE),
    transforms.RandomResizedCrop(INPUT_SHAPE, scale=(0.8, 1.0), ratio=(0.8, 1.2)),
    transforms.ColorJitter(brightness=0.5),
    transforms.RandomRotation(degrees=50),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.GaussianBlur(kernel_size=3),
    transforms.ToTensor(),
])

test_transforms = transforms.Compose([
    transforms.Resize(INPUT_SHAPE),
    transforms.ToTensor(),
])

class ClassDataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        self.data = pd.read_csv(csv_file)
        self.root_dir = Path(root_dir)
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_name = self.root_dir / self.data.iloc[idx, 0]
        image = Image.open(img_name)
        label = self.data.iloc[idx, 1]
        if self.transform:
            image = self.transform(image)
        return image, label

class TestDataset(Dataset):
    def __init__(self, image_list, transform=test_transforms):
        self.image_list = image_list
        self.transform = transform

    def __len__(self):
        return len(self.image_list)

    def __getitem__(self, idx):
        image = Image.fromarray(self.image_list[idx])
        if self.transform:
            image = self.transform(image)
        return image, "None"  # Labels are set as None

def get_data_loaders():
    train_dataset = ClassDataset(csv_file=TRAINING_CSV_FILE, root_dir=TRAINING_DATA_PATH, transform=train_transforms)
    test_dataset = ClassDataset(csv_file=TESTING_CSV_FILE, root_dir=TESTING_DATA_PATH, transform=test_transforms)


    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE)

    return train_loader, test_loader


In [7]:

class DeepFacedSVM:
    def __init__(self, device='cpu'):
        self.device = torch.device(device)
        self.feature_extractor = InceptionResnetV1(pretrained='vggface2').eval().to(self.device)
        self.svm_classifier = SVC(kernel='rbf')

    def train(self, train_loader, num_epochs):
        train_features, train_labels = self._extract_features(train_loader, num_epochs)
        self.svm_classifier.fit(train_features, train_labels)
        train_accuracy = self.svm_classifier.score(train_features, train_labels)
        print(f"Train Accuracy: {train_accuracy}")

    def test(self, test_loader):
        test_features, test_labels = self._extract_features(test_loader, 1)
        test_accuracy = self.svm_classifier.score(test_features, test_labels)
        print(f"Test Accuracy: {test_accuracy}")

    def dump(self, filename):
        with open(filename, 'wb') as file:
            pickle.dump(self.svm_classifier, file)
        print(f"Model saved as {filename}")

    @classmethod
    def load_from_pickle(cls, filename):
        with open(filename, 'rb') as file:
            svm_classifier = pickle.load(file)
        instance = cls()
        instance.svm_classifier = svm_classifier
        return instance

    def _extract_features(self, loader, num_epochs):
      features = []
      labels = []

      for _ in range(num_epochs):
          for images, batch_labels in tqdm(loader):
              images = images.to(self.device)

              with torch.no_grad():
                  embeddings = self.feature_extractor(images)

              features.append(embeddings.cpu().numpy())
              labels.extend(batch_labels.cpu().numpy() if torch.is_tensor(batch_labels) else batch_labels)

      features = np.vstack(features)
      labels = np.array(labels).reshape(-1, 1)  # Adjust shape if necessary

      return features, labels


    def inference(self, image_list: List[np.ndarray], filenames: List[str]) -> dict:
      inference_dataset: TestDataset = TestDataset(image_list)
      inference_loader: DataLoader = DataLoader(inference_dataset, batch_size=1, shuffle=False)
      test_features, _ = self._extract_features(inference_loader, 1)
      labels: np.ndarray = self.svm_classifier.predict(test_features)

        # Generate dictionary of predictions with filenames as keys
      predictions = {}
      for i, label in enumerate(labels):
        predictions[filenames[i]] = label

      predictions_df = pd.DataFrame(list(predictions.items()), columns=['filename', 'prediction'])
      predictions_df.to_csv('prediction.csv', index=False)
      print(predictions_df.head())
      predictions_df.to_csv(grade_path+'/prediction.csv', index=False)
      return predictions

In [8]:
from pathlib import Path
from torchvision.transforms import transforms

def save_images_with_labels(images, labels, folder_path, epoch):
    folder_path = Path(folder_path)
    folder_path.mkdir(parents=True, exist_ok=True)

    for i in range(len(images)):
        image_name = f'epoch_{epoch}_label_{labels[i]}_image_{i}.png'
        image_path = folder_path / image_name
        transformed_image = transforms.ToPILImage()(images[i])
        transformed_image.save(image_path)

In [11]:
train_loader, test_loader = get_data_loaders()

deepfaced_svm = DeepFacedSVM()
deepfaced_svm.train(train_loader, NUM_EPOCHS)
deepfaced_svm.test(test_loader)
deepfaced_svm.dump("deepfaced_svm_model.pkl")

  0%|          | 0.00/107M [00:00<?, ?B/s]

100%|██████████| 86/86 [01:18<00:00,  1.10it/s]
  y = column_or_1d(y, warn=True)


Train Accuracy: 0.9836829836829837


100%|██████████| 1/1 [00:00<00:00,  2.05it/s]

Test Accuracy: 1.0
Model saved as deepfaced_svm_model.pkl





In [12]:
import os
def load_images_from_folder(folder_path):
    images = []

    for filename in os.listdir(folder_path):
        if filename.endswith(('.png', '.jpg', '.jpeg')): # Add more extensions if needed
            img_path = os.path.join(folder_path, filename)
            try:
                img = Image.open(img_path)
                img_array = np.array(img)
                images.append(img_array)

            except Exception as e:
                print(f"Error loading {filename}: {e}")
    return images

folder_path = grade_path
grade_path1=grade_path + "/solution.csv"
images = load_images_from_folder(folder_path)
image_filenames = [filename for filename in os.listdir(folder_path) if filename.endswith(('.png', '.jpg', '.jpeg'))]
deepfaced_svm = DeepFacedSVM.load_from_pickle("deepfaced_svm_model.pkl")
predictions = deepfaced_svm.inference(images, image_filenames)

100%|██████████| 1/1 [00:00<00:00,  3.24it/s]

       filename prediction
0  0220_18.jpeg  zotaharsh





In [13]:
def calculate_accuracy(ground_truths, predictions):
    if len(ground_truths) != len(predictions):
        raise ValueError("The number of predictions does not match the number of ground truths.")
    correct_predictions = 0
    for sample_id, ground_truth in ground_truths.items():
        if predictions.get(sample_id) == ground_truth:
            correct_predictions += 1
    return correct_predictions / len(ground_truths)

def grade_predictions(predictions, grade_path):
    df = pd.read_csv(grade_path1)
    ground_truths ={str(key): value for key, value in zip(df['filename'], df['label'])}
    df2 = pd. read_csv(grade_path + "/prediction.csv")
    predictions = dict(zip(df2['filename'], df2['prediction']))
    accuracy = calculate_accuracy(ground_truths, predictions)
    return accuracy

# Example usage

accuracy = grade_predictions(predictions,grade_path)
print(f"Prediction Accuracy: {accuracy * 100}%")

Prediction Accuracy: 100.0%
