In [1]:
%%capture

! pip install transformers pytorch-lightning --quiet

In [2]:
import torch
import pandas as pd
import os
from PIL import Image
import pytorch_lightning as pl
import torch.nn as nn
from torch.utils.data import DataLoader
from torchmetrics import Accuracy
from torchvision.transforms.functional import pil_to_tensor
from transformers import ViTFeatureExtractor
import numpy as np
import ast
import json

In [3]:
# Comment if not on google colab
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Load the data

In [4]:
#Constants
data_dir = '/content/drive/MyDrive/private_test_data' #Directory of dataset
bboxes_dir = '/content/bbox_result.csv' #CSV file with bbox values
model_dir = '/content/drive/MyDrive/race_model 2024-01-17 11_48_33.239102.pth' #Path to model

We create a custom `Dataset` class that **crops images by their bbox values, resize and convert them to `tensors`**.

In [5]:
# Creating a custom dataset class
class ImageDataset(torch.utils.data.Dataset):
	def __init__(self, dir, bboxes_dir, transform=None):
		self.data_dir = dir
		self.bboxes = pd.read_csv(bboxes_dir)
		self.images = os.listdir(dir)
		self.transform = transform
		self.new_size = (128,128)

	# Defining the length of the dataset
	def __len__(self):
		return len(self.bboxes['file_name'])

	# Defining the method to get an item from the dataset
	def __getitem__(self, index):
		image_path = os.path.join(self.data_dir, self.bboxes['file_name'][index])
		image = Image.open(image_path).convert('RGB')

		bbox = self.bboxes['bbox'][index]
		bbox = ast.literal_eval(bbox)
		image = image.crop((bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3]))
		image = image.resize(self.new_size)
		image = pil_to_tensor(image)

		# Applying the transform
		if self.transform:
			image = self.transform(image)

		return (image, self.bboxes['file_name'][index], self.bboxes['bbox'][index])


In [6]:
ds = ImageDataset(data_dir, bboxes_dir)

## Init Collator, Feature Extractor and Loaders

In [7]:
class ImageClassificationCollator:
    def __init__(self, feature_extractor):
        self.feature_extractor = feature_extractor

    def __call__(self, batch):
        encodings = self.feature_extractor([x[0] for x in batch], return_tensors='pt')
        #encodings['labels'] = torch.tensor([x[1] for x in batch], dtype=torch.long)
        encodings['ids'] = np.array([x[1] for x in batch])
        encodings['bboxes'] = np.array([x[2] for x in batch])
        return encodings

In [8]:
feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224-in21k')
collator = ImageClassificationCollator(feature_extractor)

dataloader = DataLoader(ds, batch_size=16, collate_fn=collator)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


preprocessor_config.json:   0%|          | 0.00/160 [00:00<?, ?B/s]



## Load the model

Include the model class definition when training so `Torch` can load it.

In [9]:
class Classifier(pl.LightningModule):

    def __init__(self, skintone_model, model, lr: float = 2e-5, **kwargs):
        #super().__init__()
        super(Classifier, self).__init__()
        self.save_hyperparameters('lr', *list(kwargs))

        self.model = model
        self.skintone_model = skintone_model
        self.output = nn.Linear(skintone_model.config.num_labels + model.config.num_labels ,model.config.num_labels)
        #self.forward = self.model.forward
        self.val_acc = Accuracy(
            task='multiclass' if model.config.num_labels > 2 else 'binary',
            num_classes=model.config.num_labels
        )

    def forward(self, **batch):
        skintone = self.skintone_model(**batch).logits
        outputs = self.model(**batch).logits

        x = torch.cat((skintone.detach(),outputs), dim=1)
        x = self.output(x)
        return x

    def training_step(self, batch, batch_idx):
        outputs = self(**batch)
        criterion = nn.CrossEntropyLoss()
        pred = outputs.softmax(1)
        loss = criterion(pred, batch['labels'])

        self.log(f"train_loss", loss)
        return loss

    def validation_step(self, batch, batch_idx):
        outputs = self(**batch)
        criterion = nn.CrossEntropyLoss()
        pred = outputs.softmax(1)
        loss = criterion(pred, batch['labels'])
        self.log(f"val_loss", loss)
        acc = self.val_acc(outputs.argmax(1), batch['labels'])
        self.log(f"val_acc", acc, prog_bar=True)
        return loss

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=self.hparams.lr)

In [10]:
model = torch.load(model_dir)
model.eval();

## Running

In [11]:
id2label = {'0': 'Caucasian', '1': 'Mongoloid', '2': 'Negroid'}
results = pd.DataFrame(columns=['file_name','bbox', 'race'])
missing = []
for batch in iter(dataloader):
  ids = batch.pop('ids')
  bboxes = batch.pop('bboxes')
  #bboxes = np.apply_along_axis(bbox_to_str, 1, bboxes)

  outputs = model(**batch).argmax(1).numpy().astype(str)
  outputs = np.array(list(map(lambda x: id2label[x], outputs.astype(str))))
  outputs = np.column_stack([ids,bboxes,outputs])
  results = pd.concat([results, pd.DataFrame(outputs, columns=['file_name','bbox','race'])])

KeyboardInterrupt: 

In [12]:
results.head()

Unnamed: 0,file_name,bbox,race
0,private_test_img_4884.jpg,"[646.840087890625, 0.0, 728.2833862304688, 666.0]",Caucasian
1,private_test_img_154.jpg,"[194.48736572265625, 166.12384033203125, 583.6...",Caucasian
2,private_test_img_3133.jpg,"[740.8070068359375, 426.7799987792969, 404.810...",Caucasian
3,private_test_img_81.jpg,"[231.55035400390625, 163.39627075195312, 621.5...",Caucasian
4,private_test_img_1724.jpg,"[974.1666870117188, 472.03411865234375, 151.52...",Mongoloid


In [13]:
results.shape

(4464, 3)

## Save to csv

In [14]:
results.to_csv('race_result.csv',index=False)