# Loading and Usage of model
**Skintone prediction**

In [None]:
import torch
import pandas as pd
import os
from PIL import Image
from torch.utils.data import DataLoader
from torchvision.transforms.functional import pil_to_tensor
from transformers import ViTFeatureExtractor,ViTForImageClassification
import numpy as np
import ast
import json

In [None]:
# Comment if not on google colab
# from google.colab import drive
# drive.mount('/content/drive')

Mounted at /content/drive


## Load the data

In [None]:
# Constants
data_dir = './drive/MyDrive/public_test/public_test' #Directory of dataset
bboxes_dir = '/content/bbox_0.75_publictest.csv' #CSV file with bbox values
model_dir = '/content/drive/MyDrive/model_weights2024-01-17 15_32_49.090930.pth' #Path to model

We create a custom `Dataset` class that **crops images by their bbox values, resize and convert them to `tensors`**.

In [None]:
# Creating a custom dataset class
class ImageDataset(torch.utils.data.Dataset):
	def __init__(self, dir, bboxes_dir, transform=None):
		self.data_dir = dir
		self.bboxes = pd.read_csv(bboxes_dir)
		self.images = os.listdir(dir)
		self.transform = transform
		self.new_size = (128,128)

	# Defining the length of the dataset
	def __len__(self):
		return len(self.bboxes['file_name'])

	# Defining the method to get an item from the dataset
	def __getitem__(self, index):
		image_path = os.path.join(self.data_dir, self.bboxes['file_name'][index])
		image = Image.open(image_path).convert('RGB')

		bbox = self.bboxes.iloc[index]['bbox']
		bbox = ast.literal_eval(bbox)
		image = image.crop((bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3]))
		image = image.resize(self.new_size)
		image = pil_to_tensor(image)

		# Applying the transform
		if self.transform:
			image = self.transform(image)

		return (image, self.bboxes['file_name'][index], self.bboxes['bbox'][index])


In [None]:
ds = ImageDataset(data_dir, bboxes_dir)

## Init Collator, Feature Extractor and Loaders

In [None]:
class ImageClassificationCollator:
    def __init__(self, feature_extractor):
        self.feature_extractor = feature_extractor

    def __call__(self, batch):
        encodings = self.feature_extractor([x[0] for x in batch], return_tensors='pt')
        #encodings['labels'] = torch.tensor([x[1] for x in batch], dtype=torch.long)
        encodings['ids'] = np.array([x[1] for x in batch])
        encodings['bboxes'] = np.array([x[2] for x in batch])
        return encodings

In [None]:
feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224-in21k')
collator = ImageClassificationCollator(feature_extractor)

dataloader = DataLoader(ds, batch_size=8, collate_fn=collator)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


preprocessor_config.json:   0%|          | 0.00/160 [00:00<?, ?B/s]



## Load the model

In [None]:
label2id = {'dark': '0', 'light': '1', 'mid-dark': '2', 'mid-light': '3'}
id2label = {'0': 'dark', '1': 'light', '2': 'mid-dark', '3': 'mid-light'}

model = ViTForImageClassification.from_pretrained(
    'google/vit-base-patch16-224-in21k',
    num_labels=len(label2id),
    label2id=label2id,
    id2label=id2label
)

config.json:   0%|          | 0.00/502 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/346M [00:00<?, ?B/s]

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
#Load state dict
model.load_state_dict(torch.load(model_dir))
model.eval();

## Running

In [None]:
results = pd.DataFrame(columns=['file_name','bbox', 'skintone'])
missing = []
for batch in iter(dataloader):
    try:
        ids = batch.pop('ids')
        bboxes = batch.pop('bboxes')
        #bboxes = np.apply_along_axis(bbox_to_str, 1, bboxes)

        outputs = model(**batch).logits.softmax(1).argmax(1).numpy().astype(str)
        outputs = np.array(list(map(lambda x: id2label[x], outputs.astype(str))))
        outputs = np.column_stack([ids,bboxes,outputs])
        results = pd.concat([results, pd.DataFrame(outputs, columns=['file_name','bbox','skintone'])])
    except:
        missing.append(ids)
        print(outputs)
        continue

In [None]:
results.head()

Unnamed: 0,file_name,bbox,skintone
0,49074608.jpg,"[340.65594482421875, 800.713623046875, 302.258...",light
1,63701630.jpg,"[779.4932861328125, 273.1918640136719, 379.562...",mid-light
2,50869636.jpg,"[660.21044921875, 199.97251892089844, 193.4159...",light
3,81768353.jpg,"[795.3341064453125, 319.72607421875, 424.87719...",mid-light
4,66790091.jpg,"[623.74560546875, 389.8661804199219, 331.65948...",light


In [None]:
results.shape

(2213, 3)

## Save to csv

In [None]:
results.to_csv('skintone_result.csv',index=False)