In [1]:
import pandas as pd

# Loading dataset
ind_df = pd.read_csv('data/indigenous_collection_processed.csv', index_col='id')
print(f'Dataframe columns: \n{ind_df.columns}')

Dataframe columns: 
Index(['url', 'thumbnail', 'creation_date', 'modification_date',
       'numero_do_item', 'tripticos', 'categoria', 'nome_do_item',
       'nome_do_item_dic', 'colecao', 'coletor', 'doador', 'modo_de_aquisicao',
       'data_de_aquisicao', 'ano_de_aquisicao', 'data_de_confeccao', 'autoria',
       'nome_etnico', 'descricao', 'dimensoes', 'funcao', 'materia_prima',
       'tecnica_confeccao', 'descritor_tematico', 'descritor_comum',
       'numero_de_pecas', 'itens_relacionados', 'responsavel_guarda',
       'inst_detentora', 'povo', 'autoidentificacao', 'lingua',
       'estado_de_origem', 'geolocalizacao', 'pais_de_origem', 'exposicao',
       'referencias', 'disponibilidade', 'qualificacao', 'historia_adm',
       'notas_gerais', 'observacao', 'conservacao', 'image_path'],
      dtype='object')


In [2]:
from IPython.core.magic import register_cell_magic

# Creating skip cell command
@register_cell_magic
def skip(line, cell):
    return

# Image Clustering

Clustering experiments with image feature extractors. The idea is to fine-tune some pre-trained models on our dataset and then remove the last layer of the model to cluster on the embedding space projections.

## Dataset Preparation

For fine-tuning the model on our dataset, we are going to try a few different labels and study how they affect the generated emebdding space. For now, we focus *povo* and *categoria*.

In [3]:
from PIL import Image

# Filtering out corrupted images
corrupted_images = []
for index, row in ind_df.loc[ind_df['image_path'].notna()].iterrows():
    try:
        Image.open(row['image_path'])
    except Exception as e:
        # print(e)
        corrupted_images.append(row['image_path'])
        ind_df.loc[index, 'image_path'] = pd.NA
print(f'{len(corrupted_images)} corrupted images')

# Creating 'image_path_br' column
ind_df['image_path_br'] = ind_df['image_path'].values
ind_df.loc[ind_df['image_path_br'].notna(), 'image_path_br'] = \
    ind_df.loc[ind_df['image_path_br'].notna(), \
               'image_path'].apply(lambda path: \
                                   f"data/br_images/{path.split('/')[-1].split('.')[0]}.png")

# Preparing labels for dataset training
label_column = 'povo' # 'categoria', 'povo', 'ano_de_aquisicao'
name_to_num = {c: i for i, c in enumerate(ind_df[label_column].unique())}
labels = {row['image_path_br']: name_to_num[row[label_column]] \
          for index, row in ind_df.loc[ind_df['image_path_br'].notna()].iterrows()}

1 corrupted images


In [4]:
import os
from tqdm import tqdm

import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

# Getting the proper device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Creating the ImageDataset class and the DataLoader object to avoid loading all the images
# simultaneously and run out of GPU memory
class ImageDataset(Dataset):
    def __init__(self, image_dir, labels, transform=None):
        self.image_dir = image_dir
        self.image_files = [f for f in os.listdir(image_dir) \
                            if f.endswith(('.png', '.jpg', '.jpeg'))]
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        image_path = os.path.join(self.image_dir, self.image_files[idx])
        image = Image.open(image_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        label = self.labels.get(image_path, -1)
        return image, label

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((224, 224)),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])
dataset = ImageDataset("data/br_images/", labels, transform=transform)

## ViT Base Patch-16

### Pre-trained Embedding Space

In [5]:
%%skip

# Projecting data onto the off-the-shelf pre-trained embedding space from ViT
import numpy as np
from transformers import ViTImageProcessor, ViTModel
from tqdm import tqdm

# Loading model
model = ViTModel.from_pretrained('google/vit-base-patch16-224-in21k')
model.to(device)

# Getting data
dataloader = DataLoader(dataset, batch_size=512, shuffle=True, num_workers=0, pin_memory=True)

# Function to iterating over data to get projections
def get_embeddings(model, dataloader):
    image_embeddings = []
    for batch_images, _ in tqdm(dataloader, desc="Computing embeddings"):
        batch_images = batch_images.to(device)
        with torch.no_grad():
            outputs = model(batch_images)
        
        # Do I get the last_hidden_state of CLS token or the pooler_output?
        # embeddings = outputs['last_hidden_state'][:, 0, :]
        embeddings = outputs['pooler_output']
        image_embeddings.append(embeddings.cpu())
    return image_embeddings

image_embeddings = np.concatenate(get_embeddings(model, dataloader), axis=0)

In [6]:
%%skip

# Computing data projection
import trimap

proj_trimap = trimap.TRIMAP(n_dims=2, n_inliers=12, n_outliers=6, n_random=3,\
                            weight_temp=0.5, lr=0.1, apply_pca=True)
vanilla_vit = proj_trimap.fit_transform(image_embeddings)

### Fine-tuning Embedding Space

In [7]:
# REMEMBER TO REMOVI THIS PART AFTERWARDS

import numpy as np
from transformers import ViTImageProcessor, ViTModel
from tqdm import tqdm
import trimap

In [8]:
# Creating our own ViT classifier head for fine-tuning
import torch.nn as nn
import torch.optim as optim

class ViTClassifier(nn.Module):
    def __init__(self, num_classes):
        super(ViTClassifier, self).__init__()
        self.vit = ViTModel.from_pretrained('google/vit-base-patch16-224-in21k')
        self.classifier = nn.Linear(self.vit.config.hidden_size, num_classes)

    def forward(self, x):
        outputs = self.vit(x)
        
        # Do I get the last_hidden_state of CLS token or the pooler_output?
        # embeddings = outputs['last_hidden_state'][:, 0, :]
        embeddings = outputs['pooler_output']

        logits = self.classifier(embeddings)
        return logits

In [9]:
from torchmetrics.classification import Accuracy, Precision

# Training function
def train_loop(model, num_classes, dataloader, epochs=20):
    losses = []
    accuracies = []
    class_precisions = [[] for i in range(num_classes)]

    # Early-stopping set up
    best_val_acc = 0
    patience = max(2, int(0.05*epochs))
    patience_counter = 0
    tolerance = 0.01

    acc_metric = Accuracy(task="multiclass", num_classes=num_classes).to(device)
    prec_metric = Precision(task="multiclass", num_classes=num_classes, \
                            average=None).to(device)
    
    for epoch in tqdm(range(epochs), desc=f"Training model", leave=True):
        model.train()
        epoch_loss = .0
        for batch_images, batch_labels in dataloader:
            batch_images, batch_labels = batch_images.to(device), batch_labels.to(device)
            
            opt.zero_grad()
            logits = model(batch_images)
            loss = criterion(logits, batch_labels)
            loss.backward()
            opt.step()

            epoch_loss += loss.item()

            # # Freeing space
            # del batch_images, batch_labels, logits, loss
            # torch.cuda.empty_cache()

        losses.append(torch.tensor(epoch_loss, dtype=torch.float16).item())

        # Validation set for early-stopping on metrics that are not directly optimized
        model.eval()
        with torch.no_grad():
            all_preds = []
            all_labels = []

            for batch_images, batch_labels in val_dataloader:
                logits = model(batch_images)
                preds = torch.argmax(logits, dim=1)
                
                all_preds.append(preds)
                all_labels.append(batch_labels)
            
            all_preds = torch.cat(all_preds)
            all_labels = torch.cat(all_labels)
            
            val_acc = acc_metric(all_preds, all_labels).item()
            val_prec = prec_metric(all_preds, all_labels).tolist()

        accuracies.append(torch.tensor(val_acc, dtype=torch.float16).item())
        for i, prec in enumerate(val_prec):
            class_precisions[i].append(torch.tensor(prec, dtype=torch.float16).item())

         # Early-stopping check
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            patience_counter = 0
        
        elif best_val_acc-val_acc > tolerance:
            patience_counter += 1
            if patience_counter >= patience:
                tqdm.write("Early-stopping training.")
                break
        
        tqdm.write((f'Epoch {epoch+1}, Loss: {batch_loss:.4f}, '
                    f'Validation Accuracy: {val_acc:.4f}'))

    return losses, accuracies, class_precisions

Training model:   1%|       | 1/100 [04:33<7:30:53, 273.26s/it]

Epoch 1, Loss: 1106.7620862722397


Training model:   2%|▏      | 2/100 [09:03<7:22:57, 271.19s/it]

Epoch 2, Loss: 682.0712050795555


Training model:   3%|▏      | 3/100 [13:31<7:16:29, 269.99s/it]

Epoch 3, Loss: 423.51108530163765


Training model:   4%|▎      | 4/100 [17:56<7:08:59, 268.12s/it]

Epoch 4, Loss: 244.93481071293354


Training model:   5%|▎      | 5/100 [22:21<7:02:46, 267.02s/it]

Epoch 5, Loss: 133.12320867925882


Training model:   6%|▍      | 6/100 [26:47<6:57:36, 266.56s/it]

Epoch 6, Loss: 74.50854260474443


Training model:   7%|▍      | 7/100 [31:12<6:52:34, 266.18s/it]

Epoch 7, Loss: 39.70178194437176


Training model:   8%|▌      | 8/100 [35:38<6:47:54, 266.03s/it]

Epoch 8, Loss: 23.51130084041506


Training model:   9%|▋      | 9/100 [40:04<6:43:24, 265.98s/it]

Epoch 9, Loss: 13.998039931757376


Training model:  10%|▌     | 10/100 [44:29<6:38:35, 265.72s/it]

Epoch 10, Loss: 9.269791238941252


Training model:  11%|▋     | 11/100 [48:54<6:33:42, 265.42s/it]

Epoch 11, Loss: 6.521042430307716


Training model:  12%|▋     | 12/100 [53:19<6:28:58, 265.21s/it]

Epoch 12, Loss: 4.623428250430152


Training model:  13%|▊     | 13/100 [57:44<6:24:36, 265.25s/it]

Epoch 13, Loss: 3.5104437932604924


Training model:  14%|▌   | 14/100 [1:02:09<6:19:56, 265.07s/it]

Epoch 14, Loss: 2.945191613282077


Training model:  15%|▌   | 15/100 [1:06:34<6:15:38, 265.16s/it]

Epoch 15, Loss: 67.57821324456017


Training model:  16%|▋   | 16/100 [1:10:59<6:11:16, 265.19s/it]

Epoch 16, Loss: 24.187576212454587


Training model:  17%|▋   | 17/100 [1:15:24<6:06:48, 265.16s/it]

Epoch 17, Loss: 7.673260277835652


Training model:  18%|▋   | 18/100 [1:19:50<6:02:41, 265.38s/it]

Epoch 18, Loss: 2.3075701690395363


Training model:  19%|▊   | 19/100 [1:24:15<5:58:00, 265.20s/it]

Epoch 19, Loss: 2.0234777142759413


Training model:  20%|▊   | 20/100 [1:28:40<5:53:39, 265.24s/it]

Epoch 20, Loss: 1.940120876795845


Training model:  21%|▊   | 21/100 [1:33:05<5:49:02, 265.10s/it]

Epoch 21, Loss: 1.306014057714492


Training model:  22%|▉   | 22/100 [1:37:30<5:44:31, 265.02s/it]

Epoch 22, Loss: 1.3107390307995956


Training model:  23%|▉   | 23/100 [1:41:56<5:40:40, 265.46s/it]

Epoch 23, Loss: 38.9049574767414


Training model:  24%|▉   | 24/100 [1:46:21<5:35:51, 265.16s/it]

Epoch 24, Loss: 17.384929275140166


Training model:  25%|█   | 25/100 [1:50:46<5:31:25, 265.14s/it]

Epoch 25, Loss: 3.154495512484573


Training model:  26%|█   | 26/100 [1:55:11<5:26:57, 265.11s/it]

Epoch 26, Loss: 2.124788953922689


Training model:  27%|█   | 27/100 [1:59:36<5:22:37, 265.17s/it]

Epoch 27, Loss: 2.448073112696875


Training model:  28%|█   | 28/100 [2:04:01<5:18:11, 265.16s/it]

Epoch 28, Loss: 3.648298494226765


Training model:  29%|█▏  | 29/100 [2:08:27<5:13:48, 265.19s/it]

Epoch 29, Loss: 11.853724699496524


Training model:  30%|█▏  | 30/100 [2:12:51<5:09:07, 264.97s/it]

Epoch 30, Loss: 14.2432885003509


Training model:  31%|█▏  | 31/100 [2:17:16<5:04:46, 265.03s/it]

Epoch 31, Loss: 3.7777347871742677


Training model:  32%|█▎  | 32/100 [2:21:41<5:00:12, 264.89s/it]

Epoch 32, Loss: 6.403127712314017


Training model:  33%|█▎  | 33/100 [2:26:06<4:55:53, 264.98s/it]

Epoch 33, Loss: 10.14333004981745


Training model:  34%|█▎  | 34/100 [2:30:31<4:51:22, 264.88s/it]

Epoch 34, Loss: 6.341247495263815


Training model:  35%|█▍  | 35/100 [2:34:56<4:47:09, 265.07s/it]

Epoch 35, Loss: 3.7394473184249364


Training model:  36%|█▍  | 36/100 [2:39:21<4:42:37, 264.97s/it]

Epoch 36, Loss: 11.23506930487929


Training model:  37%|█▍  | 37/100 [2:43:47<4:38:26, 265.18s/it]

Epoch 37, Loss: 2.4975830617768224


Training model:  38%|█▌  | 38/100 [2:48:12<4:33:59, 265.16s/it]

Epoch 38, Loss: 1.9040524677548092


Training model:  39%|█▌  | 39/100 [2:52:37<4:29:34, 265.16s/it]

Epoch 39, Loss: 1.2415674035437405


Training model:  40%|█▌  | 40/100 [2:57:02<4:25:05, 265.09s/it]

Epoch 40, Loss: 0.8575672517763451


Training model:  41%|█▋  | 41/100 [3:01:27<4:20:35, 265.01s/it]

Epoch 41, Loss: 0.7519402327816351


Training model:  42%|█▋  | 42/100 [3:05:52<4:16:17, 265.13s/it]

Epoch 42, Loss: 0.7512947954965057


Training model:  43%|█▋  | 43/100 [3:10:17<4:11:55, 265.18s/it]

Epoch 43, Loss: 0.7757628343460965


Training model:  44%|█▊  | 44/100 [3:14:43<4:07:35, 265.28s/it]

Epoch 44, Loss: 0.7183772459902684


Training model:  45%|█▊  | 45/100 [3:19:09<4:03:18, 265.43s/it]

Epoch 45, Loss: 0.7762741249098326


Training model:  46%|█▊  | 46/100 [3:23:33<3:58:41, 265.22s/it]

Epoch 46, Loss: 0.6612264084615163


Training model:  47%|█▉  | 47/100 [3:27:58<3:54:10, 265.09s/it]

Epoch 47, Loss: 0.7017418253599317


Training model:  48%|█▉  | 48/100 [3:32:24<3:49:47, 265.15s/it]

Epoch 48, Loss: 0.7741451970905473


Training model:  49%|█▉  | 49/100 [3:36:49<3:45:20, 265.12s/it]

Epoch 49, Loss: 0.6838422860892024


Training model:  50%|██  | 50/100 [3:41:14<3:40:59, 265.19s/it]

Epoch 50, Loss: 3.724436603632057


Training model:  51%|██  | 51/100 [3:45:39<3:36:32, 265.14s/it]

Epoch 51, Loss: 43.46592753258301


Training model:  52%|██  | 52/100 [3:50:04<3:32:05, 265.12s/it]

Epoch 52, Loss: 5.4184298063628376


Training model:  53%|██  | 53/100 [3:54:29<3:27:36, 265.04s/it]

Epoch 53, Loss: 2.4749388908676337


Training model:  54%|██▏ | 54/100 [3:58:54<3:23:16, 265.14s/it]

Epoch 54, Loss: 0.7032253124489216


Training model:  55%|██▏ | 55/100 [4:03:19<3:18:49, 265.09s/it]

Epoch 55, Loss: 0.5909842712717364


Training model:  56%|██▏ | 56/100 [4:07:44<3:14:22, 265.07s/it]

Epoch 56, Loss: 0.5805632082410739


Training model:  57%|██▎ | 57/100 [4:12:10<3:10:03, 265.20s/it]

Epoch 57, Loss: 0.581200719345361


Training model:  58%|██▎ | 58/100 [4:16:35<3:05:42, 265.30s/it]

Epoch 58, Loss: 0.5720634800600237


Training model:  59%|██▎ | 59/100 [4:21:01<3:01:21, 265.41s/it]

Epoch 59, Loss: 0.590335943561513


Training model:  60%|██▍ | 60/100 [4:25:27<2:56:58, 265.46s/it]

Epoch 60, Loss: 0.5960298385180067


Training model:  61%|██▍ | 61/100 [4:29:52<2:52:37, 265.59s/it]

Epoch 61, Loss: 0.5661173572370899


Training model:  62%|██▍ | 62/100 [4:34:18<2:48:12, 265.59s/it]

Epoch 62, Loss: 0.6212568792107049


Training model:  63%|██▌ | 63/100 [4:38:44<2:43:49, 265.67s/it]

Epoch 63, Loss: 0.6096626746257243


Training model:  64%|██▌ | 64/100 [4:43:08<2:39:12, 265.36s/it]

Epoch 64, Loss: 0.5529065449554764


Training model:  65%|██▌ | 65/100 [4:47:34<2:34:44, 265.27s/it]

Epoch 65, Loss: 0.5981217518383346


Training model:  66%|██▋ | 66/100 [4:51:59<2:30:19, 265.27s/it]

Epoch 66, Loss: 0.573319471150171


Training model:  67%|██▋ | 67/100 [4:56:24<2:25:50, 265.15s/it]

Epoch 67, Loss: 0.5932676561569679


Training model:  68%|██▋ | 68/100 [5:00:49<2:21:28, 265.27s/it]

Epoch 68, Loss: 0.5786407347877685


Training model:  69%|██▊ | 69/100 [5:05:14<2:17:03, 265.26s/it]

Epoch 69, Loss: 0.6661948120690795


Training model:  70%|██▊ | 70/100 [5:09:40<2:12:35, 265.19s/it]

Epoch 70, Loss: 36.15616097455313


Training model:  71%|██▊ | 71/100 [5:14:04<2:08:06, 265.04s/it]

Epoch 71, Loss: 17.655871852097334


Training model:  72%|██▉ | 72/100 [5:18:29<2:03:41, 265.06s/it]

Epoch 72, Loss: 2.9922120133705903


Training model:  73%|██▉ | 73/100 [5:22:55<1:59:19, 265.17s/it]

Epoch 73, Loss: 0.9442345785791986


Training model:  74%|██▉ | 74/100 [5:27:20<1:54:56, 265.24s/it]

Epoch 74, Loss: 0.5539465547408327


Training model:  75%|███ | 75/100 [5:31:45<1:50:30, 265.21s/it]

Epoch 75, Loss: 0.5110765849385643


Training model:  76%|███ | 76/100 [5:36:11<1:46:06, 265.29s/it]

Epoch 76, Loss: 0.5360656025004573


Training model:  77%|███ | 77/100 [5:40:36<1:41:41, 265.28s/it]

Epoch 77, Loss: 0.5350302212464157


Training model:  78%|███ | 78/100 [5:45:02<1:37:19, 265.43s/it]

Epoch 78, Loss: 0.5062463817448588


Training model:  79%|███▏| 79/100 [5:49:27<1:32:51, 265.32s/it]

Epoch 79, Loss: 0.5267671894325758


Training model:  80%|███▏| 80/100 [5:53:52<1:28:24, 265.23s/it]

Epoch 80, Loss: 0.5102711971048848


Training model:  81%|███▏| 81/100 [5:58:17<1:23:58, 265.16s/it]

Epoch 81, Loss: 0.5277219625349971


Training model:  82%|███▎| 82/100 [6:02:42<1:19:34, 265.23s/it]

Epoch 82, Loss: 0.5417623401917808


Training model:  83%|███▎| 83/100 [6:07:08<1:15:10, 265.34s/it]

Epoch 83, Loss: 0.5496685110629187


Training model:  84%|███▎| 84/100 [6:11:34<1:10:47, 265.46s/it]

Epoch 84, Loss: 0.542251444348949


Training model:  85%|███▍| 85/100 [6:15:59<1:06:20, 265.37s/it]

Epoch 85, Loss: 0.5284906592496554


Training model:  86%|███▍| 86/100 [6:20:24<1:01:56, 265.43s/it]

Epoch 86, Loss: 0.545947688015076


Training model:  87%|█████▏| 87/100 [6:24:50<57:31, 265.48s/it]

Epoch 87, Loss: 0.5564359660384071


Training model:  88%|█████▎| 88/100 [6:29:15<53:04, 265.40s/it]

Epoch 88, Loss: 0.565048103524532


Training model:  89%|█████▎| 89/100 [6:33:40<48:37, 265.24s/it]

Epoch 89, Loss: 0.5336749647321994


Training model:  90%|█████▍| 90/100 [6:38:06<44:13, 265.38s/it]

Epoch 90, Loss: 0.5640550963107671


Training model:  91%|█████▍| 91/100 [6:42:31<39:49, 265.46s/it]

Epoch 91, Loss: 0.5836756804128527


Training model:  92%|█████▌| 92/100 [6:46:57<35:23, 265.45s/it]

Epoch 92, Loss: 0.6111725107602979


Training model:  93%|█████▌| 93/100 [6:51:22<30:58, 265.52s/it]

Epoch 93, Loss: 44.02104978986608


Training model:  94%|█████▋| 94/100 [6:55:48<26:32, 265.47s/it]

Epoch 94, Loss: 6.223761160072172


Training model:  95%|█████▋| 95/100 [7:00:14<22:07, 265.55s/it]

Epoch 95, Loss: 2.179058180670836


Training model:  96%|█████▊| 96/100 [7:04:38<17:41, 265.35s/it]

Epoch 96, Loss: 0.6687080589181278


Training model:  97%|█████▊| 97/100 [7:09:03<13:15, 265.21s/it]

Epoch 97, Loss: 0.48848807318427134


Training model:  98%|█████▉| 98/100 [7:13:28<08:50, 265.10s/it]

Epoch 98, Loss: 0.48489707658882253


Training model:  99%|█████▉| 99/100 [7:17:54<04:25, 265.42s/it]

Epoch 99, Loss: 0.4746606232802151


Training model: 100%|█████| 100/100 [7:22:20<00:00, 265.40s/it]

Epoch 100, Loss: 0.4756871135250549





In [None]:
from torch.utils.data import random_split

# Creating training and validation datasets
train_size = int(0.85*len(dataset))
val_size = len(dataset)-train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size], \
                                          generator=torch.Generator().manual_seed(42))

batch_size = 32
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, \
                              num_workers=0, pin_memory=True)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True, \
                            num_workers=0, pin_memory=True)

# Training set-up and execution
num_classes = ind_df['povo'].nunique()
model = ViTClassifier(num_classes).to(device)
criterion = nn.CrossEntropyLoss()
opt = optim.Adam(model.parameters(), lr=5e-5, weight_decay=0)
epochs = 100

train_loop(model, num_classes, dataloader, epochs)

In [None]:
# Computing image embeddings
image_embeddings = np.concatenate(get_embeddings(model, dataloader), axis=0)

# Computing data projection
proj_trimap = trimap.TRIMAP(n_dims=2, n_inliers=12, n_outliers=6, n_random=3,\
                            weight_temp=0.5, lr=0.1, apply_pca=True)
povo_vit = proj_trimap.fit_transform(image_embeddings)

### Visualizing and Comparing Projections

In [None]:
# Visualizing resulting projections
import matplotlib.pyplot as plt

plt.figure(figsize=(8,4))
plt.suptitle('Comparing Projections of ViT Models')

# Plotting vanilla ViT projections
plt.subplot(1, 2, 1)
plt.scatter(vanilla_vit[:, 0], vanilla_vit[:, 1], c='b')
plt.title("Vanilla ViT")
plt.xlabel("")
plt.ylabel("")
plt.xticks([])
plt.yticks([])

# Plotting ViT fine-tuned on 'povo' projections
plt.subplot(1, 2, 2)
plt.scatter(povo_vit[:, 0], povo_vit[:, 1], c='b')
plt.title("ViT Fine-Tuned on Povo")
plt.xlabel("")
plt.ylabel("")
plt.xticks([])
plt.yticks([])

plt.tight_layout()
plt.show()