In [None]:
import os
import accelerate
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from sklearn.utils import shuffle
from tqdm import tqdm
from PIL import Image

import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F

from torch.utils.data import DataLoader
from torchvision import datasets

from transformers import AutoImageProcessor, ViTForImageClassification, TrainingArguments, Trainer
from datasets import load_metric

from utils import count_parameters
from Preprocessor_Vision_transformers import AugmentedCustomImageDataset, CustomImageDataset, resize_data
from utils import predict_classes

In [None]:
# some global variables 

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
size = 224
default_inputsize = (size, size)
nclasses = 250

In [None]:
image_processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k")
model = ViTForImageClassification.from_pretrained("google/vit-base-patch16-224-in21k")
model = model.to(device)

In [None]:
count_parameters(model)

In [None]:
data_transforms = resize_data(default_inputsize)

In [None]:
image_data = datasets.ImageFolder('data_sketches/data_sketches/train_images')

train_data_path = [image_data.imgs[i][0] for i in tqdm(range(len(image_data)))]
train_labels = [image_data.imgs[i][1] for i in tqdm(range(len(image_data)))]

train_data_path, train_labels= shuffle(train_data_path, train_labels,  random_state=0, n_samples=len(train_data_path))
train_dataset = AugmentedCustomImageDataset(data_path=train_data_path, labels=train_labels, transform=data_transforms)
len(train_dataset)

In [None]:
image_data = datasets.ImageFolder('data_sketches/data_sketches/val_images')

images_classes = image_data.classes
val_data_path = [image_data.imgs[i][0] for i in tqdm(range(len(image_data)))]
val_labels = [image_data.imgs[i][1] for i in tqdm(range(len(image_data)))]

val_dataset = CustomImageDataset(data_path=val_data_path, labels=val_labels, transform=data_transforms)
len(val_dataset)

In [None]:
## Do the same go train set after split

In [None]:
#Define metric

metric = load_metric("accuracy")
def compute_metrics(p):
    return metric.compute(predictions=np.argmax(p.predictions, axis=1), references=p.label_ids)

Prepare the hugging face trainer: th following parameters change depend on the experiment

In [None]:
training_args = TrainingArguments(
  output_dir="./expert1",
  per_device_train_batch_size=64,
  evaluation_strategy="epoch",
  save_strategy="epoch",
  logging_strategy="epoch",
  #eval_steps=94, # 4 eval per epoch
  warmup_ratio= 0.1,
  num_train_epochs=12,
  lr_scheduler_type='linear',
  fp16=True,
  #save_steps=94,#  save per epoch
  #logging_steps=22, # 4 logs per epoch
  learning_rate=1e-4,
  weight_decay=0.02,
  remove_unused_columns=False,
  push_to_hub=False,
  report_to='tensorboard',
)

In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    compute_metrics=compute_metrics,
    train_dataset=train_dataset,
    eval_dataset=val_dataset
)

In [None]:
trainer.train()

In [None]:
model = ViTForImageClassification.from_pretrained("/content/expert1")
model = model.to(device)

Prepare the test data

In [None]:
image_data = datasets.ImageFolder('data_sketches/data_sketches/test_images')

test_data_path = [image_data.imgs[i][0] for i in tqdm(range(len(image_data)))]
test_labels = [image_data.imgs[i][1] for i in tqdm(range(len(image_data)))]

test_dataset = CustomImageDataset(data_path=test_data_path, labels=test_labels, transform=data_transforms)
test_dataloader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [None]:
test_pred = predict_classes(model, test_dataloader, device)
np.save('test_pred_expert1.npy', test_pred)

In [None]:
kaggle_tab = pd.DataFrame([], columns=['Id', 'Label'])

kaggle_tab['Id'] = [i.split('/')[-1].split('.')[0] for i in test_data_path]
kaggle_tab['Category'] = test_data_predictions.astype(int)

kaggle_tab.head()