## Data Preparation

In [1]:
from datasets import load_dataset
import numpy as np

import torch
from transformers import AutoImageProcessor
from torchvision.transforms import RandomResizedCrop, Compose, Normalize, ToTensor

import evaluate

  from .autonotebook import tqdm as notebook_tqdm


### Load Dataset

In [2]:
dataset = load_dataset("imagefolder", data_dir="./datasets/chest_xray")

Resolving data files: 100%|██████████| 5216/5216 [00:00<00:00, 39818.67it/s]
Resolving data files: 100%|██████████| 624/624 [00:00<00:00, 312097.03it/s]


In [3]:
print(dataset)

DatasetDict({
    train: Dataset({
        features: ['image', 'label'],
        num_rows: 5216
    })
    validation: Dataset({
        features: ['image', 'label'],
        num_rows: 16
    })
    test: Dataset({
        features: ['image', 'label'],
        num_rows: 624
    })
})


### Setup Labels

In [4]:
labels = labels = dataset["train"].features["label"].names
print(labels)

['NORMAL', 'PNEUMONIA']


In [5]:
label2id, id2label = dict(), dict()

for i, label in enumerate(labels):
  label2id[i] = label
  id2label[label] = i

In [6]:
print(label2id)
print(id2label)

{0: 'NORMAL', 1: 'PNEUMONIA'}
{'NORMAL': 0, 'PNEUMONIA': 1}


### Transforming Data

In [7]:
image_processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k")

In [8]:
size = (image_processor.size["height"], image_processor.size["width"])
resizer = RandomResizedCrop(size)
normalize = Normalize(image_processor.image_mean, image_processor.image_std)

In [9]:
_transforms = Compose([resizer, ToTensor(), normalize])

In [10]:
def transforms(examples):
  examples["pixel_values"] = [_transforms(img.convert("RGB")) for img in examples["image"]]
  del examples["image"]
  return examples

In [11]:
dataset  = dataset.with_transform(transforms)

In [12]:
print(dataset['train'])

Dataset({
    features: ['image', 'label'],
    num_rows: 5216
})


### Preparing metrics for the model

In [13]:
accuracy = evaluate.load("accuracy")

In [14]:
def compute_metrics(eval_pred):
  predictions = np.argmax(eval_pred.predictions, axis=1)
  return accuracy.compute(predictions=predictions, references=eval_pred.label_ids)

### Setting Up Model

In [15]:
from transformers import AutoModelForImageClassification
from transformers.models.vit.modeling_vit import ViTForImageClassification

base_model = AutoModelForImageClassification.from_pretrained(
  "google/vit-base-patch16-224",
  # num_labels=len(labels),
  # id2label=id2label,
  # label2id=label2id
)

type(base_model)

transformers.models.vit.modeling_vit.ViTForImageClassification

### Adding additional layer to pretrained model

In [30]:
from torch import nn

class MyCompositeModel(nn.Module):
  def __init__(self, my_pretrained_model):
    super().__init__()
    
    self.pretrained = my_pretrained_model
    self.my_new_layers = nn.Sequential(
      nn.Linear(1000, 100),
      nn.ReLU(),
      nn.Linear(100, 2)
    )
  
  def forward(self, x):
    x = self.pretrained(x).logits
    x = self.my_new_layers(x)
    return x
  
my_extended_model = MyCompositeModel(my_pretrained_model=base_model)

In [32]:
my_extended_model(dataset['test'][0]['pixel_values'][None, ...])
# dataset['test'][0]['pixel_values'][None, ...]

tensor([[0.1788, 0.0120]], grad_fn=<AddmmBackward0>)

### Training The Model

In [None]:
from transformers import TrainingArguments
from transformers import Trainer
from transformers import DefaultDataCollator

In [None]:
training_args = TrainingArguments(
  output_dir = "pneumonia_model",
  evaluation_strategy="epoch",
  save_strategy="epoch",
  learning_rate=5e-5,
  per_device_train_batch_size=12,
  per_device_eval_batch_size=12,
  num_train_epochs=1,
  load_best_model_at_end=True,
  metric_for_best_model="accuracy",
  remove_unused_columns=False,
)

In [None]:
trainer = Trainer(
  model=my_extended_model,
  args=training_args,
  data_collator=DefaultDataCollator(),
  train_dataset=dataset["train"],
  eval_dataset=dataset["test"],
  tokenizer=image_processor,
  compute_metrics=compute_metrics
)

In [None]:
trainer.train()

In [None]:
trainer.evaluate()

In [None]:
dataset_test_part = dataset['test'].shuffle(seed=1)[:16]
dataset_test_part['label']

In [None]:
model.to('cpu')

for i in range(0, 16):  
  image = dataset_test_part["pixel_values"][i]
  
  pred = model(image[None, ...])
  
  logits = pred.logits.detach().numpy()[0]
  pred_class = np.argmax(logits)
  
  print(logits, pred_class)

###