## Data Preparation

In [1]:
from datasets import load_dataset
import numpy as np

import evaluate

  from .autonotebook import tqdm as notebook_tqdm


### Load Dataset

In [2]:
dataset = load_dataset("imagefolder", data_dir="./datasets/chest_xray")

Resolving data files: 100%|██████████| 5216/5216 [00:00<00:00, 17589.43it/s]
Resolving data files: 100%|██████████| 624/624 [00:00<00:00, 312134.25it/s]


In [3]:
print(dataset)

DatasetDict({
    train: Dataset({
        features: ['image', 'label'],
        num_rows: 5216
    })
    validation: Dataset({
        features: ['image', 'label'],
        num_rows: 16
    })
    test: Dataset({
        features: ['image', 'label'],
        num_rows: 624
    })
})


### Setup Labels

In [4]:
labels = labels = dataset["train"].features["label"].names
print(labels)

['NORMAL', 'PNEUMONIA']


In [5]:
label2id, id2label = dict(), dict()

for i, label in enumerate(labels):
  label2id[i] = label
  id2label[label] = i

In [6]:
print(label2id)
print(id2label)

{0: 'NORMAL', 1: 'PNEUMONIA'}
{'NORMAL': 0, 'PNEUMONIA': 1}


### Transforming Data

In [7]:
from transformers import AutoImageProcessor
from torchvision.transforms import RandomResizedCrop, Compose, Normalize, ToTensor

In [8]:
image_processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k")

In [9]:
size = (image_processor.size["height"], image_processor.size["width"])
resizer = RandomResizedCrop(size)
normalize = Normalize(image_processor.image_mean, image_processor.image_std)

In [10]:
_transforms = Compose([resizer, ToTensor(), normalize])

In [11]:
def transforms(examples):
  examples["pixel_values"] = [_transforms(img.convert("RGB")) for img in examples["image"]]
  del examples["image"]
  return examples

In [12]:
dataset  = dataset.with_transform(transforms)

In [13]:
print(dataset['train'])

Dataset({
    features: ['image', 'label'],
    num_rows: 5216
})


### Preparing metrics for the model

In [14]:
accuracy = evaluate.load("accuracy")

In [15]:
def compute_metrics(eval_pred):
  predictions = np.argmax(eval_pred.predictions, axis=1)
  return accuracy.compute(predictions=predictions, references=eval_pred.label_ids)

### Setting Up Model

In [16]:
from transformers import AutoModelForImageClassification

model = AutoModelForImageClassification.from_pretrained(
  "google/vit-base-patch16-224-in21k",
  num_labels=len(labels),
  id2label=id2label,
  label2id=label2id
)

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [17]:
model = model.to("cuda")

In [18]:
model.device

device(type='cuda', index=0)

### Training The Model

In [19]:
from transformers import TrainingArguments
from transformers import Trainer
from transformers import DefaultDataCollator

In [20]:
training_args = TrainingArguments(
  output_dir = "pneumonia_model",
  evaluation_strategy="epoch",
  save_strategy="epoch",
  learning_rate=5e-5,
  per_device_train_batch_size=4,
  per_device_eval_batch_size=4,
  num_train_epochs=2,
  load_best_model_at_end=True,
  metric_for_best_model="accuracy",
  remove_unused_columns=False,
)

In [21]:
trainer = Trainer(
  model=model,
  args=training_args,
  data_collator=DefaultDataCollator(),
  train_dataset=dataset["train"],
  eval_dataset=dataset["test"],
  tokenizer=image_processor,
  compute_metrics=compute_metrics
)

In [22]:
trainer.train()

 19%|█▉        | 501/2608 [01:33<05:57,  5.89it/s]

{'loss': 0.327, 'learning_rate': 4.0414110429447856e-05, 'epoch': 0.38}


 38%|███▊      | 1001/2608 [03:07<04:33,  5.87it/s]

{'loss': 0.2297, 'learning_rate': 3.0828220858895703e-05, 'epoch': 0.77}


                                                   
 50%|█████     | 1304/2608 [04:17<03:38,  5.96it/s]

{'eval_loss': 0.7434412837028503, 'eval_accuracy': 0.8205128205128205, 'eval_runtime': 13.2522, 'eval_samples_per_second': 47.087, 'eval_steps_per_second': 11.772, 'epoch': 1.0}


 58%|█████▊    | 1501/2608 [04:54<03:36,  5.11it/s]  

{'loss': 0.2254, 'learning_rate': 2.1242331288343557e-05, 'epoch': 1.15}


 77%|███████▋  | 2001/2608 [06:20<01:38,  6.14it/s]

{'loss': 0.1748, 'learning_rate': 1.1656441717791411e-05, 'epoch': 1.53}


 96%|█████████▌| 2501/2608 [07:46<00:18,  5.81it/s]

{'loss': 0.1489, 'learning_rate': 2.070552147239264e-06, 'epoch': 1.92}


                                                   
100%|██████████| 2608/2608 [08:17<00:00,  6.32it/s]

{'eval_loss': 0.4634515345096588, 'eval_accuracy': 0.8910256410256411, 'eval_runtime': 12.1155, 'eval_samples_per_second': 51.504, 'eval_steps_per_second': 12.876, 'epoch': 2.0}


100%|██████████| 2608/2608 [08:19<00:00,  5.23it/s]

{'train_runtime': 499.0813, 'train_samples_per_second': 20.902, 'train_steps_per_second': 5.226, 'train_loss': 0.21640620970287205, 'epoch': 2.0}





TrainOutput(global_step=2608, training_loss=0.21640620970287205, metrics={'train_runtime': 499.0813, 'train_samples_per_second': 20.902, 'train_steps_per_second': 5.226, 'train_loss': 0.21640620970287205, 'epoch': 2.0})

In [60]:
dataset_val = dataset['validation'][0:10]

In [65]:
image = dataset_val["pixel_values"][0]

In [72]:
from transformers import pipeline

classifier = pipeline("image-classification", image_processor=image_processor, feature_extractor="google/vit-base-patch16-224-in21k", model=model)
# classifier(image)

###