## Setup Device & Environment

In [1]:
from datasets import load_dataset
import numpy as np

import torch
from transformers import AutoImageProcessor
from torchvision.transforms import RandomResizedCrop, Compose, Normalize, ToTensor

import evaluate

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device('cuda')

device

device(type='cuda')

## Data Preparation

### Load Dataset

In [3]:
raw_dataset = load_dataset("imagefolder", data_dir="./datasets/chest_xray")

Resolving data files: 100%|██████████| 5216/5216 [00:00<00:00, 16922.60it/s]
Resolving data files: 100%|██████████| 624/624 [00:00<?, ?it/s]


In [4]:
print(raw_dataset)

DatasetDict({
    train: Dataset({
        features: ['image', 'label'],
        num_rows: 5216
    })
    validation: Dataset({
        features: ['image', 'label'],
        num_rows: 16
    })
    test: Dataset({
        features: ['image', 'label'],
        num_rows: 624
    })
})


### Setup Labels

In [5]:
labels = labels = raw_dataset["train"].features["label"].names
print(labels)

['NORMAL', 'PNEUMONIA']


In [6]:
label2id, id2label = dict(), dict()

for i, label in enumerate(labels):
  label2id[i] = label
  id2label[label] = i

In [7]:
print(label2id)
print(id2label)

{0: 'NORMAL', 1: 'PNEUMONIA'}
{'NORMAL': 0, 'PNEUMONIA': 1}


### Transforming Data

In [8]:
image_processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k")

In [9]:
size = (image_processor.size["height"], image_processor.size["width"])
resizer = RandomResizedCrop(size)
normalize = Normalize(image_processor.image_mean, image_processor.image_std)

In [10]:
_transforms = Compose([resizer, ToTensor(), normalize])

In [11]:
def transforms(examples):
  examples["image"] = [_transforms(img.convert("RGB")) for img in examples["image"]]
  # del examples["image"]
  return examples

In [12]:
dataset = raw_dataset.with_transform(transforms)

In [13]:
print(dataset)

DatasetDict({
    train: Dataset({
        features: ['image', 'label'],
        num_rows: 5216
    })
    validation: Dataset({
        features: ['image', 'label'],
        num_rows: 16
    })
    test: Dataset({
        features: ['image', 'label'],
        num_rows: 624
    })
})


### Preparing metrics for the model

In [14]:
accuracy = evaluate.load("accuracy")

In [15]:
def compute_metrics(eval_pred):
  predictions = np.argmax(eval_pred.predictions, axis=1)
  return accuracy.compute(predictions=predictions, references=eval_pred.label_ids)

## Loading Best Model

In [16]:
from own_model import CompositeModel
from training_own import get_model_params
from torch import nn

# own_layer = nn.Sequential(
#   nn.LayerNorm(1000),
#   nn.Linear(1000, 64),
#   nn.ReLU(),
#   nn.Linear(64, 128),
#   nn.Dropout(0.2),
#   nn.ReLU(),
#   nn.Linear(128, 64),
#   nn.Dropout(0.4),
#   nn.ReLU(),
#   nn.Linear(64, 2),
# )

own_layer = nn.Sequential(
  nn.LayerNorm(1000),
  nn.Linear(1000, 48),
  # nn.Dropout(0.2),
  # nn.ReLU(),
  # nn.Linear(48, 64),
  # nn.Dropout(0.4),
  nn.ReLU(),
  nn.Linear(48, 2),
)

model = CompositeModel(own_layer)
model.eval()
get_model_params(model.additional_layers)

50146

## Training Setup

In [17]:
from torchmetrics import Accuracy

training_loader = torch.utils.data.DataLoader(dataset['train'], batch_size=16, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset['test'], batch_size=16, shuffle=True)
validation_loader = torch.utils.data.DataLoader(dataset['test'], batch_size=64, shuffle=True)

# Loss Function
loss_fn = torch.nn.CrossEntropyLoss()

# Optimizer
optimizer = torch.optim.Adam(model.additional_layers.parameters(), lr=1e-4)

# Accuracy Metric

accuracy_metric = Accuracy(task='multiclass', num_classes=2).to(device)

## Training Model

In [18]:
from training_own import train_one_epoch, train_many_epochs, evaluate_model

N_EXAMPLES = 25

testing_fragment = dataset['test'].shuffle(seed=1)[:N_EXAMPLES]

# evaluate_model(model, testing_fragment=testing_fragment)

In [19]:
train_many_epochs(
  1,
  model=model,
  training_loader=training_loader,
  validation_loader=validation_loader,
  optimizer=optimizer,
  loss_fn=loss_fn,
  accuracy_metric=accuracy_metric,
  cuda_device=device,
  epoch_index=0,
  logging_frequency=100
)

EPOCH 1:
 batch 100 loss: 0.5289985927939415 vloss: 0.6147621870040894 training_accuracy: 0.7400000095367432 validation accuracy 0.635937511920929
 batch 200 loss: 0.4496798272430897 vloss: 0.5426210761070251 training_accuracy: 0.7906249761581421 validation accuracy 0.7166666388511658
 batch 300 loss: 0.37393304727971555 vloss: 0.5712288022041321 training_accuracy: 0.8374999761581421 validation accuracy 0.6703125238418579
LOSS train 0.37393304727971555 valid 0.5090877413749695 ACCURACY trai 0.7927530407905579 validation 0.7479166388511658


In [20]:
N_EXAMPLES = 50

testing_fragment = dataset['test'].shuffle(seed=1)[:N_EXAMPLES]

evaluate_model(model, testing_fragment=testing_fragment)

0.78

###