In [7]:
! pip install -q transformers datasets Pillow matplotlib torchvision==0.14.1+cpu torch==1.13.1 "optimum-graphcore>=0.6.0, <0.7.0"  -f https://download.pytorch.org/whl/torch_stable.html
! pip install -q examples-utils[common]@git+https://github.com/graphcore/examples-utils@latest_stable

!apt -qq install git-lfs
!git config --global credential.helper store

[0mLoading extensions from ~/.ipython/extensions is deprecated. We recommend managing extensions like any other Python packages, in site-packages.
In order to improve usability and support for future users, Graphcore would like to collect information about the applications and code being run in this notebook. The following information will be anonymised before being sent to Graphcore: 
	- User progression through the notebook 
	- Notebook details: number of cells, code being run and the output of the cells 
	- Environment details 

You can disable logging at any time by running `%unload_ext gc_logger` from any cell. 

git-lfs is already the newest version (2.9.2-1).
0 upgraded, 0 newly installed, 0 to remove and 0 not upgraded.


In [8]:
import os
os.chdir("/notebooks/Imagenette/Imagenette2Challenge")

In [9]:
import importlib
from transformers import AutoConfig, ResNetForImageClassification, AutoModelForImageClassification
import torch
import torch.nn as nn
import torchvision
from datasets import load_dataset
import torchvision.transforms as transforms
import poptorch
from tqdm import tqdm

import src.utils.helpermethods as helpermethods

In [10]:
executable_cache_dir = os.getenv("POPLAR_EXECUTABLE_CACHE_DIR", "/tmp/exe_cache/") + "/image_classification"
dataset_dir = os.getenv("DATASETS_DIR", "./")

In [11]:
# model_name = "microsoft/resnet-50"
# pretrained_model = ResNetForImageClassification.from_pretrained(model_name)
# pretrained_model.classifier[1] = nn.Linear(in_features=2048, out_features=10)

model_name = "google/vit-base-patch16-224-in21k"
pretrained_model = AutoModelForImageClassification.from_pretrained(
    model_name, num_labels=10)


Some weights of the model checkpoint at google/vit-base-patch16-224-in21k were not used when initializing ViTForImageClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing ViTForImageClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ViTForImageClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [12]:
num_params = sum(p.numel() for p in pretrained_model.parameters() if p.requires_grad)
num_params = format(num_params, ",d").replace(",", ".")
print(num_params)

85.806.346


In [13]:
from optimum.graphcore import IPUTrainingArguments, IPUTrainer, IPUConfig

In [14]:
micro_batch_size = 1 
gradient_accumulation_steps = 32

In [15]:

args = IPUTrainingArguments(
    output_dir="./Checkpoints/"
    f"{model_name}-finetuned-imagenette",
    remove_unused_columns=False,
    evaluation_strategy = "epoch",
    save_strategy = "epoch",
    learning_rate=5e-5,
    per_device_train_batch_size=micro_batch_size,
    per_device_eval_batch_size=micro_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    num_train_epochs=10,
    warmup_ratio=0.1,
    logging_steps=10,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    n_ipu=4,
    dataloader_drop_last=True,
    push_to_hub=False,
    # model_hub_id = f"username-or-organization/{model_name}-finetuned-eurosat"
)

In [16]:
# import optimum.graphcore as optimum_graphcore
# ipu_config = optimum_graphcore.IPUConfig.from_pretrained(
#     "Graphcore/vit-base-ipu", executable_cache_dir=executable_cache_dir
# )
# print(ipu_config) 

In [17]:
resnet_layers = [3,4,6,3]
vit_layers = [3,3,3,3]
ipu_config = IPUConfig(
    auto_loss_scaling=True,
    executable_cache_dir=executable_cache_dir,
    replication_factor=1,
    inference_replication_factor=1,
    gradient_accumulation_steps=32,
    layers_per_ipu=vit_layers,
    inference_layers_per_ipu=vit_layers,
    ipus_per_replica=4,
    inference_ipus_per_replica=4,
    optimizer_state_offchip=True,
    replicated_tensor_sharding=False,
    matmul_proportion=0.2,
    inference_matmul_proportion=0.2,
    enable_half_partials=True,
)


In [18]:
base_path = "./"
CustomDataset_module = importlib.import_module("src.dataLoaders.CustomDatasetRaw")
file_training = base_path + "Datasets/imagenette2/train.txt"
file_validation = base_path + "Datasets/imagenette2/val.txt"
transform = transforms.Compose(
    [
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]
)


In [19]:
train_ds = helpermethods.load_dataset(
        CustomDataset_module, file_training, transform,
    )
val_ds = helpermethods.load_dataset(
        CustomDataset_module, file_validation, transform,
    )

In [20]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    logits = torch.tensor(logits).float()  # Convert to tensor
    labels = torch.tensor(labels).float()  # Convert labels to tensor
    _, predicted = torch.topk(logits, k=1, dim=1)  # adjust k as needed
    correct = torch.eq(predicted, labels.view(-1, 1).expand_as(predicted)).sum().item()
    accuracy = float(correct) / labels.size(0)
    return {"accuracy": accuracy}


In [21]:
def custom_data_collator(batch):
    images = torch.stack([item[0] for item in batch])
    labels = torch.stack([item[1] for item in batch])
    return {"pixel_values": images, "labels": labels}


In [22]:
trainer = IPUTrainer(
    pretrained_model,
    ipu_config,
    args,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    compute_metrics=compute_metrics,
    data_collator= custom_data_collator,
)

Overriding IPU config: gradient_accumulation_steps=32
---------- Device Allocation -----------
Embedding  --> IPU 0
Encoder 0  --> IPU 0
Encoder 1  --> IPU 0
Encoder 2  --> IPU 0
Encoder 3  --> IPU 1
Encoder 4  --> IPU 1
Encoder 5  --> IPU 1
Encoder 6  --> IPU 2
Encoder 7  --> IPU 2
Encoder 8  --> IPU 2
Encoder 9  --> IPU 3
Encoder 10 --> IPU 3
Encoder 11 --> IPU 3
Head       --> IPU 3
---------------------------------------


In [23]:
train_results = trainer.train()
# rest is optional but nice to have
trainer.save_model()
trainer.log_metrics("train", train_results.metrics)
trainer.save_metrics("train", train_results.metrics)
trainer.save_state()

---------- Device Allocation -----------
Embedding  --> IPU 0
Encoder 0  --> IPU 0
Encoder 1  --> IPU 0
Encoder 2  --> IPU 0
Encoder 3  --> IPU 1
Encoder 4  --> IPU 1
Encoder 5  --> IPU 1
Encoder 6  --> IPU 2
Encoder 7  --> IPU 2
Encoder 8  --> IPU 2
Encoder 9  --> IPU 3
Encoder 10 --> IPU 3
Encoder 11 --> IPU 3
Head       --> IPU 3
---------------------------------------
Compiling Model...
Graph compilation: 100%|██████████| 100/100 [00:08<00:00]
Compiled/Loaded model in 22.184907204005867 secs
***** Running training *****
  Num examples = 9296
  Num epochs = 10
  Instantaneous batch size per device = 1
  Total training batch size (w. parallel, distributed & accumulation) = 32
  Gradient accumulation steps = 32
  Total optimization steps = 2900


  0%|          | 0/2900 [00:00<?, ?it/s]

{'loss': 2.2662, 'learning_rate': 1.724137931034483e-06, 'epoch': 0.03}
{'loss': 2.2666, 'learning_rate': 3.448275862068966e-06, 'epoch': 0.07}
{'loss': 2.2687, 'learning_rate': 5.172413793103448e-06, 'epoch': 0.1}
{'loss': 2.2443, 'learning_rate': 6.896551724137932e-06, 'epoch': 0.14}
{'loss': 2.2086, 'learning_rate': 8.620689655172414e-06, 'epoch': 0.17}
{'loss': 2.1273, 'learning_rate': 1.0344827586206897e-05, 'epoch': 0.21}
{'loss': 2.1348, 'learning_rate': 1.206896551724138e-05, 'epoch': 0.24}
{'loss': 2.101, 'learning_rate': 1.3793103448275863e-05, 'epoch': 0.28}
{'loss': 2.0103, 'learning_rate': 1.5517241379310346e-05, 'epoch': 0.31}
{'loss': 1.8209, 'learning_rate': 1.7241379310344828e-05, 'epoch': 0.34}
{'loss': 1.7312, 'learning_rate': 1.896551724137931e-05, 'epoch': 0.38}
{'loss': 1.4977, 'learning_rate': 2.0689655172413793e-05, 'epoch': 0.41}
{'loss': 1.3434, 'learning_rate': 2.2413793103448276e-05, 'epoch': 0.45}
{'loss': 1.2613, 'learning_rate': 2.413793103448276e-05, 'ep

---------- Device Allocation -----------
Embedding  --> IPU 0
Encoder 0  --> IPU 0
Encoder 1  --> IPU 0
Encoder 2  --> IPU 0
Encoder 3  --> IPU 1
Encoder 4  --> IPU 1
Encoder 5  --> IPU 1
Encoder 6  --> IPU 2
Encoder 7  --> IPU 2
Encoder 8  --> IPU 2
Encoder 9  --> IPU 3
Encoder 10 --> IPU 3
Encoder 11 --> IPU 3
Head       --> IPU 3
---------------------------------------
Compiling Model...

Graph compilation:   0%|          | 0/100 [00:00<?][A
Graph compilation: 100%|██████████| 100/100 [00:02<00:00][A
Compiled/Loaded model in 11.808537313714623 secs
***** Running Evaluation *****
  Num examples = 3856
  Batch size = 1


  0%|          | 0/3856 [00:00<?, ?it/s]

{'eval_loss': 0.3251953125, 'eval_accuracy': 0.9758817427385892, 'eval_runtime': 40.4202, 'eval_samples_per_second': 95.398, 'eval_steps_per_second': 95.398, 'epoch': 1.0}


Saving model checkpoint to ./Checkpoints/google/vit-base-patch16-224-in21k-finetuned-imagenette/checkpoint-290
---------- Device Allocation -----------
Embedding  --> IPU 0
Encoder 0  --> IPU 0
Encoder 1  --> IPU 0
Encoder 2  --> IPU 0
Encoder 3  --> IPU 1
Encoder 4  --> IPU 1
Encoder 5  --> IPU 1
Encoder 6  --> IPU 2
Encoder 7  --> IPU 2
Encoder 8  --> IPU 2
Encoder 9  --> IPU 3
Encoder 10 --> IPU 3
Encoder 11 --> IPU 3
Head       --> IPU 3
---------------------------------------
Configuration saved in ./Checkpoints/google/vit-base-patch16-224-in21k-finetuned-imagenette/checkpoint-290/ipu_config.json


{'loss': 0.2822, 'learning_rate': 4.980842911877395e-05, 'epoch': 1.03}
{'loss': 0.4417, 'learning_rate': 4.96168582375479e-05, 'epoch': 1.07}
{'loss': 0.2093, 'learning_rate': 4.9425287356321845e-05, 'epoch': 1.1}
{'loss': 0.2091, 'learning_rate': 4.9233716475095786e-05, 'epoch': 1.14}
{'loss': 0.1913, 'learning_rate': 4.904214559386973e-05, 'epoch': 1.17}
{'loss': 0.1854, 'learning_rate': 4.885057471264368e-05, 'epoch': 1.21}
{'loss': 0.1932, 'learning_rate': 4.865900383141763e-05, 'epoch': 1.24}
{'loss': 0.2075, 'learning_rate': 4.846743295019157e-05, 'epoch': 1.28}
{'loss': 0.1604, 'learning_rate': 4.827586206896552e-05, 'epoch': 1.31}
{'loss': 0.1593, 'learning_rate': 4.8084291187739464e-05, 'epoch': 1.34}
{'loss': 0.1646, 'learning_rate': 4.789272030651341e-05, 'epoch': 1.38}
{'loss': 0.1486, 'learning_rate': 4.770114942528736e-05, 'epoch': 1.41}
{'loss': 0.1456, 'learning_rate': 4.7509578544061307e-05, 'epoch': 1.45}
{'loss': 0.3918, 'learning_rate': 4.7318007662835254e-05, 'epo

***** Running Evaluation *****
  Num examples = 3856
  Batch size = 1


  0%|          | 0/3856 [00:00<?, ?it/s]

{'eval_loss': 0.1446533203125, 'eval_accuracy': 0.9906639004149378, 'eval_runtime': 41.3665, 'eval_samples_per_second': 93.216, 'eval_steps_per_second': 93.216, 'epoch': 2.0}


Saving model checkpoint to ./Checkpoints/google/vit-base-patch16-224-in21k-finetuned-imagenette/checkpoint-580
---------- Device Allocation -----------
Embedding  --> IPU 0
Encoder 0  --> IPU 0
Encoder 1  --> IPU 0
Encoder 2  --> IPU 0
Encoder 3  --> IPU 1
Encoder 4  --> IPU 1
Encoder 5  --> IPU 1
Encoder 6  --> IPU 2
Encoder 7  --> IPU 2
Encoder 8  --> IPU 2
Encoder 9  --> IPU 3
Encoder 10 --> IPU 3
Encoder 11 --> IPU 3
Head       --> IPU 3
---------------------------------------
Configuration saved in ./Checkpoints/google/vit-base-patch16-224-in21k-finetuned-imagenette/checkpoint-580/ipu_config.json


{'loss': 0.1058, 'learning_rate': 4.4252873563218394e-05, 'epoch': 2.03}
{'loss': 0.1065, 'learning_rate': 4.406130268199234e-05, 'epoch': 2.07}
{'loss': 0.1112, 'learning_rate': 4.386973180076628e-05, 'epoch': 2.1}
{'loss': 0.1068, 'learning_rate': 4.367816091954024e-05, 'epoch': 2.14}
{'loss': 0.1106, 'learning_rate': 4.348659003831418e-05, 'epoch': 2.17}
{'loss': 0.1197, 'learning_rate': 4.3295019157088125e-05, 'epoch': 2.21}
{'loss': 0.0973, 'learning_rate': 4.3103448275862066e-05, 'epoch': 2.24}
{'loss': 0.1104, 'learning_rate': 4.291187739463602e-05, 'epoch': 2.28}
{'loss': 0.1053, 'learning_rate': 4.272030651340996e-05, 'epoch': 2.31}
{'loss': 0.1068, 'learning_rate': 4.252873563218391e-05, 'epoch': 2.34}
{'loss': 0.1096, 'learning_rate': 4.2337164750957856e-05, 'epoch': 2.38}
{'loss': 0.1042, 'learning_rate': 4.21455938697318e-05, 'epoch': 2.41}
{'loss': nan, 'learning_rate': 4.195402298850575e-05, 'epoch': 2.45}
{'loss': nan, 'learning_rate': 4.17624521072797e-05, 'epoch': 2.4

***** Running Evaluation *****
  Num examples = 3856
  Batch size = 1


  0%|          | 0/3856 [00:00<?, ?it/s]

{'eval_loss': nan, 'eval_accuracy': 0.10217842323651452, 'eval_runtime': 39.8111, 'eval_samples_per_second': 96.857, 'eval_steps_per_second': 96.857, 'epoch': 3.0}


Saving model checkpoint to ./Checkpoints/google/vit-base-patch16-224-in21k-finetuned-imagenette/checkpoint-870
---------- Device Allocation -----------
Embedding  --> IPU 0
Encoder 0  --> IPU 0
Encoder 1  --> IPU 0
Encoder 2  --> IPU 0
Encoder 3  --> IPU 1
Encoder 4  --> IPU 1
Encoder 5  --> IPU 1
Encoder 6  --> IPU 2
Encoder 7  --> IPU 2
Encoder 8  --> IPU 2
Encoder 9  --> IPU 3
Encoder 10 --> IPU 3
Encoder 11 --> IPU 3
Head       --> IPU 3
---------------------------------------
Configuration saved in ./Checkpoints/google/vit-base-patch16-224-in21k-finetuned-imagenette/checkpoint-870/ipu_config.json


{'loss': nan, 'learning_rate': 3.869731800766284e-05, 'epoch': 3.03}
{'loss': nan, 'learning_rate': 3.850574712643678e-05, 'epoch': 3.07}
{'loss': nan, 'learning_rate': 3.831417624521073e-05, 'epoch': 3.1}
{'loss': nan, 'learning_rate': 3.8122605363984674e-05, 'epoch': 3.14}
{'loss': nan, 'learning_rate': 3.793103448275862e-05, 'epoch': 3.17}
{'loss': nan, 'learning_rate': 3.773946360153257e-05, 'epoch': 3.21}
{'loss': nan, 'learning_rate': 3.7547892720306517e-05, 'epoch': 3.24}
{'loss': nan, 'learning_rate': 3.735632183908046e-05, 'epoch': 3.28}
{'loss': nan, 'learning_rate': 3.716475095785441e-05, 'epoch': 3.31}
{'loss': nan, 'learning_rate': 3.697318007662835e-05, 'epoch': 3.34}
{'loss': nan, 'learning_rate': 3.67816091954023e-05, 'epoch': 3.38}
{'loss': nan, 'learning_rate': 3.659003831417625e-05, 'epoch': 3.41}
{'loss': nan, 'learning_rate': 3.6398467432950195e-05, 'epoch': 3.45}
{'loss': nan, 'learning_rate': 3.620689655172414e-05, 'epoch': 3.48}
{'loss': nan, 'learning_rate': 3.

***** Running Evaluation *****
  Num examples = 3856
  Batch size = 1


  0%|          | 0/3856 [00:00<?, ?it/s]

{'eval_loss': nan, 'eval_accuracy': 0.10217842323651452, 'eval_runtime': 50.5834, 'eval_samples_per_second': 76.231, 'eval_steps_per_second': 76.231, 'epoch': 4.0}


Saving model checkpoint to ./Checkpoints/google/vit-base-patch16-224-in21k-finetuned-imagenette/checkpoint-1160
---------- Device Allocation -----------
Embedding  --> IPU 0
Encoder 0  --> IPU 0
Encoder 1  --> IPU 0
Encoder 2  --> IPU 0
Encoder 3  --> IPU 1
Encoder 4  --> IPU 1
Encoder 5  --> IPU 1
Encoder 6  --> IPU 2
Encoder 7  --> IPU 2
Encoder 8  --> IPU 2
Encoder 9  --> IPU 3
Encoder 10 --> IPU 3
Encoder 11 --> IPU 3
Head       --> IPU 3
---------------------------------------
Configuration saved in ./Checkpoints/google/vit-base-patch16-224-in21k-finetuned-imagenette/checkpoint-1160/ipu_config.json


{'loss': nan, 'learning_rate': 3.314176245210728e-05, 'epoch': 4.03}
{'loss': nan, 'learning_rate': 3.295019157088123e-05, 'epoch': 4.07}
{'loss': nan, 'learning_rate': 3.275862068965517e-05, 'epoch': 4.1}
{'loss': nan, 'learning_rate': 3.256704980842912e-05, 'epoch': 4.14}
{'loss': nan, 'learning_rate': 3.2375478927203066e-05, 'epoch': 4.17}
{'loss': nan, 'learning_rate': 3.218390804597701e-05, 'epoch': 4.21}
{'loss': nan, 'learning_rate': 3.1992337164750954e-05, 'epoch': 4.24}
{'loss': nan, 'learning_rate': 3.180076628352491e-05, 'epoch': 4.28}
{'loss': nan, 'learning_rate': 3.160919540229885e-05, 'epoch': 4.31}
{'loss': nan, 'learning_rate': 3.1417624521072797e-05, 'epoch': 4.34}
{'loss': nan, 'learning_rate': 3.1226053639846744e-05, 'epoch': 4.38}
{'loss': nan, 'learning_rate': 3.103448275862069e-05, 'epoch': 4.41}
{'loss': nan, 'learning_rate': 3.084291187739464e-05, 'epoch': 4.45}
{'loss': nan, 'learning_rate': 3.065134099616858e-05, 'epoch': 4.48}
{'loss': nan, 'learning_rate': 

***** Running Evaluation *****
  Num examples = 3856
  Batch size = 1


  0%|          | 0/3856 [00:00<?, ?it/s]

{'eval_loss': nan, 'eval_accuracy': 0.10217842323651452, 'eval_runtime': 51.0226, 'eval_samples_per_second': 75.574, 'eval_steps_per_second': 75.574, 'epoch': 5.0}


Saving model checkpoint to ./Checkpoints/google/vit-base-patch16-224-in21k-finetuned-imagenette/checkpoint-1450
---------- Device Allocation -----------
Embedding  --> IPU 0
Encoder 0  --> IPU 0
Encoder 1  --> IPU 0
Encoder 2  --> IPU 0
Encoder 3  --> IPU 1
Encoder 4  --> IPU 1
Encoder 5  --> IPU 1
Encoder 6  --> IPU 2
Encoder 7  --> IPU 2
Encoder 8  --> IPU 2
Encoder 9  --> IPU 3
Encoder 10 --> IPU 3
Encoder 11 --> IPU 3
Head       --> IPU 3
---------------------------------------
Configuration saved in ./Checkpoints/google/vit-base-patch16-224-in21k-finetuned-imagenette/checkpoint-1450/ipu_config.json


{'loss': nan, 'learning_rate': 2.7586206896551727e-05, 'epoch': 5.03}
{'loss': nan, 'learning_rate': 2.739463601532567e-05, 'epoch': 5.07}
{'loss': nan, 'learning_rate': 2.720306513409962e-05, 'epoch': 5.1}
{'loss': nan, 'learning_rate': 2.7011494252873566e-05, 'epoch': 5.14}
{'loss': nan, 'learning_rate': 2.681992337164751e-05, 'epoch': 5.17}
{'loss': nan, 'learning_rate': 2.662835249042146e-05, 'epoch': 5.21}
{'loss': nan, 'learning_rate': 2.6436781609195405e-05, 'epoch': 5.24}
{'loss': nan, 'learning_rate': 2.624521072796935e-05, 'epoch': 5.28}
{'loss': nan, 'learning_rate': 2.6053639846743293e-05, 'epoch': 5.31}
{'loss': nan, 'learning_rate': 2.5862068965517244e-05, 'epoch': 5.34}
{'loss': nan, 'learning_rate': 2.5670498084291188e-05, 'epoch': 5.38}
{'loss': nan, 'learning_rate': 2.5478927203065132e-05, 'epoch': 5.41}
{'loss': nan, 'learning_rate': 2.5287356321839083e-05, 'epoch': 5.45}
{'loss': nan, 'learning_rate': 2.5095785440613027e-05, 'epoch': 5.48}
{'loss': nan, 'learning_ra

***** Running Evaluation *****
  Num examples = 3856
  Batch size = 1


  0%|          | 0/3856 [00:00<?, ?it/s]

{'eval_loss': nan, 'eval_accuracy': 0.10217842323651452, 'eval_runtime': 39.9384, 'eval_samples_per_second': 96.549, 'eval_steps_per_second': 96.549, 'epoch': 6.0}


Saving model checkpoint to ./Checkpoints/google/vit-base-patch16-224-in21k-finetuned-imagenette/checkpoint-1740
---------- Device Allocation -----------
Embedding  --> IPU 0
Encoder 0  --> IPU 0
Encoder 1  --> IPU 0
Encoder 2  --> IPU 0
Encoder 3  --> IPU 1
Encoder 4  --> IPU 1
Encoder 5  --> IPU 1
Encoder 6  --> IPU 2
Encoder 7  --> IPU 2
Encoder 8  --> IPU 2
Encoder 9  --> IPU 3
Encoder 10 --> IPU 3
Encoder 11 --> IPU 3
Head       --> IPU 3
---------------------------------------
Configuration saved in ./Checkpoints/google/vit-base-patch16-224-in21k-finetuned-imagenette/checkpoint-1740/ipu_config.json


{'loss': nan, 'learning_rate': 2.203065134099617e-05, 'epoch': 6.03}
{'loss': nan, 'learning_rate': 2.183908045977012e-05, 'epoch': 6.07}
{'loss': nan, 'learning_rate': 2.1647509578544062e-05, 'epoch': 6.1}
{'loss': nan, 'learning_rate': 2.145593869731801e-05, 'epoch': 6.14}
{'loss': nan, 'learning_rate': 2.1264367816091954e-05, 'epoch': 6.17}
{'loss': nan, 'learning_rate': 2.10727969348659e-05, 'epoch': 6.21}
{'loss': nan, 'learning_rate': 2.088122605363985e-05, 'epoch': 6.24}
{'loss': nan, 'learning_rate': 2.0689655172413793e-05, 'epoch': 6.28}
{'loss': nan, 'learning_rate': 2.049808429118774e-05, 'epoch': 6.31}
{'loss': nan, 'learning_rate': 2.0306513409961685e-05, 'epoch': 6.34}
{'loss': nan, 'learning_rate': 2.0114942528735632e-05, 'epoch': 6.38}
{'loss': nan, 'learning_rate': 1.992337164750958e-05, 'epoch': 6.41}
{'loss': nan, 'learning_rate': 1.9731800766283527e-05, 'epoch': 6.45}
{'loss': nan, 'learning_rate': 1.9540229885057475e-05, 'epoch': 6.48}
{'loss': nan, 'learning_rate'

***** Running Evaluation *****
  Num examples = 3856
  Batch size = 1


  0%|          | 0/3856 [00:00<?, ?it/s]

{'eval_loss': nan, 'eval_accuracy': 0.10217842323651452, 'eval_runtime': 37.6214, 'eval_samples_per_second': 102.495, 'eval_steps_per_second': 102.495, 'epoch': 7.0}


Saving model checkpoint to ./Checkpoints/google/vit-base-patch16-224-in21k-finetuned-imagenette/checkpoint-2030
---------- Device Allocation -----------
Embedding  --> IPU 0
Encoder 0  --> IPU 0
Encoder 1  --> IPU 0
Encoder 2  --> IPU 0
Encoder 3  --> IPU 1
Encoder 4  --> IPU 1
Encoder 5  --> IPU 1
Encoder 6  --> IPU 2
Encoder 7  --> IPU 2
Encoder 8  --> IPU 2
Encoder 9  --> IPU 3
Encoder 10 --> IPU 3
Encoder 11 --> IPU 3
Head       --> IPU 3
---------------------------------------
Configuration saved in ./Checkpoints/google/vit-base-patch16-224-in21k-finetuned-imagenette/checkpoint-2030/ipu_config.json


{'loss': nan, 'learning_rate': 1.6475095785440615e-05, 'epoch': 7.03}
{'loss': nan, 'learning_rate': 1.628352490421456e-05, 'epoch': 7.07}
{'loss': nan, 'learning_rate': 1.6091954022988507e-05, 'epoch': 7.1}
{'loss': nan, 'learning_rate': 1.5900383141762454e-05, 'epoch': 7.14}
{'loss': nan, 'learning_rate': 1.5708812260536398e-05, 'epoch': 7.17}
{'loss': nan, 'learning_rate': 1.5517241379310346e-05, 'epoch': 7.21}
{'loss': nan, 'learning_rate': 1.532567049808429e-05, 'epoch': 7.24}
{'loss': nan, 'learning_rate': 1.5134099616858237e-05, 'epoch': 7.28}
{'loss': nan, 'learning_rate': 1.4942528735632185e-05, 'epoch': 7.31}
{'loss': nan, 'learning_rate': 1.475095785440613e-05, 'epoch': 7.34}
{'loss': nan, 'learning_rate': 1.4559386973180078e-05, 'epoch': 7.38}
{'loss': nan, 'learning_rate': 1.4367816091954022e-05, 'epoch': 7.41}
{'loss': nan, 'learning_rate': 1.417624521072797e-05, 'epoch': 7.45}
{'loss': nan, 'learning_rate': 1.3984674329501916e-05, 'epoch': 7.48}
{'loss': nan, 'learning_r

***** Running Evaluation *****
  Num examples = 3856
  Batch size = 1


  0%|          | 0/3856 [00:00<?, ?it/s]

{'eval_loss': nan, 'eval_accuracy': 0.10217842323651452, 'eval_runtime': 39.6576, 'eval_samples_per_second': 97.232, 'eval_steps_per_second': 97.232, 'epoch': 8.0}


Saving model checkpoint to ./Checkpoints/google/vit-base-patch16-224-in21k-finetuned-imagenette/checkpoint-2320
---------- Device Allocation -----------
Embedding  --> IPU 0
Encoder 0  --> IPU 0
Encoder 1  --> IPU 0
Encoder 2  --> IPU 0
Encoder 3  --> IPU 1
Encoder 4  --> IPU 1
Encoder 5  --> IPU 1
Encoder 6  --> IPU 2
Encoder 7  --> IPU 2
Encoder 8  --> IPU 2
Encoder 9  --> IPU 3
Encoder 10 --> IPU 3
Encoder 11 --> IPU 3
Head       --> IPU 3
---------------------------------------
Configuration saved in ./Checkpoints/google/vit-base-patch16-224-in21k-finetuned-imagenette/checkpoint-2320/ipu_config.json


{'loss': nan, 'learning_rate': 1.091954022988506e-05, 'epoch': 8.03}
{'loss': nan, 'learning_rate': 1.0727969348659005e-05, 'epoch': 8.07}
{'loss': nan, 'learning_rate': 1.053639846743295e-05, 'epoch': 8.1}
{'loss': nan, 'learning_rate': 1.0344827586206897e-05, 'epoch': 8.14}
{'loss': nan, 'learning_rate': 1.0153256704980842e-05, 'epoch': 8.17}
{'loss': nan, 'learning_rate': 9.96168582375479e-06, 'epoch': 8.21}
{'loss': nan, 'learning_rate': 9.770114942528738e-06, 'epoch': 8.24}
{'loss': nan, 'learning_rate': 9.578544061302683e-06, 'epoch': 8.28}
{'loss': nan, 'learning_rate': 9.386973180076629e-06, 'epoch': 8.31}
{'loss': nan, 'learning_rate': 9.195402298850575e-06, 'epoch': 8.34}
{'loss': nan, 'learning_rate': 9.00383141762452e-06, 'epoch': 8.38}
{'loss': nan, 'learning_rate': 8.812260536398467e-06, 'epoch': 8.41}
{'loss': nan, 'learning_rate': 8.620689655172414e-06, 'epoch': 8.45}
{'loss': nan, 'learning_rate': 8.429118773946362e-06, 'epoch': 8.48}
{'loss': nan, 'learning_rate': 8.2

***** Running Evaluation *****
  Num examples = 3856
  Batch size = 1


  0%|          | 0/3856 [00:00<?, ?it/s]

{'eval_loss': nan, 'eval_accuracy': 0.10217842323651452, 'eval_runtime': 40.7446, 'eval_samples_per_second': 94.638, 'eval_steps_per_second': 94.638, 'epoch': 9.0}


Saving model checkpoint to ./Checkpoints/google/vit-base-patch16-224-in21k-finetuned-imagenette/checkpoint-2610
---------- Device Allocation -----------
Embedding  --> IPU 0
Encoder 0  --> IPU 0
Encoder 1  --> IPU 0
Encoder 2  --> IPU 0
Encoder 3  --> IPU 1
Encoder 4  --> IPU 1
Encoder 5  --> IPU 1
Encoder 6  --> IPU 2
Encoder 7  --> IPU 2
Encoder 8  --> IPU 2
Encoder 9  --> IPU 3
Encoder 10 --> IPU 3
Encoder 11 --> IPU 3
Head       --> IPU 3
---------------------------------------
Configuration saved in ./Checkpoints/google/vit-base-patch16-224-in21k-finetuned-imagenette/checkpoint-2610/ipu_config.json


{'loss': nan, 'learning_rate': 5.3639846743295025e-06, 'epoch': 9.03}
{'loss': nan, 'learning_rate': 5.172413793103448e-06, 'epoch': 9.07}
{'loss': nan, 'learning_rate': 4.980842911877395e-06, 'epoch': 9.1}
{'loss': nan, 'learning_rate': 4.789272030651342e-06, 'epoch': 9.14}
{'loss': nan, 'learning_rate': 4.5977011494252875e-06, 'epoch': 9.17}
{'loss': nan, 'learning_rate': 4.406130268199233e-06, 'epoch': 9.21}
{'loss': nan, 'learning_rate': 4.214559386973181e-06, 'epoch': 9.24}
{'loss': nan, 'learning_rate': 4.022988505747127e-06, 'epoch': 9.28}
{'loss': nan, 'learning_rate': 3.8314176245210725e-06, 'epoch': 9.31}
{'loss': nan, 'learning_rate': 3.6398467432950196e-06, 'epoch': 9.34}
{'loss': nan, 'learning_rate': 3.448275862068966e-06, 'epoch': 9.38}
{'loss': nan, 'learning_rate': 3.2567049808429117e-06, 'epoch': 9.41}
{'loss': nan, 'learning_rate': 3.0651340996168583e-06, 'epoch': 9.45}
{'loss': nan, 'learning_rate': 2.8735632183908046e-06, 'epoch': 9.48}
{'loss': nan, 'learning_rate

***** Running Evaluation *****
  Num examples = 3856
  Batch size = 1


  0%|          | 0/3856 [00:00<?, ?it/s]

{'eval_loss': nan, 'eval_accuracy': 0.10217842323651452, 'eval_runtime': 37.4203, 'eval_samples_per_second': 103.046, 'eval_steps_per_second': 103.046, 'epoch': 10.0}


Saving model checkpoint to ./Checkpoints/google/vit-base-patch16-224-in21k-finetuned-imagenette/checkpoint-2900
---------- Device Allocation -----------
Embedding  --> IPU 0
Encoder 0  --> IPU 0
Encoder 1  --> IPU 0
Encoder 2  --> IPU 0
Encoder 3  --> IPU 1
Encoder 4  --> IPU 1
Encoder 5  --> IPU 1
Encoder 6  --> IPU 2
Encoder 7  --> IPU 2
Encoder 8  --> IPU 2
Encoder 9  --> IPU 3
Encoder 10 --> IPU 3
Encoder 11 --> IPU 3
Head       --> IPU 3
---------------------------------------
Configuration saved in ./Checkpoints/google/vit-base-patch16-224-in21k-finetuned-imagenette/checkpoint-2900/ipu_config.json


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ./Checkpoints/google/vit-base-patch16-224-in21k-finetuned-imagenette/checkpoint-580 (score: 0.9906639004149378).
---------- Device Allocation -----------
Embedding  --> IPU 0
Encoder 0  --> IPU 0
Encoder 1  --> IPU 0
Encoder 2  --> IPU 0
Encoder 3  --> IPU 1
Encoder 4  --> IPU 1

{'train_runtime': 2163.0657, 'train_samples_per_second': 42.902, 'train_steps_per_second': 1.341, 'train_loss': nan, 'epoch': 10.0}


Saving model checkpoint to ./Checkpoints/google/vit-base-patch16-224-in21k-finetuned-imagenette
---------- Device Allocation -----------
Embedding  --> IPU 0
Encoder 0  --> IPU 0
Encoder 1  --> IPU 0
Encoder 2  --> IPU 0
Encoder 3  --> IPU 1
Encoder 4  --> IPU 1
Encoder 5  --> IPU 1
Encoder 6  --> IPU 2
Encoder 7  --> IPU 2
Encoder 8  --> IPU 2
Encoder 9  --> IPU 3
Encoder 10 --> IPU 3
Encoder 11 --> IPU 3
Head       --> IPU 3
---------------------------------------
Configuration saved in ./Checkpoints/google/vit-base-patch16-224-in21k-finetuned-imagenette/ipu_config.json


***** train metrics *****
  epoch                    =       10.0
  train_loss               =        nan
  train_runtime            = 0:36:03.06
  train_samples_per_second =     42.902
  train_steps_per_second   =      1.341


In [28]:
metrics = trainer.evaluate()
# some nice to haves:
trainer.log_metrics("eval", metrics)
trainer.save_metrics("eval", metrics)

***** Running Evaluation *****
  Num examples = 3856
  Batch size = 1


  0%|          | 0/3856 [00:00<?, ?it/s]

***** eval metrics *****
  epoch                   =        1.0
  eval_accuracy           =     0.9839
  eval_loss               =     0.3167
  eval_runtime            = 0:00:52.71
  eval_samples_per_second =     73.141
  eval_steps_per_second   =     73.141


## Inference



In [24]:
import os
os.chdir("/notebooks/Imagenette/Imagenette2Challenge")
import importlib
import torch
from tqdm import tqdm as tqdm
import src.utils.helpermethods as helpermethods
from torchvision import transforms
from transformers import AutoConfig, AutoModelForImageClassification

# Load the model configuration
config = AutoConfig.from_pretrained("./Checkpoints/google/vit-base-patch16-224-in21k-finetuned-imagenette/config.json")
# Load the model
model = AutoModelForImageClassification.from_pretrained("./Checkpoints/google/vit-base-patch16-224-in21k-finetuned-imagenette/pytorch_model.bin", config=config)
model.eval()
labels = ['tench', 'English springer', 'cassette player', 'chain saw', 'church', 'French horn', 'garbage truck', 'gas pump', 'golf ball', 'parachute']

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

base_path = "./"
CustomDataset_module = importlib.import_module("src.dataLoaders.CustomDatasetRaw")
file_training = base_path + "Datasets/imagenette2/train.txt"
file_validation = base_path + "Datasets/imagenette2/val.txt"

val_dataset = helpermethods.load_dataset(
    CustomDataset_module,
    file_validation,
    transform,
)
val_loader = torch.utils.data.DataLoader(
    val_dataset, batch_size=32, shuffle=True, num_workers=6
)

correct_predictions = 0
total_predictions = 0

with torch.no_grad():
    for inputs,labels in tqdm(val_loader):
        outputs = model(inputs).logits
        _, predicted = torch.topk(outputs, k=1, dim=1)  # adjust k as needed
        correct = torch.eq(predicted, labels.view(-1, 1).expand_as(predicted)).sum().item()
        correct_predictions += correct
        total_predictions += labels.size(0)
accuracy = correct_predictions / total_predictions
print(f'Accuracy : {accuracy:.2f}')


100%|██████████| 121/121 [06:44<00:00,  3.34s/it]


Accuracy : 0.99


In [6]:
import random
labels = ['tench', 'English springer', 'cassette player', 'chain saw', 'church', 'French horn', 'garbage truck', 'gas pump', 'golf ball', 'parachute']
num_samples = 5
indices = random.sample(range(len(val_dataset)), num_samples)
samples = [val_dataset[i] for i in indices]

with torch.no_grad():
    for image, label in samples:
        pred_idx = torch.argmax(model(image.unsqueeze(0)).logits).item()
        label = labels[label.item()]
        pred = labels[pred_idx]
        print(f'Label : {label}, pred : {pred}')

Label : parachute, pred : parachute
Label : golf ball, pred : golf ball
Label : gas pump, pred : gas pump
Label : tench, pred : tench
Label : church, pred : church


In [6]:
import os
os.chdir("/notebooks/Imagenette/Imagenette2Challenge")
import importlib
import torch
from tqdm import tqdm as tqdm
import src.utils.helpermethods as helpermethods
from torchvision import transforms
from transformers import AutoConfig, AutoModelForImageClassification
import poptorch


def compute_accuracy_IPU(output: torch.Tensor, labels: torch.Tensor, device_iterations: int) -> float:
    # Reshape labels to match output's batch size
    labels = labels.view(device_iterations, -1, 1)
    _, predicted = torch.topk(output, k=1, dim=1)
    # Calculate the number of correct predictions for each mini-batch and sum them up
    correct = torch.eq(predicted, labels).sum().item()
    return float(correct)

opts = poptorch.Options()
opts.deviceIterations(1)

# Load the model configuration
config = AutoConfig.from_pretrained("./Checkpoints/google/vit-base-patch16-224-in21k-finetuned-imagenette/config.json")
# Load the model
model = AutoModelForImageClassification.from_pretrained("./Checkpoints/google/vit-base-patch16-224-in21k-finetuned-imagenette/pytorch_model.bin", config=config)
model.eval()

labels = ['tench', 'English springer', 'cassette player', 'chain saw', 'church', 'French horn', 'garbage truck', 'gas pump', 'golf ball', 'parachute']

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

base_path = "./"
CustomDataset_module = importlib.import_module("src.dataLoaders.CustomDatasetRaw")
file_training = base_path + "Datasets/imagenette2/train.txt"
file_validation = base_path + "Datasets/imagenette2/val.txt"

val_dataset = helpermethods.load_dataset(
    CustomDataset_module,
    file_validation,
    transform,
)
val_loader = poptorch.DataLoader(options=opts, dataset=val_dataset, batch_size=32, shuffle=True, drop_last=True)

poptorch_inference_model = poptorch.inferenceModel(model, options=val_loader.options)


with torch.no_grad():
    for inputs, labels in tqdm(val_loader):
        outputs = poptorch_inference_model(inputs)
        _, predicted = torch.topk(outputs, k=1, dim=1)  # adjust k as needed
        correct = torch.eq(predicted, labels.view(-1, 1).expand_as(predicted)).sum().item()
        correct_predictions += correct
        total_predictions += labels.size(0)

accuracy = correct_predictions / total_predictions
print(f'Accuracy : {accuracy:.2f}')

  0%|          | 0/120 [00:00<?, ?it/s]
Graph compilation:   0%|          | 0/100 [00:00<?][A
Graph compilation:   3%|▎         | 3/100 [00:01<00:54][A
Graph compilation:   4%|▍         | 4/100 [00:01<00:40][A
Graph compilation:   6%|▌         | 6/100 [00:08<02:56][A
Graph compilation:   7%|▋         | 7/100 [00:14<04:19][A
Graph compilation:  17%|█▋        | 17/100 [00:14<00:50][A
Graph compilation:  21%|██        | 21/100 [00:15<00:42][A
Graph compilation:  24%|██▍       | 24/100 [00:17<00:39][A
Graph compilation:  26%|██▌       | 26/100 [00:17<00:36][A
Graph compilation:  28%|██▊       | 28/100 [00:19<00:37][A
Graph compilation:  29%|██▉       | 29/100 [00:19<00:36][A
Graph compilation:  30%|███       | 30/100 [00:31<02:47][A
Graph compilation:  31%|███       | 31/100 [00:44<04:55][A
Graph compilation:  32%|███▏      | 32/100 [00:49<05:00][A
Graph compilation:  33%|███▎      | 33/100 [00:49<03:49][A
Graph compilation:  34%|███▍      | 34/100 [00:49<02:52][A
Graph com

Error: In unknown:0: 'popart_exception': /opt/jenkins/workspace/poplar/poplar_ci_ubuntu_20_04_unprivileged/popart/willow/src/popx/irlowering.cpp:3523 Out of memory on tile 0: 773304 bytes used but tiles only have 638976 bytes of memory
Error raised in:
  [0] popart::Session::prepareDevice: Poplar compilation
  [1] Compiler::compileAndPrepareDevice
  [2] LowerToPopart::compile
  [3] compileWithManualTracing
