In [1]:
model_checkpoint = "google/vit-base-patch16-224-in21k"
model_checkpoint

'google/vit-base-patch16-224-in21k'

In [2]:
import os 
import torch
from peft import PeftModel, LoraConfig, get_peft_model
from transformers import AutoModelForImageClassification


def print_model_size(path):
    size = 0
    for f in os.scandir(path):
        size += os.path.getsize(f)

    print(f"Model size: {(size / 1e6):.2} MB")


def print_trainable_parameters(model, label):
    parameters, trainable = 0, 0
    
    for _, p in model.named_parameters():
        parameters += p.numel()
        trainable += p.numel() if p.requires_grad else 0

    print(f"{label} trainable parameters: {trainable:,}/{parameters:,} ({100 * trainable / parameters:.2f}%)")


def split_dataset(dataset):
    dataset_splits = dataset.train_test_split(test_size=0.1)
    return dataset_splits.values()
    

def create_label_mappings(dataset):
    label2id, id2label = dict(), dict()
    for i, label in enumerate(dataset.features["label"].names):
        label2id[label] = i
        id2label[i] = label 

    return label2id, id2label

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
from datasets import load_dataset

# This is the food dataset
dataset1 = load_dataset("food101", split="train[:10000]")

# This is the datasets of pictures of cats and dogs.
# Notice we need to rename the label column so we can
# reuse the same code for both datasets.
# dataset2 = load_dataset("microsoft/cats_vs_dogs", split="train", trust_remote_code=True)
# dataset2 = dataset2.rename_column("labels", "label")

dataset1_train, dataset1_test = split_dataset(dataset1)
# dataset2_train, dataset2_test = split_dataset(dataset2)

Downloading readme: 100%|██████████| 10.5k/10.5k [00:00<00:00, 22.8MB/s]
Downloading data: 100%|██████████| 490M/490M [00:43<00:00, 11.1MB/s] 
Downloading data: 100%|██████████| 464M/464M [00:38<00:00, 12.0MB/s] 
Downloading data: 100%|██████████| 472M/472M [00:39<00:00, 12.1MB/s] 
Downloading data: 100%|██████████| 464M/464M [00:38<00:00, 12.1MB/s] 
Downloading data: 100%|██████████| 475M/475M [00:43<00:00, 11.0MB/s] 
Downloading data: 100%|██████████| 470M/470M [00:42<00:00, 11.2MB/s] 
Downloading data: 100%|██████████| 478M/478M [00:39<00:00, 12.0MB/s] 
Downloading data: 100%|██████████| 486M/486M [00:40<00:00, 11.9MB/s] 
Downloading data: 100%|██████████| 423M/423M [00:34<00:00, 12.1MB/s] 
Downloading data: 100%|██████████| 413M/413M [00:38<00:00, 10.9MB/s] 
Downloading data: 100%|██████████| 426M/426M [00:35<00:00, 12.1MB/s] 
Generating train split: 100%|██████████| 75750/75750 [00:02<00:00, 27537.62 examples/s]
Generating validation split: 100%|██████████| 25250/25250 [00:00<00:0

In [4]:
dataset1_label2id, dataset1_id2label = create_label_mappings(dataset1)
# dataset2_label2id, dataset2_id2label = create_label_mappings(dataset2)

In [8]:
model1 = {
        "train_data": dataset1_train,
        "test_data": dataset1_test,
        "label2id": dataset1_label2id,
        "id2label": dataset1_id2label,
        "epochs": 5,
        "path": "./lora-model1"
    }

In [10]:
from transformers import AutoImageProcessor

image_processor = AutoImageProcessor.from_pretrained(model_checkpoint, use_fast=True)

In [13]:
from torchvision.transforms import (
    CenterCrop,
    Compose,
    Normalize,
    Resize,
    ToTensor,
)

preprocess_pipeline = Compose([
    Resize(image_processor.size["height"]),
    CenterCrop(image_processor.size["height"]),
    ToTensor(),
    Normalize(mean=image_processor.image_mean, std=image_processor.image_std),
])

def preprocess(batch):
    batch["pixel_values"] = [
        preprocess_pipeline(image.convert("RGB")) for image in batch["image"]
    ]
    return batch

In [14]:
model1["train_data"].set_transform(preprocess)
model1["test_data"].set_transform(preprocess)