# Image Classification

In [None]:
from huggingface_hub import notebook_login
notebook_login()

## Load dataset

In [None]:
from datasets import load_dataset
food = load_dataset("food101", split="train[:5000]")

## Prepare the dataset

In [None]:
food = food.train_test_split(test_size=0.2)

In [None]:
food["train"][0]

In [None]:
labels = food["train"].features["label"].names
label2id, id2label = dict(), dict()
for i, label in enumerate(labels):
    label2id[label] = str(i)
    id2label[str(i)] = label

In [None]:
for key, value in id2label.items():
    print(key, value)

In [None]:
from transformers import AutoImageProcessor
model_checkpoint = "google/vit-base-patch16-224-in21k"
image_processor = AutoImageProcessor.from_pretrained(model_checkpoint)

## Configs

In [None]:
model_output_name =f'VIT-food101-image-classifier'
eval_and_save_strat = 'epoch'
learn_rate=5e-5
batch_size=16
grad_accum_steps=4
train_epochs=5
warmup_ratio=0.1
log_steps=10
metric_for_best_mod="accuracy"

In [None]:
from torchvision.transforms import RandomResizedCrop, Compose, Normalize, ToTensor

normalize = Normalize(mean=image_processor.image_mean, std=image_processor.image_std)
size = (
    image_processor.size["shortest_edge"]
    if "shortest_edge" in image_processor.size
    else (image_processor.size['height'], image_processor.size["width"])
)

_transforms = Compose([RandomResizedCrop(size), ToTensor(), normalize])


In [None]:
def transforms(examples):
    examples["pixel_values"] = [_transforms(img.convert("RGB")) for img in examples["image"]]
    del examples["image"]
    return examples

In [None]:
food = food.with_transform(transforms)

In [None]:
from transformers import DefaultDataCollator
data_collator = DefaultDataCollator()

## Get evaluation metrics

In [None]:
import evaluate
accuracy = evaluate.load("accuracy")

In [None]:
import numpy as np

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return accuracy.compute(predictions=predictions, references=labels)

## Train the CV classifier

In [None]:
from transformers import AutoModelForImageClassification, TrainingArguments, Trainer
model = AutoModelForImageClassification.from_pretrained(
    model_checkpoint, 
    num_labels=len(labels), 
    id2label=id2label, 
    label2id=label2id
)

In [None]:
train_args = TrainingArguments(
    output_dir=model_output_name,
    remove_unused_columns=False, 
    evaluation_strategy=eval_and_save_strat,
    save_strategy=eval_and_save_strat, 
    learning_rate=learn_rate, 
    per_device_train_batch_size=batch_size, 
    per_device_eval_batch_size=batch_size, 
    gradient_accumulation_steps=grad_accum_steps, 
    num_train_epochs=train_epochs, 
    warmup_ratio=warmup_ratio,
    logging_steps=log_steps,
    load_best_model_at_end=True,
    metric_for_best_model=metric_for_best_mod,
    push_to_hub=True
)

In [None]:
trainer = Trainer(
    model=model, 
    args=train_args, 
    data_collator=data_collator, 
    train_dataset=food["train"],
    eval_dataset=food["test"],
    tokenizer=image_processor, 
    compute_metrics=compute_metrics
)

In [None]:
trainer.train()

In [None]:
trainer.push_to_hub()

## Evaluate the model

In [None]:
ds = load_dataset("food101", split="validation[:10]")
image = ds["image"][0]

In [None]:
from transformers import pipeline
classifier = pipeline("image-classification", model=model_output_name)
classifier(image)

In [None]:
from transformers import AutoImageProcessor
import torch

image_processor = AutoImageProcessor.from_pretrained(model_output_name)
inputs = image_processor(image, return_tensors="pt")

In [None]:
from transformers import AutoModelForImageClassification

model = AutoModelForImageClassification.from_pretrained(model_output_name)
with torch.no_grad():
    logits = model(**inputs).logits

In [None]:
predicted_label = logits.argmax(-1).item()
model.config.id2label[predicted_label]