## Imports

In [None]:
from datasets import load_dataset
from transformers import AutoFeatureExtractor, AutoModelForImageClassification, TrainingArguments, Trainer, pipeline
import torch
import evaluate

## Definitions

In [6]:
DATASET_PATH = './dataset'
MODEL_NAME = "google/vit-base-patch16-224"
NUMBER_OF_CLASSES = 7
feature_extractor = AutoFeatureExtractor.from_pretrained(MODEL_NAME)

## Load Dataset

In [None]:
# Load dataset from local directory
dataset = load_dataset("imagefolder", data_dir=DATASET_PATH)

# Split into train and validation sets if needed
dataset = dataset["train"].train_test_split(test_size=0.2)
train_dataset = dataset["train"]
val_dataset = dataset["test"]

## Define a Preprocessing Pipeline

In [None]:
def preprocess_data(batch):
    images = [image.convert("RGB") for image in batch["image"]]
    inputs = feature_extractor(images=images, return_tensors="pt")
    inputs["labels"] = batch["label"]  # Change "label" to "labels" for Trainer compatibility
    return inputs

In [None]:
train_dataset = train_dataset.map(preprocess_data, batched=True, remove_columns=["image"])
val_dataset = val_dataset.map(preprocess_data, batched=True, remove_columns=["image"])

# Set the format for PyTorch tensors
train_dataset.set_format(type="torch", columns=["pixel_values", "labels"])
val_dataset.set_format(type="torch", columns=["pixel_values", "labels"])

## Define Model and its Params

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
num_classes = NUMBER_OF_CLASSES  # Number of road sign classes
model = AutoModelForImageClassification.from_pretrained(
    MODEL_NAME,
    num_labels=num_classes,
    ignore_mismatched_sizes=True
).to(device)

## Define Training arguments and Performance Metrics

In [60]:
# Evaluation metric (accuracy)
accuracy = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = logits.argmax(axis=-1)
    return accuracy.compute(predictions=predictions, references=labels)

# Training arguments
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=5,
    weight_decay=0.01,
    logging_dir="./logs",
    load_best_model_at_end=True,
)

# Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
)

## Train/Fine Tune The Model

In [None]:
trainer.train()

## Save The Model

In [None]:
model.save_pretrained("fine_tuned_model")
feature_extractor.save_pretrained("fine_tuned_model")

## Load The Fine Tuned Model And Use it for Classification

In [8]:
# Load fine-tuned model
model_path = "fine_tuned_model"
classifier = pipeline("image-classification", model=model_path, feature_extractor=feature_extractor)

# Predict on an image
image_path = "./stop.jpg"
predictions = classifier(image_path)
map = {
    "LABEL_0": "Keep Left",
    "LABEL_1": "Keep Right",
    "LABEL_2": "No Entry",
    "LABEL_3": "Pedestrian crossing",
    "LABEL_4": "Stop Sign",
    "LABEL_5": "Turn Left",
    "LABEL_6": "Turn Right"
}
print(f"Predicted class: {map[predictions[0]['label']]}")


Device set to use cpu


Predicted class: Stop Sign
