In [None]:
!pip install transformers[torch]


In [None]:
import transformers
from torchvision import datasets, transforms
import torch

In [None]:
import gc
torch.cuda.empty_cache()
gc.collect()

In [None]:
from datasets import load_dataset

train_dataset = load_dataset("csv", data_files='C:/Users/train_images/cleaned_train_dataset.csv', split='train')
test_dataset = load_dataset("csv", data_files="C:/Users/test_images/cleaned_test_dataset.csv", split='train')

In [None]:
print(set(train_dataset['Label']))
print(set(test_dataset['Label']))

In [None]:
classes = list(set(train_dataset['Label']))
print(len(classes)) # classes: 62

In [None]:
from transformers import ViTImageProcessor

model_name = 'google/vit-base-patch16-224'

image_processor = ViTImageProcessor.from_pretrained(model_name, num_channels=3, image_mean=0.5, image_std=0.5)
image_processor

In [None]:
def preprocess(batch):
    images = []
    for i, image_name in enumerate(batch['Filename']):
        image = Image.open(image_name)
        image = image.convert('RGB')
        images.append(image)
    inputs = image_processor(images, return_tensors = 'pt')

    inputs['label'] = batch['Label']

    return inputs


In [None]:
# prepared_train = train_dataset.with_transform(preprocess)
# print(prepared_train['Filename'])
import PIL
from PIL import Image

print(train_dataset[0]['Filename'])

image = Image.open(train_dataset[0]['Filename'])
example = image_processor(image, return_tensors='pt')
print(example)

In [None]:
prepared_train = train_dataset.with_transform(preprocess)
prepared_test = test_dataset.with_transform(preprocess)


In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
def collate_fn(batch):
    return {
        'pixel_values': torch.stack([x['pixel_values'] for x in batch]),
        'labels': torch.tensor([x['label'] for x in batch])
    }

In [None]:
pip install evaluate

In [None]:
import numpy as np
import evaluate

accuracy_metric = evaluate.load("accuracy")

def compute_metric(p):
    return accuracy_metric.compute(
        predictions=np.argmax(p.predictions, axis=1),
        references= p.label_ids
    )

In [None]:
from transformers import TrainingArguments, AdamW, get_linear_schedule_with_warmup

training_args = TrainingArguments(
    output_dir='./ModelOutput4',
    per_device_train_batch_size=16,
    gradient_accumulation_steps=2,
    num_train_epochs=5,
    evaluation_strategy='steps',
    save_steps = 100,
    eval_steps=100,
    fp16=True,
    logging_steps = 10,
    learning_rate = 2e-3,
    save_total_limit = 2,
    remove_unused_columns = False,
    push_to_hub = False,
    load_best_model_at_end = True,
    resume_from_checkpoint='./ModelOutput4',
)


In [None]:
from transformers import ViTForImageClassification
from torch import nn

model = ViTForImageClassification.from_pretrained(model_name, num_labels = len(classes), ignore_mismatched_sizes=True)
model.to(device)

In [None]:
from transformers import Trainer

trainer = Trainer(
    model = model,
    args = training_args,
    data_collator = collate_fn,
    compute_metrics = compute_metric,
    train_dataset = prepared_train,
    eval_dataset = prepared_test,
    tokenizer = image_processor
)

In [None]:
# train_results = trainer.train(resume_from_checkpoint=True)
train_results = trainer.train()
trainer.log_metrics("train", train_results.metrics)
trainer.save_metrics("train", train_results.metrics)
trainer.save_state()
# accuracy around :- 0.0X (X -> random number)

In [None]:
metrics = trainer.evaluate(prepared_test)
trainer.log_metrics("eval", metrics)
trainer.save_metrics("eval", metrics)

Infer


In [None]:
image_name = test_dataset[779]['Filename']
input = Image.open(image_name)
label = test_dataset[779]['Label']
test_labels = sorted(['office booths', 'bar furniture', 'appliances', 'kitchen', 'textile & rugs', 'office storage', 'office tables', 'bathroom', 'appliances & media', 'decorative accessories', 'showers & bathtubs', 'stairs & railings', 'tables', 'window treatment', 'botanical', 'washbasin', 'furniture components & accessories', 'people & pets', 'toilets & bidets', 'kitchen & dining furniture', 'structure', 'office partitions', 'transport', 'windows', 'landscapes', 'shapes', 'office furniture', 'bathroom storage', 'outdoor lighting', 'feature walls', 'lighting systems', 'bathroom furniture', 'lifestyle', 'food, drink, crockery', 'concept light', 'sofas and arm chairs', 'storage & organization', 'toilet & bidet', 'bathroom faucet', 'accessories', 'doors', 'outdoor furniture', 'building', 'office chairs', 'sports & hobbies', 'molding & millwork', 'bedroom furniture', 'bath accessories', 'ceiling designs', 'furniture', 'lamps', 'storage furniture', 'gardening & structure', 'sofas & arm chairs', 'lighting', 'kids furniture', 'wall decor', 'fireplace & services'])
label = test_labels[label]
print(image_name, input, label)

In [None]:
features = image_processor(input, return_tensors='pt').to(device)
with torch.no_grad():
    logits = model(**features).logits

print(logits)
predicted_label = logits.argmax(-1).item()
print(predicted_label)
predicted_label = test_labels[predicted_label]
print(predicted_label)