In [1]:
!pip install transformers

Defaulting to user installation because normal site-packages is not writeable
Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com


In [2]:
!pip show transformers


Name: transformers
Version: 4.52.3
Summary: State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow
Home-page: https://github.com/huggingface/transformers
Author: The Hugging Face team (past and future) with the help of all our contributors (https://github.com/huggingface/transformers/graphs/contributors)
Author-email: transformers@huggingface.co
License: Apache 2.0 License
Location: /work/m4le/.local/lib/python3.11/site-packages
Requires: filelock, huggingface-hub, numpy, packaging, pyyaml, regex, requests, safetensors, tokenizers, tqdm
Required-by: 


In [19]:
import os
import torch
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torch.optim import AdamW          
from transformers import (
    AutoImageProcessor,
    ViTForImageClassification,
    get_linear_schedule_with_warmup,
)
from tqdm.auto import tqdm


In [23]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.backends.cudnn.benchmark = True # Tells CUDA's cuDNN library to auto-tune and pick the fastest convolution algorithms for the imgsize

use_amp = True
scaler = torch.amp.GradScaler('cuda') if use_amp else None

In [35]:
# Make slit dataset
import os
import shutil
from sklearn.model_selection import train_test_split

dataset_dir = "full/"
train_dir = "train/"
valid_dir = "valid/"
test_dir = "test/"

train_ratio = 0.7
valid_ratio = 0.15
test_ratio = 0.15

for dir_path in [train_dir, valid_dir, test_dir]:
    os.makedirs(dir_path, exist_ok=True)

classes = [d for d in os.listdir(dataset_dir) if os.path.isdir(os.path.join(dataset_dir, d)) and d != '.ipynb_checkpoints']

for class_name in classes:
    for dir_path in [train_dir, valid_dir, test_dir]:
        os.makedirs(os.path.join(dir_path, class_name), exist_ok=True)

    # get all images files in the fill directores
    origin_class_path = os.path.join(dataset_dir, class_name)
    images = [f for f in os.listdir(origin_class_path) if f.lower().endswith(('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif', '.tiff', '.webp'))]

    # splitting images into train, valid, test
    train_images, temp_images = train_test_split(images, train_size=train_ratio, random_state=42)
    valid_images, test_images = train_test_split(temp_images, train_size=test_ratio/(valid_ratio + test_ratio), random_state=42)

    # move images to respective directories
    for img in train_images:
        shutil.copy(os.path.join(dataset_dir, class_name, img), os.path.join(train_dir, class_name, img))
    for img in valid_images:
        shutil.copy(os.path.join(dataset_dir, class_name, img), os.path.join(valid_dir, class_name, img))
    for img in test_images:
        shutil.copy(os.path.join(dataset_dir, class_name, img), os.path.join(test_dir, class_name, img))

    print(f"Created train ({len(train_images)} images), valid ({len(valid_images)} images), and test ({len(test_images)} images) datasets for class '{class_name}'.")

Created train (675 images), valid (145 images), and test (145 images) datasets for class 'Kitchen'.
Created train (3143 images), valid (674 images), and test (674 images) datasets for class 'Exterior'.
Created train (873 images), valid (187 images), and test (188 images) datasets for class 'Bedroom'.
Created train (810 images), valid (174 images), and test (174 images) datasets for class 'Dinning'.
Created train (891 images), valid (191 images), and test (191 images) datasets for class 'Livingroom'.
Created train (424 images), valid (91 images), and test (91 images) datasets for class 'Bathroom'.


In [36]:
# Prepare datasets and dataloaders
train_dir = "train/"
valid_dir = "valid/"
test_dir = "test/"

processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224", use_fast=True)
size = processor.size["height"]

train_tfms = transforms.Compose([
    transforms.Resize((size,size)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=processor.image_mean,
                         std=processor.image_std),
    # transforms.Lambda(lambda t: t.half()),
])
valid_tfms = transforms.Compose([
    transforms.Resize((size,size)),
    transforms.ToTensor(),
    transforms.Normalize(mean=processor.image_mean,
                         std=processor.image_std),
    # transforms.Lambda(lambda t: t.half()),
])
test_tfms = valid_tfms

train_ds = datasets.ImageFolder(train_dir, transform=train_tfms)
valid_ds = datasets.ImageFolder(valid_dir, transform=valid_tfms)
test_ds = datasets.ImageFolder(test_dir, transform=test_tfms)

train_loader = DataLoader(train_ds, batch_size=24, shuffle=True, num_workers=0, pin_memory=True)
valid_loader = DataLoader(valid_ds, batch_size=24, shuffle=False, num_workers=0, pin_memory=True)
test_loader = DataLoader(test_ds, batch_size=24, shuffle=False, num_workers=0, pin_memory=True)

In [37]:
# Initial model,optimizer, scheduler
model = ViTForImageClassification.from_pretrained(
    "google/vit-base-patch16-224",
    num_labels=len(train_ds.classes),
    id2label={i:lab for i, lab in enumerate(train_ds.classes)},
    label2id={lab:i for i, lab in enumerate(train_ds.classes)},
    ignore_mismatched_sizes=True       
).to(device)

model = model.float().to(device)

optim = AdamW(model.parameters(), lr=5e-5, weight_decay=0.01)

num_epochs = 5
total_steps = num_epochs * len(train_loader)
sched = get_linear_schedule_with_warmup(
    optim,
    num_warmup_steps=int(0.1 * total_steps),
    num_training_steps=total_steps,
)

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([6]) in the model instantiated
- classifier.weight: found shape torch.Size([1000, 768]) in the checkpoint and torch.Size([6, 768]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [38]:
# training loop
def train_epoch():
    model.train()
    losses = []
    loop = tqdm(train_loader, desc="Train")
    for imgs, labels in loop:
        imgs, labels = imgs.to(device), labels.to(device)
        optim.zero_grad() 

        # forward + backward,with optional AMP
        with torch.amp.autocast("cuda"):
            outputs = model(pixel_values=imgs, labels=labels)
            loss = outputs.loss
        scaler.scale(loss).backward()
        scaler.step(optim)
        scaler.update()
        sched.step()
        losses.append(loss.item())
        loop.set_postfix(loss=sum(losses)/len(losses))
        
    return sum(losses)/ len(losses)


In [39]:
# Evaluation Loop
@torch.no_grad()
def eval_epoch(loader=valid_loader):
    model.eval()
    correct, total = 0, 0
    loop = tqdm(loader, desc="Eval")

    for imgs, labels in loop:
        imgs, labels = imgs.to(device), labels.to(device)
        with torch.amp.autocast("cuda") if use_amp else torch.no_grad():
            logits = model(pixel_values=imgs).logits
        preds = logits.argmax(-1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)
        loop.set_postfix(acc=correct/total)
    return correct/total
        

In [40]:
best_accuracy = 0.0
num_epochs = 10
for epoch in range(1, num_epochs+1):
    print(f"\n=== Epoch {epoch}/{num_epochs} ===")
    train_loss = train_epoch()
    val_acc = eval_epoch()
    print(f"Train loss: {train_loss:.4f}  |  Val acc: {val_acc:.4f}")

    # if val_acc > best_accuracy:
    #     best_acc = val_acc
    #     model.save_pretrained("vit-house-best")
    #     processor.save_pretrained("vit-house-best")
    #     print(" Saved new best model")



=== Epoch 1/10 ===


Train:   0%|          | 0/284 [00:00<?, ?it/s]

Eval:   0%|          | 0/61 [00:00<?, ?it/s]

Train loss: 0.4281  |  Val acc: 0.9590

=== Epoch 2/10 ===


Train:   0%|          | 0/284 [00:00<?, ?it/s]

Eval:   0%|          | 0/61 [00:00<?, ?it/s]

Train loss: 0.0852  |  Val acc: 0.9569

=== Epoch 3/10 ===


Train:   0%|          | 0/284 [00:00<?, ?it/s]

Eval:   0%|          | 0/61 [00:00<?, ?it/s]

Train loss: 0.0277  |  Val acc: 0.9651

=== Epoch 4/10 ===


Train:   0%|          | 0/284 [00:00<?, ?it/s]

Eval:   0%|          | 0/61 [00:00<?, ?it/s]

Train loss: 0.0156  |  Val acc: 0.9699

=== Epoch 5/10 ===


Train:   0%|          | 0/284 [00:00<?, ?it/s]

Eval:   0%|          | 0/61 [00:00<?, ?it/s]

Train loss: 0.0113  |  Val acc: 0.9706

=== Epoch 6/10 ===


Train:   0%|          | 0/284 [00:00<?, ?it/s]

Eval:   0%|          | 0/61 [00:00<?, ?it/s]

Train loss: 0.0069  |  Val acc: 0.9706

=== Epoch 7/10 ===


Train:   0%|          | 0/284 [00:00<?, ?it/s]

Eval:   0%|          | 0/61 [00:00<?, ?it/s]

Train loss: 0.0075  |  Val acc: 0.9706

=== Epoch 8/10 ===


Train:   0%|          | 0/284 [00:00<?, ?it/s]

Eval:   0%|          | 0/61 [00:00<?, ?it/s]

Train loss: 0.0070  |  Val acc: 0.9706

=== Epoch 9/10 ===


Train:   0%|          | 0/284 [00:00<?, ?it/s]

Eval:   0%|          | 0/61 [00:00<?, ?it/s]

Train loss: 0.0068  |  Val acc: 0.9706

=== Epoch 10/10 ===


Train:   0%|          | 0/284 [00:00<?, ?it/s]

Eval:   0%|          | 0/61 [00:00<?, ?it/s]

Train loss: 0.0066  |  Val acc: 0.9706
