In [55]:
zip_file = "/content/drive/MyDrive/CS331_Advanced_CV/HoaVietNam.zip"
output_dir = "/content/drive/MyDrive/CS331_Advanced_CV"

In [56]:
zip_file = "/content/drive/MyDrive/CS331_Advanced_CV/HoaVietNam.zip"
output_dir = "/content/drive/MyDrive/CS331_Advanced_CV"

!mkdir -p "$output_dir"
!unzip -o "$zip_file" -d "$output_dir"


Archive:  /content/drive/MyDrive/CS331_Advanced_CV/HoaVietNam.zip
  inflating: /content/drive/MyDrive/CS331_Advanced_CV/HoaVietNam/test/Cuc/0031.jpg  
  inflating: /content/drive/MyDrive/CS331_Advanced_CV/HoaVietNam/test/Cuc/0032.jpg  
  inflating: /content/drive/MyDrive/CS331_Advanced_CV/HoaVietNam/test/Cuc/0033.jpg  
  inflating: /content/drive/MyDrive/CS331_Advanced_CV/HoaVietNam/test/Cuc/0034.jpg  
  inflating: /content/drive/MyDrive/CS331_Advanced_CV/HoaVietNam/test/Cuc/0035.jpg  
  inflating: /content/drive/MyDrive/CS331_Advanced_CV/HoaVietNam/test/Cuc/0036.jpg  
  inflating: /content/drive/MyDrive/CS331_Advanced_CV/HoaVietNam/test/Cuc/0037.jpg  
  inflating: /content/drive/MyDrive/CS331_Advanced_CV/HoaVietNam/test/Cuc/0038.jpg  
  inflating: /content/drive/MyDrive/CS331_Advanced_CV/HoaVietNam/test/Cuc/0039.jpg  
  inflating: /content/drive/MyDrive/CS331_Advanced_CV/HoaVietNam/test/Cuc/0040.jpg  
  inflating: /content/drive/MyDrive/CS331_Advanced_CV/HoaVietNam/test/Dao/0031.jpg  

In [57]:
!pip install torch timm



In [58]:
data_path = "/content/drive/MyDrive/CS331_Advanced_CV/HoaVietNam"

In [59]:
!ls -o "$data_path"

total 8
drwx------ 7 root 4096 May  6 15:06 test
drwx------ 7 root 4096 May  6 15:07 train


# Import the libraries

In [60]:
import os
import cv2
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, Dataset
from timm import create_model
from PIL import Image

# Data Preparing

In [61]:
data_train_path = os.path.join(data_path, "train")
data_test_path = os.path.join(data_path, "test")


In [62]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [63]:
def data_extractor(data_dir):
  images = []
  labels = []

  class_names = sorted(os.listdir(data_dir))
  for class_idx, class_name in enumerate(class_names):
      class_dir = os.path.join(data_dir, class_name)
      for img_file in os.listdir(class_dir):
          img_path = os.path.join(class_dir, img_file)
          img = Image.open(img_path).convert("RGB")

          if img is None:
              continue

          images.append(img)
          labels.append(class_idx)
  return images, labels



In [64]:
train_images, train_labels = data_extractor(data_train_path)
test_images, test_labels = data_extractor(data_test_path)


In [65]:
class_names = os.listdir(data_train_path)
num_classes = len(class_names)
print("Class names: ", class_names)
print("Num of class: ", num_classes)

Class names:  ['Cuc', 'Dao', 'Lan', 'Mai', 'Tho']
Num of class:  5


# Create Custom Dataset

In [66]:
class CustomDataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)
        return image, label

In [67]:
train_dataset = CustomDataset(train_images, train_labels, transform=transform)
test_dataset = CustomDataset(test_images, test_labels, transform=transform)

In [68]:
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Model

In [69]:
# define ViT model
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

model = create_model('vit_base_patch16_224', pretrained=True, num_classes=num_classes) # Pretrain ViT
model = model.to(device)


cuda


In [70]:
# loss function
criterion = nn.CrossEntropyLoss()

# optimizer
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# Training

In [75]:
def train(model, train_loader, criterion, optimzer, device, epochs=10):
    model.train()
    for epoch in range(epochs):
        running_loss = 0
        correct, total = 0, 0

        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            # zero the gradient
            optimizer.zero_grad()

            # forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)

            # backward pass
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        print(f"Epoch [{epoch + 1}/{epochs}] \tLoss: {running_loss/len(train_loader)} \tAccuracy: {(100 * correct/total):.2f}%")

In [76]:
train(model, train_loader, criterion, optimizer, device, 5)

Epoch [1/5] 	Loss: 0.020869992417283358 	Accuracy: 98.67%
Epoch [2/5] 	Loss: 0.014135573315434158 	Accuracy: 99.33%
Epoch [3/5] 	Loss: 0.02426503400201909 	Accuracy: 99.33%
Epoch [4/5] 	Loss: 0.006379417306743562 	Accuracy: 100.00%
Epoch [5/5] 	Loss: 0.00012323108094278724 	Accuracy: 100.00%


In [77]:
def evaluate(model, test_loader, device):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f'Accuracy on test data: {100 * correct / total:.2f}%')



In [78]:
evaluate(model, test_loader, device)

Accuracy on test data: 96.00%
