In [None]:
from google.colab import files
files.upload()

Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"bhuvanesh73","key":"4442b37679a7fbc5f9d6ea84f705c7a3"}'}

In [None]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
!kaggle datasets download -d asdasdasasdas/garbage-classification -p /content

Dataset URL: https://www.kaggle.com/datasets/asdasdasasdas/garbage-classification
License(s): copyright-authors
Downloading garbage-classification.zip to /content
 85% 70.0M/82.0M [00:00<00:00, 719MB/s]
100% 82.0M/82.0M [00:00<00:00, 697MB/s]


In [None]:
import zipfile
zip_path = "/content/garbage-classification.zip"
extract_path = "/content/garbage"

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

import os
print(os.listdir(extract_path))


['zero-indexed-files.txt', 'Garbage classification', 'one-indexed-files-notrash_val.txt', 'one-indexed-files.txt', 'garbage classification', 'one-indexed-files-notrash_test.txt', 'one-indexed-files-notrash_train.txt']


In [None]:
import os
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader, random_split


class PatchEmbedding(nn.Module):
    def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=256):
        super().__init__()
        self.patch_size = patch_size
        self.n_patches = (img_size // patch_size) ** 2
        self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)

    def forward(self, x):
        x = self.proj(x)
        x = x.flatten(2)
        x = x.transpose(1, 2)
        return x


class Attention(nn.Module):
    def __init__(self, dim, n_heads=8, qkv_bias=True):
        super().__init__()
        self.n_heads = n_heads
        self.head_dim = dim // n_heads
        self.scale = self.head_dim ** -0.5

        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
        self.proj = nn.Linear(dim, dim)

    def forward(self, x):
        B, N, C = x.shape
        qkv = self.qkv(x).reshape(B, N, 3, self.n_heads, self.head_dim)
        q, k, v = qkv.permute(2, 0, 3, 1, 4)

        attn = (q @ k.transpose(-2, -1)) * self.scale
        attn = attn.softmax(dim=-1)

        x = (attn @ v).transpose(1, 2).reshape(B, N, C)
        return self.proj(x)


class MLP(nn.Module):
    def __init__(self, in_features, hidden_features=None, out_features=None, drop=0.):
        super().__init__()
        out_features = out_features or in_features
        hidden_features = hidden_features or in_features
        self.fc1 = nn.Linear(in_features, hidden_features)
        self.fc2 = nn.Linear(hidden_features, out_features)
        self.act = nn.GELU()
        self.drop = nn.Dropout(drop)

    def forward(self, x):
        x = self.fc1(x)
        x = self.act(x)
        x = self.drop(x)
        x = self.fc2(x)
        x = self.drop(x)
        return x


class TransformerEncoderBlock(nn.Module):
    def __init__(self, dim, n_heads, mlp_ratio=4.0, drop=0.):
        super().__init__()
        self.norm1 = nn.LayerNorm(dim)
        self.attn = Attention(dim, n_heads)
        self.norm2 = nn.LayerNorm(dim)
        self.mlp = MLP(dim, int(dim * mlp_ratio), drop=drop)

    def forward(self, x):
        x = x + self.attn(self.norm1(x))
        x = x + self.mlp(self.norm2(x))
        return x


class VisionTransformer(nn.Module):
    def __init__(self, img_size=224, patch_size=16, in_chans=3, num_classes=6,
                 embed_dim=256, depth=8, n_heads=8, mlp_ratio=4.0, drop=0.):
        super().__init__()
        self.patch_embed = PatchEmbedding(img_size, patch_size, in_chans, embed_dim)
        n_patches = self.patch_embed.n_patches

        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
        self.pos_embed = nn.Parameter(torch.zeros(1, n_patches + 1, embed_dim))
        self.pos_drop = nn.Dropout(p=drop)

        self.blocks = nn.ModuleList([
            TransformerEncoderBlock(embed_dim, n_heads, mlp_ratio, drop)
            for _ in range(depth)
        ])
        self.norm = nn.LayerNorm(embed_dim)
        self.head = nn.Linear(embed_dim, num_classes)

        nn.init.trunc_normal_(self.pos_embed, std=0.02)
        nn.init.trunc_normal_(self.cls_token, std=0.02)

    def forward(self, x):
        B = x.shape[0]
        x = self.patch_embed(x)

        cls_tokens = self.cls_token.expand(B, -1, -1)
        x = torch.cat((cls_tokens, x), dim=1)
        x = x + self.pos_embed
        x = self.pos_drop(x)

        for blk in self.blocks:
            x = blk(x)

        x = self.norm(x)
        return self.head(x[:, 0])



def load_garbage_dataset(data_dir="./garbage", img_size=224, batch_size=64, split_ratio=(0.7, 0.15, 0.15)):
    transform = transforms.Compose([
        transforms.Resize((img_size, img_size)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(15),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
    ])

    full_dataset = ImageFolder(root=data_dir, transform=transform)
    n_total = len(full_dataset)
    n_train = int(split_ratio[0] * n_total)
    n_val = int(split_ratio[1] * n_total)
    n_test = n_total - n_train - n_val

    train_set, val_set, test_set = random_split(full_dataset, [n_train, n_val, n_test])

    train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)

    print("Classes:", full_dataset.classes)
    print("Total images:", n_total)

    return train_loader, val_loader, test_loader, len(full_dataset.classes)



def train_vit_on_garbage(data_dir="./garbage", epochs=10, batch_size=64, lr=3e-4, device="cuda" if torch.cuda.is_available() else "cpu"):
    train_loader, val_loader, test_loader, num_classes = load_garbage_dataset(data_dir, img_size=224, batch_size=batch_size)

    model = VisionTransformer(img_size=224, patch_size=16, in_chans=3, num_classes=num_classes,
                              embed_dim=256, depth=8, n_heads=8, mlp_ratio=4.0).to(device)

    optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=0.05)
    criterion = nn.CrossEntropyLoss()

    for epoch in range(epochs):
        model.train()
        total_loss = 0
        for imgs, labels in train_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(imgs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        model.eval()
        val_correct, val_total = 0, 0
        with torch.no_grad():
            for imgs, labels in val_loader:
                imgs, labels = imgs.to(device), labels.to(device)
                outputs = model(imgs)
                _, predicted = outputs.max(1)
                val_total += labels.size(0)
                val_correct += predicted.eq(labels).sum().item()
        val_acc = 100. * val_correct / val_total

        print(f"Epoch {epoch+1}/{epochs} - Loss: {total_loss/len(train_loader):.4f}, Val Acc: {val_acc:.2f}%")


    model.eval()
    test_correct, test_total = 0, 0
    with torch.no_grad():
        for imgs, labels in test_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            outputs = model(imgs)
            _, predicted = outputs.max(1)
            test_total += labels.size(0)
            test_correct += predicted.eq(labels).sum().item()
    print(f"Final Test Accuracy: {100. * test_correct / test_total:.2f}%")

    return model


In [None]:
data_dir = "/content/garbage/Garbage classification/Garbage classification"


model = train_vit_on_garbage(
    data_dir=data_dir,
    epochs=20,
    batch_size=64
)


Classes: ['cardboard', 'glass', 'metal', 'paper', 'plastic', 'trash']
Total images: 2527
Epoch 1/20 - Loss: 1.7211, Val Acc: 40.11%
Epoch 2/20 - Loss: 1.4730, Val Acc: 38.79%
Epoch 3/20 - Loss: 1.4713, Val Acc: 39.58%
Epoch 4/20 - Loss: 1.4193, Val Acc: 45.91%
Epoch 5/20 - Loss: 1.3734, Val Acc: 46.17%
Epoch 6/20 - Loss: 1.3383, Val Acc: 50.40%
Epoch 7/20 - Loss: 1.2889, Val Acc: 53.30%
Epoch 8/20 - Loss: 1.2125, Val Acc: 52.77%
Epoch 9/20 - Loss: 1.2499, Val Acc: 56.20%
Epoch 10/20 - Loss: 1.1465, Val Acc: 57.52%
Epoch 11/20 - Loss: 1.1200, Val Acc: 55.41%
Epoch 12/20 - Loss: 1.0853, Val Acc: 61.21%
Epoch 13/20 - Loss: 1.0720, Val Acc: 58.58%
Epoch 14/20 - Loss: 1.0009, Val Acc: 61.21%
Epoch 15/20 - Loss: 1.0152, Val Acc: 57.26%
Epoch 16/20 - Loss: 0.9960, Val Acc: 60.42%
Epoch 17/20 - Loss: 0.9437, Val Acc: 62.27%
Epoch 18/20 - Loss: 0.9019, Val Acc: 62.53%
Epoch 19/20 - Loss: 0.8978, Val Acc: 61.74%
Epoch 20/20 - Loss: 0.8927, Val Acc: 62.01%
Final Test Accuracy: 62.11%
