In [None]:
import os
import random
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import timm  # PyTorch Image Models

# --------------------- Dataset ---------------------

class ImageArrayDataset(Dataset):
    def __init__(self, samples, transform=None):
        self.samples = samples
        self.transform = transform

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        img_path, npy_path = self.samples[idx]
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        label = int(np.argmax(np.load(npy_path)))
        return image, label

# --------------------- Training ---------------------

def train(model, train_loader, val_loader, criterion, optimizer, scheduler, device, epochs=10):
    for epoch in range(epochs):
        model.train()
        train_loss, correct, total = 0.0, 0, 0
        for imgs, labels in train_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(imgs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item() * imgs.size(0)
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

        train_acc = correct / total
        train_loss /= total

        model.eval()
        val_loss, correct, total = 0.0, 0, 0
        with torch.no_grad():
            for imgs, labels in val_loader:
                imgs, labels = imgs.to(device), labels.to(device)
                outputs = model(imgs)
                loss = criterion(outputs, labels)
                val_loss += loss.item() * imgs.size(0)
                _, predicted = torch.max(outputs, 1)
                correct += (predicted == labels).sum().item()
                total += labels.size(0)

        val_acc = correct / total
        val_loss /= total

        scheduler.step(val_acc)

        print(f"Epoch {epoch+1}: "
              f"Train Loss = {train_loss:.4f}, Train Acc = {train_acc:.4f}, "
              f"Val Loss = {val_loss:.4f}, Val Acc = {val_acc:.4f}")

# --------------------- Main ---------------------

def main():
    folder_path = r"C:\Users\huang\Downloads\Engineering Projects\Genesys Lab\v5"
    batch_size = 16
    epochs = 20
    val_split = 0.2
    seed = 42

    random.seed(seed)
    torch.manual_seed(seed)

    # 1. Collect samples
    all_samples = []
    for fname in os.listdir(folder_path):
        if fname.endswith('.jpg') or fname.endswith('.png'):
            base = os.path.splitext(fname)[0]
            img_path = os.path.join(folder_path, fname)
            npy_path = os.path.join(folder_path, base + '.npy')
            if os.path.exists(npy_path):
                all_samples.append((img_path, npy_path))

    random.shuffle(all_samples)
    split_idx = int(len(all_samples) * (1 - val_split))
    train_samples = all_samples[:split_idx]
    val_samples = all_samples[split_idx:]

    # 2. Transforms with ImageNet normalization
    transform = transforms.Compose([
        transforms.Resize((224, 224)),  # Keep original ViT input size
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],  # ImageNet mean
                             std=[0.229, 0.224, 0.225]),  # ImageNet std
    ])

    train_dataset = ImageArrayDataset(train_samples, transform=transform)
    val_dataset = ImageArrayDataset(val_samples, transform=transform)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size)

    # 3. Load smaller pretrained ViT for faster training
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = timm.create_model('vit_tiny_patch16_224', pretrained=True, num_classes=5).to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=1e-4)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', patience=2, factor=0.5)

    train(model, train_loader, val_loader, criterion, optimizer, scheduler, device, epochs)
    torch.save(model.state_dict(), 'vit_tiny_5class.pth')
    print("Model weights saved to vit_tiny_5class.pth")

if __name__ == "__main__":
    main()

model.safetensors:   0%|          | 0.00/22.9M [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


Epoch 1: Train Loss = 1.5286, Train Acc = 0.4713, Val Loss = 1.2457, Val Acc = 0.4318
Epoch 2: Train Loss = 1.0240, Train Acc = 0.5920, Val Loss = 1.0499, Val Acc = 0.5909
Epoch 3: Train Loss = 0.7889, Train Acc = 0.6782, Val Loss = 1.2054, Val Acc = 0.6136
Epoch 4: Train Loss = 0.5094, Train Acc = 0.7874, Val Loss = 1.2610, Val Acc = 0.5682
Epoch 5: Train Loss = 0.3358, Train Acc = 0.8736, Val Loss = 1.6784, Val Acc = 0.5000
Epoch 6: Train Loss = 0.2269, Train Acc = 0.9253, Val Loss = 1.9426, Val Acc = 0.4318
Epoch 7: Train Loss = 0.1126, Train Acc = 0.9713, Val Loss = 1.8374, Val Acc = 0.5000
Epoch 8: Train Loss = 0.0446, Train Acc = 1.0000, Val Loss = 1.8736, Val Acc = 0.5682
Epoch 9: Train Loss = 0.0215, Train Acc = 1.0000, Val Loss = 1.9981, Val Acc = 0.5682
Epoch 10: Train Loss = 0.0113, Train Acc = 1.0000, Val Loss = 2.0672, Val Acc = 0.5227
Epoch 11: Train Loss = 0.0085, Train Acc = 1.0000, Val Loss = 2.0660, Val Acc = 0.5682
Epoch 12: Train Loss = 0.0046, Train Acc = 1.0000, V