In [1]:
import timm
import torch
import zipfile, os
from PIL import Image
from pathlib import Path
from torchvision.datasets import ImageFolder
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
from sklearn.metrics import accuracy_score
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Check CUDA availability
print(torch.cuda.is_available())  # Should return True
print(torch.version.cuda)         # Should match the installed CUDA version

True
11.8


In [3]:
local_zip = os.path.join(os.getcwd(), 'GCD.zip')
zip_ref = zipfile.ZipFile(local_zip, 'r')
zip_ref.extractall(os.getcwd())
zip_ref.close()

In [4]:
train_dir = os.path.join(os.getcwd(), 'GCD', 'train')
test_dir = os.path.join(os.getcwd(), 'GCD', 'test')

In [5]:
# Transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize images to 224x224
    transforms.ToTensor(),           # Convert images to PyTorch tensors
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize
])

In [6]:
# Custom dataset class
class CustomDataset(Dataset):
    def __init__(self, data_dir, transform=None):
        self.data_dir = data_dir
        self.transform = transform
        self.images = []
        self.labels = []
        self.class_names = os.listdir(data_dir)

        for label, class_name in enumerate(self.class_names):
            class_dir = os.path.join(data_dir, class_name)
            for img_name in os.listdir(class_dir):
                img_path = os.path.join(class_dir, img_name)
                self.images.append(img_path)
                self.labels.append(label)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = self.images[idx]
        image = Image.open(img_path).convert('RGB')
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label

In [7]:
# Data loaders
train_dataset = CustomDataset(data_dir=train_dir, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)

test_dataset = ImageFolder(root=test_dir, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [8]:
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [9]:
# Function for training and evaluation
def train_and_evaluate_model(model_name, model, train_loader, test_loader, num_epochs=10):
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    print(f"Training model: {model_name}")
    
    # Training loop
    for epoch in range(num_epochs):
        model.train()  # Set the model to training mode
        epoch_loss = 0  # To accumulate loss for the epoch
        correct_predictions = 0
        total_samples = 0
        all_targets = []
        all_preds = []
        
        progress_bar = tqdm(train_loader, desc=f"Epoch [{epoch+1}/{num_epochs}]")  # Progress bar

        for inputs, targets in progress_bar:
            inputs, targets = inputs.to(device), targets.to(device)  # Move to device

            optimizer.zero_grad()  # Zero the gradients
            outputs = model(inputs)  # Forward pass
            loss = criterion(outputs, targets)  # Compute loss
            loss.backward()  # Backward pass
            optimizer.step()  # Update weights

            # Update progress bar description with the current loss
            epoch_loss += loss.item()
            progress_bar.set_postfix(loss=loss.item())
            
            # Calculate accuracy
            _, preds = torch.max(outputs, dim=1)
            correct_predictions += (preds == targets).sum().item()
            total_samples += targets.size(0)
            all_targets.extend(targets.tolist())
            all_preds.extend(preds.tolist())
        
        avg_loss = epoch_loss / len(train_loader)
        accuracy = correct_predictions / total_samples * 100
        print(f"Epoch [{epoch+1}/{num_epochs}], Avg Loss: {avg_loss:.4f}, Accuracy: {accuracy:.2f}%")
        
        # Evaluate the model on the test set after each epoch
        model.eval()  # Set model to evaluation mode
        correct_predictions = 0
        total_samples = 0
        all_targets = []
        all_preds = []

        with torch.no_grad():
            for inputs, targets in test_loader:
                inputs, targets = inputs.to(device), targets.to(device)
                outputs = model(inputs)
                _, preds = torch.max(outputs, dim=1)
                correct_predictions += (preds == targets).sum().item()
                total_samples += targets.size(0)
                all_targets.extend(targets.tolist())
                all_preds.extend(preds.tolist())

        test_accuracy = correct_predictions / total_samples * 100
        print(f"Test Accuracy for {model_name} after epoch {epoch+1}: {test_accuracy:.2f}%")

In [10]:
# Models to train
models = [
    ('maxvit_tiny_tf_224.in1k', timm.create_model('maxvit_tiny_tf_224.in1k', pretrained=True, num_classes=7)),
    ('maxit_tiny_rw_224.sw_in1k', timm.create_model('maxit_tiny_rw_224.sw_in1k', pretrained=True, num_classes=7)),
    ('maxvit_small_tf_224.in1k', timm.create_model('maxvit_small_tf_224.in1k', pretrained=True, num_classes=7))
]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


In [11]:
# Train and evaluate each model
for model_name, model in models:
    train_and_evaluate_model(model_name, model, train_loader, test_loader, num_epochs=2)

Training model: maxvit_small_tf_224.in1k


Epoch [1/2]: 100%|██████████| 1250/1250 [49:39<00:00,  2.38s/it, loss=0.649] 


Epoch [1/2], Avg Loss: 0.9495, Accuracy: 63.85%
Test Accuracy for maxvit_small_tf_224.in1k after epoch 1: 63.87%


Epoch [2/2]: 100%|██████████| 1250/1250 [48:20<00:00,  2.32s/it, loss=0.135] 


Epoch [2/2], Avg Loss: 0.6506, Accuracy: 75.35%
Test Accuracy for maxvit_small_tf_224.in1k after epoch 2: 68.00%
Training model: maxvit_rmlp_base_rw_224.sw_in12k_ft_in1k


Epoch [1/2]: 100%|██████████| 1250/1250 [2:31:01<00:00,  7.25s/it, loss=nan]   


Epoch [1/2], Avg Loss: nan, Accuracy: 45.01%
Test Accuracy for maxvit_rmlp_base_rw_224.sw_in12k_ft_in1k after epoch 1: 8.33%


Epoch [2/2]: 100%|██████████| 1250/1250 [2:33:26<00:00,  7.36s/it, loss=nan] 


Epoch [2/2], Avg Loss: nan, Accuracy: 7.75%
Test Accuracy for maxvit_rmlp_base_rw_224.sw_in12k_ft_in1k after epoch 2: 8.33%
Training model: maxvit_large_tf_224.in1k


Epoch [1/2]:   0%|          | 1/1250 [00:17<6:03:23, 17.46s/it, loss=2.04]


OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacity of 4.00 GiB of which 0 bytes is free. Of the allocated memory 10.36 GiB is allocated by PyTorch, and 454.73 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)