In [None]:
# Summary

## 1. Dataset Preparation
    1. Split
    2. Transformations
## 2. Training Techniques
    1. Data Augmentation
    2. Learning Rate Scheduling
    3. Regularization
        1. Weight Decay
        2. Dropout
        3. Batch Normalization
## 3. ViT
## 4. EfficientNet
## 5. Performance
## 6. Analysis

# 1. Datasets

In [16]:
data_path = '/Users/adamlee/Downloads/Deepest/dataset'

In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
from torchvision.datasets import STL10

# Define the Mixup transform
class MixupTransform:
    def __init__(self, alpha):
        self.alpha = alpha

    def __call__(self, image, label):
        lam = torch.distributions.beta.Beta(self.alpha, self.alpha).sample().item()
        batch_size = image.size()[0]
        index = torch.randperm(batch_size)
        mixed_image = lam * image + (1 - lam) * image[index, :]
#         mixed_label = lam * label + (1 - lam) * label[index, :]
        mixed_label = lam * label.unsqueeze(1) + (1 - lam) * label[index, :].unsqueeze(1)
        return mixed_image, mixed_label

# Define the transforms
train_transform = transforms.Compose([
#     transforms.RandomCrop(96, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
    MixupTransform(alpha=1.0)
])

test_transform = transforms.Compose([
#     transforms.Resize(96),
#     transforms.CenterCrop(96),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])

# Load the dataset and apply the transforms
train_set = STL10(root=data_path, split='test', download=True, transform=train_transform)
test_set = STL10(root=data_path, split='train', download=True, transform=test_transform)

Downloading http://ai.stanford.edu/~acoates/stl10/stl10_binary.tar.gz to /Users/adamlee/Downloads/Deepest/dataset/stl10_binary.tar.gz


 19%|████▏                 | 506658816/2640397119 [05:04<2:32:21, 233409.31it/s]

In [32]:
test_set

Dataset STL10
    Number of datapoints: 5000
    Root location: /Users/adamlee/Downloads/Deepest/dataset
    Split: train
    StandardTransform
Transform: Compose(
               ToTensor()
               Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
           )

In [20]:
# Split the test set into test and validation sets
num_test = len(test_set)
indices = list(range(num_test))
split = int(num_test * 0.8)
test_idx, val_idx = indices[:split], indices[split:]
testset = torch.utils.data.Subset(test_set, test_idx)
valset = torch.utils.data.Subset(test_set, val_idx)

In [33]:
# Create data loaders for batching
train_loader = torch.utils.data.DataLoader(train_set, batch_size=32, shuffle=True)
test_loader = torch.utils.data.DataLoader(testset, batch_size=32, shuffle=False)
val_loader = torch.utils.data.DataLoader(valset, batch_size=32, shuffle=True)

In [39]:
first_batch = next(iter(val_loader))
print(first_batch)

[tensor([[[[-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179],
          [-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179],
          [-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179],
          ...,
          [ 0.9988,  1.0331,  1.0331,  ...,  1.0331,  1.0159,  1.0502],
          [ 0.9646,  1.0159,  1.0159,  ...,  1.0673,  1.0502,  1.0331],
          [ 0.9817,  0.9988,  1.0159,  ...,  1.0673,  1.0673,  1.0844]],

         [[-2.0357, -2.0357, -2.0357,  ..., -2.0357, -2.0357, -2.0357],
          [-2.0357, -2.0357, -2.0357,  ..., -2.0357, -2.0357, -2.0357],
          [-2.0357, -2.0357, -2.0357,  ..., -2.0357, -2.0357, -2.0357],
          ...,
          [ 1.1681,  1.2031,  1.1856,  ...,  1.2381,  1.2206,  1.2381],
          [ 1.1155,  1.1681,  1.1681,  ...,  1.2381,  1.2206,  1.2031],
          [ 1.1331,  1.1506,  1.1681,  ...,  1.2206,  1.2206,  1.2381]],

         [[-1.8044, -1.8044, -1.8044,  ..., -1.8044, -1.8044, -1.8044],
          [-1.8044, -1.8044, 

# 2. ViT

ViT-L/16

ViT (Samll)
layers: 4
hidden size: 256
heads: 6

In [24]:
import torch
import torch.nn as nn
from torchvision.models import ResNet

class ViT(nn.Module):
    def __init__(self, num_classes=10, img_size=96, patch_size=16, num_channels=3, emb_dim=256, num_heads=4, num_layers=4):
        super(ViT, self).__init__()

        assert img_size % patch_size == 0, "Image size must be divisible by patch size"

        self.patch_size = patch_size
        self.num_patches = (img_size // patch_size) ** 2
        self.emb_dim = emb_dim

        self.patch_embed = nn.Conv2d(num_channels, emb_dim, kernel_size=patch_size, stride=patch_size)
        self.cls_token = nn.Parameter(torch.zeros(1, 1, emb_dim))

        self.position_embed = nn.Parameter(torch.zeros(1, 1 + self.num_patches, emb_dim))
        self.position_dropout = nn.Dropout(p=0.1)

        self.layers = nn.ModuleList()
        for _ in range(num_layers):
            layer = nn.TransformerEncoderLayer(d_model=emb_dim, nhead=num_heads)
            self.layers.append(layer)

        self.norm = nn.LayerNorm(emb_dim)
        self.fc = nn.Linear(emb_dim, num_classes)

    def forward(self, x):
        B, C, H, W = x.shape
        x = self.patch_embed(x).flatten(2).transpose(1, 2)

        cls_tokens = self.cls_token.expand(B, -1, -1)
        x = torch.cat((cls_tokens, x), dim=1)

        x = x + self.position_embed
        x = self.position_dropout(x)

        for layer in self.layers:
            x = layer(x)

        x = self.norm(x[:, 0])
        x = self.fc(x)

        return x


# 3. EfficientNet

In [None]:
from efficientnet_pytorch import EfficientNet

# Define the model
num_classes = 10
efficient_net = EfficientNet.from_pretrained('efficientnet-b0', num_classes=num_classes)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
# Train the model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
efficient_net.to(device)

num_epochs = 10
for epoch in range(num_epochs):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = efficient_net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch {epoch+1}, loss: {running_loss / len(trainloader):.4f}")

# Test the model
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        outputs = efficient_net(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Accuracy: {correct / total:.4f}")

# 5. Training

In [25]:
model = ViT().to(device)

In [28]:
from torch.utils.tensorboard import SummaryWriter

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)

num_epochs = 100

writer = SummaryWriter()

for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0
    train_acc = 0.0
    for i, (images, labels) in enumerate(train_loader):
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, preds = torch.max(outputs, 1)
        train_acc += (preds == labels).sum().item()

    train_loss /= len(train_loader)
    train_acc /= len(train_dataset)

    model.eval()
    val_loss = 0.0
    val_acc = 0.0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)

            test_loss += loss.item()
            _, preds = torch.max(outputs, 1)
            test_acc += (preds == labels).sum().item()

    val_loss /= len(val_loader)
    val_acc /= len(valset)

    print(f"Epoch [{epoch + 1}/{num_epochs}]")
    print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc * 100:.2f}%")
    print(f"Validataion Loss: {val_loss:.4f}, Validataion Acc: {val_acc * 100:.2f}%")
    
    # Log to TensorBoard
    writer.add_scalar('Loss/train', train_loss, epoch)
    writer.add_scalar('Accuracy/validation', val_acc, epoch)

writer.close()


TypeError: __call__() missing 1 required positional argument: 'label'

In [None]:
# Test performance

model.eval()
test_loss = 0.0
test_acc = 0.0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels)

        test_loss += loss.item()
        _, preds = torch.max(outputs, 1)
        test_acc += (preds == labels).sum().item()

test_loss /= len(test_loader)
test_acc /= len(test_dataset)

print(f"Test Loss: {test_loss:.4f}, Test Acc: {test_acc * 100:.2f}%")

# 4. Analysis