In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.datasets as datasets
import torchvision.transforms as transforms

from torch.utils.tensorboard import SummaryWriter
import torch.utils.checkpoint as checkpoint

import matplotlib.pyplot as plt

writer = SummaryWriter('runs/mednext_cifar10')

def gelu(x):
    """Implementation of the gelu activation function."""
    return x * torch.sigmoid(1.702 * x)


# Questions to ask:
# 1. I trained my model with 64-sized batch. Now it requires 64-sized input, meaning 64 images in total.
# 2. How to understand this diagram.

In [29]:
#Kernel size k x k x k
# 1. Depthwise Convolution with kernel size k × k × k
# 2. Normalization, with C output channels
#       We use channel-wise GroupNorm for stability with small
#       batches, instead of the original LayerNorm
class DepthwiseConv3d(nn.Module):
    def __init__(self, in_channels, kernel_size, padding=0, bias=False):
        super(DepthwiseConv3d, self).__init__()
        self.depthwise_conv = nn.Conv3d(in_channels, in_channels, 
                                        kernel_size=kernel_size, padding=padding, 
                                        groups=in_channels, bias=bias)
        self.norm = nn.GroupNorm(num_groups=in_channels, num_channels=in_channels)

    def forward(self, x):
        out = self.depthwise_conv(x)
        out = self.norm(out)
        return out


# Expansion layer contains:
# 1. An overcomplete Convolution Layer with CR output channels,
#       where R is the expansion ratio (expansion_ratio)
# 

class ExpansionLayer(nn.Module):
    def __init__(self, in_channels, expansion_ratio, kernel_size, stride, padding):
        super(ExpansionLayer, self).__init__()
        self.conv = nn.Conv3d(in_channels, in_channels * expansion_ratio, 
                              kernel_size, stride=stride, padding=padding, 
                              groups=in_channels)
        self.norm = nn.GroupNorm(in_channels, in_channels)
        self.activation = nn.GELU()

    def forward(self, x):
        out = self.conv(x)
        out = self.norm(out)
        out = self.activation(out)
        return out
    
# Compression layer:
# 1. 1×1×1 kernel and and C output channels performing channel-wise 
#       compression of the feature maps.
# 2. 
class CompressionLayer(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(CompressionLayer, self).__init__()
        self.conv = nn.Conv3d(in_channels, out_channels, kernel_size=1)

    def forward(self, x):
        return self.conv(x)

class MedNext(nn.Module):
    def __init__(self, num_classes=10):
        super(MedNext, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(16)
        self.dwconv1 = nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=1, groups=16)
        self.bn2 = nn.BatchNorm2d(16)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=2, padding=1)
        self.bn3 = nn.BatchNorm2d(32)
        self.dwconv2 = nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1, groups=32)
        self.bn4 = nn.BatchNorm2d(32)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1)
        self.bn5 = nn.BatchNorm2d(64)
        self.dwconv3 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1, groups=64)
        self.bn6 = nn.BatchNorm2d(64)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(64, num_classes)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = nn.functional.relu(x)
        x = self.dwconv1(x)
        x = self.bn2(x)
        x = nn.functional.relu(x)
        x = self.conv2(x)
        x = self.bn3(x)
        x = nn.functional.relu(x)
        x = self.dwconv2(x)
        x = self.bn4(x)
        x = nn.functional.relu(x)
        x = self.conv3(x)
        x = self.bn5(x)
        x = nn.functional.relu(x)
        x = self.dwconv3(x)
        x = self.bn6(x)
        x = nn.functional.relu(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

In [30]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = MedNext().to(device)

if torch.cuda.is_available():
    device = torch.cuda.get_device_name(0)
    print('My GPU is correctly configured. GPU device name:', device)
else:
    print('No GPU available')

# Define hyperparameters
batch_size = 64
learning_rate = 0.0001
num_epochs = 2


train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transforms.ToTensor())
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transforms.ToTensor())


transform = transforms.Compose([transforms.Resize(256),
                                transforms.CenterCrop(224),
                                transforms.ToTensor()])

# train_dataset = datasets.ImageFolder(root='./train', transform=transform)
# test_dataset = datasets.ImageFolder(root='./train', transform=transform)

# Define dataloaders
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Load from checkpoint ==================================================================

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = MedNext().to(device)

# Load checkpoint
checkpoint = torch.load('checkpoint.pth')

# Extract relevant data from checkpoint
epoch = checkpoint['epoch']
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
loss = checkpoint['loss']

# Load from checkpoint ==================================================================

for epoch in range(num_epochs):
    
    running_loss = 0.0
    
    for i, (images, labels) in enumerate(train_loader):
        
        # move the data to the GPU. Gives an error otherwise lol
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        images = images.to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        if i % 100 == 99:    # print every 100 mini-batches
            print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 100))
            writer.add_scalar('training loss', running_loss / 100, epoch * len(train_loader) + i)
            running_loss = 0.0
            
    # save checkpoint
    torch.save({
        'epoch': epoch + 1,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'loss': running_loss,
    },  'checkpoint.pth')
    
    # validation loop
    correct = 0
    total = 0
    with torch.no_grad():
        for data in test_loader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print('Accuracy of the network on the 10000 test images: %d %%' % (
        100 * correct / total))
    writer.add_scalar('validation accuracy', 100 * correct / total, epoch)


My GPU is correctly configured. GPU device name: NVIDIA GeForce RTX 3080 Laptop GPU
Files already downloaded and verified
Files already downloaded and verified
[1,   100] loss: 0.388
[1,   200] loss: 0.409
[1,   300] loss: 0.403
[1,   400] loss: 0.393
[1,   500] loss: 0.400
[1,   600] loss: 0.409
[1,   700] loss: 0.390
Accuracy of the network on the 10000 test images: 71 %
[2,   100] loss: 0.403
[2,   200] loss: 0.411
[2,   300] loss: 0.388
[2,   400] loss: 0.397
[2,   500] loss: 0.395
[2,   600] loss: 0.396
[2,   700] loss: 0.398
Accuracy of the network on the 10000 test images: 71 %


In [65]:
first_batch, label = next(iter(train_loader))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
first_batch = first_batch.to(device)


model.eval()

with torch.no_grad():
    outputs = model(first_batch)

predicted_class = torch.argmax(outputs[0])
print (predicted_class)
print (label[0])

tensor(8, device='cuda:0')
tensor(8)


In [52]:
for i, (images, labels) in enumerate(train_loader):
        
        # move the data to the GPU. Gives an error otherwise lol
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        images = images.to(device)
        labels = labels.to(device)
        print (len(labels))
        print (labels[0])

        # Forward pass
        outputs = model(images)

64
tensor(5, device='cuda:0')
64
tensor(0, device='cuda:0')
64
tensor(2, device='cuda:0')
64
tensor(6, device='cuda:0')
64
tensor(1, device='cuda:0')
64
tensor(1, device='cuda:0')
64
tensor(2, device='cuda:0')
64
tensor(1, device='cuda:0')
64
tensor(8, device='cuda:0')
64
tensor(0, device='cuda:0')
64
tensor(9, device='cuda:0')
64
tensor(2, device='cuda:0')
64
tensor(7, device='cuda:0')
64
tensor(8, device='cuda:0')
64
tensor(9, device='cuda:0')
64
tensor(2, device='cuda:0')
64
tensor(7, device='cuda:0')
64
tensor(3, device='cuda:0')
64
tensor(8, device='cuda:0')
64
tensor(4, device='cuda:0')
64
tensor(0, device='cuda:0')
64
tensor(0, device='cuda:0')
64
tensor(0, device='cuda:0')
64
tensor(3, device='cuda:0')
64
tensor(7, device='cuda:0')
64
tensor(1, device='cuda:0')
64
tensor(6, device='cuda:0')
64
tensor(2, device='cuda:0')
64
tensor(2, device='cuda:0')
64
tensor(4, device='cuda:0')
64
tensor(4, device='cuda:0')
64
tensor(3, device='cuda:0')
64
tensor(9, device='cuda:0')
64
tensor(

KeyboardInterrupt: 

In [1]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
image = image.to(device)
model.eval()

with torch.no_grad():
    outputs = model(image)


NameError: name 'torch' is not defined