In [None]:
import warnings
warnings.filterwarnings("ignore") 

In [None]:
import os
os.environ['OMP_NUM_THREADS'] = "128"
os.environ['KMP_AFFINITY'] = "granularity=fine,compact,1,0"
os.environ['KMP_BLOCKTIME'] = "0"

In [None]:
import time
import logging
import numpy as np
import torch
import torch.nn as nn
import torch._dynamo
import torch._inductor
from torchvision import datasets
from torchvision import transforms
from torchvision import models
from torch.utils.data.sampler import SubsetRandomSampler
from torch.profiler import profile, record_function, ProfilerActivity

In [None]:
def build_data_loader(data_dir, batch_size, random_seed=42, valid_size=0.1, shuffle=True, test=False):

    transform = transforms.Compose([transforms.ToTensor()])
    transform.crop_size=224
    transform.resize_size=224
    
    train_dataset = datasets.CIFAR10(root=data_dir, train=True, download=True, transform=transform)
    test_dataset = datasets.CIFAR10(root=data_dir, train=False, download=True, transform=transform)

    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=shuffle)

    return (train_loader, test_loader)

In [None]:
def train(model, data_loader, num_epochs, criterion, optimizer, device):
    total_steps = len(train_loader)
    epoch_time = []
    for epoch in range(num_epochs):
        start = time.time()
        for step, (images, labels) in enumerate(train_loader):  
            # Move tensors to the configured device
            images = images.to(device)
            labels = labels.to(device)
        
            # Forward pass
            outputs = model(images)
            outputs.to(device)
            loss = criterion(outputs, labels)
        
            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            #print ('Step [{}/{}], Loss: {:.4f}'.format(step+1, total_steps, loss.item()))
        end = time.time()
        elapsed_time = int(end - start)
        epoch_time.append(elapsed_time)
               
        print ('Epoch [{}/{}], Loss: {:.4f}, time: {} seconds'.format(epoch+1, num_epochs, loss.item(), elapsed_time))
    return epoch_time            

In [None]:
def test(model, test_loader, device):
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            del images, labels, outputs

    accuracy = 100 * (correct / total)
    print('Accuracy of the network on the {} test images: {} %'.format(10000, accuracy))
    return accuracy

In [None]:
import torch
import warnings

gpu_ok = False
if torch.cuda.is_available():
    device_cap = torch.cuda.get_device_capability()
    if device_cap in ((7, 0), (8, 0), (9, 0)):
        gpu_ok = True

if not gpu_ok:
    warnings.warn("GPU is not NVIDIA V100, A100, or H100. Speedup numbers may be lower than expected.")

In [None]:
# General parameters
data_dir = '/tmp'
device = "cuda"

# Hyperparameters
lr = 0.00001
weight_decay = 0.005
batch_size = 64
num_epochs = 50
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam

# FashionMNIST dataset 
train_loader, test_loader = build_data_loader(data_dir=data_dir, batch_size=batch_size)

# Model
weights = models.DenseNet121_Weights.DEFAULT
net = models.densenet121(weights=weights)
#net = net.train()
net = net.to(device)

In [None]:
backends = torch._dynamo.list_backends()
results = []
for backend in backends:
    #del compiled_model
    torch._dynamo.reset()
    print('Compiling model with backend {}'.format(backend))
    mode = None
    if backend == "inductor":
        mode = "reduce-overhead"
    model = torch.compile(net, backend=backend, mode=mode)
    opt = optimizer(model.parameters(), lr, weight_decay=weight_decay)
    start = time.time()
    epoch_time = train(model, train_loader, num_epochs, criterion, opt, device)
    end = time.time()
    training_time = int(end-start)
    print('Training time of {}: {} seconds'.format(backend, training_time))
    accuracy = test(model, test_loader, device)
    result = [backend, training_time, epoch_time, accuracy]
    results.append(result)
print(results)