In [1]:
import time
import logging
import numpy as np
import torch
import torch.nn as nn
import torch._dynamo
import torch._inductor
from torchvision import datasets
from torchvision import transforms
from torchvision import models
from torch.utils.data.sampler import SubsetRandomSampler
from torch.profiler import profile, record_function, ProfilerActivity

In [2]:
def build_data_loader(data_dir, batch_size, random_seed=42, valid_size=0.1, shuffle=True, test=False):

    transform = transforms.Compose([transforms.ToTensor()])
    
    train_dataset = datasets.FashionMNIST(root=data_dir, train=True, download=True, transform=transform)
    valid_dataset = datasets.FashionMNIST(root=data_dir, train=True, download=True, transform=transform)
    test_dataset = datasets.FashionMNIST(root=data_dir, train=False, download=True, transform=transform)
  
    num_train = len(train_dataset)
    indices = list(range(num_train))
    split = int(np.floor(valid_size * num_train))

    if shuffle:
        np.random.seed(random_seed)
        np.random.shuffle(indices)

    train_idx, valid_idx = indices[split:], indices[:split]
    train_sampler = SubsetRandomSampler(train_idx)
    valid_sampler = SubsetRandomSampler(valid_idx)

    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, sampler=train_sampler)
    valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=batch_size, sampler=valid_sampler)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=shuffle)

    return (train_loader, valid_loader, test_loader)

In [3]:
def train(model, data_loader, valid_loader, num_epochs, criterion, optimizer, device):
    total_steps = len(train_loader)
    for epoch in range(num_epochs):
        for step, (images, labels) in enumerate(train_loader):  
            # Move tensors to the configured device
            images = images.to(device)
            labels = labels.to(device)
        
            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)
        
            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            #print ('Step [{}/{}], Loss: {:.4f}'.format(step+1, total_steps, loss.item()))
               
        print ('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))
            

In [4]:
def validate(model, valid_loader, device):
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in valid_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            del images, labels, outputs
    
    print('Accuracy of the network on the {} validation images: {:.2f} %'.format(5000, 100 * correct / total)) 

In [5]:
def test(model, test_loader, device):
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            del images, labels, outputs

    print('Accuracy of the network on the {} test images: {} %'.format(10000, 100 * correct / total))  

In [6]:
class CNN(nn.Module):
    def __init__(self, num_classes=10):
        super(CNN, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        
        self.layer2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))

        self.fc1 = nn.Linear(64*7*7, 512)
        
        self.fc2 = nn.Linear(512, num_classes)

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc1(out)
        out = self.fc2(out)
        return out

In [7]:
import os
os.environ['TORCH_COMPILE_DEBUG'] = "1"
torch._dynamo.config.log_level = logging.DEBUG
torch._dynamo.config.verbose = True
#torch._dynamo.config.log_level = logging.INFO
torch._dynamo.config.output_code = True
#torch._dynamo.config.cache_size_limit = 1
#torch.set_default_device(device)

In [8]:
# General parameters
data_dir = '/tmp'
num_classes = 10
device = "cpu"

# Hyperparameters
max_lr = 0.00001
weight_decay = 0.005
batch_size = 64
num_epochs = 1
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam

# FashionMNIST dataset 
train_loader, valid_loader, test_loader = build_data_loader(data_dir=data_dir, batch_size=batch_size)

# Models
torch._dynamo.reset()
eager_model = CNN().to(device)
graph_model = torch.compile(CNN().to(device), backend="inductor", fullgraph=True)

# Model selection
model = graph_model

# Optimizer
optimizer = optimizer(model.parameters(), max_lr, weight_decay=weight_decay)

In [None]:
start = time.time()
train(model, train_loader, valid_loader, num_epochs, criterion, optimizer, device)
end = time.time()
print('Training time: {} seconds'.format(int(end - start)))
test(model, test_loader, device)

[2023-05-31 08:10:23,781] torch._dynamo.eval_frame: [DEBUG] skipping __init__ /opt/conda/lib/python3.10/contextlib.py
[2023-05-31 08:10:23,782] torch._dynamo.eval_frame: [DEBUG] skipping __enter__ /opt/conda/lib/python3.10/contextlib.py
[2023-05-31 08:10:23,783] torch._dynamo.eval_frame: [DEBUG] skipping __init__ /opt/conda/lib/python3.10/contextlib.py
[2023-05-31 08:10:23,783] torch._dynamo.eval_frame: [DEBUG] skipping __enter__ /opt/conda/lib/python3.10/contextlib.py
[2023-05-31 08:10:23,783] torch._dynamo.eval_frame: [DEBUG] skipping enable_dynamic /opt/conda/lib/python3.10/site-packages/torch/_dynamo/eval_frame.py
[2023-05-31 08:10:23,786] torch._dynamo.symbolic_convert: [INFO] Step 1: torchdynamo start tracing forward
[2023-05-31 08:10:23,787] torch._dynamo.symbolic_convert: [DEBUG] TRACE starts_line /tmp/ipykernel_1878944/3432332073.py:19
[2023-05-31 08:10:23,787] torch._dynamo.symbolic_convert: [DEBUG] TRACE LOAD_FAST self []
[2023-05-31 08:10:23,787] torch._dynamo.symbolic_conv

Epoch [1/1], Loss: 0.7764
Training time: 20 seconds


[2023-05-31 08:10:44,897] torch._dynamo.symbolic_convert: [INFO] Step 1: torchdynamo start tracing forward
[2023-05-31 08:10:44,898] torch._dynamo.symbolic_convert: [DEBUG] TRACE starts_line /tmp/ipykernel_1878944/3432332073.py:19
[2023-05-31 08:10:44,899] torch._dynamo.symbolic_convert: [DEBUG] TRACE LOAD_FAST self []
[2023-05-31 08:10:44,899] torch._dynamo.symbolic_convert: [DEBUG] TRACE LOAD_ATTR layer1 [NNModuleVariable()]
[2023-05-31 08:10:44,900] torch._dynamo.symbolic_convert: [DEBUG] TRACE LOAD_FAST x [NNModuleVariable()]
[2023-05-31 08:10:44,900] torch._dynamo.symbolic_convert: [DEBUG] TRACE CALL_FUNCTION 1 [NNModuleVariable(), TensorVariable()]
[2023-05-31 08:10:44,906] torch._dynamo.symbolic_convert: [DEBUG] TRACE STORE_FAST out [TensorVariable()]
[2023-05-31 08:10:44,906] torch._dynamo.symbolic_convert: [DEBUG] TRACE starts_line /tmp/ipykernel_1878944/3432332073.py:20
[2023-05-31 08:10:44,906] torch._dynamo.symbolic_convert: [DEBUG] TRACE LOAD_FAST self []
[2023-05-31 08:10

In [None]:
activities = [ProfilerActivity.CPU, ProfilerActivity.CUDA]
prof = profile(activities=activities, with_stack=True, record_shapes=True, on_trace_ready=torch.profiler.tensorboard_trace_handler('./log/cnn/'))
        
input_sample, _ = next(iter(train_loader))

prof.start()
model(input_sample.to(device))
prof.stop()

#prof.export_chrome_trace("./cnn_trace.json")
print(prof.key_averages().table(sort_by="self_cpu_time_total", row_limit=10))