# End-to-end training in GPU environment with FFCV

In [1]:
from ffcv.loader import Loader, OrderOption
from ffcv.fields.decoders import NDArrayDecoder, FloatDecoder
from ffcv.transforms import ToTensor, ToDevice
import time
import os
import numpy as np
import torch
from torchvision import transforms
import torch.nn as nn
import torch.nn.functional as F
device = 'cuda'



In [2]:
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(32 * 125 * 125, 200)
        self.fc2 = nn.Linear(200, 1)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool1(x)
        x = F.relu(self.conv2(x))
        x = self.pool2(x)
        x = x.view(-1, 32 * 125 * 125)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

model = Model()
model = model.to(device)

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.005)

In [3]:
loader_preprocess = Loader("data/train_data.beton",
                batch_size=48,
                num_workers=8,
                order=OrderOption.RANDOM, #Truely random shuffle
                pipelines = { 
                    "image": [
                        NDArrayDecoder(), 
                        ToTensor(),
                        ToDevice(torch.device(device), non_blocking=True),
                        transforms.RandomVerticalFlip(0.5), 
                        transforms.RandomHorizontalFlip(0.5),
                        transforms.RandomRotation(90),
                        transforms.RandomCrop((500, 500)),
                    ],
                    "label": [FloatDecoder(), ToTensor(), ToDevice(torch.device(device), non_blocking=True),]
                })

In [4]:
num_epochs = 10

start_time = time.time()
for epoch in range(num_epochs):
    epoch_start = time.time()
    for images, labels in loader_preprocess:
        images = images.float() / 255.0
        
        outputs = model(images)
        labels = labels.view(-1, 1).float()
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    epoch_end = time.time()
    epoch_time = epoch_end - epoch_start
    print(f"Epoch {epoch+1} done in {epoch_time} seconds.")
end_time = time.time()

total_time = end_time - start_time

print(f"Total time taken: {total_time} seconds")
print(f"Total time per epoch: {total_time/num_epochs} seconds")

Epoch 1 done in 20.68252992630005 seconds.
Epoch 2 done in 15.581626653671265 seconds.
Epoch 3 done in 14.77998161315918 seconds.
Epoch 4 done in 15.00122618675232 seconds.
Epoch 5 done in 15.144744634628296 seconds.
Epoch 6 done in 15.31307339668274 seconds.
Epoch 7 done in 15.636549949645996 seconds.
Epoch 8 done in 15.72204041481018 seconds.
Epoch 9 done in 15.008957147598267 seconds.
Epoch 10 done in 15.26915693283081 seconds.
Total time taken: 158.14104866981506 seconds
Total time per epoch: 15.814104866981506 seconds
