# Preprocessing in CPU environment with FFCV

In [1]:
from ffcv.loader import Loader, OrderOption
from ffcv.fields.decoders import NDArrayDecoder, FloatDecoder
from ffcv.transforms import ToTensor
import time
import os
import numpy as np
import torch
from torchvision import transforms



In [2]:
loader_preprocess = Loader("data/train_data.beton",
                batch_size=48,
                num_workers=8,
                order=OrderOption.RANDOM,
                pipelines = { 
                    "image": [
                        NDArrayDecoder(), 
                        ToTensor(),
                        transforms.RandomVerticalFlip(0.5), 
                        transforms.RandomHorizontalFlip(0.5),
                        transforms.RandomRotation(90),
                        transforms.RandomCrop((500, 500)),
                    ],
                    "label": [FloatDecoder(), ToTensor()]
                })

In [3]:
num_epochs = 10

start_time = time.time()
for epoch in range(num_epochs):
    epoch_start = time.time()
    for images, labels in loader_preprocess:
        images = images.float() / 255.0
        pass
        
    epoch_end = time.time()
    epoch_time = epoch_end - epoch_start
    print(f"Epoch {epoch+1} done in {epoch_time} seconds.")
end_time = time.time()

total_time = end_time - start_time

print(f"Total time taken: {total_time} seconds")
print(f"Total time per epoch: {total_time/num_epochs} seconds")

Epoch 1 done in 153.1041705608368 seconds.
Epoch 2 done in 146.23926663398743 seconds.
Epoch 3 done in 154.82725048065186 seconds.
Epoch 4 done in 157.18064951896667 seconds.
Epoch 5 done in 151.49009561538696 seconds.
Epoch 6 done in 159.79161310195923 seconds.
Epoch 7 done in 178.50044131278992 seconds.
Epoch 8 done in 139.15274596214294 seconds.
Epoch 9 done in 123.39749646186829 seconds.
Epoch 10 done in 127.07772946357727 seconds.
Total time taken: 1490.7638936042786 seconds
Total time per epoch: 149.07638936042787 seconds
