In [33]:
from torchvision.models import resnet34
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from torchvision.transforms import ToTensor
import torch



https://www.reddit.com/r/MachineLearning/comments/kvs1ex/d_here_are_17_ways_of_making_pytorch_training/
Tried some of the trick in this post for 1 epoch. ~17 seconds is with Cuda on my GPU (RTX 3070) 
Defining num_workers and pin_memory made the performance worse with resnet34, may be different with other models.
Could also possibly be because of our dataset not being that large, these tricks might help for larger datasets.
Same with 
```torch.backends.cudnn.benchmark = True```
Increasing the batch_size to 128 sometimes increased the performance from ~17secs -> ~16 but not always

In [34]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

dataset = ImageFolder("./Colorectal Cancer", transform=ToTensor())
train_set, validation_set, test_set = torch.utils.data.random_split(dataset, [0.8, 0.1, 0.1])
train_loader = DataLoader(train_set, shuffle=True, batch_size=64)
validation_loader = DataLoader(validation_set, shuffle=True, batch_size=64)
test_loader = DataLoader(test_set, shuffle=True, batch_size=64)
print("Data loaded")

Data loaded


In [35]:

model = resnet34(num_classes=3).to(device)
torch.backends.cudnn.benchmark = True
torch.manual_seed(0)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=0.001)
loss = torch.nn.CrossEntropyLoss()

num_epoch = 10

Using Automatix Mixed Precision increase heavily the performance.
From ~17 secs to ~13 secs in terms of performance.

In [36]:
scaler = torch.cuda.amp.GradScaler()
for epoch in range(num_epoch):
    
    # Pass an epoch over the training data in batch_size chunks
    for features, labels in train_loader:
        optimizer.zero_grad()
        features = features.to(device)
        labels = labels.to(device)
        # TODO:: Check if this changes 
        with torch.cuda.amp.autocast():
            y_pred = model(features)
            l = loss(y_pred ,labels)

        scaler.scale(l).backward()
        scaler.step(optimizer)
        scaler.update()

        # # # forward
        # y_pred = model(features)
        # l = loss(y_pred, labels)
        # model.zero_grad()
        # # backprop and step
        # l.backward()
        # optimizer.step()
        print(f"minibatch loss: {l}")
    print(f"Epoch {epoch}: last batch loss: {l}")

minibatch loss: 1.0968856811523438
minibatch loss: 1.0917892456054688
minibatch loss: 1.07843017578125
minibatch loss: 1.1226272583007812
minibatch loss: 1.2240753173828125
minibatch loss: 1.1814818382263184
minibatch loss: 1.1292850971221924
minibatch loss: 1.0041580200195312
minibatch loss: 0.9169168472290039
minibatch loss: 0.9315519332885742
minibatch loss: 0.9014134407043457
minibatch loss: 0.8045858144760132
minibatch loss: 0.9114318490028381
minibatch loss: 0.884507417678833
minibatch loss: 0.6950580477714539
minibatch loss: 0.9178089499473572
minibatch loss: 0.8630834221839905
minibatch loss: 0.8506239652633667
minibatch loss: 0.756601095199585
minibatch loss: 0.8579224944114685
minibatch loss: 0.7405933737754822
minibatch loss: 0.8551340699195862
minibatch loss: 0.6445809006690979
minibatch loss: 0.4879162907600403
minibatch loss: 0.6637753248214722
minibatch loss: 1.525828242301941
minibatch loss: 1.0996928215026855
minibatch loss: 0.9228406548500061
minibatch loss: 0.4315136

In [37]:
# torch.cuda.empty_cache()