# Setup

In [None]:
!pip install torchinfo

In [None]:

import torch
from torch import nn
from torchvision import datasets, transforms
import random
import torchvision
from torchvision import datasets
from torch.utils.data import Dataset, DataLoader
import tqdm
import os
from pathlib import Path
import matplotlib.pyplot as plt
import time
import torchinfo
from torchinfo import summary
from tqdm import tqdm
from torchvision import models
from google.colab import drive
from torch.optim import lr_scheduler


device = "cuda" if torch.cuda.is_available() else "cpu"
device

Here you need to set up the path to the train and test folder.

In [None]:
drive.mount('/content/drive')
image_path = Path("Path")
train_path = image_path / "train"
test_path = image_path / "test"

Transformations setting

In [None]:
data_transform_1 = transforms.Compose([
    transforms.RandomRotation((0,360)),
    transforms.RandomResizedCrop(size=(224, 224), scale=(0.8, 1.0)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.ColorJitter(brightness=.2,contrast=.1, saturation=.2, hue=.1),
    transforms.GaussianBlur(kernel_size=(5, 5), sigma=(0.1, 1.0)),
    transforms.Resize(size=(224, 224)),
    transforms.ToTensor()
])

data_transform_2 = transforms.Compose([
    transforms.Resize(size=(224, 224)),
    transforms.ToTensor()
])

data_transform_3 = transforms.Compose([
    transforms.Resize(size=(224, 224)),
    transforms.RandomRotation((90,90)),
    transforms.ToTensor()
])

data_transform_4 = transforms.Compose([
    transforms.Resize(size=(224, 224)),
    transforms.RandomRotation((180,180)),
    transforms.ToTensor()
])

data_transform_5 = transforms.Compose([
    transforms.Resize(size=(224, 224)),
    transforms.RandomRotation((270,270)),
    transforms.ToTensor()
])

train_data = datasets.ImageFolder(root=train_path,
                                  transform=data_transform_1,
                                  target_transform=None)

test_data_1 = datasets.ImageFolder(root=test_path,
                                   transform=data_transform_2)

test_data_2 = datasets.ImageFolder(root=test_path,
                                   transform=data_transform_3)

test_data_3 = datasets.ImageFolder(root=test_path,
                                   transform=data_transform_4)

test_data_4 = datasets.ImageFolder(root=test_path,
                                   transform=data_transform_5)

Dataloader setting

In [None]:
BATCH_SIZE = 32

train_dataloader = DataLoader(dataset=train_data,
                              batch_size=BATCH_SIZE,
                              num_workers=2,
                              shuffle=True)

test_dataloader_1 = DataLoader(dataset=test_data_1,
                               batch_size=BATCH_SIZE,
                               num_workers=2,
                               shuffle=True)

test_dataloader_2 = DataLoader(dataset=test_data_2,
                               batch_size=BATCH_SIZE,
                               num_workers=2,
                               shuffle=True)

test_dataloader_3 = DataLoader(dataset=test_data_3,
                               batch_size=BATCH_SIZE,
                               num_workers=2,
                               shuffle=True)

test_dataloader_4 = DataLoader(dataset=test_data_4,
                               batch_size=BATCH_SIZE,
                               num_workers=2,
                               shuffle=True)

Here you can load a trained model

In [None]:
LOAD_PATH = Path("Path")

model = torch.hub.load('NVIDIA/DeepLearningExamples:torchhub', 'nvidia_resnet50', pretrained=True)
model.fc = nn.Linear(in_features=2048, out_features=30, bias=True)

model.load_state_dict(torch.load(LOAD_PATH))
model.to(device)

Use this if you dont have a trained model

In [None]:
model = torch.hub.load('NVIDIA/DeepLearningExamples:torchhub', 'nvidia_resnet50', pretrained=True)
model.fc = nn.Linear(in_features=2048, out_features=30, bias=True)
model.to(device)

Here is a summary of the models structure

In [None]:
summary(model, input_size=[1, 3, 224, 224])

# Training

Choosing optimizer and loss function and scheduler for weights

In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(params=model.parameters(), lr=0.001)
scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.794)
train_loss, train_acc = 0, 0
model.train()

In [None]:
s_num = 0
epochs = 50

l1 = []
l2 = []
l3 = []
l4 = []

for epoch in tqdm(range(epochs)):

    train_loss_list = []
    train_acc_list = []
    test_loss_list = []
    test_acc_list = []

    start_time = time.time()

    model.train()

    train_loss, train_acc = 0, 0

    #i = 0

    for batch, (X, y) in enumerate(train_dataloader):

        #print(i)
        #i += 32

        # Send data to GPU
        X, y = X.to(device), y.to(device)

        # 1. Forward pass
        y_pred = model(X)

        # 2. Calculate loss
        loss = loss_fn(y_pred, y)
        train_loss += loss

        # 3. Optimizer zero grad
        optimizer.zero_grad()

        # 4. Loss backward
        loss.backward()

        # 5. Optimizer step
        optimizer.step()

        # 6. Accuracy calculation
        y_pred_class = torch.argmax(torch.softmax(y_pred, dim=1), dim=1)
        train_acc += (y_pred_class == y).sum().item()/len(y_pred)

    train_loss = train_loss / len(train_dataloader)
    train_acc = train_acc / len(train_dataloader)
    train_loss_list.append(train_loss)
    train_acc_list.append(train_acc)

    # Test
    model.eval()

    with torch.inference_mode():
        for test_dataloader in test_dataloader_list:

            test_loss, test_acc = 0, 0

            for batch, (X, y) in enumerate(test_dataloader):

                # Send data to target device
                X, y = X.to(device), y.to(device)

                # 1. Forward pass
                test_pred_logits = model(X)

                # 2. Calculate and accumulate loss
                loss = loss_fn(test_pred_logits, y)
                test_loss += loss.item()

                # Calculate and accumulate accuracy
                test_pred_labels = test_pred_logits.argmax(dim=1)
                test_acc += ((test_pred_labels == y).sum().item()/len(test_pred_labels))

            test_loss = test_loss / len(test_dataloader)
            test_acc = test_acc / len(test_dataloader)

            test_loss_list.append(test_loss)
            test_acc_list.append(test_acc)

    l1.append(train_loss_list[0])
    l2.append(train_acc_list[0])
    l3.append(sum(test_loss_list)/len(test_loss_list))
    l4.append(sum(test_acc_list)/len(test_acc_list))

    scheduler.step()

    # 1. Create models directory
    MODEL_PATH = Path("Path")
    MODEL_PATH.mkdir(parents=True, exist_ok=True)

    # 2. Create model save path
    MODEL_NAME = "Name" + str(s_num) #Saves after every epoch
    MODEL_SAVE_PATH = MODEL_PATH / MODEL_NAME

    # 3. Save the model state dict
    print(f"Saving model to: {MODEL_SAVE_PATH}")
    torch.save(obj=model.state_dict(),
               f=MODEL_SAVE_PATH)
    s_num += 1

print("Train Loss", train_loss_list)
print("Train Acc", train_acc_list)
print("Test Loss", sum(test_loss_list)/len(test_loss_list))
print("Test Acc", sum(test_acc_list)/len(test_acc_list))

# Testing:

Creating a confusion matrix and testing with that

In [None]:
M = []
for i in range(30):
    m = []
    for j in range(30):
        m.append(0)
    M.append(m)

In [None]:
test_data = test_data_1

In [None]:
model.eval()

class_names = train_data.classes
pred_dict = {element: 0 for element in class_names}


for i in tqdm(range(len(test_data))):
    image, label = test_data[i][0], test_data[i][1]

    sample = torch.unsqueeze(image, dim=0).to(device)
    pred_logit = model(sample)
    pred_prob = torch.softmax(pred_logit.squeeze(), dim=0)
    pred_list = list(pred_prob.cpu())
    pred_label = pred_list.index(max(pred_list))

    M[label][pred_label] += 1

    if pred_label == label:
        pred_dict[class_names[label]] += 1

print(pred_dict)

total_sum = 0

for value in pred_dict.values():
    total_sum += value

print(total_sum)

In [None]:
for i in M:
    print(i)