In [1]:
# Load environment variables
import os
from dotenv import load_dotenv
load_dotenv()
IMAGES_PATH = os.getenv('IMAGES_PATH')
ROOT_DATA_PATH = os.getenv('ROOT_DATA_PATH')
BATCH_SIZE= int(os.getenv('BATCH_SIZE'))
EPOCHS = int(os.getenv('EPOCHS'))
LR = float(os.getenv('LR'))
PATCH_SIZE=int(os.getenv('PATCH_SIZE'))
DROPOUT=float(os.getenv('DROPOUT'))
ATTENTION_DROPOUT=float(os.getenv('ATTENTION_DROPOUT'))
HIDDEN_DIM=int(os.getenv('HIDDEN_DIM'))
MLP_DIM=int(os.getenv('MLP_DIM'))
NUM_HEADS=int(os.getenv('NUM_HEADS'))
NUM_LAYERS=int(os.getenv('NUM_LAYERS'))


In [2]:
from preprocess.data_process import get_dataloaders

train_loader, val_loader, test_loader = get_dataloaders()

Train Set Size: 6000
Test Set Size: 500
Val Set Size: 200


In [3]:
import torch
from torchvision.models import VisionTransformer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = VisionTransformer(image_size=80, patch_size=PATCH_SIZE, num_layers=NUM_LAYERS, 
                                       num_heads=NUM_HEADS, hidden_dim=HIDDEN_DIM, 
                                       mlp_dim=MLP_DIM, dropout=DROPOUT,
                                       attention_dropout=ATTENTION_DROPOUT,
                                       num_classes=2)
loss_fn = torch.nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LR)

In [4]:
from torchinfo import summary

summary(model, input_size=(64, 3, 80, 80), device='cuda')

Layer (type:depth-idx)                        Output Shape              Param #
VisionTransformer                             [64, 2]                   768
├─Conv2d: 1-1                                 [64, 768, 8, 8]           231,168
├─Encoder: 1-2                                [64, 65, 768]             49,920
│    └─Dropout: 2-1                           [64, 65, 768]             --
│    └─Sequential: 2-2                        [64, 65, 768]             --
│    │    └─EncoderBlock: 3-1                 [64, 65, 768]             7,087,872
│    │    └─EncoderBlock: 3-2                 [64, 65, 768]             7,087,872
│    │    └─EncoderBlock: 3-3                 [64, 65, 768]             7,087,872
│    │    └─EncoderBlock: 3-4                 [64, 65, 768]             7,087,872
│    │    └─EncoderBlock: 3-5                 [64, 65, 768]             7,087,872
│    │    └─EncoderBlock: 3-6                 [64, 65, 768]             7,087,872
│    └─LayerNorm: 2-3                      

In [5]:
input, label = next(iter(train_loader))
input

tensor([[[[0.2570, 0.2608, 0.2743,  ..., 0.2534, 0.2729, 0.2812],
          [0.2671, 0.2700, 0.2787,  ..., 0.2575, 0.2705, 0.2753],
          [0.2811, 0.2847, 0.2871,  ..., 0.2717, 0.2702, 0.2661],
          ...,
          [0.2559, 0.2574, 0.2608,  ..., 0.2618, 0.2625, 0.2610],
          [0.2523, 0.2559, 0.2639,  ..., 0.2640, 0.2601, 0.2580],
          [0.2501, 0.2543, 0.2643,  ..., 0.2661, 0.2607, 0.2588]],

         [[0.2814, 0.2837, 0.2935,  ..., 0.3048, 0.3189, 0.3235],
          [0.2902, 0.2916, 0.2974,  ..., 0.3067, 0.3153, 0.3171],
          [0.3020, 0.3034, 0.3043,  ..., 0.3139, 0.3103, 0.3053],
          ...,
          [0.2902, 0.2913, 0.2945,  ..., 0.2950, 0.2921, 0.2889],
          [0.2876, 0.2912, 0.2992,  ..., 0.2961, 0.2893, 0.2860],
          [0.2863, 0.2910, 0.3010,  ..., 0.2970, 0.2892, 0.2862]],

         [[0.2130, 0.2203, 0.2347,  ..., 0.2275, 0.2381, 0.2420],
          [0.2194, 0.2265, 0.2374,  ..., 0.2304, 0.2354, 0.2362],
          [0.2273, 0.2345, 0.2405,  ..., 0

In [6]:
input = input.to(device)
output = model(input)
output

tensor([[0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0

In [7]:
label

tensor([1., 0., 0., 0., 0., 1., 1., 0., 0., 1., 1., 0., 1., 0., 1., 0., 0., 0.,
        1., 1., 1., 1., 0., 1., 1., 1., 1., 0., 0., 1., 1., 1., 0., 1., 1., 0.,
        1., 0., 1., 1., 1., 0., 1., 1., 1., 1., 1., 1., 0., 1., 1., 0., 0., 0.,
        1., 1., 1., 1., 1., 0., 1., 1., 0., 0.], dtype=torch.float64)

In [9]:
label = label.to(device)

In [12]:
loss = loss_fn(output, label)
loss

ValueError: Target size (torch.Size([64])) must be the same as input size (torch.Size([64, 2]))

In [15]:
loss.backward()

RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

In [9]:
from timeit import default_timer as timer
from tqdm import tqdm
from utilities import train_step, val_step
import pandas as pd
train_start_time = timer()

print(f"Training on Device: {device}")
model.to(device)

train_loss, train_acc = 0, 0
test_loss, test_acc = 0, 0

# Create two empty dataframes to store loss and accuracy values
train_metrics = pd.DataFrame(columns=["train_loss", "train_acc"])
val_metrics = pd.DataFrame(columns=["val_loss", "val_acc"])
test_metrics = pd.DataFrame(columns=["test_loss", "test_acc"])
print("-----------Training starting----------")
for epoch in tqdm(range(EPOCHS)):
    if epoch % 10 == 0:
        print(f"Epoch {epoch + 1}/{EPOCHS}")
    
    # Train the model
    train_loss, train_acc = train_step(model, loss_fn, optimizer, device, train_loader)
    train_metrics['train_loss'].append(train_loss)
    train_metrics['train_acc'].append(train_acc)

    val_loss, val_acc, min_val_loss = val_step(model, loss_fn, device, min_val_loss, val_loader)
    val_metrics['val_loss'].append(val_loss)
    val_metrics['val_acc'].append(val_acc)
print("-----------Training finished----------")

train_end_time = timer()

total_train_time = train_end_time - train_start_time

print(f"Training completed in {total_train_time:.2f} seconds")

Training on Device: cuda
-----------Training starting----------


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch 1/10


  0%|          | 0/10 [00:00<?, ?it/s]


RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

In [None]:
from utilities import test_step

# Testing
test_start_time = timer()

print("-----------Testing starting----------")

test_step(model, loss_fn, device, test_loader)

print("-----------Testing finished----------")

test_end_time = timer()

total_test_time = test_end_time - test_start_time

print(f"Testing completed in {total_test_time:.2f} seconds")