# PyTorch 3D ResNet

In [1]:
import torch
from tqdm import tqdm
from time import time
from sklearn.metrics import accuracy_score, r2_score, precision_score, recall_score, f1_score
import pandas as pd

from torch.utils.data import DataLoader, Subset, random_split

from models.ThreeDResNet import get_3dResNet, get_resnet_transformer
from colorVideoDataset import ColorVideoDataset



In [2]:
MODEL = get_3dResNet()
DATASET = ColorVideoDataset('./colors')
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

Using cache found in C:\Users\Admin/.cache\torch\hub\facebookresearch_pytorchvideo_main


Net(
  (blocks): ModuleList(
    (0): ResNetBasicStem(
      (conv): Conv3d(3, 64, kernel_size=(1, 7, 7), stride=(1, 2, 2), padding=(0, 3, 3), bias=False)
      (norm): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (activation): ReLU()
      (pool): MaxPool3d(kernel_size=(1, 3, 3), stride=(1, 2, 2), padding=[0, 1, 1], dilation=1, ceil_mode=False)
    )
    (1): ResStage(
      (res_blocks): ModuleList(
        (0): ResBlock(
          (branch1_conv): Conv3d(64, 256, kernel_size=(1, 1, 1), stride=(np.int64(1), np.int64(1), np.int64(1)), bias=False)
          (branch1_norm): BatchNorm3d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (branch2): BottleneckBlock(
            (conv_a): Conv3d(64, 64, kernel_size=(1, 1, 1), stride=(1, 1, 1), bias=False)
            (norm_a): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (act_a): ReLU()
            (conv_b): Conv3d(64, 64, kern

Train, Test, Val Sets

In [3]:
train_dataset, test_dataset = random_split(DATASET, [int(0.8 * len(DATASET)), len(DATASET) - int(0.8 * len(DATASET))])
test_dataset, val_dataset = random_split(test_dataset, [int(0.5 * len(test_dataset)), len(test_dataset) - int(0.5 * len(test_dataset))])

In [4]:
def get_dataloader(dataset, subset_ratio : float | None = 0.1, batch_size : int = 2):
    transform = get_resnet_transformer()

    def collate_fn(batch):
        videos = []
        labels = []
        for video, label, _ in batch:
            video = video.permute(1, 0, 2, 3)
            video = transform({"video": video})["video"]
            videos.append(video)
            labels.append(torch.tensor(label, dtype=torch.long))
        
        videos = torch.stack(videos)
        labels = torch.stack(labels)
        return videos, labels
    
    if subset_ratio is not None:
        num_samples = int(len(dataset) * subset_ratio) 
        subset_indices = list(range(num_samples))
        subset = Subset(dataset, subset_indices)
        dataloader = DataLoader(subset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
    else:
        dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
    return dataloader

In [5]:
def infer_loop(model, dataloader, criterion) -> tuple:
    running_loss = 0.0
    all_preds = []
    all_labels = []
    model.eval()
    for videos, labels in dataloader:
        videos, labels = videos.to(DEVICE), labels.to(DEVICE)

        outputs = model(videos)
        loss = criterion(outputs, labels)
        running_loss += loss.item()
        all_preds.extend(outputs.argmax(dim=1).cpu().detach().numpy())
        all_labels.extend(labels.cpu().detach().numpy())
        
    val_accuracy = accuracy_score(all_preds, all_labels)
    avg_loss = running_loss / len(dataloader)

    return (val_accuracy, avg_loss)

In [None]:
def training_loop(model, train_dataloader, val_dataloader : DataLoader | None = None, epochs=5, learning_rate=1e-4):
    results_dict = {
        'time_per_batch': [],
        'time_per_epoch': [],
        'train_accuracy': [],
        'train_loss': [],
        'val_accuracy': [],
        'val_loss': [],
        'precision': [],
        'recall': [],
        'f1': [],
        'r2': [],
    }
    model = model.to(DEVICE)
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    epoch_pbar = tqdm(range(epochs), desc="Training Epochs")

    model.train()
    for epoch in epoch_pbar:
        running_loss = 0.0
        epoch_start = time()
        all_preds = []
        all_labels = []
        
        for videos, labels in train_dataloader:
            videos, labels = videos.to(DEVICE), labels.to(DEVICE)

            optimizer.zero_grad()
            outputs = model(videos)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            
            # Store predictions and labels for metrics
            all_preds.extend(outputs.argmax(dim=1).cpu().detach().numpy())
            all_labels.extend(labels.cpu().detach().numpy())
        
        epoch_time = time() - epoch_start
        results_dict['time_per_epoch'].append(epoch_time)
        
        # Calculate metrics
        results_dict['train_accuracy'].append(accuracy_score(all_labels, all_preds))
        results_dict['precision'].append(precision_score(all_labels, all_preds, average='weighted', zero_division=0))
        results_dict['recall'].append(recall_score(all_labels, all_preds, average='weighted', zero_division=0))
        results_dict['f1'].append(f1_score(all_labels, all_preds, average='weighted', zero_division=0))
        results_dict['r2'].append(r2_score(all_labels, all_preds))
        
        avg_loss = running_loss / len(train_dataloader)
        results_dict['train_loss'].append(avg_loss)
        epoch_pbar.set_postfix(avg_loss=f"{avg_loss:.4f}")

    val_acc, val_loss = infer_loop(model, val_dataloader, criterion)
    results_dict['val_accuracy'].append(val_acc)
    results_dict['val_loss'].append(val_loss)
    
    return results_dict

In [7]:
dataloader = get_dataloader(DATASET, subset_ratio=0.1, batch_size=16)
x, y = next(iter(dataloader))
print(x.shape, y.shape)
del x, y

torch.Size([16, 3, 8, 256, 256]) torch.Size([16])


In [8]:
train_dataloader =  get_dataloader(train_dataset, subset_ratio=None, batch_size=4)
val_dataloader =  get_dataloader(val_dataset, subset_ratio=None, batch_size=4)
test_dataloader =  get_dataloader(test_dataset, subset_ratio=None, batch_size=4)

In [9]:
results = training_loop(MODEL, train_dataloader, val_dataloader, epochs=100, learning_rate=2e-4)

Training Epochs:   1%|          | 1/100 [00:52<1:26:46, 52.59s/it]

Epoch [1/100], Loss: 2.0465


Training Epochs:   2%|▏         | 2/100 [01:40<1:21:47, 50.07s/it]

Epoch [2/100], Loss: 1.8105


Training Epochs:   3%|▎         | 3/100 [02:30<1:20:21, 49.71s/it]

Epoch [3/100], Loss: 1.6554


Training Epochs:   4%|▍         | 4/100 [03:15<1:17:04, 48.17s/it]

Epoch [4/100], Loss: 1.5096


Training Epochs:   5%|▌         | 5/100 [03:58<1:13:01, 46.12s/it]

Epoch [5/100], Loss: 1.3938


Training Epochs:   6%|▌         | 6/100 [04:41<1:10:39, 45.10s/it]

Epoch [6/100], Loss: 1.2755


Training Epochs:   7%|▋         | 7/100 [05:26<1:10:01, 45.18s/it]

Epoch [7/100], Loss: 1.1657


Training Epochs:   8%|▊         | 8/100 [06:12<1:09:27, 45.30s/it]

Epoch [8/100], Loss: 1.0716


Training Epochs:   9%|▉         | 9/100 [06:55<1:07:38, 44.60s/it]

Epoch [9/100], Loss: 1.0109


Training Epochs:  10%|█         | 10/100 [07:41<1:07:22, 44.91s/it]

Epoch [10/100], Loss: 0.9661


Training Epochs:  11%|█         | 11/100 [08:20<1:04:08, 43.24s/it]

Epoch [11/100], Loss: 0.9019


Training Epochs:  12%|█▏        | 12/100 [09:04<1:03:39, 43.41s/it]

Epoch [12/100], Loss: 0.8441


Training Epochs:  13%|█▎        | 13/100 [09:54<1:05:56, 45.48s/it]

Epoch [13/100], Loss: 0.8164


Training Epochs:  14%|█▍        | 14/100 [10:41<1:05:52, 45.96s/it]

Epoch [14/100], Loss: 0.7522


Training Epochs:  15%|█▌        | 15/100 [11:21<1:02:35, 44.18s/it]

Epoch [15/100], Loss: 0.7469


Training Epochs:  16%|█▌        | 16/100 [12:01<1:00:02, 42.89s/it]

Epoch [16/100], Loss: 0.7062


Training Epochs:  17%|█▋        | 17/100 [12:46<1:00:09, 43.48s/it]

Epoch [17/100], Loss: 0.6735


Training Epochs:  18%|█▊        | 18/100 [13:35<1:01:43, 45.16s/it]

Epoch [18/100], Loss: 0.6469


Training Epochs:  19%|█▉        | 19/100 [14:23<1:02:05, 45.99s/it]

Epoch [19/100], Loss: 0.6354


Training Epochs:  20%|██        | 20/100 [15:03<58:45, 44.07s/it]  

Epoch [20/100], Loss: 0.5593


Training Epochs:  21%|██        | 21/100 [15:49<58:53, 44.72s/it]

Epoch [21/100], Loss: 0.5725


Training Epochs:  22%|██▏       | 22/100 [16:34<58:07, 44.71s/it]

Epoch [22/100], Loss: 0.5674


Training Epochs:  23%|██▎       | 23/100 [17:16<56:35, 44.10s/it]

Epoch [23/100], Loss: 0.5271


Training Epochs:  24%|██▍       | 24/100 [18:03<56:46, 44.82s/it]

Epoch [24/100], Loss: 0.5189


Training Epochs:  25%|██▌       | 25/100 [18:51<57:26, 45.95s/it]

Epoch [25/100], Loss: 0.4964


Training Epochs:  26%|██▌       | 26/100 [19:38<56:45, 46.02s/it]

Epoch [26/100], Loss: 0.5085


Training Epochs:  27%|██▋       | 27/100 [20:27<57:17, 47.09s/it]

Epoch [27/100], Loss: 0.4842


Training Epochs:  28%|██▊       | 28/100 [21:12<55:49, 46.52s/it]

Epoch [28/100], Loss: 0.4478


Training Epochs:  29%|██▉       | 29/100 [21:52<52:35, 44.44s/it]

Epoch [29/100], Loss: 0.4656


Training Epochs:  30%|███       | 30/100 [22:31<49:56, 42.80s/it]

Epoch [30/100], Loss: 0.4339


Training Epochs:  31%|███       | 31/100 [23:20<51:26, 44.73s/it]

Epoch [31/100], Loss: 0.4118


Training Epochs:  32%|███▏      | 32/100 [24:06<51:00, 45.01s/it]

Epoch [32/100], Loss: 0.4193


Training Epochs:  33%|███▎      | 33/100 [24:47<49:07, 44.00s/it]

Epoch [33/100], Loss: 0.4252


Training Epochs:  34%|███▍      | 34/100 [25:30<47:49, 43.47s/it]

Epoch [34/100], Loss: 0.4070


Training Epochs:  35%|███▌      | 35/100 [26:10<46:00, 42.46s/it]

Epoch [35/100], Loss: 0.3741


Training Epochs:  36%|███▌      | 36/100 [26:53<45:34, 42.73s/it]

Epoch [36/100], Loss: 0.3951


Training Epochs:  37%|███▋      | 37/100 [27:42<46:53, 44.66s/it]

Epoch [37/100], Loss: 0.3758


Training Epochs:  38%|███▊      | 38/100 [28:27<46:04, 44.60s/it]

Epoch [38/100], Loss: 0.3449


Training Epochs:  39%|███▉      | 39/100 [29:09<44:32, 43.81s/it]

Epoch [39/100], Loss: 0.3576


Training Epochs:  40%|████      | 40/100 [29:52<43:45, 43.76s/it]

Epoch [40/100], Loss: 0.3088


Training Epochs:  40%|████      | 40/100 [30:23<45:35, 45.60s/it]


KeyboardInterrupt: 

In [None]:
results

{'time_per_batch': [0.26043272018432617,
  0.04736924171447754,
  0.051451921463012695,
  0.0431818962097168,
  0.04414844512939453,
  0.05073857307434082,
  0.04169797897338867,
  0.03714585304260254,
  0.06663298606872559,
  0.0694270133972168,
  0.0866551399230957,
  0.0837092399597168,
  0.08386969566345215,
  0.08619928359985352,
  0.07990288734436035,
  0.07819128036499023,
  0.0991065502166748,
  0.12497091293334961,
  0.09679746627807617,
  0.2366929054260254,
  0.051824331283569336,
  0.05388641357421875,
  0.03834795951843262,
  0.08256721496582031,
  0.07532191276550293,
  0.09441947937011719,
  0.08146357536315918,
  0.07831835746765137,
  0.08380961418151855,
  0.06888031959533691,
  0.08284783363342285,
  0.07780265808105469,
  0.08165574073791504,
  0.09861326217651367,
  0.1150963306427002,
  0.09218430519104004,
  0.08516550064086914,
  0.08716821670532227,
  0.08283019065856934,
  0.1100468635559082,
  0.0975348949432373,
  0.08039689064025879,
  0.16640973091125488,


In [None]:
results_df = pd.DataFrame.from_dict(results, orient="index").T
results_df.to_csv("3D_ResNet_Results.csv")