In [1]:
import torch
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [2]:
device

'cuda'

In [3]:
torch.__version__

'2.2.1+cu118'

In [4]:
import pytorch_lightning
from pytorchvideo.data import LabeledVideoDataset, UniformClipSampler, RandomClipSampler
from torch.utils.data import DataLoader
from torchvision.transforms import Compose, Lambda, RandomCrop, RandomHorizontalFlip
from transforms import (
    ApplyTransformToKey,
    UniformTemporalSubsample,
    Normalize,
    RandomShortSideScale,
)

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
import csv 
def read_csv_to_list_of_tuples(filename: str):
    data = []

    with open(filename, 'r') as file:
        reader = csv.reader(file)
        for row in reader:
            # Assuming the first column contains file paths and the second column contains labels
            filepath = row[0]
            label = row[1]
            data.append((filepath, {"label": int(label)}))

    return data

In [6]:
class DataModule(pytorch_lightning.LightningDataModule):
    def __init__(self, clip_duration=60, batch_size=8, num_workers=8) -> None:
        super().__init__()
        self.CLIP_DURATION = clip_duration
        self.BATCH_SIZE = batch_size
        self.NUM_WORKERS = num_workers
        self.transform = Compose(
            [
                ApplyTransformToKey(
                    key="video",
                    transform=Compose(
                        [
                            UniformTemporalSubsample(8),
                            Lambda(lambda x: x / 255.0),
                            Normalize((0.45, 0.45, 0.45),
                                      (0.225, 0.225, 0.225)),
                            RandomShortSideScale(min_size=256, max_size=320),
                            RandomCrop(244),
                            RandomHorizontalFlip(p=0.5),
                        ]
                    ),
                ),
            ]
        )

    def train_dataloader(self):
        train_dataset = LabeledVideoDataset(
            labeled_video_paths=read_csv_to_list_of_tuples("train.csv"),
            clip_sampler=UniformClipSampler(self.CLIP_DURATION),
            decode_audio=False,
            transform=self.transform
        )

        return DataLoader(dataset=train_dataset, batch_size=self.BATCH_SIZE)

    def val_dataloader(self):
        val_dataset = LabeledVideoDataset(
            labeled_video_paths=read_csv_to_list_of_tuples("val.csv"),
            clip_sampler=UniformClipSampler(self.CLIP_DURATION),
            decode_audio=False,
            transform=self.transform
        )

        return DataLoader(dataset=val_dataset, batch_size=self.BATCH_SIZE)

In [7]:
import timm
model_name = 'inception_v4'
model = timm.create_model(model_name, pretrained=False)

In [8]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [9]:
import pytorchvideo.models.resnet

def make_kinetics_resnet():
  return pytorchvideo.models.resnet.create_resnet(
      input_channel=3,
      model_depth=50,
      model_num_class=400,
      norm=nn.BatchNorm3d,
      activation=nn.ReLU,
  )

In [10]:
class VideoClassificationLightningModule(pytorch_lightning.LightningModule):
  def __init__(self):
    super().__init__()
    self.model = make_kinetics_resnet().to(device=device)

  def forward(self, x):
    return self.model(x)

  def training_step(self, batch, batch_idx):
    y_hat = self.model(batch["video"])
    loss = F.cross_entropy(y_hat, torch.Tensor(batch["label"]))
    self.log("train_loss", loss.item())

    return loss
  
  def validation_step(self, batch, batch_idx):
      y_hat = self.model(batch["video"])
      loss = F.cross_entropy(y_hat, torch.tensor(batch["label"]))
      self.log("val_loss", loss)
      return loss

  def configure_optimizers(self):
      return torch.optim.Adam(self.parameters(), lr=1e-1)

In [11]:
classification_module = VideoClassificationLightningModule()
data_module = DataModule()
trainer = pytorch_lightning.Trainer(accelerator="auto", devices="auto", strategy="auto", max_epochs=10)
trainer.fit(classification_module, data_module)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
c:\Users\Andreas\anaconda3\envs\SSBD\lib\site-packages\pytorch_lightning\trainer\connectors\logger_connector\logger_connector.py:75: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `pytorch_lightning` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default
You are using a CUDA device ('NVIDIA GeForce RTX 3060 Laptop GPU') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

c:\Users\Andreas\anaconda3\envs\SSBD\lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.


Sanity Checking DataLoader 0:  50%|█████     | 1/2 [00:00<00:00,  1.02it/s]

  loss = F.cross_entropy(y_hat, torch.tensor(batch["label"]))
c:\Users\Andreas\anaconda3\envs\SSBD\lib\site-packages\pytorch_lightning\utilities\data.py:77: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 8. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.


                                                                           

c:\Users\Andreas\anaconda3\envs\SSBD\lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.


Epoch 0: |          | 4/? [05:59<00:00,  0.01it/s, v_num=10]

RuntimeError: [enforce fail at alloc_cpu.cpp:114] data. DefaultCPUAllocator: not enough memory: you tried to allocate 19906560000 bytes.