<a href="https://colab.research.google.com/github/KuiMian/ForTest/blob/master/DualStream.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install torchmetrics pytorch_lightning

Collecting torchmetrics
  Downloading torchmetrics-1.8.2-py3-none-any.whl.metadata (22 kB)
Collecting pytorch_lightning
  Downloading pytorch_lightning-2.5.5-py3-none-any.whl.metadata (20 kB)
Collecting lightning-utilities>=0.8.0 (from torchmetrics)
  Downloading lightning_utilities-0.15.2-py3-none-any.whl.metadata (5.7 kB)
Downloading torchmetrics-1.8.2-py3-none-any.whl (983 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m983.2/983.2 kB[0m [31m26.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pytorch_lightning-2.5.5-py3-none-any.whl (832 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m832.4/832.4 kB[0m [31m71.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading lightning_utilities-0.15.2-py3-none-any.whl (29 kB)
Installing collected packages: lightning-utilities, torchmetrics, pytorch_lightning
Successfully installed lightning-utilities-0.15.2 pytorch_lightning-2.5.5 torchmetrics-1.8.2


In [2]:
!unzip -q drive/MyDrive/ucf101_noleakage.zip -d .

In [7]:
from glob import glob
import os
import pandas as pd
from PIL import Image

import torch
from torchvision import transforms as T
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch import nn
from torchvision.models import resnet18
from torchvision.models.video import r3d_18
from torchmetrics import Accuracy
from pytorch_lightning import LightningModule


class FrameVideoDualDataset(torch.utils.data.Dataset):
    def __init__(self, root_dir = 'ucf10', split = 'train', transform = None, stack_frames = True):
        self.video_paths = sorted(glob(f'{root_dir}/videos/{split}/*/*.avi'))
        self.df = pd.read_csv(f'{root_dir}/metadata/{split}.csv')
        self.split = split
        self.transform = transform
        self.stack_frames = stack_frames
        self.n_sampled_frames = 10

    def __len__(self):
        return len(self.video_paths)

    def _get_meta(self, attr, value):
        return self.df.loc[self.df[attr] == value]

    def __getitem__(self, idx):
        video_path = self.video_paths[idx]
        video_name = video_path.split('/')[-1].split('.avi')[0]
        video_meta = self._get_meta('video_name', video_name)
        label = video_meta['label'].item()

        video_frames_dir = video_path.split('.avi')[0].replace('videos', 'frames')
        video_frames = self.load_video_frames(video_frames_dir)

        optic_flow_dir = video_frames_dir.replace('frames', 'flows_png')
        flow_frames = self.load_optic_flow_frames(optic_flow_dir)
        flow_frames = [Image.new("RGB", flow_frames[0].size, (0, 0, 0))] + flow_frames

        if self.transform:
            video_frames = [self.transform(frame) for frame in video_frames]
            flow_frames = [self.transform(frame) for frame in flow_frames]

        else:
            video_frames = [T.ToTensor()(frame) for frame in video_frames]
            flow_frames = [T.ToTensor()(frame) for frame in flow_frames]

        if self.stack_frames:
            video_frames = torch.stack(video_frames).permute(1, 0, 2, 3)
            flow_frames = torch.stack(flow_frames).permute(1, 0, 2, 3)

        return video_frames, flow_frames, label

    def load_video_frames(self, frames_dir):
        frames = []
        for i in range(1, self.n_sampled_frames + 1):
            frame_file = os.path.join(frames_dir, f"frame_{i}.jpg")
            frame = Image.open(frame_file).convert("RGB")
            frames.append(frame)

        return frames

    def load_optic_flow_frames(self, frames_dir):
        frames = []
        for i in range(1, self.n_sampled_frames):
            frame_file = os.path.join(frames_dir, f"flow_{i}_{i+1}.png")
            frame = Image.open(frame_file).convert("RGB")
            frames.append(frame)

        return frames

In [8]:
class DualStreamModel(LightningModule):
    """
    Dual-stream model using video frames and optic flow frames as input.
    """
    def __init__(self, num_classes=10, pretrained=True):
        super().__init__()
        self.num_classes = num_classes

        self.spatial_cnn = resnet18(weights="IMAGENET1K_V1" if pretrained else None)
        self.spatial_cnn.fc = nn.Identity()

        self.temporal_cnn = resnet18(weights="IMAGENET1K_V1" if pretrained else None)
        self.temporal_cnn.fc = nn.Identity()

        self.classifier = nn.Linear(512*2, num_classes)

        self.criterion = nn.CrossEntropyLoss()
        self.accuracy = Accuracy(task="multiclass", num_classes=num_classes)

    def forward(self, video_frames, flow_frames):
        B, C, T, H, W = video_frames.shape

        rgb = video_frames.permute(0, 2, 1, 3, 4).reshape(B*T, C, H, W)
        spatial_feat = self.spatial_cnn(rgb).view(B, T, -1).mean(dim=1)

        flow = flow_frames.permute(0, 2, 1, 3, 4).reshape(B*T, C, H, W)
        temporal_feat = self.temporal_cnn(flow).view(B, T, -1).mean(dim=1)

        fused = torch.cat([spatial_feat, temporal_feat], dim=1)
        logits = self.classifier(fused)
        return logits

    def training_step(self, batch, batch_idx):
        video_frames, flow_frames, labels = batch
        logits = self(video_frames, flow_frames)
        loss = self.criterion(logits, labels)
        acc = self.accuracy(F.softmax(logits, dim=-1), labels)
        self.log("train_loss", loss, prog_bar=True)
        self.log("train_acc", acc, prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx):
        video_frames, flow_frames, labels = batch
        logits = self(video_frames, flow_frames)
        loss = self.criterion(logits, labels)
        acc = self.accuracy(F.softmax(logits, dim=-1), labels)
        self.log("val_loss", loss, prog_bar=True)
        self.log("val_acc", acc, prog_bar=True)
        return loss

    def test_step(self, batch, batch_idx):
        video_frames, flow_frames, labels = batch
        logits = self(video_frames, flow_frames)
        loss = self.criterion(logits, labels)
        acc = self.accuracy(F.softmax(logits, dim=-1), labels)
        self.log("test_loss", loss, prog_bar=True)
        self.log("test_acc", acc, prog_bar=True)
        return loss

    def configure_optimizers(self):
        return torch.optim.AdamW(self.parameters(), lr=3e-4, weight_decay=1e-4)

In [9]:
from pytorch_lightning import Trainer
from pytorch_lightning.loggers import TensorBoardLogger

logger = TensorBoardLogger("tb_logs", name="DualStream")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
epochs = 30

root_dir = 'ucf101_noleakage'

transform = T.Compose([T.Resize((64, 64)),T.ToTensor()])

framevideostack_dataset_train = FrameVideoDualDataset(
    root_dir=root_dir, split="train", transform=transform, stack_frames=True
)
framevideostack_dataset_val = FrameVideoDualDataset(
    root_dir=root_dir, split="val", transform=transform, stack_frames=True
)

framevideostack_dataset_test = FrameVideoDualDataset(
    root_dir=root_dir, split="test", transform=transform, stack_frames=True
)

train_loader = DataLoader(framevideostack_dataset_train, batch_size=8, shuffle=True, num_workers=4)
val_loader = DataLoader(framevideostack_dataset_val, batch_size=8, shuffle=False, num_workers=4)
test_loader = DataLoader(framevideostack_dataset_test, batch_size=8, shuffle=False, num_workers=4)

model = DualStreamModel()
trainer = Trainer(max_epochs=epochs, accelerator=device.type, logger=logger)
trainer.fit(model, train_dataloaders=train_loader, val_dataloaders=val_loader)
trainer.test(model, dataloaders=test_loader)

INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name         | Type               | Params | Mode 
------------------------------------------------------------
0 | spatial_cnn  | ResNet             | 11.2 M | train
1 | temporal_cnn | ResNet             | 11.2 M | train
2 | classifier   | Linear             | 10.2 K | train
3 | criterion    | CrossEntropyLoss   | 0      | train
4 | accuracy     | MulticlassAccuracy | 

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=30` reached.
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

[{'test_loss': 1.1391704082489014, 'test_acc': 0.7333333492279053}]