### Prepare DataLoader for Video Sequence

### Import

In [532]:
import torch 
import torch.nn as nn 
import torch.nn.functional as F 
import torch.optim as optim 
import datasets 
import numpy as np 
import matplotlib.pyplot as plt

In [533]:
import torchvision 
from torchvision import datasets, transforms 
from torch.utils.data import DataLoader

In [534]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import os

### Create a Custom Dataset Class

In [535]:
import re

class VideoSequenceDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.classes = os.listdir(root_dir)
        self.data = self.load_sequences()

    def load_sequences(self):
        data = []
        for class_idx, class_folder in enumerate(self.classes):
            class_path = os.path.join(self.root_dir, class_folder)

            # List and sort frames numerically
            image_sequences = sorted(os.listdir(class_path), key=self.natural_sort_key)
            frames = [os.path.join(class_path, img) for img in image_sequences]

            data.append((frames, class_idx))
        return data

    def natural_sort_key(self, string):
        # Extract numbers from the filename to ensure numeric sorting
        return [int(text) if text.isdigit() else text for text in re.split(r'(\d+)', string)]

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image_paths, label = self.data[idx]
        images = [Image.open(img_path).convert('RGB') for img_path in image_paths]

        if self.transform:
            images = [self.transform(img) for img in images]

        images = torch.stack(images, dim=0)  # Stack into a tensor

        return images, label


### Define Transformations

In [536]:
transform = transforms.Compose([
    transforms.Resize((128, 128)),  # Resize all images to the same size
    transforms.ToTensor(),  # Convert images to PyTorch tensors
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize with ImageNet stats
])

### Initialize Dataset and DataLoader

In [537]:
from torch.nn.utils.rnn import pad_sequence

# Collate function for padding
def collate_fn(batch):
    sequences, labels = zip(*batch)
    
    # Find the max length of sequences in the batch
    max_len = max([seq.size(0) for seq in sequences])
    
    # Pad each sequence to the max length along the time dimension (0th dimension)
    padded_sequences = []
    for seq in sequences:
        # Padding to (max_len, 3, 128, 128)
        padding = (0, 0, 0, 0, 0, max_len - seq.size(0))  # Pad only the time dimension
        padded_seq = F.pad(seq, padding, "constant", 0)  # Use zero padding
        padded_sequences.append(padded_seq)
    
    padded_sequences = torch.stack(padded_sequences, dim=0)  # Stack them into a batch
    labels = torch.tensor(labels)
    
    return padded_sequences, labels

In [538]:
train_dir = 'C:/Users/araya/Desktop/frame/train'  # Your directory path

dataset = VideoSequenceDataset(root_dir=train_dir, transform=transform)

list_dataset = []

list_dataset.append([dataset[i][0] for i in range(len(dataset))])

# print(len(list_dataset[0][i]))
# dataset = pad_sequence(list_dataset).size()

# dataloader = DataLoader(dataset, batch_size=4, shuffle=True, collate_fn=collate_fn)

In [None]:
dataset[9][0]

In [None]:
len(list_dataset[0][0])

In [None]:

for j in range(len(dataset)):
    dataset[j][0] = pad_sequence(list_dataset[0][i] for i in range(len(list_dataset))).size()
    # print(x)

In [None]:
len(list_dataset[0][1])

In [None]:
dataloader.dataset[0][0].shape

In [None]:
pad_sequence(dataset[i][0] for i in range(len(dataloader.dataset))).size()

In [None]:
dataloader.dataset[0]

In [None]:
dataset.data

In [None]:
dataloader.dataset[0][0]

In [None]:
dataloader.dataset[1][0].shape

In [None]:
print(len(dataloader.dataset[0][0]))
print(f"This video has {len(dataloader.dataset[0][0])} frames")

### Iterate Through DataLoader

In [28]:
# Define device (use GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [29]:
class CNNModel(nn.Module):
    def __init__(self, in_channel=3, num_classes=9):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=32, kernel_size=(3,3), stride=(1,1), padding=(1,1))
        self.pool = nn.MaxPool2d(kernel_size=(3,3), stride=(1,1))
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=(3,3), stride=(1,1), padding=(1,1))
        self.fc1 = nn.Linear(64*15376, num_classes)  # 128/2 = 64 -> 64/2 = 32        15376
        
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        x = x.reshape(x.shape[0], -1)
        x = self.fc1(x)
        
        return x

In [30]:
model = CNNModel().to(device)

In [31]:
# spatial = model(dataloader.dataset[0][0])
# spatial = spatial.detach().numpy() 
# spatial.shape

In [None]:
plt.imshow(dataloader.dataset[0][0][50][0])

### ----------------------------------------------------------------------------------------------------------------------------

In [33]:
# Loss function (CrossEntropyLoss for classification tasks)
criterion = nn.CrossEntropyLoss()

# Optimizer (Adam optimizer)
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Number of epochs
num_epochs = 10

In [None]:
for epoch in range(num_epochs):
    print("Epoch", epoch)
    for batch_idx, (data, targets) in enumerate(dataloader.dataset):
        # Get data to CUDA if possible
        data = data.to(device)
        targets = torch.tensor(targets, dtype=torch.int8)
        
        print(data.shape)
        print(targets)

        # # Score
        # scores = model(data)
        # loss = criterion(scores, targets.to(device))
        
        # # Backward
        # optimizer.zero_grad()
        # loss.backward()
        
        # # Gradient Descent or Adam Step
        # optimizer.step() # Update Weight
        
        # print(f"Loss : {loss}")

In [36]:
import torch
torch.cuda.empty_cache()