<a href="https://colab.research.google.com/github/Shoh0428/3DCNN_with_PyTorch/blob/main/3DCNN_pytorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import numpy as np
import os
from sklearn.model_selection import train_test_split
from google.colab import drive
drive.mount('/content/drive')
content_folder = '/content/drive/My Drive/content'
advertisement_folder = '/content/drive/My Drive/Advertisement'

class VideoDataset(Dataset):
    def __init__(self, video_folder, label, max_frames, transform=None):
        self.video_folder = video_folder
        self.label = label
        self.max_frames = max_frames
        self.transform = transform

    def __len__(self):
        return len(os.listdir(self.video_folder))

    def __getitem__(self, idx):
        video_path = os.path.join(self.video_folder, sorted(os.listdir(self.video_folder))[idx])
        frames = self.process_video(video_path)
        return torch.stack(frames), torch.tensor(self.label)

    def process_video(self, video_path):
        frames = []
        for frame_file in sorted(os.listdir(video_path)):
            if frame_file.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')):
                frame_path = os.path.join(video_path, frame_file)
                frame = Image.open(frame_path)
                if self.transform:
                    frame = self.transform(frame)
                frames.append(frame)

        while len(frames) < self.max_frames:
            frames.append(torch.zeros(3, 224, 224))

        return frames[:self.max_frames]

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

def count_frames_in_folder(folder):
    max_frames = 0
    for video in os.listdir(folder):
        video_path = os.path.join(folder, video)
        if os.path.isdir(video_path):
            frame_count = sum(1 for frame in os.listdir(video_path) if frame.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')))
            max_frames = max(max_frames, frame_count)
    return max_frames

max_frames_content = count_frames_in_folder(content_folder)
max_frames_advertisement = count_frames_in_folder(advertisement_folder)
max_frames = max(max_frames_content, max_frames_advertisement)


content_dataset = VideoDataset(content_folder, 0, max_frames, transform=transform)
advertisement_dataset = VideoDataset(advertisement_folder, 1, max_frames, transform=transform)

combined_dataset = torch.utils.data.ConcatDataset([content_dataset, advertisement_dataset])

# Splitting datasets
train_size = int(0.8 * len(combined_dataset))
test_size = len(combined_dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(combined_dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Define a 3D CNN model
class VideoClassifier(nn.Module):
    def __init__(self, num_frames):
        super(VideoClassifier, self).__init__()
        self.conv1 = nn.Conv3d(3, 32, kernel_size=(3, 3, 3), padding=1)
        self.pool = nn.MaxPool3d(kernel_size=(2, 2, 2))
        self.conv2 = nn.Conv3d(32, 64, kernel_size=(3, 3, 3), padding=1)
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(64 * num_frames * 28 * 28, 64)
        self.fc2 = nn.Linear(64, 1)
        self.sigmoid = nn.Sigmoid()
def forward(self, x):
    x = self.pool(F.relu(self.conv1(x)))
    x = self.pool(F.relu(self.conv2(x)))
    x = self.flatten(x)
    x = F.relu(self.fc1(x))
    x = self.sigmoid(self.fc2(x))
    return x
num_frames = max_frames
model = VideoClassifier(num_frames)

criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

def train_model(model, train_loader, criterion, optimizer, epochs=5):
  model.train()
  for epoch in range(epochs):
    for i, (videos, labels) in enumerate(train_loader):
      outputs = model(videos)
      labels = labels.unsqueeze(1).float()
      loss = criterion(outputs, labels)
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
      if (i + 1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{epochs}], Step [{i+1}/{len(train_loader)}], Loss: {loss.item():.4f}')


Mounted at /content/drive




In [7]:
train_model(model, train_loader, criterion, optimizer, epochs=5)

RuntimeError: stack expects a non-empty TensorList

In [6]:
def __getitem__(self, idx):
    video_path = os.path.join(self.video_folder, sorted(os.listdir(self.video_folder))[idx])
    frames = self.process_video(video_path)

    if not frames:
        raise RuntimeError(f"No frames found in video: {video_path}")

    return torch.stack(frames), torch.tensor(self.label)


In [None]:
def evaluate_model(model, test_loader):
  model.eval()
  with torch.no_grad():
    correct = 0
    total = 0
    for videos, labels in test_loader:
      outputs = model(videos)
      predicted = (outputs > 0.5).float()
      total += labels.size(0)
      correct += (predicted.squeeze(1) == labels).sum().item()

In [None]:
    print(f'Accuracy of the model on the test videos: {100 * correct / total}%')


In [8]:
evaluate_model(model, test_loader)

NameError: name 'evaluate_model' is not defined