In [1]:
import os
import cv2
import numpy as np
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader, random_split
from sklearn.model_selection import train_test_split
from tqdm import tqdm

# Set device to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


Using device: cuda


In [2]:
class SarcasmVideoDataset(Dataset):
    def __init__(self, root_dir, label, transform=None):
        self.root_dir = root_dir
        self.label = label
        self.transform = transform
        self.videos = [os.path.join(root_dir, vid) for vid in os.listdir(root_dir) if vid.endswith('.mp4')]

    def __len__(self):
        return len(self.videos)

    def __getitem__(self, idx):
        video_path = self.videos[idx]
        frames = self.extract_frames(video_path)
        if self.transform:
            frames = [self.transform(frame) for frame in frames]
        return torch.stack(frames), self.label

    def extract_frames(self, video_path):
        """Extract frames from video."""
        cap = cv2.VideoCapture(video_path)
        frames = []
        while len(frames) < 10:  # Extract 10 frames for simplicity
            ret, frame = cap.read()
            if not ret:
                break
            frame = cv2.resize(frame, (224, 224))
            frames.append(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        cap.release()
        return frames

# Define transforms for frames
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Load datasets
sarcastic_dataset = SarcasmVideoDataset("C:/Users/Rifat/Music/dsv1/sarcastic", label=1, transform=transform)
nonsarcastic_dataset = SarcasmVideoDataset("C:/Users/Rifat/Music/dsv1/nonsarcastic", label=0, transform=transform)

# Combine and split datasets
full_dataset = sarcastic_dataset + nonsarcastic_dataset
train_size = int(0.7 * len(full_dataset))
val_size = int(0.15 * len(full_dataset))
test_size = len(full_dataset) - train_size - val_size
train_dataset, val_dataset, test_dataset = random_split(full_dataset, [train_size, val_size, test_size])

# Data loaders
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False)


In [3]:
class VideoClassifier(nn.Module):
    def __init__(self):
        super(VideoClassifier, self).__init__()
        self.feature_extractor = models.resnet18(pretrained=True)
        self.feature_extractor.fc = nn.Identity()  # Remove the final classification layer
        self.lstm = nn.LSTM(512, 128, batch_first=True)
        self.fc = nn.Linear(128, 2)

    def forward(self, x):
        batch_size, frames, c, h, w = x.size()
        x = x.view(batch_size * frames, c, h, w)
        features = self.feature_extractor(x)
        features = features.view(batch_size, frames, -1)
        _, (hn, _) = self.lstm(features)
        out = self.fc(hn[-1])
        return out

model = VideoClassifier().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)




In [4]:
save_path = "C:/Users/Rifat/Music/Models/ver0o1"

def train_model(model, train_loader, val_loader, num_epochs=5):
    best_acc = 0.0
    for epoch in range(num_epochs):
        model.train()
        train_loss, correct, total = 0.0, 0, 0
        
        for batch_idx, (inputs, labels) in enumerate(train_loader):
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

            print(f"Epoch [{epoch+1}/{num_epochs}], Batch [{batch_idx+1}/{len(train_loader)}], Loss: {loss.item():.4f}")

        train_acc = 100. * correct / total
        val_acc = evaluate_model(model, val_loader)
        print(f"Epoch {epoch+1}, Train Accuracy: {train_acc:.2f}%, Validation Accuracy: {val_acc:.2f}%")

        # Save model if validation accuracy improves
        if val_acc > best_acc:
            best_acc = val_acc
            torch.save(model.state_dict(), f"{save_path}/video_resnet_epoch{epoch+1}_val{val_acc:.2f}.pt")

def evaluate_model(model, val_loader):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
    return 100. * correct / total

train_model(model, train_loader, val_loader)


Epoch [1/5], Batch [1/97], Loss: 0.6942
Epoch [1/5], Batch [2/97], Loss: 0.7543
Epoch [1/5], Batch [3/97], Loss: 0.9721
Epoch [1/5], Batch [4/97], Loss: 0.7537
Epoch [1/5], Batch [5/97], Loss: 0.7813
Epoch [1/5], Batch [6/97], Loss: 0.5587
Epoch [1/5], Batch [7/97], Loss: 0.7063
Epoch [1/5], Batch [8/97], Loss: 0.7231
Epoch [1/5], Batch [9/97], Loss: 0.7363
Epoch [1/5], Batch [10/97], Loss: 0.6373
Epoch [1/5], Batch [11/97], Loss: 0.7285
Epoch [1/5], Batch [12/97], Loss: 0.5205
Epoch [1/5], Batch [13/97], Loss: 0.6149
Epoch [1/5], Batch [14/97], Loss: 0.6158
Epoch [1/5], Batch [15/97], Loss: 0.6654
Epoch [1/5], Batch [16/97], Loss: 0.5538
Epoch [1/5], Batch [17/97], Loss: 0.7420
Epoch [1/5], Batch [18/97], Loss: 0.8889
Epoch [1/5], Batch [19/97], Loss: 0.6935
Epoch [1/5], Batch [20/97], Loss: 0.6600
Epoch [1/5], Batch [21/97], Loss: 0.9008
Epoch [1/5], Batch [22/97], Loss: 0.8512
Epoch [1/5], Batch [23/97], Loss: 0.6137
Epoch [1/5], Batch [24/97], Loss: 0.6708
Epoch [1/5], Batch [25/97

In [13]:
def test_model(model, test_loader):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
    print(f"Test Accuracy: {100. * correct / total:.2f}%")

test_model(model, test_loader)


Test Accuracy: 59.28%
