In [1]:
import os, random, numpy as np, pandas as pd
from tqdm import tqdm
from sklearn.metrics import f1_score, classification_report, confusion_matrix
import torch, torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision.models import swin_t, Swin_T_Weights
from torchvision.transforms import Resize
from torch.amp import autocast, GradScaler

# === REPRODUCIBILITY ===
seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# === PATHS ===
train_csv_path = "/content/drive/MyDrive/Colab Notebooks/Projects/CSVs/video_source/train.csv"
test_csv_path  = "/content/drive/MyDrive/Colab Notebooks/Projects/CSVs/video_source/test.csv"
NPY_DIR        = "/content/drive/MyDrive/Colab Notebooks/npy_segments_videosource"
save_path      = "/content/drive/MyDrive/Colab Notebooks/Results/Unfrozen_randomseed/Video-source/Swin+GRU"
os.makedirs(save_path, exist_ok=True)

# === CONFIG ===
BATCH_SIZE = 4
MAX_FRAMES = 80
EPOCHS = 10
USE_WEIGHTED_LOSS = True
NUM_SRC_CLASSES = 7  # CCTV, News, Self-filmed, Dashcam, Combinations, Others, Bodycam

# === MODEL ===
class SwinGRUClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.swin = swin_t(weights=Swin_T_Weights.DEFAULT)
        self.swin.head = nn.Identity()  # remove classification head
        self.gru = nn.GRU(input_size=768 + NUM_SRC_CLASSES, hidden_size=256,
                          num_layers=1, bidirectional=True, batch_first=True)
        self.fc = nn.Linear(256 * 2, 1)

    def forward(self, x, src_onehot):
        B, T, C, H, W = x.shape
        x = x.view(B * T, C, H, W)
        features = self.swin(x).view(B, T, -1)
        src_feat = src_onehot.unsqueeze(1).repeat(1, T, 1)
        fused = torch.cat([features, src_feat], dim=2)
        out, _ = self.gru(fused)
        pooled = out.mean(dim=1)
        return self.fc(pooled).squeeze(1)

# === DATASET ===
class ViolenceDataset(Dataset):
    def __init__(self, csv_path, npy_dir):
        self.df = pd.read_csv(csv_path)
        self.npy_dir = npy_dir
        self.resize = Resize((224, 224))

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        frames = np.load(os.path.join(self.npy_dir, f"{row['Segment ID']}.npy"))
        frames = [torch.from_numpy(f).permute(2,0,1).float()/255.0 for f in frames]
        frames = [self.resize(f) for f in frames]

        # pad/truncate to MAX_FRAMES
        if len(frames) < MAX_FRAMES:
            pad_frame = torch.zeros_like(frames[0])
            frames += [pad_frame] * (MAX_FRAMES - len(frames))
        frames = torch.stack(frames[:MAX_FRAMES])

        src_label = torch.tensor(row['Video Source Label'], dtype=torch.long)
        src_onehot = torch.nn.functional.one_hot(src_label, num_classes=NUM_SRC_CLASSES).float()

        return frames, torch.tensor(row['Violence label(video)'], dtype=torch.float32), src_onehot

# === INIT ===
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
train_dataset = ViolenceDataset(train_csv_path, NPY_DIR)
test_dataset  = ViolenceDataset(test_csv_path,  NPY_DIR)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader  = DataLoader(test_dataset,  batch_size=BATCH_SIZE, shuffle=False)

pos = train_dataset.df['Violence label(video)'].sum()
neg = len(train_dataset) - pos
ratio = neg / max(pos, 1)
criterion = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([ratio]).to(device)) if USE_WEIGHTED_LOSS else nn.BCEWithLogitsLoss()

model = SwinGRUClassifier().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
scaler = GradScaler()

# === TRAIN ===
best_f1 = 0
for epoch in range(EPOCHS):
    model.train()
    y_true, y_pred, total_loss = [], [], 0.0
    for frames, labels, src_onehot in tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS}"):
        frames, labels, src_onehot = frames.to(device), labels.to(device), src_onehot.to(device)
        with autocast(device_type='cuda'):
            outputs = model(frames, src_onehot)
            loss = criterion(outputs, labels)
        optimizer.zero_grad()
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        total_loss += loss.item()
        preds = (torch.sigmoid(outputs) > 0.5).int()
        y_true.extend(labels.cpu().numpy()); y_pred.extend(preds.cpu().numpy())
    macro_f1 = f1_score(y_true, y_pred, average='macro')
    print(f"Epoch {epoch+1} | Loss: {total_loss/len(train_loader):.4f} | Macro F1: {macro_f1:.4f}")
    if macro_f1 > best_f1:
        best_f1 = macro_f1
        torch.save(model.state_dict(), os.path.join(save_path, "swin_gru_best.pt"))

# === TEST ===
model.load_state_dict(torch.load(os.path.join(save_path, "swin_gru_best.pt")))
model.eval()
y_true, y_pred, test_losses = [], [], []
segment_ids = test_dataset.df['Segment ID'].tolist()
with torch.no_grad():
    for frames, labels, src_onehot in test_loader:
        frames, labels, src_onehot = frames.to(device), labels.to(device), src_onehot.to(device)
        outputs = model(frames, src_onehot)
        loss = criterion(outputs, labels)
        test_losses.append(loss.item())
        preds = (torch.sigmoid(outputs) > 0.5).int()
        y_true.extend(labels.cpu().numpy()); y_pred.extend(preds.cpu().numpy())

avg_test_loss = np.mean(test_losses)
report = classification_report(y_true, y_pred, target_names=["Non-violent","Violent"], output_dict=True, zero_division=0)
conf_matrix = confusion_matrix(y_true, y_pred)

print(f"\n[TEST] BCE Loss: {avg_test_loss:.4f}")
print(f"[TEST] Macro F1: {report['macro avg']['f1-score']:.4f}")
print(f"[TEST] Micro F1: {f1_score(y_true,y_pred,average='micro'):.4f}")
print("[TEST] Per-Class F1 Scores:")
print(f" - Non-violent F1: {report['Non-violent']['f1-score']:.4f}")
print(f" - Violent F1: {report['Violent']['f1-score']:.4f}")
print("Confusion Matrix:\n", conf_matrix)

pd.DataFrame({"Segment ID": segment_ids, "True": y_true, "Pred": y_pred}).to_csv(
    os.path.join(save_path, "swin_gru_predictions.csv"), index=False)
pd.DataFrame(report).to_csv(os.path.join(save_path, "swin_gru_test_metrics.csv"))


Downloading: "https://download.pytorch.org/models/swin_t-704ceda3.pth" to /root/.cache/torch/hub/checkpoints/swin_t-704ceda3.pth
100%|██████████| 108M/108M [00:00<00:00, 199MB/s] 
Epoch 1/10: 100%|██████████| 168/168 [23:41<00:00,  8.46s/it]


Epoch 1 | Loss: 0.8233 | Macro F1: 0.4955


Epoch 2/10: 100%|██████████| 168/168 [13:00<00:00,  4.64s/it]


Epoch 2 | Loss: 0.7811 | Macro F1: 0.5698


Epoch 3/10: 100%|██████████| 168/168 [11:33<00:00,  4.13s/it]


Epoch 3 | Loss: 0.7695 | Macro F1: 0.5648


Epoch 4/10: 100%|██████████| 168/168 [11:00<00:00,  3.93s/it]


Epoch 4 | Loss: 0.7669 | Macro F1: 0.5686


Epoch 5/10: 100%|██████████| 168/168 [12:26<00:00,  4.45s/it]


Epoch 5 | Loss: 0.7500 | Macro F1: 0.6018


Epoch 6/10: 100%|██████████| 168/168 [10:59<00:00,  3.93s/it]


Epoch 6 | Loss: 0.7600 | Macro F1: 0.6063


Epoch 7/10: 100%|██████████| 168/168 [12:51<00:00,  4.60s/it]


Epoch 7 | Loss: 0.7396 | Macro F1: 0.6082


Epoch 8/10: 100%|██████████| 168/168 [11:42<00:00,  4.18s/it]


Epoch 8 | Loss: 0.7109 | Macro F1: 0.6218


Epoch 9/10: 100%|██████████| 168/168 [11:25<00:00,  4.08s/it]


Epoch 9 | Loss: 0.7227 | Macro F1: 0.6286


Epoch 10/10: 100%|██████████| 168/168 [11:19<00:00,  4.04s/it]


Epoch 10 | Loss: 0.6781 | Macro F1: 0.6730

[TEST] BCE Loss: 0.8391
[TEST] Macro F1: 0.4706
[TEST] Micro F1: 0.4785
[TEST] Per-Class F1 Scores:
 - Non-violent F1: 0.4056
 - Violent F1: 0.5355
Confusion Matrix:
 [[ 58 125]
 [ 45  98]]
