In [2]:
import os, random, numpy as np, pandas as pd
from tqdm import tqdm
from sklearn.metrics import f1_score, classification_report, confusion_matrix
import torch, torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision.models import resnet50, ResNet50_Weights
from torchvision import transforms
from torch.amp import autocast, GradScaler

# === REPRODUCIBILITY ===
seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# === PATHS ===
train_csv_path = "/content/drive/MyDrive/Colab Notebooks/Projects/CSVs/train.csv"
test_csv_path  = "/content/drive/MyDrive/Colab Notebooks/Projects/CSVs/test.csv"
NPY_DIR  = "/content/drive/MyDrive/Colab Notebooks/Projects/npy_segments_unimodal"
save_path = "/content/drive/MyDrive/Colab Notebooks/Results/Unfrozen_randomseed/ResNet50+GRU"
os.makedirs(save_path, exist_ok=True)

# === CONFIG ===
BATCH_SIZE = 4
GRAD_ACCUM_STEPS = 4
EPOCHS = 10
MAX_FRAMES = 80
EARLY_STOPPING_PATIENCE = 4

# === MODEL ===
class ResNet50GRU(nn.Module):
    def __init__(self):
        super().__init__()
        self.resnet = resnet50(weights=ResNet50_Weights.DEFAULT)
        self.resnet.fc = nn.Identity()  # fully unfrozen
        self.gru = nn.GRU(2048, 256, batch_first=True, bidirectional=True)
        self.dropout = nn.Dropout(0.3)
        self.attn = nn.Linear(512, 1)
        self.fc = nn.Linear(512, 1)

    def forward(self, x):
        B, T, C, H, W = x.size()
        x = x.view(B*T, C, H, W)
        feats = self.resnet(x).view(B, T, -1)
        out, _ = self.gru(feats)
        weights = torch.softmax(self.attn(out), dim=1)
        out = torch.sum(weights * out, dim=1)
        out = self.dropout(out)
        return self.fc(out).squeeze(1)

# === DATASET ===
class ViolenceDataset(Dataset):
    def __init__(self, csv_path, npy_dir):
        self.df = pd.read_csv(csv_path)
        self.npy_dir = npy_dir
        self.transform = transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize((224, 224)),
            transforms.RandomHorizontalFlip(0.5),
            transforms.ColorJitter(0.2, 0.2),
            transforms.ToTensor(),
            transforms.Normalize((0.485,0.456,0.406),(0.229,0.224,0.225))
        ])
    def __len__(self): return len(self.df)
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        frames = np.load(os.path.join(self.npy_dir, f"{row['Segment ID']}.npy"))[:MAX_FRAMES]
        frames = torch.stack([self.transform(torch.from_numpy(f).permute(2,0,1).float()/255.0) for f in frames])
        return frames, torch.tensor(row['Violence label(video)'], dtype=torch.float32)

# === INIT ===
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
train_dataset = ViolenceDataset(train_csv_path, NPY_DIR)
test_dataset  = ViolenceDataset(test_csv_path,  NPY_DIR)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader  = DataLoader(test_dataset,  batch_size=BATCH_SIZE, shuffle=False)

model = ResNet50GRU().to(device)
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=1)
scaler = GradScaler()

best_loss, early_stop_counter = float('inf'), 0

# === TRAIN ===
for epoch in range(EPOCHS):
    model.train()
    y_true, y_pred, total_loss = [], [], 0.0
    optimizer.zero_grad()
    for i, (frames, labels) in enumerate(tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS}")):
        frames, labels = frames.to(device), labels.to(device)
        with autocast(device_type='cuda'):
            outputs = model(frames)
            loss = criterion(outputs, labels) / GRAD_ACCUM_STEPS
        scaler.scale(loss).backward()
        if (i+1) % GRAD_ACCUM_STEPS == 0 or (i+1) == len(train_loader):
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()
        total_loss += loss.item() * GRAD_ACCUM_STEPS
        preds = (torch.sigmoid(outputs) > 0.5).int()
        y_true.extend(labels.cpu().numpy()); y_pred.extend(preds.cpu().numpy())
    macro_f1 = f1_score(y_true, y_pred, average='macro')
    micro_f1 = f1_score(y_true, y_pred, average='micro')
    print(f"Epoch {epoch+1} | Loss: {total_loss:.4f} | Macro F1: {macro_f1:.4f} | Micro F1: {micro_f1:.4f}")
    scheduler.step(total_loss)
    if total_loss < best_loss:
        best_loss = total_loss
        torch.save(model.state_dict(), os.path.join(save_path, "resnet50_gru_best.pt"))
        early_stop_counter = 0
    else:
        early_stop_counter += 1
        if early_stop_counter >= EARLY_STOPPING_PATIENCE: break

# === TEST ===
model.load_state_dict(torch.load(os.path.join(save_path, "resnet50_gru_best.pt")))
model.eval()
y_true, y_pred, test_losses = [], [], []
segment_ids = test_dataset.df['Segment ID'].tolist()
with torch.no_grad():
    for frames, labels in test_loader:
        frames, labels = frames.to(device), labels.to(device)
        outputs = model(frames)
        loss = criterion(outputs, labels)
        test_losses.append(loss.item())
        preds = (torch.sigmoid(outputs) > 0.5).int()
        y_true.extend(labels.cpu().numpy()); y_pred.extend(preds.cpu().numpy())

avg_test_loss = np.mean(test_losses)
report = classification_report(y_true, y_pred, target_names=["Non-violent","Violent"], output_dict=True, zero_division=0)
conf_matrix = confusion_matrix(y_true, y_pred)
print(f"\n[TEST] BCE Loss: {avg_test_loss:.4f}")
print(f"[TEST] Macro F1: {report['macro avg']['f1-score']:.4f}")
print(f"[TEST] Micro F1: {f1_score(y_true,y_pred,average='micro'):.4f}")
print("[TEST] Per-Class F1 Scores:")
print(f" - Non-violent F1: {report['Non-violent']['f1-score']:.4f}")
print(f" - Violent F1: {report['Violent']['f1-score']:.4f}")
print("Confusion Matrix:\n", conf_matrix)

pd.DataFrame({"Segment ID": segment_ids, "True": y_true, "Pred": y_pred}).to_csv(
    os.path.join(save_path, "resnet50_gru_predictions.csv"), index=False)
pd.DataFrame(report).to_csv(os.path.join(save_path, "resnet50_gru_test_metrics.csv"))


Epoch 1/10: 100%|██████████| 168/168 [16:44<00:00,  5.98s/it]


Epoch 1 | Loss: 110.0561 | Macro F1: 0.4827 | Micro F1: 0.5874


Epoch 2/10: 100%|██████████| 168/168 [06:25<00:00,  2.30s/it]


Epoch 2 | Loss: 97.7987 | Macro F1: 0.6766 | Micro F1: 0.6831


Epoch 3/10: 100%|██████████| 168/168 [06:11<00:00,  2.21s/it]


Epoch 3 | Loss: 88.2912 | Macro F1: 0.7074 | Micro F1: 0.7145


Epoch 4/10: 100%|██████████| 168/168 [06:07<00:00,  2.19s/it]


Epoch 4 | Loss: 81.0329 | Macro F1: 0.7571 | Micro F1: 0.7608


Epoch 5/10: 100%|██████████| 168/168 [06:05<00:00,  2.17s/it]


Epoch 5 | Loss: 74.7712 | Macro F1: 0.7697 | Micro F1: 0.7728


Epoch 6/10: 100%|██████████| 168/168 [06:04<00:00,  2.17s/it]


Epoch 6 | Loss: 72.3450 | Macro F1: 0.7754 | Micro F1: 0.7773


Epoch 7/10: 100%|██████████| 168/168 [06:06<00:00,  2.18s/it]


Epoch 7 | Loss: 63.7738 | Macro F1: 0.8230 | Micro F1: 0.8266


Epoch 8/10: 100%|██████████| 168/168 [06:02<00:00,  2.16s/it]


Epoch 8 | Loss: 59.0119 | Macro F1: 0.8380 | Micro F1: 0.8401


Epoch 9/10: 100%|██████████| 168/168 [06:02<00:00,  2.16s/it]


Epoch 9 | Loss: 59.5529 | Macro F1: 0.8487 | Micro F1: 0.8505


Epoch 10/10: 100%|██████████| 168/168 [06:00<00:00,  2.15s/it]


Epoch 10 | Loss: 47.1004 | Macro F1: 0.8652 | Micro F1: 0.8670

[TEST] BCE Loss: 0.6741
[TEST] Macro F1: 0.6921
[TEST] Micro F1: 0.7055
[TEST] Per-Class F1 Scores:
 - Non-violent F1: 0.7563
 - Violent F1: 0.6279
Confusion Matrix:
 [[149  34]
 [ 62  81]]
