In [1]:
import torch
import torch.nn.functional as F
from model import build_model_r3d_18
from dataset import ClipDataset
from torch.utils.data import DataLoader
import pandas as pd
import json
from tqdm import tqdm
import os




In [2]:
# ----- File paths -----
model_path = "checkpoints/r3d_18_best.pth"
inference_csv = "/home/atupulazi/personal_projects/collision-detection/frames/test/test_clip_labels.csv"        # only clip_name + label
metadata_csv = "/home/atupulazi/personal_projects/collision-detection/frames/test/demo_csv_copy.csv"            # has clip_name + time_of_event, id, etc.
output_csv = "/home/atupulazi/personal_projects/collision-detection/frames/test/demo_predictions.csv"


In [3]:
# ----- Setup device -----
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load model
model_name = "r3d_18"
model_path = f"checkpoints/{model_name}_best.pth"

model = build_model_r3d_18()
checkpoint = torch.load(model_path, map_location=device)
model.load_state_dict(checkpoint['model_state_dict'])
model.to(device)
model.eval()

VideoResNet(
  (stem): BasicStem(
    (0): Conv3d(3, 64, kernel_size=(3, 7, 7), stride=(1, 2, 2), padding=(1, 3, 3), bias=False)
    (1): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
  )
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Sequential(
        (0): Conv3DSimple(64, 64, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1), bias=False)
        (1): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
      )
      (conv2): Sequential(
        (0): Conv3DSimple(64, 64, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1), bias=False)
        (1): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (relu): ReLU(inplace=True)
    )
    (1): BasicBlock(
      (conv1): Sequential(
        (0): Conv3DSimple(64, 64, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1), bias=False)
        (1):

In [4]:
# ----- Load inference dataset -----
dataset = ClipDataset("/home/atupulazi/personal_projects/collision-detection/frames/test/demo_csv_copy.csv", split="test")
loader = DataLoader(dataset, batch_size=1, shuffle=False)

# ----- Run inference -----
results = []


In [5]:

with torch.no_grad():
    for i in tqdm(range(len(dataset))):
        clip, label = dataset[i]
        row = dataset.data.iloc[i]  # this row only has clip_name + label

        clip = clip.unsqueeze(0).to(device)
        label = label.unsqueeze(0).to(device)

        output = model(clip)
        probs = F.softmax(output, dim=1)
        confidence, predicted = torch.max(probs, 1)

        results.append({
            "clip_name": row["clip_name"],
            "ground_truth": int(label.item()),
            "prediction": int(predicted.item()),
            "confidence": float(confidence.item())
        })

100%|██████████| 160/160 [00:02<00:00, 75.82it/s]


In [6]:
# ----- Create DataFrame -----
pred_df = pd.DataFrame(results)

# ----- Load metadata and merge -----
meta_df = pd.read_csv(metadata_csv)

# Make sure clip_name exists and matches format
if "clip_name" not in meta_df.columns:
    raise ValueError("demo_csv_copy.csv must have a 'clip_name' column")

merged_df = pred_df.merge(meta_df, on="clip_name", how="left")  # keep only matched rows

# ----- Save final output -----
merged_df.to_csv(output_csv, index=False)
print(f"Saved merged predictions to: {output_csv}")

Saved merged predictions to: /home/atupulazi/personal_projects/collision-detection/frames/test/demo_predictions.csv
