In [1]:
import sys
import os

project_root = os.path.abspath(os.getcwd())
if project_root not in sys.path:
    sys.path.append(project_root)
print(sys.path)

['/usr/lib/python310.zip', '/usr/lib/python3.10', '/usr/lib/python3.10/lib-dynload', '', '/home/atupulazi/personal_projects/collision-forecast/.venv/lib/python3.10/site-packages', '/home/atupulazi/personal_projects/collision-forecast/src']


In [3]:
%pip install torch
%pip install torchvision
%pip install torchaudio
%pip install pandas
%pip install numpy
%pip install matplotlib
%pip install seaborn
%pip install scikit-learn
%pip install scikit-image
%pip install scipy
%pip install tqdm

Collecting torch
  Using cached torch-2.7.1-cp310-cp310-manylinux_2_28_x86_64.whl (821.2 MB)
Collecting fsspec
  Using cached fsspec-2025.5.1-py3-none-any.whl (199 kB)
Collecting nvidia-cusparselt-cu12==0.6.3
  Using cached nvidia_cusparselt_cu12-0.6.3-py3-none-manylinux2014_x86_64.whl (156.8 MB)
Collecting sympy>=1.13.3
  Using cached sympy-1.14.0-py3-none-any.whl (6.3 MB)
Collecting nvidia-cusparse-cu12==12.5.4.2
  Using cached nvidia_cusparse_cu12-12.5.4.2-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (216.6 MB)
Collecting nvidia-nvjitlink-cu12==12.6.85
  Using cached nvidia_nvjitlink_cu12-12.6.85-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl (19.7 MB)
Collecting nvidia-nccl-cu12==2.26.2
  Using cached nvidia_nccl_cu12-2.26.2-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (201.3 MB)
Collecting filelock
  Using cached filelock-3.18.0-py3-none-any.whl (16 kB)
Collecting nvidia-nvtx-cu12==12.6.77
  Using cached nvidia_nvtx_cu12-12.6.77-py3-none-manylinux201

In [5]:
%pip install transformers

Collecting transformers
  Downloading transformers-4.53.1-py3-none-any.whl (10.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.8/10.8 MB[0m [31m55.9 MB/s[0m eta [36m0:00:00[0m00:01[0m0:01[0m
[?25hCollecting tokenizers<0.22,>=0.21
  Downloading tokenizers-0.21.2-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0mmm
[?25hCollecting huggingface-hub<1.0,>=0.30.0
  Downloading huggingface_hub-0.33.2-py3-none-any.whl (515 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m515.4/515.4 KB[0m [31m62.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pyyaml>=5.1
  Using cached PyYAML-6.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (751 kB)
Collecting safetensors>=0.4.3
  Downloading safetensors-0.5.3-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (471 kB)
[2K     [90m━━━━

In [1]:
from model import build_videomae
import torch.nn as nn
import torch
import torch.optim as optim
from torch.utils.data import DataLoader
from dataloader import CollisionForecastDataset

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
def train_model(model, lr, device, train_loader, val_loader, FORCE_RETRAIN, epochs=10):
    model_name = "model"
    checkpoint_dir = "checkpoints"
    os.makedirs(checkpoint_dir, exist_ok=True)

    model_path = os.path.join(checkpoint_dir, f"{model_name}_best.pth")

    # Check if model already exists
    if not FORCE_RETRAIN and os.path.exists(model_path):
        print(f"Model {model_name} already exists. Skipping training.")
        return

    # Define loss function and optimizer
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    
    model.to(device)
    best_val_loss = float('inf')
    
    for e in range(epochs):
        model.train()
        for batch_idx, (input, target) in enumerate (train_loader):
            print(f"Train Epoch {e}, Batch {batch_idx}, inputs shape {input.shape}, target shape {target.shape}")
            input = input.to(device) # clip_name = input
            target = target.to(device) # label = target

            optimizer.zero_grad() #clears grads after each batch
            outputs = model(input) # does a forward pass
            loss = criterion(outputs, target) # calculates the loss
            loss.backward() #calculates the gradients
            optimizer.step() #updates the weights

            if batch_idx % 10 == 0:
                print(f"Train Epoch {e}, Batch {batch_idx}, Loss {loss.item()}")

        # Validation
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for val_batch_idx, (val_input, val_target) in enumerate(val_loader):
                val_input = val_input.to(device)
                val_target = val_target.to(device)

                val_outputs = model(val_input)
                batch_loss = criterion(val_outputs, val_target)
                val_loss += batch_loss.item()

        val_loss /= len(val_loader)
        print(f"Validation Loss: {val_loss}")

        # Save the best model
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), model_path)
            print(f"Model saved to {model_path}")

        # Save the model every 5 epochs
        if e % 5 == 0: 
            torch.save(model.state_dict(), model_path)
            print(f"Model saved to {model_path}")

    return model


In [6]:
if __name__ == "__main__":
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    if device == "cuda":
        device = torch.device("cuda")
    else:
        device = torch.device("cpu")

    train_dataset = CollisionForecastDataset(csv_path="/home/atupulazi/personal_projects/collision-forecast/labels/train_clip_labels.csv", split="train")
    val_dataset = CollisionForecastDataset(csv_path="/home/atupulazi/personal_projects/collision-forecast/labels/val_clip_labels.csv", split="val" )

    train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False)



In [7]:
for inputs, targets in train_loader:
    print("inputs shape:", inputs.shape)
    print("targets shape:", targets.shape)
    break  

inputs shape: torch.Size([8, 3, 16, 224, 224])
targets shape: torch.Size([8])


In [None]:
# # 🎯 Project: Crash Forecast Attention Visualization (Streamlit)

# ## 🧠 Goal:
# Help users understand **which video frames your model focused on** when predicting a crash.

# ---

# ## ✅ Step-by-Step Pipeline (No Code)

# ### Step 1: Load Preprocessed Video Clip
# - Load the `.npy` file (each one is a 16-frame video clip).
# - This is the same clip used during model inference.

# ### Step 2: Run Model to Predict Crash
# - Send the clip into your trained VideoMAE model.
# - Get the prediction (e.g., "Crash likely").

# ### Step 3: Capture Attention Map from Inside the Model
# - Modify your model to save its **self-attention weights** during inference.
# - Save this as `model.saved_attention`.
# - The attention map shape will be: `[1, 12, 3136, 3136]`

# ### Step 4: Convert Attention → Frame Importance
# - Each frame has 196 patches (14x14 grid).
# - Group those patches by frame.
# - Measure how much attention the model gave to each frame's patches.
# - Output: a list of 16 scores (1 per frame), showing importance.

# ### Step 5: Display in Streamlit
# - For each frame:
#   - Show the frame as an image
#   - Show the matching attention score
# - Optional: Sort by attention score to show most-important frames first.

# ---

# ## 🚀 Result
# An interactive Streamlit dashboard that shows:
# - What your model predicted
# - What frames it paid attention to (and how much)
# - Why it made that decision (visually)

# ---

# ## 🎁 Bonus
# - Helps explain the model's reasoning
# - Makes your AV crash forecast project interview-ready
# - Shows you're not just running models — you're understanding them

# Let me know when you're ready to plug this into your real pipeline!
