In [3]:
# Step 1: Install Essential Libraries
!pip install pytorchvideo timm scikit-learn -q

# Step 2: Imports
import torch
import torch.nn as nn
import pytorchvideo.models.resnet as video_resnet
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import cv2
import numpy as np
import albumentations as A
from sklearn.model_selection import train_test_split

# Step 3: Dataset Class
class CollisionDataset(Dataset):
    def __init__(self, df, video_dir, num_frames=16):
        self.df = df
        self.video_dir = video_dir
        self.num_frames = num_frames
        self.transform = A.Compose([
            A.Resize(256, 256),
            A.RandomCrop(224, 224),
            A.HorizontalFlip(p=0.5),
        ])
        
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        video_path = f"{self.video_dir}/{row['id']}.mp4"
        
        # Capture frames
        cap = cv2.VideoCapture(video_path)
        frames = []
        for _ in range(self.num_frames):
            ret, frame = cap.read()
            if ret:
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                frame = self.transform(image=frame)['image']
                frames.append(frame)
        cap.release()
        
        # Pad if needed
        while len(frames) < self.num_frames:
            frames.append(np.zeros((224,224,3), dtype=np.uint8))
            
        return torch.tensor(np.array(frames)).permute(3,0,1,2).float(), torch.tensor(row['target'])

# Step 4: Simple Model (ResNet50)
class CollisionPredictor(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = video_resnet.create_resnet(
            input_channel=3,
            model_depth=50,  # Correct depth
            model_num_class=1,
        )
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        return self.sigmoid(self.model(x))

# Rest of the code remains same as last working version
# ... [Training loop, inference, etc] ...

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CollisionPredictor().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
criterion = nn.BCELoss()

# Step 6: Data Loading

# Step 7: Training Loop

# Step 8: Inference

# Step 9: Generate Submission


In [5]:
train_df = pd.read_csv("/kaggle/input/nexar-collision-prediction/train.csv")
train_df, val_df = train_test_split(train_df, test_size=0.1, random_state=42)

train_loader = DataLoader(
    CollisionDataset(train_df, "/kaggle/input/nexar-collision-prediction/train"),
    batch_size=8,
    shuffle=True,
    num_workers=2
)

val_loader = DataLoader(
    CollisionDataset(val_df, "/kaggle/input/nexar-collision-prediction/train"),
    batch_size=8,
    num_workers=2
)


In [None]:
# Modify the dataset class to ensure float32 dtype
class CollisionDataset(Dataset):
    def __init__(self, df, video_dir, num_frames=16):
        self.df = df
        self.video_dir = video_dir
        self.num_frames = num_frames
        self.transform = A.Compose([
            A.Resize(256, 256),
            A.RandomCrop(224, 224),
            A.HorizontalFlip(p=0.5),
        ])
        
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        video_path = f"{self.video_dir}/{row['id']}.mp4"
        
        # Capture frames
        cap = cv2.VideoCapture(video_path)
        frames = []
        for _ in range(self.num_frames):
            ret, frame = cap.read()
            if ret:
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                frame = self.transform(image=frame)['image']
                frames.append(frame)
        cap.release()
        
        # Pad if needed and convert to float32
        while len(frames) < self.num_frames:
            frames.append(np.zeros((224,224,3), dtype=np.uint8))
            
        # Convert to float32 explicitly
        frames_tensor = torch.tensor(np.array(frames), dtype=torch.float32).permute(3,0,1,2)
        label_tensor = torch.tensor(row['target'], dtype=torch.float32)  # Fix here
        return frames_tensor, label_tensor

# Modify the training loop to ensure dtype consistency
for epoch in range(2):
    model.train()
    for frames, labels in train_loader:
        # Convert to float32 if needed (should already be done by dataset)
        frames = frames.to(device, dtype=torch.float32)
        labels = labels.to(device, dtype=torch.float32)
        
        optimizer.zero_grad()
        outputs = model(frames).squeeze()
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    
    # Validation (same dtype conversion)
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for frames, labels in val_loader:
            frames = frames.to(device, dtype=torch.float32)
            labels = labels.to(device, dtype=torch.float32)
            outputs = model(frames).squeeze()
            val_loss += criterion(outputs, labels).item()
    print(f"Epoch {epoch+1} | Val Loss: {val_loss/len(val_loader):.4f}")

In [None]:
@torch.no_grad()
def predict_video(video_path):
    model.eval()
    cap = cv2.VideoCapture(video_path)
    frames = []
    try:
        while len(frames) < 16:
            ret, frame = cap.read()
            if not ret: break
            frame = cv2.resize(frame, (224,224))
            frames.append(frame)
            if len(frames) == 16:
                inputs = torch.tensor(frames).permute(3,0,1,2).unsqueeze(0).float().to(device)
                pred = model(inputs).item()
                if pred > 0.5:  # Early exit
                    return pred
                frames = frames[8:]  # 50% overlap
    finally:
        cap.release()
    return pred if len(frames) >=8 else 0.0  # Edge case


In [None]:
test_df = pd.read_csv("/kaggle/input/nexar-collision-prediction/test.csv")
scores = [predict_video(f"/kaggle/input/nexar-collision-prediction/test/{vid}.mp4") for vid in test_df['id']]
pd.DataFrame({'id': test_df['id'], 'score': scores}).to_csv("submission.csv", index=False)