In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
# /kaggle/input/car-crash-dataset-ccd/CrashBest
# /kaggle/input/car-crash-dataset-ccd/CrashBest/C_000001_01.jpg
# /kaggle/input/crash-1500/Crash-1500/000001.mp4
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [1]:
!sudo apt-get update
!sudo apt-get install ffmpeg

Get:1 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64  InRelease [1581 B]
Hit:2 http://archive.ubuntu.com/ubuntu focal InRelease                         
Get:3 http://security.ubuntu.com/ubuntu focal-security InRelease [128 kB]
Get:4 http://archive.ubuntu.com/ubuntu focal-updates InRelease [128 kB]        
Get:5 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64  Packages [2502 kB]
Get:6 https://packages.cloud.google.com/apt gcsfuse-focal InRelease [1227 B]   
Get:7 http://archive.ubuntu.com/ubuntu focal-backports InRelease [128 kB]      
Get:8 https://packages.cloud.google.com/apt cloud-sdk InRelease [1618 B]      
Get:9 http://security.ubuntu.com/ubuntu focal-security/multiverse amd64 Packages [33.2 kB]
Get:10 http://security.ubuntu.com/ubuntu focal-security/restricted amd64 Packages [4548 kB]
Get:11 https://packages.cloud.google.com/apt google-fast-socket InRelease [1071 B]
Get:12 http://archive.ubuntu.com/ubuntu focal-updates/restr

In [1]:
import pandas as pd

csv_file_path = '/kaggle/input/preprocessed-df/preprocessed_df (1).csv'
annotations = pd.read_csv(csv_file_path)

class_counts = annotations['Severity of the Crash'].value_counts()
print(class_counts)

Severity of the Crash
Moderate    622
Minor       348
Severe      244
moderate     70
severe       57
minor        31
fatal        25
Fatal        18
Name: count, dtype: int64


In [2]:
annotations['Severity of the Crash'] = annotations['Severity of the Crash'].str.lower()
class_counts = annotations['Severity of the Crash'].value_counts()
print(class_counts)

Severity of the Crash
moderate    692
minor       379
severe      301
fatal        43
Name: count, dtype: int64


In [None]:
import os
import pandas as pd

video_dir = '/kaggle/input/crash-1500/Crash-1500'
frames_dir = '/kaggle/working/frames' 

for idx, row in annotations.iterrows():
    video_number = row['Video Number']
    video_path = os.path.join(video_dir, f'{video_number:06}.mp4')
    output_dir = os.path.join(frames_dir, f'{video_number:06}')
    os.makedirs(output_dir, exist_ok=True)

    ffmpeg_command = f'ffmpeg -ss 00:00:01 -i "{video_path}" -vf fps=5 "{output_dir}/frame%04d.png"'
#     print(f"Running FFmpeg command: {ffmpeg_command}")
    os.system(ffmpeg_command)
    
    extracted_frames = os.listdir(output_dir)
    print(f"Extracted {len(extracted_frames)} frames for video {video_number}")

In [3]:
import os
frames_dir = '/kaggle/working/frames'
video_dirs = next(os.walk(frames_dir))[1]
num_videos = len(video_dirs)

print(f"Number of videos with extracted frames: {num_videos}")

Number of videos with extracted frames: 1415


In [4]:
import os
import glob
import pandas as pd
import torch
from torch.utils.data import Dataset
from torchvision.io import read_image
from torchvision import transforms
from PIL import Image

class VideoDataset(Dataset):
    def __init__(self, annotations_df, root_dir, transform=None, num_frames=20):
        self.annotations = annotations_df
        self.annotations['Severity of the Crash'] = self.annotations['Severity of the Crash'].str.lower()
        self.root_dir = root_dir
        self.transform = transform
        self.label_map = {'minor': 0, 'moderate': 1, 'severe': 2,'fatal': 3}
        self.num_frames = num_frames

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):
        video_number = self.annotations.iloc[idx]['Video Number']
        label = self.label_map[self.annotations.iloc[idx]['Severity of the Crash']]
        frame_dir = os.path.join(self.root_dir, f'{video_number:06}')
        frames = []
        # print(f"Looking for frames in: {frame_dir}")
        for frame_path in sorted(glob.glob(os.path.join(frame_dir, '*.png'))):
            frame = read_image(frame_path)
            frame = transforms.ToPILImage()(frame)

            if self.transform:
                frame = self.transform(frame)
            frames.append(frame)
        # print(f"Found {len(frames)} frames.")

        if not frames: 
            raise ValueError(f"No frames found for video {video_number} in directory {frame_dir}")
        frames = torch.stack(frames)
        return frames, label

transform = transforms.Compose([
    transforms.Resize((224, 224)),  
    transforms.RandomHorizontalFlip(),  
    transforms.RandomRotation(10), 
    transforms.ToTensor(),  
])

dataset = VideoDataset(annotations_df=annotations, root_dir='/kaggle/working/frames', transform=transform)

In [5]:
import numpy as np
import torch
from torch.utils.data import DataLoader, SubsetRandomSampler
from collections import Counter

np.random.seed(21)
dataset_size = len(dataset)
indices = list(range(dataset_size))
np.random.shuffle(indices)

train_split = int(np.floor(0.7 * dataset_size))
val_split = int(np.floor(0.2 * dataset_size))
test_split = dataset_size - train_split - val_split

train_indices = indices[:train_split]
val_indices = indices[train_split:train_split + val_split]
test_indices = indices[train_split + val_split:]

print(f"Total dataset size: {dataset_size}")
print(f"Training set size: {len(train_indices)}")
print(f"Validation set size: {len(val_indices)}")
print(f"Test set size: {len(test_indices)}")

train_sampler = SubsetRandomSampler(train_indices)
val_sampler = SubsetRandomSampler(val_indices)
test_sampler = SubsetRandomSampler(test_indices)

train_loader = DataLoader(dataset, batch_size=2, sampler=train_sampler)
val_loader = DataLoader(dataset, batch_size=2, sampler=val_sampler)
test_loader = DataLoader(dataset, batch_size=2, sampler=test_sampler)

print("Total effective samples in training set:", len(train_loader) * train_loader.batch_size)

Total dataset size: 1415
Training set size: 990
Validation set size: 283
Test set size: 142
Total effective samples in training set: 990


In [6]:
import torch
import torch.nn as nn
from torchvision.models import mobilenet_v3_small
from transformers import TimesformerModel, TimesformerConfig
from torch.utils.data import DataLoader
import torch.optim as optim
import os

class VideoClassifier(nn.Module):
    def __init__(self, num_classes, num_frames, hidden_size=512, num_layers=1, bidirectional=True, num_heads=4):
        super().__init__()
        self.config = TimesformerConfig(num_frames=num_frames, num_classes=num_classes)
        self.timesformer = TimesformerModel(self.config)
        self.lstm = nn.LSTM(input_size=self.config.hidden_size, hidden_size=hidden_size,
                            num_layers=num_layers, bidirectional=bidirectional, batch_first=True)
        lstm_output_size = hidden_size * 2 if bidirectional else hidden_size
        self.multihead_attention = nn.MultiheadAttention(embed_dim=lstm_output_size, num_heads=num_heads, batch_first=True)
        self.classifier = nn.Linear(lstm_output_size, num_classes)

    def forward(self, x):
        transformer_output = self.timesformer(x).last_hidden_state
        lstm_output, _ = self.lstm(transformer_output)
        attn_output, _ = self.multihead_attention(lstm_output, lstm_output, lstm_output)
        context_vector = torch.sum(attn_output, dim=1)
        logits = self.classifier(context_vector)
        return logits

class DistilledCrashNet(nn.Module):
    def __init__(self, num_classes=4, num_frames=20):
        super().__init__()
        self.mobilenet = mobilenet_v3_small(pretrained=True)
        self.mobilenet.classifier = nn.Identity()
        self.temporal_pool = nn.AdaptiveAvgPool1d(32)
        self.lstm = nn.LSTM(input_size=576, hidden_size=128, num_layers=1, bidirectional=True)
        self.attention = nn.MultiheadAttention(embed_dim=256, num_heads=4)
        self.classifier = nn.Sequential(nn.Linear(256, 128), nn.ReLU(), nn.Linear(128, num_classes))

    def forward(self, x):
        batch_size, timesteps = x.shape[0], x.shape[1]
        spatial_features = []
        for t in range(timesteps):
            frame_features = self.mobilenet(x[:, t])
            spatial_features.append(frame_features)
        temporal_features = torch.stack(spatial_features, dim=1)
        temporal_features = self.temporal_pool(temporal_features.permute(0, 2, 1)).permute(0, 2, 1)
        lstm_out, _ = self.lstm(temporal_features)
        attn_out, _ = self.attention(lstm_out, lstm_out, lstm_out)
        context = torch.mean(attn_out, dim=1)
        return self.classifier(context)

# ------------------------------
# 3. Distillation Trainer
# ------------------------------
class DistillationTrainer:
    def __init__(self, teacher, student, temp=3.0, alpha=0.7):
        self.teacher = teacher.eval()
        self.student = student.train()
        self.temp = temp
        self.alpha = alpha
        self.kl_loss = nn.KLDivLoss(reduction="batchmean")
        self.ce_loss = nn.CrossEntropyLoss()
        
    def compute_loss(self, student_logits, teacher_logits, labels):
        soft_loss = self.kl_loss(torch.log_softmax(student_logits / self.temp, dim=1),
                                 torch.softmax(teacher_logits / self.temp, dim=1)) * (self.temp ** 2)
        hard_loss = self.ce_loss(student_logits, labels)
        return self.alpha * soft_loss + (1 - self.alpha) * hard_loss

    def train_step(self, inputs, labels):
        with torch.no_grad():
            teacher_logits = self.teacher(inputs)
        student_logits = self.student(inputs)
        loss = self.compute_loss(student_logits, teacher_logits, labels)
        return loss

# ------------------------------
# 4. Load Teacher Model
# ------------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
teacher = VideoClassifier(num_classes=4, num_frames=20).to(device)
teacher.load_state_dict(torch.load("/kaggle/input/timesformer_mha_bilstm/pytorch/default/1/best_model.pth", map_location=device))

# ------------------------------
# 5. Initialize Student & Distiller
# ------------------------------
student = DistilledCrashNet(num_classes=4, num_frames=20).to(device)
distiller = DistillationTrainer(teacher, student)
optimizer = torch.optim.AdamW(student.parameters(), lr=3e-4, weight_decay=1e-5)

# ------------------------------
# 6. Distillation Training Loop
# ------------------------------

Downloading: "https://download.pytorch.org/models/mobilenet_v3_small-047dcff4.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v3_small-047dcff4.pth
100%|██████████| 9.83M/9.83M [00:00<00:00, 90.3MB/s]


In [7]:
# ------------------------------
# 6. Distillation Training Loop with Validation
# ------------------------------

best_val_loss = float('inf')
patience = 3
patience_counter = 0
num_epochs = 10

for epoch in range(num_epochs):
    # --- Training Phase ---
    student.train()
    total_loss = 0.0
    for batch_idx, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)
        loss = distiller.train_step(inputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        if batch_idx % 100 == 0:
            print(f"Epoch [{epoch+1}/{num_epochs}], Step [{batch_idx}], Loss: {loss.item():.4f}")

    avg_train_loss = total_loss / len(train_loader)
    print(f"Epoch [{epoch+1}/{num_epochs}] completed. Avg Train Loss: {avg_train_loss:.4f}")

    # --- Validation Phase ---
    student.eval()
    val_loss = 0.0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            teacher_logits = teacher(inputs)
            student_logits = student(inputs)
            loss = distiller.compute_loss(student_logits, teacher_logits, labels)
            val_loss += loss.item()

    avg_val_loss = val_loss / len(val_loader)
    print(f"Validation Loss after Epoch {epoch+1}: {avg_val_loss:.4f}")

    # --- Check for improvement ---
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        patience_counter = 0
        torch.save(student.state_dict(), "/kaggle/working/best_student_model.pth")
        print("Best student model saved!")
    else:
        patience_counter += 1
        print(f"No improvement. Patience Counter: {patience_counter}/{patience}")
        if patience_counter >= patience:
            print("Early stopping triggered.")
            break

Epoch [1/10], Step [0], Loss: 3.8233
Epoch [1/10], Step [100], Loss: 1.8438
Epoch [1/10], Step [200], Loss: 0.1233
Epoch [1/10], Step [300], Loss: 0.2688
Epoch [1/10], Step [400], Loss: 0.2184
Epoch [1/10] completed. Avg Train Loss: 0.8343
Validation Loss after Epoch 1: 0.7920
Best student model saved!
Epoch [2/10], Step [0], Loss: 0.2707
Epoch [2/10], Step [100], Loss: 1.0479
Epoch [2/10], Step [200], Loss: 1.0045
Epoch [2/10], Step [300], Loss: 0.3561
Epoch [2/10], Step [400], Loss: 0.3118
Epoch [2/10] completed. Avg Train Loss: 0.7633
Validation Loss after Epoch 2: 0.7941
No improvement. Patience Counter: 1/3
Epoch [3/10], Step [0], Loss: 1.0825
Epoch [3/10], Step [100], Loss: 1.0703
Epoch [3/10], Step [200], Loss: 1.0207
Epoch [3/10], Step [300], Loss: 1.0453
Epoch [3/10], Step [400], Loss: 0.4552
Epoch [3/10] completed. Avg Train Loss: 0.7553
Validation Loss after Epoch 3: 0.8092
No improvement. Patience Counter: 2/3
Epoch [4/10], Step [0], Loss: 1.1088
Epoch [4/10], Step [100], L

In [9]:
# ------------------------------
# 7. Evaluate Student on Test Set
# ------------------------------
student.load_state_dict(torch.load("/kaggle/working/best_student_model.pth"))
student.eval()
correct = 0
total = 0

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = student(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

student_acc = 100 * correct / total
print(f"Student Test Accuracy: {student_acc:.3f}%")

Student Test Accuracy: 57.042%
