In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
# /kaggle/input/car-crash-dataset-ccd/CrashBest
# /kaggle/input/car-crash-dataset-ccd/CrashBest/C_000001_01.jpg
# /kaggle/input/crash-1500/Crash-1500/000001.mp4
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!sudo apt-get update
!sudo apt-get install ffmpeg

In [3]:
import pandas as pd

csv_file_path = '/kaggle/input/preprocessed-df/preprocessed_df (1).csv'
annotations = pd.read_csv(csv_file_path)

class_counts = annotations['Severity of the Crash'].value_counts()
print(class_counts)

Severity of the Crash
Moderate    622
Minor       348
Severe      244
moderate     70
severe       57
minor        31
fatal        25
Fatal        18
Name: count, dtype: int64


In [4]:
annotations['Severity of the Crash'] = annotations['Severity of the Crash'].str.lower()
class_counts = annotations['Severity of the Crash'].value_counts()
print(class_counts)

Severity of the Crash
moderate    692
minor       379
severe      301
fatal        43
Name: count, dtype: int64


In [None]:
import os
import pandas as pd

video_dir = '/kaggle/input/crash-1500/Crash-1500'
frames_dir = '/kaggle/working/frames' 

for idx, row in annotations.iterrows():
    video_number = row['Video Number']
    video_path = os.path.join(video_dir, f'{video_number:06}.mp4')
    output_dir = os.path.join(frames_dir, f'{video_number:06}')
    os.makedirs(output_dir, exist_ok=True)

    ffmpeg_command = f'ffmpeg -ss 00:00:01 -i "{video_path}" -vf fps=5 "{output_dir}/frame%04d.png"'
#     print(f"Running FFmpeg command: {ffmpeg_command}")
    os.system(ffmpeg_command)
    
    extracted_frames = os.listdir(output_dir)
    print(f"Extracted {len(extracted_frames)} frames for video {video_number}")

In [5]:
import os
frames_dir = '/kaggle/working/frames'
video_dirs = next(os.walk(frames_dir))[1]
num_videos = len(video_dirs)

print(f"Number of videos with extracted frames: {num_videos}")

Number of videos with extracted frames: 1415


In [6]:
import os
import glob
import pandas as pd
import torch
from torch.utils.data import Dataset
from torchvision.io import read_image
from torchvision import transforms
from PIL import Image

class VideoDataset(Dataset):
    def __init__(self, annotations_df, root_dir, transform=None, num_frames=20):
        self.annotations = annotations_df
        self.annotations['Severity of the Crash'] = self.annotations['Severity of the Crash'].str.lower()
        self.root_dir = root_dir
        self.transform = transform
        self.label_map = {'minor': 0, 'moderate': 1, 'severe': 2,'fatal': 3}
        self.num_frames = num_frames

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):
        video_number = self.annotations.iloc[idx]['Video Number']
        label = self.label_map[self.annotations.iloc[idx]['Severity of the Crash']]
        frame_dir = os.path.join(self.root_dir, f'{video_number:06}')
        frames = []
        # print(f"Looking for frames in: {frame_dir}")
        for frame_path in sorted(glob.glob(os.path.join(frame_dir, '*.png'))):
            frame = read_image(frame_path)
            frame = transforms.ToPILImage()(frame)

            if self.transform:
                frame = self.transform(frame)
            frames.append(frame)
        # print(f"Found {len(frames)} frames.")

        if not frames: 
            raise ValueError(f"No frames found for video {video_number} in directory {frame_dir}")
        frames = torch.stack(frames)
        return frames, label

transform = transforms.Compose([
    transforms.Resize((224, 224)),  
    transforms.RandomHorizontalFlip(),  
    transforms.RandomRotation(10), 
    transforms.ToTensor(),  
])

dataset = VideoDataset(annotations_df=annotations, root_dir='/kaggle/working/frames', transform=transform)

In [7]:
import torch
from transformers import VideoMAEConfig, VideoMAEModel
import torch.nn as nn
import torch.optim as optim

In [8]:
class VideoClassifier(nn.Module):
    def __init__(self, num_classes, num_frames):
        super().__init__()
        self.config = VideoMAEConfig(
            num_frames=num_frames,
            num_classes=num_classes
        )
        self.videomae = VideoMAEModel(self.config)
        self.classifier = nn.Linear(self.config.hidden_size, num_classes)

    def forward(self, x):
        outputs = self.videomae(x)
        logits = self.classifier(outputs.last_hidden_state[:, 0, :])
        return logits

model = VideoClassifier(num_classes=4, num_frames=20)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

VideoClassifier(
  (videomae): VideoMAEModel(
    (embeddings): VideoMAEEmbeddings(
      (patch_embeddings): VideoMAEPatchEmbeddings(
        (projection): Conv3d(3, 768, kernel_size=(2, 16, 16), stride=(2, 16, 16))
      )
    )
    (encoder): VideoMAEEncoder(
      (layer): ModuleList(
        (0-11): 12 x VideoMAELayer(
          (attention): VideoMAESdpaAttention(
            (attention): VideoMAESdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=False)
              (key): Linear(in_features=768, out_features=768, bias=False)
              (value): Linear(in_features=768, out_features=768, bias=False)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): VideoMAESelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): VideoMAEIntermediate(
            (dense): Linear(in_

In [9]:
class_counts = annotations['label'].value_counts()
print(class_counts)

label
1    692
0    379
2    301
3     43
Name: count, dtype: int64


In [10]:
import numpy as np
import torch
from torch.utils.data import DataLoader, SubsetRandomSampler
from collections import Counter

np.random.seed(42)
dataset_size = len(dataset)
indices = list(range(dataset_size))
np.random.shuffle(indices)

train_split = int(np.floor(0.7 * dataset_size))
val_split = int(np.floor(0.2 * dataset_size))
test_split = dataset_size - train_split - val_split

train_indices = indices[:train_split]
val_indices = indices[train_split:train_split + val_split]
test_indices = indices[train_split + val_split:]

print(f"Total dataset size: {dataset_size}")
print(f"Training set size: {len(train_indices)}")
print(f"Validation set size: {len(val_indices)}")
print(f"Test set size: {len(test_indices)}")

train_sampler = SubsetRandomSampler(train_indices)
val_sampler = SubsetRandomSampler(val_indices)
test_sampler = SubsetRandomSampler(test_indices)

train_loader = DataLoader(dataset, batch_size=4, sampler=train_sampler)
val_loader = DataLoader(dataset, batch_size=4, sampler=val_sampler)
test_loader = DataLoader(dataset, batch_size=4, sampler=test_sampler)

print("Total effective samples in training set:", len(train_loader) * train_loader.batch_size)

Total dataset size: 1415
Training set size: 990
Validation set size: 283
Test set size: 142
Total effective samples in training set: 992


In [11]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)
num_epochs = 4
best_val_acc = 0.0  
save_path = '/kaggle/working/best_model.pth'  

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for data in train_loader:
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    # Here the model is evaluated on the validation set
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data in val_loader:
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    val_acc = 100 * correct / total

    print('[%d] loss: %.3f, val_acc: %.3f' % (epoch + 1, running_loss / len(train_loader), val_acc))
    
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), save_path)
        print(f"New best model saved with val_acc: {best_val_acc:.3f}")

[1] loss: 5.726, val_acc: 29.682
New best model saved with val_acc: 29.682
[2] loss: 1.477, val_acc: 50.177
New best model saved with val_acc: 50.177
[3] loss: 1.333, val_acc: 38.869
[4] loss: 1.336, val_acc: 33.569


In [12]:
model.load_state_dict(torch.load('/kaggle/working/best_model.pth'))
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for data in test_loader:
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
test_acc = 100 * correct / total
print('Test Accuracy: %.3f' % test_acc)

Test Accuracy: 52.113


In [13]:
import torch
from sklearn.metrics import precision_score, recall_score, f1_score

def calculate_metrics(model, data_loader, device):
    model.eval()
    correct_predictions = 0
    custom_correct = 0
    all_labels = []
    all_preds = []

    with torch.no_grad():
        for data in data_loader:
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            # The probabilities from the outputs
            probabilities = torch.softmax(outputs, dim=1)
            top_probs, top_classes = torch.topk(probabilities, 2, dim=1)

            p1 = top_classes[:, 0]  
            p2 = top_classes[:, 1]  

            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(p1.cpu().numpy())

            for i in range(len(labels)):
                true_label = labels[i].item()
                predicted1 = p1[i].item()
                predicted2 = p2[i].item()
                
                if abs(predicted1-predicted2) == 1:
                    if(predicted1==true_label or predicted2==true_label):  
                        custom_correct += 1 
                else:
                    if predicted1 == true_label:
                        custom_correct += 1

    total_samples = len(all_labels)
    custom_accuracy = custom_correct / total_samples
    precision = precision_score(all_labels, all_preds, average='weighted', zero_division=0)
    recall = recall_score(all_labels, all_preds, average='weighted', zero_division=0)
    f1 = f1_score(all_labels, all_preds, average='weighted', zero_division=0)

    print(f"Custom Accuracy: {custom_accuracy * 100:.2f}%")
    print(f"Precision: {precision:.2f}")
    print(f"Recall: {recall:.2f}")
    print(f"F1 Score: {f1:.2f}")

In [14]:
model.load_state_dict(torch.load(save_path))
calculate_metrics(model, test_loader, device)

Custom Accuracy: 74.65%
Precision: 0.27
Recall: 0.52
F1 Score: 0.36
