<a href="https://colab.research.google.com/github/HMNS19/Mini-Project-Crash-Prediction/blob/main/Crash%20Pred3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import kagglehub
kagglehub.login()


In [None]:
nexar_collision_prediction_path = kagglehub.competition_download('nexar-collision-prediction')
print('Data source import complete.')


In [None]:

import numpy as np
import pandas as pd
import os
import cv2
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Bidirectional, LSTM, Dense
from tensorflow.keras.callbacks import EarlyStopping


DATA_DIR = "/kaggle/input/nexar-collision-prediction"
train_df = pd.read_csv(os.path.join(DATA_DIR, "train.csv"))
test_df  = pd.read_csv(os.path.join(DATA_DIR, "test.csv"))

print(f"Training videos: {len(train_df)}, Test videos: {len(test_df)}")
print(train_df.head(3))

2025-05-11 17:17:50.625322: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1746983870.827813      31 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1746983870.882368      31 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Training videos: 1500, Test videos: 1344
     id  time_of_event  time_of_alert  target
0  1924            NaN            NaN       0
1   822           19.5         18.633       1
2  1429            NaN            NaN       0


  has_large_values = (abs_vals > 1e6).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()


In [None]:

train_df['filename'] = train_df['id'].apply(lambda x: f"{int(x):04d}.mp4")
test_df['filename']  = test_df['id'].apply(lambda x: f"{int(x):04d}.mp4")


print(train_df['target'].value_counts())
train_df.head(3)

target
0    750
1    750
Name: count, dtype: int64


  has_large_values = (abs_vals > 1e6).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()


Unnamed: 0,id,time_of_event,time_of_alert,target,filename
0,1924,,,0,1924.mp4
1,822,19.5,18.633,1,0822.mp4
2,1429,,,0,1429.mp4


In [None]:
from sklearn.model_selection import train_test_split


train_df, val_df = train_test_split(train_df, test_size=0.2, stratify=train_df['target'], random_state=42)
print(f"Train split: {len(train_df)} videos, Validation split: {len(val_df)} videos")
print("Class balance in train ->", train_df['target'].mean())
print("Class balance in val   ->", val_df['target'].mean())

Train split: 1200 videos, Validation split: 300 videos
Class balance in train -> 0.5
Class balance in val   -> 0.5


In [None]:

base_cnn = ResNet50(weights=None, include_top=False, pooling='avg')

base_cnn.trainable = False


def sample_uniform_frames(video_path, num_frames=15):
    frames = []
    cap = cv2.VideoCapture(video_path)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    if frame_count <= 0:
        cap.release()
        return frames

    indices = np.linspace(0, frame_count-1, num=num_frames, dtype=np.int)
    for idx in indices:
        cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
        ret, frame = cap.read()
        if not ret:
            break

        frame = cv2.resize(frame, (224, 224))
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frames.append(frame)
    cap.release()
    return np.array(frames)


sample_vid = train_df.iloc[0]
frames = sample_uniform_frames(os.path.join(DATA_DIR, "train", sample_vid['filename']), num_frames=5)

if len(frames) > 0:
    print(f"Sampled {len(frames)} frames of shape {frames[0].shape} from video {sample_vid['id']}")
else:
    print(f"No frames extracted for video {sample_vid['id']} (might be a corrupted video).")

I0000 00:00:1746983883.579833      31 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15513 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0


No frames extracted for video 780 (might be a corrupted video).


In [None]:

def extract_features_dataframe(df):
    X_list = []
    for _, row in df.iterrows():
        vid_id = row['id']; fname = row['filename']
        label = row.get('target', None)
        video_path = os.path.join(DATA_DIR, "train", fname)

        frames = sample_uniform_frames(video_path, num_frames=15)
        if len(frames) == 0:

            X_list.append(np.zeros((15, 2048), dtype=np.float32))
            continue

        frames = preprocess_input(frames.astype(np.float32))

        features = base_cnn.predict(frames, batch_size=15, verbose=0)
        X_list.append(features)
    return np.array(X_list, dtype=np.float32)

X_train = extract_features_dataframe(train_df)
X_val   = extract_features_dataframe(val_df)
y_train = train_df['target'].values
y_val   = val_df['target'].values

print("Feature extraction complete:")
print(f"X_train shape: {X_train.shape}, X_val shape: {X_val.shape}")

Feature extraction complete:
X_train shape: (1200, 15, 2048), X_val shape: (300, 15, 2048)


In [None]:

model = Sequential([
    Bidirectional(LSTM(128, dropout=0.5, return_sequences=False), input_shape=(15, 2048)),
    Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

  super().__init__(**kwargs)


In [None]:

early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True, verbose=1)

history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=20,
    batch_size=32,
    callbacks=[early_stop],
    verbose=2
)

Epoch 1/20


I0000 00:00:1746983891.795440      90 cuda_dnn.cc:529] Loaded cuDNN version 90300


38/38 - 6s - 160ms/step - accuracy: 0.4900 - loss: 0.6943 - val_accuracy: 0.5000 - val_loss: 0.6932
Epoch 2/20
38/38 - 0s - 10ms/step - accuracy: 0.5000 - loss: 0.6934 - val_accuracy: 0.5000 - val_loss: 0.6932
Epoch 3/20
38/38 - 0s - 10ms/step - accuracy: 0.5017 - loss: 0.6932 - val_accuracy: 0.5000 - val_loss: 0.6932
Epoch 4/20
38/38 - 0s - 10ms/step - accuracy: 0.5000 - loss: 0.6933 - val_accuracy: 0.5000 - val_loss: 0.6932
Epoch 5/20
38/38 - 0s - 10ms/step - accuracy: 0.4850 - loss: 0.6932 - val_accuracy: 0.5000 - val_loss: 0.6931
Epoch 6/20
38/38 - 0s - 10ms/step - accuracy: 0.4767 - loss: 0.6933 - val_accuracy: 0.5000 - val_loss: 0.6931
Epoch 7/20
38/38 - 0s - 10ms/step - accuracy: 0.5000 - loss: 0.6935 - val_accuracy: 0.5000 - val_loss: 0.6932
Epoch 8/20
38/38 - 0s - 10ms/step - accuracy: 0.4783 - loss: 0.6933 - val_accuracy: 0.5000 - val_loss: 0.6931
Epoch 9/20
38/38 - 0s - 10ms/step - accuracy: 0.4983 - loss: 0.6933 - val_accuracy: 0.5000 - val_loss: 0.6932
Epoch 10/20
38/38 - 

In [None]:

val_loss, val_acc = model.evaluate(X_val, y_val, verbose=0)
print(f"Validation accuracy: {val_acc:.4f}")

from sklearn.metrics import classification_report
val_preds = model.predict(X_val)[:, 0]
val_preds_binary = (val_preds >= 0.5).astype(int)
print(classification_report(y_val, val_preds_binary, digits=4))

Validation accuracy: 0.5000
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 40ms/step
              precision    recall  f1-score   support

           0     0.5000    1.0000    0.6667       150
           1     0.0000    0.0000    0.0000       150

    accuracy                         0.5000       300
   macro avg     0.2500    0.5000    0.3333       300
weighted avg     0.2500    0.5000    0.3333       300



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:

def extract_features_test(df):
    X_list = []
    for _, row in df.iterrows():
        fname = row['filename']
        video_path = os.path.join(DATA_DIR, "test", fname)
        frames = sample_uniform_frames(video_path, num_frames=15)
        if len(frames) == 0:
            X_list.append(np.zeros((15, 2048), dtype=np.float32))
            continue
        frames = preprocess_input(frames.astype(np.float32))
        features = base_cnn.predict(frames, batch_size=15, verbose=0)
        X_list.append(features)
    return np.array(X_list, dtype=np.float32)

X_test = extract_features_test(test_df)
print(f"X_test shape: {X_test.shape}")


test_preds = model.predict(X_test)
test_preds = test_preds[:, 0]

X_test shape: (1344, 15, 2048)
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step


In [None]:

submission = test_df.copy()
submission['target'] = test_preds
submission = submission[['id', 'target']]
submission.to_csv('submission.csv', index=False)
submission.head(10)

Unnamed: 0,id,target
0,204,0.499743
1,30,0.499743
2,146,0.499743
3,20,0.499743
4,511,0.499743
5,261,0.499743
6,346,0.499743
7,545,0.499743
8,492,0.499743
9,299,0.499743
