# Import

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Define dataset path
dataset_path = '/content/drive/My Drive/car_crash_dataset_600/train'

# List the contents of the dataset directory
label_types = os.listdir(dataset_path)
print(label_types)

['normal', 'crash']


# Preparing Training Data

In [None]:
rooms = []

# Iterate over items (labels) in dataset_path
for item in os.listdir(dataset_path):
    # Get all the file names in the current label directory
    all_rooms = os.listdir(os.path.join(dataset_path, item))

    # Add file names to the rooms list along with their respective labels
    for room in all_rooms:
        rooms.append((item, os.path.join(dataset_path, item, room)))

# Build a DataFrame from the list
train_df = pd.DataFrame(data=rooms, columns=['tag', 'video_name'])
print(train_df.head())
print(train_df.tail())


      tag                                         video_name
0  normal  /content/drive/My Drive/car_crash_dataset_600/...
1  normal  /content/drive/My Drive/car_crash_dataset_600/...
2  normal  /content/drive/My Drive/car_crash_dataset_600/...
3  normal  /content/drive/My Drive/car_crash_dataset_600/...
4  normal  /content/drive/My Drive/car_crash_dataset_600/...
       tag                                         video_name
475  crash  /content/drive/My Drive/car_crash_dataset_600/...
476  crash  /content/drive/My Drive/car_crash_dataset_600/...
477  crash  /content/drive/My Drive/car_crash_dataset_600/...
478  crash  /content/drive/My Drive/car_crash_dataset_600/...
479  crash  /content/drive/My Drive/car_crash_dataset_600/...


In [None]:
df = train_df.loc[:,['video_name','tag']]
df
df.to_csv('train.csv')

# Preparing Test Data

In [None]:
import os
import pandas as pd

# Define dataset path for testing data
test_dataset_path = '/content/drive/MyDrive/car_crash_dataset_600/test'

# List the contents of the 'test' directory
dataset_path = os.listdir(test_dataset_path)
print(dataset_path)

# Get types of activities found
room_types = os.listdir(test_dataset_path)
print("Types of classes found:", len(dataset_path))

rooms = []

# Iterate over items (labels) in the test dataset path
for item in dataset_path:
    # Get all the file names in the current label directory
    all_rooms = os.listdir(os.path.join(test_dataset_path, item))

    # Add file names to the rooms list along with their respective labels
    for room in all_rooms:
        rooms.append((item, os.path.join(test_dataset_path, item, room)))

# Build a DataFrame from the list
test_df = pd.DataFrame(data=rooms, columns=['tag', 'video_name'])
print(test_df.head())
print(test_df.tail())

# Save the DataFrame to a CSV file
test_df.to_csv('test.csv', index=False)


['normal', 'crash']
Types of classes found: 2
      tag                                         video_name
0  normal  /content/drive/MyDrive/car_crash_dataset_600/t...
1  normal  /content/drive/MyDrive/car_crash_dataset_600/t...
2  normal  /content/drive/MyDrive/car_crash_dataset_600/t...
3  normal  /content/drive/MyDrive/car_crash_dataset_600/t...
4  normal  /content/drive/MyDrive/car_crash_dataset_600/t...
       tag                                         video_name
115  crash  /content/drive/MyDrive/car_crash_dataset_600/t...
116  crash  /content/drive/MyDrive/car_crash_dataset_600/t...
117  crash  /content/drive/MyDrive/car_crash_dataset_600/t...
118  crash  /content/drive/MyDrive/car_crash_dataset_600/t...
119  crash  /content/drive/MyDrive/car_crash_dataset_600/t...


In [None]:
!pip install tensorflow-docs

Collecting tensorflow-docs
  Downloading tensorflow_docs-2024.2.5.73858-py3-none-any.whl (182 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m182.5/182.5 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting astor (from tensorflow-docs)
  Downloading astor-0.8.1-py2.py3-none-any.whl (27 kB)
Installing collected packages: astor, tensorflow-docs
Successfully installed astor-0.8.1 tensorflow-docs-2024.2.5.73858


In [None]:
from tensorflow_docs.vis import embed
from tensorflow import keras
from imutils import paths

import matplotlib.pyplot as plt
import tensorflow as tf
import pandas as pd
import numpy as np
import imageio
import cv2
import os

# Data preparation

In [None]:
train_df = pd.read_csv("train.csv")
test_df = pd.read_csv("test.csv")

print(f"Total videos for training: {len(train_df)}")
print(f"Total videos for testing: {len(test_df)}")


train_df.sample(10)

Total videos for training: 480
Total videos for testing: 120


Unnamed: 0.1,Unnamed: 0,video_name,tag
334,334,/content/drive/My Drive/car_crash_dataset_600/...,crash
123,123,/content/drive/My Drive/car_crash_dataset_600/...,normal
317,317,/content/drive/My Drive/car_crash_dataset_600/...,crash
156,156,/content/drive/My Drive/car_crash_dataset_600/...,normal
458,458,/content/drive/My Drive/car_crash_dataset_600/...,crash
40,40,/content/drive/My Drive/car_crash_dataset_600/...,normal
271,271,/content/drive/My Drive/car_crash_dataset_600/...,crash
418,418,/content/drive/My Drive/car_crash_dataset_600/...,crash
407,407,/content/drive/My Drive/car_crash_dataset_600/...,crash
280,280,/content/drive/My Drive/car_crash_dataset_600/...,crash


# Feed the videos to a network:


In [None]:
# The following two methods are taken from this tutorial:
# https://www.tensorflow.org/hub/tutorials/action_recognition_with_tf_hub
IMG_SIZE = 224


def crop_center_square(frame):
    y, x = frame.shape[0:2]
    min_dim = min(y, x)
    start_x = (x // 2) - (min_dim // 2)
    start_y = (y // 2) - (min_dim // 2)
    return frame[start_y : start_y + min_dim, start_x : start_x + min_dim]


def load_video(path, max_frames=0, resize=(IMG_SIZE, IMG_SIZE)):
    cap = cv2.VideoCapture(path)
    frames = []
    try:
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            frame = crop_center_square(frame)
            frame = cv2.resize(frame, resize)
            frame = frame[:, :, [2, 1, 0]]
            frames.append(frame)

            if len(frames) == max_frames:
                break
    finally:
        cap.release()
    return np.array(frames)

   ### Feature Extraction

In [None]:
def build_feature_extractor():
    feature_extractor = keras.applications.InceptionV3(
        weights="imagenet",
        include_top=False,
        pooling="avg",
        input_shape=(IMG_SIZE, IMG_SIZE, 3),
    )
    preprocess_input = keras.applications.inception_v3.preprocess_input

    inputs = keras.Input((IMG_SIZE, IMG_SIZE, 3))
    preprocessed = preprocess_input(inputs)

    outputs = feature_extractor(preprocessed)
    return keras.Model(inputs, outputs, name="feature_extractor")


feature_extractor = build_feature_extractor()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5


### Label Encoding
StringLookup layer encode the class labels as integers.

In [None]:
label_processor = keras.layers.StringLookup(num_oov_indices=0, vocabulary=np.unique(train_df["tag"]))
print(label_processor.get_vocabulary())

labels = train_df["tag"].values
labels = label_processor(labels[..., None]).numpy()
labels

['crash', 'normal']


array([[1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
    

Finally, we can put all the pieces together to create our data processing utility.

In [None]:
#Define hyperparameters

IMG_SIZE = 224
BATCH_SIZE = 64
EPOCHS = 10

MAX_SEQ_LENGTH = 50
NUM_FEATURES = 2048

In [15]:
def prepare_all_videos(df, root_dir):
    num_samples = len(df)
    video_paths = df["video_name"].values.tolist()

    ##take all classlabels from train_df column named 'tag' and store in labels
    labels = df["tag"].values

    #convert classlabels to label encoding
    labels = label_processor(labels[..., None]).numpy()

    # `frame_masks` and `frame_features` are what we will feed to our sequence model.
    # `frame_masks` will contain a bunch of booleans denoting if a timestep is
    # masked with padding or not.
    frame_masks = np.zeros(shape=(num_samples, MAX_SEQ_LENGTH), dtype="bool") # 145,20
    frame_features = np.zeros(shape=(num_samples, MAX_SEQ_LENGTH, NUM_FEATURES), dtype="float32") #145,20,2048

    # For each video.
    for idx, path in enumerate(video_paths):
        # Gather all its frames and add a batch dimension.
        frames = load_video(os.path.join(root_dir, path))
        frames = frames[None, ...]

        # Initialize placeholders to store the masks and features of the current video.
        temp_frame_mask = np.zeros(shape=(1, MAX_SEQ_LENGTH,), dtype="bool")
        temp_frame_features = np.zeros(
            shape=(1, MAX_SEQ_LENGTH, NUM_FEATURES), dtype="float32"
        )

        # Extract features from the frames of the current video.
        for i, batch in enumerate(frames):
            video_length = batch.shape[0]
            length = min(MAX_SEQ_LENGTH, video_length)
            for j in range(length):
                temp_frame_features[i, j, :] = feature_extractor.predict(
                    batch[None, j, :]
                )
            temp_frame_mask[i, :length] = 1  # 1 = not masked, 0 = masked

        frame_features[idx,] = temp_frame_features.squeeze()
        frame_masks[idx,] = temp_frame_mask.squeeze()

    return (frame_features, frame_masks), labels


train_data, train_labels = prepare_all_videos(train_df, "train")
test_data, test_labels = prepare_all_videos(test_df, "test")

print(f"Frame features in train set: {train_data[0].shape}")
print(f"Frame masks in train set: {train_data[1].shape}")



print(f"train_labels in train set: {train_labels.shape}")

print(f"test_labels in train set: {test_labels.shape}")

# MAX_SEQ_LENGTH = 50, NUM_FEATURES = 2048. We have defined this above under hyper parameters

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Frame features in train set: (480, 50, 2048)
Frame masks in train set: (480, 50)
train_labels in train set: (480, 1)
test_labels in train set: (120, 1)


# The sequence model
Now, we can feed this data to a sequence model consisting of recurrent layers like GRU.

In [16]:
# Utility for our sequence model.
def get_sequence_model():
    class_vocab = label_processor.get_vocabulary()

    frame_features_input = keras.Input((MAX_SEQ_LENGTH, NUM_FEATURES))
    mask_input = keras.Input((MAX_SEQ_LENGTH,), dtype="bool")

    # Refer to the following tutorial to understand the significance of using `mask`:
    # https://keras.io/api/layers/recurrent_layers/gru/
    x = keras.layers.GRU(16, return_sequences=True)(frame_features_input, mask=mask_input)
    x = keras.layers.GRU(8)(x)
    x = keras.layers.Dropout(0.4)(x)
    x = keras.layers.Dense(8, activation="relu")(x)
    output = keras.layers.Dense(len(class_vocab), activation="softmax")(x)

    rnn_model = keras.Model([frame_features_input, mask_input], output)

    rnn_model.compile(
        loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"]
    )
    return rnn_model

EPOCHS = 30
# Utility for running experiments.
def run_experiment():
    filepath = "./tmp/video_classifier"
    checkpoint = keras.callbacks.ModelCheckpoint(
        filepath, save_weights_only=True, save_best_only=True, verbose=1
    )

    seq_model = get_sequence_model()
    history = seq_model.fit(
        [train_data[0], train_data[1]],
        train_labels,
        validation_split=0.3,
        epochs=EPOCHS,
        callbacks=[checkpoint],
    )

    seq_model.load_weights(filepath)
    _, accuracy = seq_model.evaluate([test_data[0], test_data[1]], test_labels)
    print(f"Test accuracy: {round(accuracy * 100, 2)}%")

    return history, seq_model


_, sequence_model = run_experiment()

Epoch 1/30
Epoch 1: val_loss improved from inf to 0.91206, saving model to ./tmp/video_classifier
Epoch 2/30
Epoch 2: val_loss improved from 0.91206 to 0.81296, saving model to ./tmp/video_classifier
Epoch 3/30
Epoch 3: val_loss improved from 0.81296 to 0.65906, saving model to ./tmp/video_classifier
Epoch 4/30
Epoch 4: val_loss did not improve from 0.65906
Epoch 5/30
Epoch 5: val_loss improved from 0.65906 to 0.58736, saving model to ./tmp/video_classifier
Epoch 6/30
Epoch 6: val_loss improved from 0.58736 to 0.57672, saving model to ./tmp/video_classifier
Epoch 7/30
Epoch 7: val_loss improved from 0.57672 to 0.49544, saving model to ./tmp/video_classifier
Epoch 8/30
Epoch 8: val_loss improved from 0.49544 to 0.46726, saving model to ./tmp/video_classifier
Epoch 9/30
Epoch 9: val_loss did not improve from 0.46726
Epoch 10/30
Epoch 10: val_loss did not improve from 0.46726
Epoch 11/30
Epoch 11: val_loss did not improve from 0.46726
Epoch 12/30
Epoch 12: val_loss did not improve from 0.

# Inference

In [23]:
def prepare_single_video(frames):
    frames = frames[None, ...]
    frame_mask = np.zeros(shape=(1, MAX_SEQ_LENGTH,), dtype="bool")
    frame_features = np.zeros(shape=(1, MAX_SEQ_LENGTH, NUM_FEATURES), dtype="float32")

    for i, batch in enumerate(frames):
        video_length = batch.shape[0]
        length = min(MAX_SEQ_LENGTH, video_length)
        for j in range(length):
            frame_features[i, j, :] = feature_extractor.predict(batch[None, j, :])
        frame_mask[i, :length] = 1  # 1 = not masked, 0 = masked

    return frame_features, frame_mask


def sequence_prediction(path):
    class_vocab = label_processor.get_vocabulary()

    frames = load_video(os.path.join("test", path))
    frame_features, frame_mask = prepare_single_video(frames)
    probabilities = sequence_model.predict([frame_features, frame_mask])[0]

    for i in np.argsort(probabilities)[::-1]:
        print(f"  {class_vocab[i]}: {probabilities[i] * 100:5.2f}%")
    return frames

test_video = np.random.choice(test_df["video_name"].values.tolist())
print(f"Test video path: {test_video}")

test_frames = sequence_prediction(test_video)


Test video path: /content/drive/MyDrive/car_crash_dataset_600/test/crash/crash_67.mp4
  crash: 93.47%
  normal:  6.53%


In [24]:
from IPython.display import HTML

HTML("""
    <video alt="test" width="520" height="440" controls>
        <source src="/content/drive/MyDrive/car_dataset_small/test/crash/crash_45.mp4" type="video/mp4" style="height:300px;width:300px">
    </video>
""")



In [25]:
# Save the model's weights to a file on Google Drive
sequence_model.save_weights("model_weights.h5")

# Save the model's architecture as JSON
model_json = sequence_model.to_json()
with open("model.json", "w") as json_file:
    json_file.write(model_json)

