# Importar Dependecias

In [1]:
from tensorflow_docs.vis import embed
from tensorflow import keras
from imutils import paths
import matplotlib.pyplot as plt
import tensorflow as tf
import pandas as pd
import numpy as np
import imageio
import cv2
import os

2022-06-09 01:11:26.484564: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-06-09 01:11:26.484598: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


Hiperparametros

In [2]:
IMG_SIZE = 224
BATCH_SIZE = 64
EPOCHS = 20

MAX_SEQ_LENGTH = 900
NUM_FEATURES = 2048

# Cargando datos de train y test

In [3]:
train_df = pd.read_csv("train.csv")
test_df = pd.read_csv("test.csv")

print(f"Total videos for training: {len(train_df)}")
print(f"Total videos for testing: {len(test_df)}")
type(test_df)

Total videos for training: 48
Total videos for testing: 4


pandas.core.frame.DataFrame

# Funciones para preprocesamiento del video

In [4]:
def crop_center_square(frame):
    y, x = frame.shape[0:2]
    min_dim = min(y, x)
    start_x = (x // 2) - (min_dim // 2)
    start_y = (y // 2) - (min_dim // 2)
    return frame[start_y : start_y + min_dim, start_x : start_x + min_dim]


def load_video(path, max_frames=0, resize=(IMG_SIZE, IMG_SIZE)):
    cap = cv2.VideoCapture(path)
    frames = []
    try:
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            frame = crop_center_square(frame)
            frame = cv2.resize(frame, resize)
            frame = frame[:, :, [2, 1, 0]]
            frames.append(frame)

            if len(frames) == max_frames:
                break
    finally:
        cap.release()
    return np.array(frames)

# Función para extracción de carateristicas con Inception

In [5]:
def build_feature_extractor():
    feature_extractor = keras.applications.InceptionV3(
        weights="imagenet",
        include_top=False,
        pooling="avg",
        input_shape=(IMG_SIZE, IMG_SIZE, 3),
    )
    preprocess_input = keras.applications.inception_v3.preprocess_input

    inputs = keras.Input((IMG_SIZE, IMG_SIZE, 3))
    preprocessed = preprocess_input(inputs)

    outputs = feature_extractor(preprocessed)
    return keras.Model(inputs, outputs, name="feature_extractor")


feature_extractor = build_feature_extractor()

2022-06-09 01:12:21.476081: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-06-09 01:12:21.477340: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/stiven/python/python/lib/python3.8/site-packages/cv2/../../lib64:
2022-06-09 01:12:21.477517: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublas.so.11'; dlerror: libcublas.so.11: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/stiven/python/python/lib/python3.8/site-packages/cv2/../../lib64:
2022-06-09 01:12:21.477689: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublasLt.so.11'; 

In [6]:
label_processor = keras.layers.StringLookup(
    num_oov_indices=0, vocabulary=np.unique(train_df["tag"])
)
print(label_processor.get_vocabulary())

['fight', 'normal']


# Fución para preparar lo videos
En esta función se realiza la un preprocesamiento de los videos usando la función de extracción de caracteristicas anterior

In [7]:
def prepare_all_videos(df, root_dir):
    num_samples = len(df)
    video_paths = df["video_name"].values.tolist()
    labels = df["tag"].values
    labels = label_processor(labels[..., None]).numpy()
    
    frame_masks = np.zeros(shape=(num_samples, MAX_SEQ_LENGTH), dtype="bool")
    frame_features = np.zeros(
        shape=(num_samples, MAX_SEQ_LENGTH, NUM_FEATURES), dtype="float32"
    )
    
   
    for idx, path in enumerate(video_paths):
        frames = load_video(os.path.join("all_data", path))
        frames = frames[None, ...]
        
        temp_frame_mask = np.zeros(shape=(1, MAX_SEQ_LENGTH,), dtype="bool")
        temp_frame_features = np.zeros(
            shape=(1, MAX_SEQ_LENGTH, NUM_FEATURES), dtype="float32"
        )

        for i, batch in enumerate(frames):
            video_length = batch.shape[0]
            length = min(MAX_SEQ_LENGTH, video_length)
            for j in range(length):
                temp_frame_features[i, j, :] = feature_extractor.predict(
                    batch[None, j, :],
                    verbose=0
                )
            temp_frame_mask[i, :length] = 1 

        frame_features[idx,] = temp_frame_features.squeeze()
        frame_masks[idx,] = temp_frame_mask.squeeze()

    return (frame_features, frame_masks), labels


train_data, train_labels = prepare_all_videos(train_df, "train")
test_data, test_labels = prepare_all_videos(test_df, "test")

print(f"Frame features in train set: {train_data[0].shape}")
print(f"Frame masks in train set: {train_data[1].shape}")

Frame features in train set: (48, 900, 2048)
Frame masks in train set: (48, 900)


# Modelo y ejecución de un experimento

In [8]:
def get_sequence_model():
    from tensorflow.keras.optimizers import Adam
    class_vocab = label_processor.get_vocabulary()

    frame_features_input = keras.Input((MAX_SEQ_LENGTH, NUM_FEATURES))
    mask_input = keras.Input((MAX_SEQ_LENGTH,), dtype="bool")


    x = keras.layers.GRU(16, return_sequences=True)(
        frame_features_input, mask=mask_input
    )
    x = keras.layers.GRU(8)(x)
    x = keras.layers.Dropout(0.5)(x)
    x = keras.layers.Dense(8, activation="relu")(x)
    output = keras.layers.Dense(len(class_vocab), activation="softmax")(x)

    rnn_model = keras.Model([frame_features_input, mask_input], output)

    rnn_model.compile(
        loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"]
    )
    return rnn_model


def run_experiment():
    filepath = "/tmp/video_classifier"
    checkpoint = keras.callbacks.ModelCheckpoint(
        filepath, save_weights_only=True, save_best_only=True, verbose=1
    )

    seq_model = get_sequence_model()
    history = seq_model.fit(
        [train_data[0], train_data[1]],
        train_labels,
        validation_split=0.3,
        epochs=EPOCHS,
        callbacks=[checkpoint],
    )

    seq_model.load_weights(filepath)
    _, accuracy = seq_model.evaluate([test_data[0], test_data[1]], test_labels)
    print(f"Test accuracy: {round(accuracy * 100, 2)}%")

    return history, seq_model


_, sequence_model = run_experiment()

Epoch 1/20


2022-06-09 02:07:07.393449: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 243302400 exceeds 10% of free system memory.
2022-06-09 02:07:14.314036: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 235929600 exceeds 10% of free system memory.
2022-06-09 02:07:14.314090: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 235929600 exceeds 10% of free system memory.
2022-06-09 02:07:14.651041: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 235929600 exceeds 10% of free system memory.
2022-06-09 02:07:14.652125: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 235929600 exceeds 10% of free system memory.


Epoch 1: val_loss improved from inf to 0.61558, saving model to /tmp/video_classifier
Epoch 2/20
Epoch 2: val_loss did not improve from 0.61558
Epoch 3/20
Epoch 3: val_loss did not improve from 0.61558
Epoch 4/20
Epoch 4: val_loss did not improve from 0.61558
Epoch 5/20
Epoch 5: val_loss did not improve from 0.61558
Epoch 6/20
Epoch 6: val_loss did not improve from 0.61558
Epoch 7/20
Epoch 7: val_loss did not improve from 0.61558
Epoch 8/20
Epoch 8: val_loss did not improve from 0.61558
Epoch 9/20
Epoch 9: val_loss did not improve from 0.61558
Epoch 10/20
Epoch 10: val_loss did not improve from 0.61558
Epoch 11/20
Epoch 11: val_loss did not improve from 0.61558
Epoch 12/20
Epoch 12: val_loss improved from 0.61558 to 0.61095, saving model to /tmp/video_classifier
Epoch 13/20
Epoch 13: val_loss did not improve from 0.61095
Epoch 14/20
Epoch 14: val_loss did not improve from 0.61095
Epoch 15/20
Epoch 15: val_loss did not improve from 0.61095
Epoch 16/20
Epoch 16: val_loss did not improve 

# Test del modelo

In [9]:
def prepare_single_video(frames):
    frames = frames[None, ...]
    frame_mask = np.zeros(shape=(1, MAX_SEQ_LENGTH,), dtype="bool")
    frame_features = np.zeros(shape=(1, MAX_SEQ_LENGTH, NUM_FEATURES), dtype="float32")

    for i, batch in enumerate(frames):
        video_length = batch.shape[0]
        length = min(MAX_SEQ_LENGTH, video_length)
        for j in range(length):
            frame_features[i, j, :] = feature_extractor.predict(batch[None, j, :], verbose=0)
        frame_mask[i, :length] = 1 

    return frame_features, frame_mask


def sequence_prediction(path):
    class_vocab = label_processor.get_vocabulary()

    frames = load_video(os.path.join("test", path))
    frame_features, frame_mask = prepare_single_video(frames)
    probabilities = sequence_model.predict([frame_features, frame_mask], verbose=0)[0]

    for i in np.argsort(probabilities)[::-1]:
        print(f"  {class_vocab[i]}: {probabilities[i] * 100:5.2f}%")
    return frames



In [10]:
path = "all_data/F_32_1_0_0_0_test.mp4"
print(f"Test video path: {path}")
test_frames = sequence_prediction(path)

Test video path: all_data/F_32_1_0_0_0_test.mp4
  fight: 50.14%
  normal: 49.86%


In [11]:
path = "all_data/normal.mp4"
print(f"Test video path: {path}")
test_frames = sequence_prediction(path)

Test video path: all_data/normal.mp4
  fight: 50.14%
  normal: 49.86%


In [12]:
path = "all_data/normal_2.mp4"
print(f"Test video path: {path}")
test_frames = sequence_prediction(path)

Test video path: all_data/normal_2.mp4
  fight: 50.14%
  normal: 49.86%
