In [None]:
import tensorflow as tf
from tensorflow import keras
import tensorflow_hub as hub
import cv2
import os
import numpy as np
import pathlib
from matplotlib import pyplot as plt
from tensorflow.keras.utils import plot_model
import datetime

In [None]:
IMG_HEIGHT = 64
IMG_WIDTH = 64
VIDEO_FOLDER_PATH = 'videos'
PATH_FOR_IMAGES = 'images'
PATH_FOR_IMAGES = pathlib.Path(PATH_FOR_IMAGES)
SEQUENCE_LENGTH=20

VALIDATION_PERCENTAGE = 0.20
TEST_PERCENTAGE = 0.10
TRAIN_PERCENTAGE = 1 - (VALIDATION_PERCENTAGE + TEST_PERCENTAGE)


CLASSES = np.array(sorted([item.name for item in PATH_FOR_IMAGES.glob('*') if item.is_dir()]))


1. (Data Preparation) Extract frames from the video and store in new folder location in project directory

In [None]:
for folder in os.listdir(VIDEO_FOLDER_PATH):

    # TODO: Check if images folder exists, if not create it
    if not os.path.exists(PATH_FOR_IMAGES):
        os.mkdir(PATH_FOR_IMAGES)
    
    if not os.path.exists(os.path.join(PATH_FOR_IMAGES, folder)):
        os.mkdir(os.path.join(PATH_FOR_IMAGES, folder))

    for video_file in os.listdir(os.path.join(VIDEO_FOLDER_PATH, folder)):
        full_video_path = os.path.join(VIDEO_FOLDER_PATH, folder, video_file)
        video = cv2.VideoCapture(full_video_path)
        success, image = video.read()
        count = 0

        while success:
            # video.set(cv2.CAP_PROP_POS_MSEC,(count*200))
            image_path = os.path.join(PATH_FOR_IMAGES, folder, "frame%d.jpg" % count)
            cv2.imwrite(image_path, image)
            success, image = video.read()
            count += 1

2. (Data Partioning) Create TensorFlow Datasets Objects consisting of images

In [None]:
# Create a dataset dictionary with train, test and validation paritions, where the images are stored in a list
def create_dataset(path):
    dataset = {}
    dataset['train'] = list()
    dataset['test'] = list()
    dataset['validation'] = list()

    for folder in os.listdir(path):
        folder_path = os.path.join(path, folder)
        # Split the images in the folder into train, test and validation sets
        images = [os.path.join(path, folder, item.name) for item in pathlib.Path(folder_path).glob('*') if item.is_file()]
        images = np.array(images)
        split = int(len(images) * TRAIN_PERCENTAGE)
        train_images = images[:split]
        test_images = images[split:]
        validation_split = int(len(test_images) * VALIDATION_PERCENTAGE)
        validation_images = test_images[:validation_split]
        test_images = test_images[validation_split:]

        dataset['train'].extend(train_images)
        dataset['test'].extend(test_images)
        dataset['validation'].extend(validation_images)

        print("Folder: %s, train: %d, test: %d, validation: %d" % (folder, len(train_images), len(test_images), len(validation_images)))
    return dataset
            
dataset = create_dataset(PATH_FOR_IMAGES)

print("Total training images: %d" % len(dataset['train']))
print("Total test images: %d" % len(dataset['test']))
print("Total validation images: %d" % len(dataset['validation']))


In [None]:
# total_image_count = len(list(PATH_FOR_IMAGES.glob('**/*.jpg')))
# # NOTE: Below does not need to be a tensorflow dataset
# list_ds = tf.data.Dataset.list_files(str(PATH_FOR_IMAGES/'*/*.jpg'), shuffle=False)
# # list_ds = list_ds.shuffle(total_image_count, reshuffle_each_iteration=False)


# validation_set_size = int(total_image_count * VALIDATION_PERCENTAGE)
# training_set_size = int(total_image_count * TRAIN_PERCENTAGE)
# test_set_size = int(total_image_count * TEST_PERCENTAGE)

# training_set = list_ds.take(training_set_size)
# validation_set = list_ds.skip(training_set_size).take(validation_set_size)
# test_set = list_ds.skip(validation_set_size + training_set_size).take(test_set_size)

# print("Amount of Training data: " + str(tf.data.experimental.cardinality(training_set).numpy()))
# print("Amount of Validation data: " + str(tf.data.experimental.cardinality(validation_set).numpy()))
# print("Amount of Test data: " + str(tf.data.experimental.cardinality(test_set).numpy()))



3. (Data Pre-processing) Detect person, Create image, label pair data points, augment dataset

In [None]:
def extract_label_from_path(image_path):
    parts = tf.strings.split(image_path, os.path.sep)
    one_hot = tf.dtypes.cast(parts[-2] == CLASSES, tf.int16)
    # NOTE: Converting back tensor to numpy array, as the tensor is not serializable
    return tf.argmax(one_hot).numpy()


def decode_image(img):
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.convert_image_dtype(img, tf.float32)
    return tf.image.resize(img, [IMG_HEIGHT, IMG_WIDTH])

def process_path(image_path):
    img = tf.io.read_file(image_path)
    img = decode_image(img)
    return img

In [None]:
# Loop through the image path list and load the images in batches containing 20 images
# Each batch is assigned a label based on the predominant class in the batch
# Repeat the process for the entire image path list offsetting the iteration by the sequence length
def load_dataset(list_of_images):
    full_dataset_images = []
    full_dataset_labels = []
    temp_set = []
    temp_labels = []
    for image_path in list_of_images:
        if len(temp_set) == SEQUENCE_LENGTH:
            main_label = max(set(temp_labels), key=temp_labels.count)
            full_dataset_images.append(temp_set)
            full_dataset_labels.append(main_label)
            temp_set = []
            temp_labels = []
        else:
            temp_set.append(process_path(image_path))
            temp_labels.append(extract_label_from_path(image_path))
    return np.asarray(full_dataset_images), tf.keras.utils.to_categorical(np.asarray(full_dataset_labels))

In [None]:
# Create tensorflow dataset object from the dataset dictionary
# NOTE: This is a tensorflow dataset object

train_ds = load_dataset(dataset['train'])
validation_ds = load_dataset(dataset['validation'])
test_ds = load_dataset(dataset['test'])

# training_set = tf.data.Dataset.from_tensor_slices(dataset['train'])
# validation_set = tf.data.Dataset.from_tensor_slices(dataset['validation'])
# test_set = tf.data.Dataset.from_tensor_slices(dataset['test'])



In [None]:
for i in train_ds:
    print(i)

In [None]:
def configure_for_performance(ds):
    ds = ds.cache()
    ds = ds.shuffle(buffer_size=1000)
    ds = ds.batch(batch_size=32)
    ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
    return ds

train_ds = configure_for_performance(train_ds)
validation_ds = configure_for_performance(validation_ds)



In [None]:
image_batch, label_batch = next(iter(train_ds))

plt.figure(figsize=(10, 10))
print(image_batch.shape)

for j, images in enumerate(image_batch):
    for i in range(9):
     ax = plt.subplot(3, 3, i + 1)
     plt.imshow(images[i].numpy().astype("uint8"))
     label = label_batch[j][i]
     plt.title(CLASSES[label])
     plt.axis("off")


4. (Model Implementation) Neural Network Implementation
* We might not need to actually shuffle the dataset as we want to capture temporal relationship
* Need to consider the sequence length 


In [None]:
model = keras.Sequential()

model.add(keras.layers.ConvLSTM2D(filters=16, kernel_size=(3, 3), activation="tanh",  padding='same', recurrent_dropout=0.2, return_sequences=True, input_shape=(SEQUENCE_LENGTH, IMG_HEIGHT, IMG_WIDTH, 3)))
model.add(keras.layers.MaxPooling3D(pool_size=(1, 2, 2), padding='same'))
model.add(keras.layers.TimeDistributed(keras.layers.Dropout(0.2)))

model.add(keras.layers.ConvLSTM2D(filters=32, kernel_size=(3, 3), activation="tanh",  padding='same', recurrent_dropout=0.2, return_sequences=True))
model.add(keras.layers.MaxPooling3D(pool_size=(1, 2, 2), padding='same'))
model.add(keras.layers.TimeDistributed(keras.layers.Dropout(0.2)))

model.add(keras.layers.ConvLSTM2D(filters=64, kernel_size=(3, 3), activation="tanh",  padding='same', recurrent_dropout=0.2, return_sequences=True))
model.add(keras.layers.MaxPooling3D(pool_size=(1, 2, 2), padding='same'))
model.add(keras.layers.TimeDistributed(keras.layers.Dropout(0.2)))

model.add(keras.layers.ConvLSTM2D(filters=128, kernel_size=(3, 3), activation="tanh",  padding='same', recurrent_dropout=0.2, return_sequences=True))
model.add(keras.layers.MaxPooling3D(pool_size=(1, 2, 2), padding='same'))

model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(2, activation="softmax"))

model.summary()

In [None]:
plot_model(model, to_file='model.png', show_shapes=True)

In [None]:
# Training Visualisation with TensorBoard
path = log_dir="logs\\fit\\" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=path, histogram_freq=1, update_freq='epoch')

In [None]:
model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss=tf.keras.losses.CategoricalCrossentropy(), metrics=['accuracy'])
hist = model.fit(x=train_ds[0], y=train_ds[1], epochs=200, batch_size=32, validation_data=validation_ds, callbacks=[tensorboard_callback])

In [None]:
# Evaluate trained model on test dataset
# model.evaluate(test_batches)cey

In [None]:
from video2tfrecord import convert_videos_to_tfrecord
video_path = os.path.join("videos", "move", "1.mp4")
print(video_path)
video_path_2 = "C:/Users/LoveShark/Desktop/Projects\move_detection/videos/move/1.mp4"
convert_videos_to_tfrecord(video_path_2, 'videos/move/1.tfrecord', 1, "all", "*.mp4")

In [51]:
import tensorflow.compat.v1 as tf
from tensorflow import keras
import tensorflow_hub as hub
import cv2
import numpy as np
#To make tf 2.0 compatible with tf1.0 code, we disable the tf2.0 functionalities
tf.disable_eager_execution()

# Loading TF Hub module
model_handle = 'https://tfhub.dev/google/tiny_video_net/tvn2/1'
model = hub.Module(model_handle)
# video = tf.io.read_file('videos/move/1.mp4')
# predictions = model(video)
# predictions

In [13]:
def load_video(path, max_frames=0, resize=(256, 256)):
  cap = cv2.VideoCapture(path)
  frames = []
  try:
    while True:
      ret, frame = cap.read()
      if not ret:
        break
    #   frame = crop_center_square(frame)
      frame = cv2.resize(frame, resize)
      frame = frame[:, :, [2, 1, 0]]
      frames.append(frame)
      
      if len(frames) == max_frames:
        break
  finally:
    cap.release()
  return np.array(frames) / 255.0

In [18]:
def predict(sample_video):
    # Add a batch axis to the sample video.
    model_input = tf.constant(sample_video, dtype=tf.float32)
    print(model_input.shape)
    return model(model_input)

In [19]:
video_path = os.path.join("videos", "move", "1.mp4")
video_test = load_video(video_path)
# Take the first 16 frames to match the model's expected input shape.
video_test = video_test[:16]
predictions = predict(video_test)
predictions

(16, 256, 256, 3)
(16, 256, 256, 3)
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


<tf.Tensor 'module_2_apply_default/dense/BiasAdd:0' shape=(8, 157) dtype=float32>

In [40]:
# Load charades class labels
with open('charades_classes.txt', 'r') as f:
    CLASSES = [line.strip() for line in f.readlines()]
CLASSES

['c000 Holding some clothes',
 'c001 Putting clothes somewhere',
 'c002 Taking some clothes from somewhere',
 'c003 Throwing clothes somewhere',
 'c004 Tidying some clothes',
 'c005 Washing some clothes',
 'c006 Closing a door',
 'c007 Fixing a door',
 'c008 Opening a door',
 'c009 Putting something on a table',
 'c010 Sitting on a table',
 'c011 Sitting at a table',
 'c012 Tidying up a table',
 'c013 Washing a table',
 'c014 Working at a table',
 'c015 Holding a phone/camera',
 'c016 Playing with a phone/camera',
 'c017 Putting a phone/camera somewhere',
 'c018 Taking a phone/camera from somewhere',
 'c019 Talking on a phone/camera',
 'c020 Holding a bag',
 'c021 Opening a bag',
 'c022 Putting a bag somewhere',
 'c023 Taking a bag from somewhere',
 'c024 Throwing a bag somewhere',
 'c025 Closing a book',
 'c026 Holding a book',
 'c027 Opening a book',
 'c028 Putting a book somewhere',
 'c029 Smiling at a book',
 'c030 Taking a book from somewhere',
 'c031 Throwing a book somewhere',
 

In [68]:
predictions

<tf.Tensor 'strided_slice_5:0' shape=() dtype=float32>