In [1]:
# import tensorflow.compat.v1 as tf
import tensorflow as tf
from tensorflow import keras
import tensorflow_hub as hub
from tensorflow_docs.vis import embed

import cv2
import numpy as np
import os
#To make tf 2.0 compatible with tf1.0 code, we disable the tf2.0 functionalities
# tf.disable_eager_execution()

%load_ext tensorboard

# Some modules to display an animation using imageio.
import imageio
from IPython import display

In [2]:
tf.__version__
print("The following GPU devices are available: %s" % tf.test.gpu_device_name())

'2.7.0'

In [3]:
# Loading movinet model from tensorflow hub
movinet_model_handle = "https://tfhub.dev/tensorflow/movinet/a2/base/kinetics-600/classification/3"
encoder = hub.KerasLayer(movinet_model_handle, trainable=True)
inputs = tf.keras.layers.Input(
    shape=[None, None, None, 3],
    dtype=tf.float32,
    name='image')

# [batch_size, 600]
outputs = encoder(dict(image=inputs))

model = tf.keras.Model(inputs, outputs, name='movinet')

In [4]:
def crop_center_square(frame):
  y, x = frame.shape[0:2]
  min_dim = min(y, x)
  start_x = (x // 2) - (min_dim // 2)
  start_y = (y // 2) - (min_dim // 2)
  return frame[start_y:start_y+min_dim,start_x:start_x+min_dim]

def load_video(path, max_frames=0, resize=(256, 256)):
  cap = cv2.VideoCapture(path)
  frames = []
  try:
    while True:
      ret, frame = cap.read()
      if not ret:
        break
      frame = crop_center_square(frame)
      frame = cv2.resize(frame, resize)
      frame = frame[:, :, [2, 1, 0]]
      frames.append(frame)
      
      if len(frames) == max_frames:
        break
  finally:
    cap.release()
  return np.array(frames) / 255.0

In [5]:
def predict(sample_video):
    # Add a batch axis to the sample video.
    model_input = tf.constant(sample_video, dtype=tf.float32)[tf.newaxis, ...]
    return model(model_input)

In [25]:
video_path = os.path.join("videos", "no_move", "21.mp4")
video_test = load_video(video_path)
# Take the first 16 frames to match the model's expected input shape.
video_test = video_test
predictions = predict(video_test)
predictions

<tf.Tensor: shape=(1, 600), dtype=float32, numpy=
array([[ 1.26792073e+00,  1.19310093e+00,  3.39206433e+00,
        -3.00656915e-01, -5.88506460e-04,  3.64208639e-01,
         8.29331756e-01,  2.08081627e+00, -9.91187245e-02,
        -2.59654403e-01, -1.83533072e+00, -3.02726555e+00,
        -4.53914225e-01, -9.96431053e-01, -9.52925026e-01,
         1.23814255e-01, -2.41552472e+00,  1.19374907e+00,
        -1.23322415e+00,  3.69139224e-01, -5.66354871e-01,
        -2.77563047e+00, -1.74385905e-02,  3.67891908e-01,
         4.34320897e-01,  2.14967430e-01, -8.78618956e-01,
        -8.56883943e-01, -1.29588589e-01,  9.93225992e-01,
         1.95392418e+00, -2.75242597e-01,  2.75062561e-01,
         1.65512145e-01,  1.09468579e+00, -1.33026019e-01,
        -1.18426287e+00, -3.67771477e-01, -7.90078521e-01,
         4.68731880e-01,  8.96503806e-01, -2.10339952e+00,
        -8.90522659e-01,  1.12040013e-01,  3.27530742e-01,
        -1.07633483e+00,  7.46155560e-01, -6.66364551e-01,
      

In [26]:
# Load charades class labels
with open('kinetics_classes.txt', 'r') as f:
    CLASSES = [line.strip() for line in f.readlines()]
CLASSES

['0,abseiling',
 '1,acting in play',
 '2,adjusting glasses',
 '3,air drumming',
 '4,alligator wrestling',
 '5,answering questions',
 '6,applauding',
 '7,applying cream',
 '8,archaeological excavation',
 '9,archery',
 '10,arguing',
 '11,arm wrestling',
 '12,arranging flowers',
 '13,assembling bicycle',
 '14,assembling computer',
 '15,attending conference',
 '16,auctioning',
 '17,backflip (human)',
 '18,baking cookies',
 '19,bandaging',
 '20,barbequing',
 '21,bartending',
 '22,base jumping',
 '23,bathing dog',
 '24,battle rope training',
 '25,beatboxing',
 '26,bee keeping',
 '27,belly dancing',
 '28,bench pressing',
 '29,bending back',
 '30,bending metal',
 '31,biking through snow',
 '32,blasting sand',
 '33,blowdrying hair',
 '34,blowing bubble gum',
 '35,blowing glass',
 '36,blowing leaves',
 '37,blowing nose',
 '38,blowing out candles',
 '39,bobsledding',
 '40,bodysurfing',
 '41,bookbinding',
 '42,bottling',
 '43,bouncing on bouncy castle',
 '44,bouncing on trampoline',
 '45,bowling',

In [33]:
from PIL import Image, ImageFont, ImageDraw

def to_gif(images):
  converted_images = np.clip(images * 255, 0, 255).astype(np.uint8)
  imageio.mimsave('./animation.gif', converted_images, fps=25)
  return embed.embed_file('./animation.gif')

def convert_prediction_to_classes(predictions):
    probabilities = tf.argmax(predictions[-1],-1)
    print(probabilities)
    print(CLASSES[probabilities])

In [34]:
to_gif(video_test, "staring")

AttributeError: 'numpy.ndarray' object has no attribute 'load'

In [29]:
convert_prediction_to_classes(predictions)

tf.Tensor(494, shape=(), dtype=int64)
494,staring
