In [None]:
"""Action Recognition using the pretrained inflated 3d CNN model(pretrained on the kinetics400 
dataset) on the UCF101 dataset.Kinetics400 dataset has 400 labels while UCF101 dataset has 101 
labels"""

'Action Recognition using the pretrained inflated 3d CNN model(pretrained on the kinetics400 dataset) on the UCF101 dataset.\n Kinetics400 dataset has 400 labels while UCF101 dataset has 101 labels'

In [None]:
!pip install -q imageio
!pip install -q opencv-python
!pip install -q git+https://github.com/tensorflow/docs

  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for tensorflow-docs (setup.py) ... [?25l[?25hdone


In [None]:
from absl import logging
import tensorflow as tf
import tensorflow_hub as hub
from tensorflow_docs.vis import embed
logging.set_verbosity(logging.ERROR)
import random
import re
import os
import tempfile
import ssl
import cv2
import numpy as np
import imageio
from IPython import display
from urllib import request

In [None]:
"""Utilities to fetch videos from UCF101 dataset"""
UCF_ROOT = "https://www.crcv.ucf.edu/THUMOS14/UCF101/UCF101/"
_VIDEO_LIST = None
_CACHE_DIR = tempfile.mkdtemp()
unverified_context = ssl._create_unverified_context()

def list_ucf_videos():
  global _VIDEO_LIST
  if not _VIDEO_LIST:
    index = request.urlopen(UCF_ROOT, context=unverified_context).read().decode("utf-8")
    videos = re.findall("(v_[\w_]+\.avi)", index)
    _VIDEO_LIST = sorted(set(videos))
  return list(_VIDEO_LIST)

def fetch_ucf_video(video):
  cache_path = os.path.join(_CACHE_DIR, video)
  if not os.path.exists(cache_path):
    urlpath = request.urljoin(UCF_ROOT, video)
    print("Fetching %s => %s" % (urlpath, cache_path))
    data = request.urlopen(urlpath, context=unverified_context).read()
    open(cache_path, "wb").write(data)
  return cache_path

def crop_center_square(frame):
  y, x = frame.shape[0:2]
  min_dim = min(y, x)
  start_x = (x // 2) - (min_dim // 2)
  start_y = (y // 2) - (min_dim // 2)
  return frame[start_y:start_y+min_dim,start_x:start_x+min_dim]

def load_video(path, max_frames=0, resize=(224, 224)):
  cap = cv2.VideoCapture(path)
  frames = []
  try:
    while True:
      ret, frame = cap.read()
      if not ret:
        break
      frame = crop_center_square(frame)
      frame = cv2.resize(frame, resize)
      frame = frame[:, :, [2, 1, 0]]
      frames.append(frame)

      if len(frames) == max_frames:
        break
  finally:
    cap.release()
  return np.array(frames) / 255.0

def to_gif(images):
  converted_images = np.clip(images * 255, 0, 255).astype(np.uint8)
  imageio.mimsave('./animation.gif', converted_images, fps=25)
  return embed.embed_file('./animation.gif')

In [None]:
"""Getting the labels of the kinetics400 dataset"""
KINETICS_URL = "https://raw.githubusercontent.com/deepmind/kinetics-i3d/master/data/label_map.txt"
with request.urlopen(KINETICS_URL) as obj:
  labels = [line.decode("utf-8").strip() for line in obj.readlines()]
print("Found in total %d labels." % len(labels))

Found in total 400 labels.


In [None]:
"""Getting the UCF101 dataset"""
ucf_videos = list_ucf_videos()
categories = {}
for video in ucf_videos:
  category = video[2:-12]
  if category not in categories:
    categories[category] = []
  categories[category].append(video)
print("Found in total %d videos in overall %d categories." % (len(ucf_videos), len(categories)))

print("\n")
head1 = "CATEGORY"
head2 = "No. of Videos"
head3 = "Details"
print(" ",head1," \t  ",head2," \t\t\t ",head3)
for category, sequences in categories.items():
  summary = ", ".join(sequences[:2])
  print("%-20s    %4d           %s, ..." % (category, len(sequences), summary))

Found in total 13320 videos in overall 101 categories.


  CATEGORY  	   No. of Videos  			  Details
ApplyEyeMakeup           145           v_ApplyEyeMakeup_g01_c01.avi, v_ApplyEyeMakeup_g01_c02.avi, ...
ApplyLipstick            114           v_ApplyLipstick_g01_c01.avi, v_ApplyLipstick_g01_c02.avi, ...
Archery                  145           v_Archery_g01_c01.avi, v_Archery_g01_c02.avi, ...
BabyCrawling             132           v_BabyCrawling_g01_c01.avi, v_BabyCrawling_g01_c02.avi, ...
BalanceBeam              108           v_BalanceBeam_g01_c01.avi, v_BalanceBeam_g01_c02.avi, ...
BandMarching             155           v_BandMarching_g01_c01.avi, v_BandMarching_g01_c02.avi, ...
BaseballPitch            150           v_BaseballPitch_g01_c01.avi, v_BaseballPitch_g01_c02.avi, ...
BasketballDunk           131           v_BasketballDunk_g01_c01.avi, v_BasketballDunk_g01_c02.avi, ...
Basketball               134           v_Basketball_g01_c01.avi, v_Basketball_g01_c02.avi, ...
BenchPress  

In [None]:
video_path = fetch_ucf_video("v_Surfing_g01_c01.avi")
sample_video = load_video(video_path)
sample_video1 = load_video(video_path)[:100]
sample_video.shape

(254, 224, 224, 3)

In [None]:
"""A surfing video"""
to_gif(sample_video1)

In [None]:
"""Downloading the pretrained Inflated 3D CNN model(i3d)"""
i3d = hub.load("https://tfhub.dev/deepmind/i3d-kinetics-400/1").signatures['default']

In [None]:
"""Function which predicts the top 5 actions in a video"""
def predict(sample_video):
  # Add a batch axis to the to the sample video.
  model_input = tf.constant(sample_video, dtype=tf.float32)[tf.newaxis, ...]

  logits = i3d(model_input)['default'][0]
  probabilities = tf.nn.softmax(logits)

  print("Top 5 actions:")
  for i in np.argsort(probabilities)[::-1][:5]:
    print(f"  {labels[i]:22}: {probabilities[i] * 100:5.2f}%")

In [None]:
"""Predicting the top 5 actions in the video"""
predict(sample_video)

Top 5 actions:
  surfing water         : 96.96%
  kitesurfing           :  2.19%
  windsurfing           :  0.39%
  faceplanting          :  0.19%
  snowkiting            :  0.12%


In [None]:
video_path = fetch_ucf_video("v_HorseRiding_g01_c01.avi")
sample_video_1 = load_video(video_path)
sample_video_1_1 = load_video(video_path)[:100]
sample_video.shape

(254, 224, 224, 3)

In [None]:
"""Horse Riding Video"""
to_gif(sample_video_1_1)

In [None]:
"""Predicting the top 5 actions in the video"""
predict(sample_video_1)

Top 5 actions:
  riding or walking with horse: 98.97%
  riding mule           :  0.87%
  triple jump           :  0.05%
  milking cow           :  0.04%
  hurdling              :  0.03%
