In [2]:
#@title Import the necessary modules
# TensorFlow and TF-Hub modules.
from absl import logging

import tensorflow as tf
import tensorflow_hub as hub
from tensorflow_docs.vis import embed

logging.set_verbosity(logging.ERROR)

# Some modules to help with reading the UCF101 dataset.
import random
import re
import os
import tempfile
import ssl
import cv2
import numpy as np

# Some modules to display an animation using imageio.
import imageio
from IPython import display

from urllib import request  # requires python3

%matplotlib notebook
from matplotlib import pyplot as plt
from matplotlib import animation
from IPython.display import HTML, display

In [3]:
from IPython.display import Video

def show_video(video):
    fig = plt.figure()
    im = plt.imshow(video[0,:,:,:])
    plt.close() # this is required to not display the generated image

    def init_video():
        im.set_data(video[0,:,:,:])

    def animate_video(i):
        im.set_data(video[i,:,:,:])
        return im

    anim = animation.FuncAnimation(fig, animate_video, init_func=init_video, frames=video.shape[0],
                               interval=50)
    display(HTML(anim.to_html5_video()))

In [4]:
#@title Helper functions for the UCF101 dataset

# Utilities to fetch videos from UCF101 dataset
UCF_ROOT = "https://www.crcv.ucf.edu/THUMOS14/UCF101/UCF101/"
_VIDEO_LIST = None
_CACHE_DIR = tempfile.mkdtemp()
# As of July 2020, crcv.ucf.edu doesn't use a certificate accepted by the
# default Colab environment anymore.
unverified_context = ssl._create_unverified_context()

def list_ucf_videos():
    """Lists videos available in UCF101 dataset."""
    global _VIDEO_LIST
    if not _VIDEO_LIST:
        index = request.urlopen(UCF_ROOT, context=unverified_context).read().decode("utf-8")
        videos = re.findall("(v_[\w_]+\.avi)", index)
        _VIDEO_LIST = sorted(set(videos))
    return list(_VIDEO_LIST)

def fetch_ucf_video(video):
    """Fetchs a video and cache into local filesystem."""
    cache_path = os.path.join(_CACHE_DIR, video)
    if not os.path.exists(cache_path):
        urlpath = request.urljoin(UCF_ROOT, video)
        print("Fetching %s => %s" % (urlpath, cache_path))
        data = request.urlopen(urlpath, context=unverified_context).read()
        open(cache_path, "wb").write(data)
    return cache_path

# Utilities to open video files using CV2
def crop_center_square(frame):
    y, x = frame.shape[0:2]
    min_dim = min(y, x)
    start_x = (x // 2) - (min_dim // 2)
    start_y = (y // 2) - (min_dim // 2)
    return frame[start_y:start_y+min_dim,start_x:start_x+min_dim]

def load_video(path, max_frames=0, resize=(224, 224)):
    cap = cv2.VideoCapture(path)
    frames = []
    try:
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            frame = crop_center_square(frame)
            frame = cv2.resize(frame, resize)
            frame = frame[:, :, [2, 1, 0]]
            frames.append(frame)
      
            if len(frames) == max_frames:
                break
    finally:
        cap.release()
    return np.array(frames) / 255.0

def to_gif(images):
    converted_images = np.clip(images * 255, 0, 255).astype(np.uint8)
    imageio.mimsave('./animation.gif', converted_images, fps=25)
    return embed.embed_file('./animation.gif')

In [5]:
#@title Get the kinetics-400 labels
# Get the kinetics-400 action labels from the GitHub repository.
KINETICS_URL = "https://raw.githubusercontent.com/deepmind/kinetics-i3d/master/data/label_map.txt"
with request.urlopen(KINETICS_URL) as obj:
    labels = [line.decode("utf-8").strip() for line in obj.readlines()]
print("Found %d labels." % len(labels))

Found 400 labels.


In [6]:
# Get the list of videos in the dataset.
ucf_videos = list_ucf_videos()
  
categories = {}
for video in ucf_videos:
    category = video[2:-12]
    if category not in categories:
        categories[category] = []
    categories[category].append(video)
print("Found %d videos in %d categories." % (len(ucf_videos), len(categories)))

for category, sequences in categories.items():
    summary = ", ".join(sequences[:2])
    print("%-20s %4d videos (%s, ...)" % (category, len(sequences), summary))

Found 13320 videos in 101 categories.
ApplyEyeMakeup        145 videos (v_ApplyEyeMakeup_g01_c01.avi, v_ApplyEyeMakeup_g01_c02.avi, ...)
ApplyLipstick         114 videos (v_ApplyLipstick_g01_c01.avi, v_ApplyLipstick_g01_c02.avi, ...)
Archery               145 videos (v_Archery_g01_c01.avi, v_Archery_g01_c02.avi, ...)
BabyCrawling          132 videos (v_BabyCrawling_g01_c01.avi, v_BabyCrawling_g01_c02.avi, ...)
BalanceBeam           108 videos (v_BalanceBeam_g01_c01.avi, v_BalanceBeam_g01_c02.avi, ...)
BandMarching          155 videos (v_BandMarching_g01_c01.avi, v_BandMarching_g01_c02.avi, ...)
BaseballPitch         150 videos (v_BaseballPitch_g01_c01.avi, v_BaseballPitch_g01_c02.avi, ...)
BasketballDunk        131 videos (v_BasketballDunk_g01_c01.avi, v_BasketballDunk_g01_c02.avi, ...)
Basketball            134 videos (v_Basketball_g01_c01.avi, v_Basketball_g01_c02.avi, ...)
BenchPress            160 videos (v_BenchPress_g01_c01.avi, v_BenchPress_g01_c02.avi, ...)
Biking              

In [7]:
# Get a sample cricket video.
video_path = fetch_ucf_video("v_CricketShot_g04_c02.avi")
sample_video = load_video(video_path)

Fetching https://www.crcv.ucf.edu/THUMOS14/UCF101/UCF101/v_CricketShot_g04_c02.avi => /var/folders/q2/v8n1vs615dzc6zx7jy46tkw80000gn/T/tmpd_gzzk5_/v_CricketShot_g04_c02.avi


In [8]:
sample_video.shape

(116, 224, 224, 3)

In [9]:
i3d = hub.load("https://tfhub.dev/deepmind/i3d-kinetics-400/1").signatures['default']

2022-01-20 14:21:12.374970: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


In [10]:
def predict(sample_video):
    # Add a batch axis to the sample video.
    model_input = tf.constant(sample_video, dtype=tf.float32)[tf.newaxis, ...]

    logits = i3d(model_input)['default'][0]
    probabilities = tf.nn.softmax(logits)

    print("Top 5 actions:")
    for i in np.argsort(probabilities)[::-1][:5]:
        print(f"  {labels[i]:22}: {probabilities[i] * 100:5.2f}%")

In [11]:
show_video(sample_video)
predict(sample_video)

<IPython.core.display.Javascript object>

Top 5 actions:
  playing cricket       : 97.30%
  skateboarding         :  0.87%
  robot dancing         :  0.68%
  roller skating        :  0.66%
  golf putting          :  0.17%


### Only First Frame

In [13]:
only_frame1 = np.tile(np.expand_dims(sample_video[32], 0), (116, 1, 1, 1))
show_video(only_frame1)
predict(only_frame1)

<IPython.core.display.Javascript object>

Top 5 actions:
  setting table         : 23.90%
  robot dancing         : 21.97%
  playing cricket       : 13.53%
  washing hands         :  3.61%
  headbutting           :  3.28%


### Shuffle Patches

In [14]:
import torch
import torch.nn.functional as nnf

class ShufflePatches(object):
    def patchify(self, x, ps):
        x = torch.from_numpy(x).permute(0, 3, 1, 2)
        # divide the batch of images into non-overlapping patches
        u = nnf.unfold(x, kernel_size=ps, stride=ps, padding=0)
        # permute the patches of each image in the batch
        indices = torch.randperm(u.shape[-1])
        pu = u[:, :, indices]
        # fold the permuted patches back together
        f = nnf.fold(pu, x.shape[-2:], kernel_size=ps, stride=ps, padding=0).permute(0, 2, 3, 1).numpy()
        return f

In [15]:
shuffler = ShufflePatches()
patch_112 = shuffler.patchify(sample_video, 28)
show_video(patch_112)
predict(patch_112)

<IPython.core.display.Javascript object>

Top 5 actions:
  playing cricket       : 64.90%
  marching              :  7.67%
  shooting goal (soccer):  3.04%
  playing tennis        :  2.84%
  applauding            :  2.39%


In [16]:
test = np.tile(np.expand_dims(patch_112[50], 0), (116, 1, 1, 1))
show_video(test)
predict(test)

<IPython.core.display.Javascript object>

Top 5 actions:
  presenting weather forecast: 40.97%
  marching              :  3.42%
  folding napkins       :  2.84%
  bookbinding           :  2.02%
  shooting basketball   :  1.81%


### Superpixel

In [17]:
from vidaug import augmentors as va

In [18]:
seq = va.Sequential([
    va.Superpixel(1, 2000)
])
superpixel = np.array([seq(sample_video)])[0]
show_video(superpixel)
predict(superpixel)

Converting


<IPython.core.display.Javascript object>

Top 5 actions:
  robot dancing         : 72.46%
  playing cricket       :  6.67%
  marching              :  2.98%
  headbutting           :  1.95%
  golf putting          :  1.78%


##### 

In [19]:
print(ucf_videos)

['v_ApplyEyeMakeup_g01_c01.avi', 'v_ApplyEyeMakeup_g01_c02.avi', 'v_ApplyEyeMakeup_g01_c03.avi', 'v_ApplyEyeMakeup_g01_c04.avi', 'v_ApplyEyeMakeup_g01_c05.avi', 'v_ApplyEyeMakeup_g01_c06.avi', 'v_ApplyEyeMakeup_g02_c01.avi', 'v_ApplyEyeMakeup_g02_c02.avi', 'v_ApplyEyeMakeup_g02_c03.avi', 'v_ApplyEyeMakeup_g02_c04.avi', 'v_ApplyEyeMakeup_g03_c01.avi', 'v_ApplyEyeMakeup_g03_c02.avi', 'v_ApplyEyeMakeup_g03_c03.avi', 'v_ApplyEyeMakeup_g03_c04.avi', 'v_ApplyEyeMakeup_g03_c05.avi', 'v_ApplyEyeMakeup_g03_c06.avi', 'v_ApplyEyeMakeup_g04_c01.avi', 'v_ApplyEyeMakeup_g04_c02.avi', 'v_ApplyEyeMakeup_g04_c03.avi', 'v_ApplyEyeMakeup_g04_c04.avi', 'v_ApplyEyeMakeup_g04_c05.avi', 'v_ApplyEyeMakeup_g04_c06.avi', 'v_ApplyEyeMakeup_g04_c07.avi', 'v_ApplyEyeMakeup_g05_c01.avi', 'v_ApplyEyeMakeup_g05_c02.avi', 'v_ApplyEyeMakeup_g05_c03.avi', 'v_ApplyEyeMakeup_g05_c04.avi', 'v_ApplyEyeMakeup_g05_c05.avi', 'v_ApplyEyeMakeup_g05_c06.avi', 'v_ApplyEyeMakeup_g05_c07.avi', 'v_ApplyEyeMakeup_g06_c01.avi', 'v_Appl