# TensorFlow Hub - Action recognition

- Based on: https://www.tensorflow.org/hub/tutorials/action_recognition_with_tf_hub?hl=pt-br
- Model for detections in videos: https://tfhub.dev/s?module-type=video-classification

# Installing and importing the libraries

In [None]:
!pip install opencv-python

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import tensorflow as tf
import tensorflow_hub as hub
from google.colab.patches import cv2_imshow
import re # regular expression
import os
import tempfile
import ssl
import cv2
import numpy as np
from urllib import request

# Functions to visualize the videos

In [None]:
url_classes = 'https://raw.githubusercontent.com/deepmind/kinetics-i3d/master/data/label_map.txt'
with request.urlopen(url_classes) as file:
  #print([row.decode('utf-8').strip() for row in file.readlines()])
  labels = [row.decode('utf-8').strip() for row in file.readlines()]

In [None]:
print(labels)

['abseiling', 'air drumming', 'answering questions', 'applauding', 'applying cream', 'archery', 'arm wrestling', 'arranging flowers', 'assembling computer', 'auctioning', 'baby waking up', 'baking cookies', 'balloon blowing', 'bandaging', 'barbequing', 'bartending', 'beatboxing', 'bee keeping', 'belly dancing', 'bench pressing', 'bending back', 'bending metal', 'biking through snow', 'blasting sand', 'blowing glass', 'blowing leaves', 'blowing nose', 'blowing out candles', 'bobsledding', 'bookbinding', 'bouncing on trampoline', 'bowling', 'braiding hair', 'breading or breadcrumbing', 'breakdancing', 'brush painting', 'brushing hair', 'brushing teeth', 'building cabinet', 'building shed', 'bungee jumping', 'busking', 'canoeing or kayaking', 'capoeira', 'carrying baby', 'cartwheeling', 'carving pumpkin', 'catching fish', 'catching or throwing baseball', 'catching or throwing frisbee', 'catching or throwing softball', 'celebrating', 'changing oil', 'changing wheel', 'checking tires', 'che

In [None]:
len(labels)

400

In [None]:
root_folder = 'https://www.crcv.ucf.edu/THUMOS14/UCF101/UCF101/'
context = ssl._create_unverified_context()
cache_videos = tempfile.mkdtemp()

In [None]:
def list_videos():
  index = request.urlopen(root_folder, context = context).read().decode('utf-8')
  videos = re.findall('(v_[\w_]+\.avi)', index)
  return list(set(videos))

In [None]:
print(list_videos())

['v_Archery_g06_c02.avi', 'v_PizzaTossing_g23_c03.avi', 'v_BabyCrawling_g01_c04.avi', 'v_GolfSwing_g05_c01.avi', 'v_ShavingBeard_g12_c06.avi', 'v_PlayingDaf_g15_c02.avi', 'v_BaseballPitch_g06_c02.avi', 'v_GolfSwing_g04_c03.avi', 'v_Biking_g11_c03.avi', 'v_HandstandWalking_g07_c02.avi', 'v_BalanceBeam_g01_c02.avi', 'v_HighJump_g22_c05.avi', 'v_JavelinThrow_g17_c02.avi', 'v_SkyDiving_g21_c04.avi', 'v_WallPushups_g23_c02.avi', 'v_CleanAndJerk_g20_c05.avi', 'v_BaseballPitch_g06_c04.avi', 'v_Rafting_g15_c01.avi', 'v_Diving_g16_c01.avi', 'v_PlayingFlute_g02_c03.avi', 'v_JugglingBalls_g16_c01.avi', 'v_JumpRope_g07_c06.avi', 'v_PommelHorse_g24_c02.avi', 'v_PlayingSitar_g14_c04.avi', 'v_BalanceBeam_g18_c03.avi', 'v_BoxingPunchingBag_g22_c05.avi', 'v_HandstandPushups_g16_c06.avi', 'v_Surfing_g16_c02.avi', 'v_Mixing_g22_c03.avi', 'v_Drumming_g07_c01.avi', 'v_Rafting_g01_c03.avi', 'v_BasketballDunk_g22_c04.avi', 'v_CuttingInKitchen_g07_c03.avi', 'v_HorseRace_g16_c03.avi', 'v_PlayingCello_g09_c03.a

In [None]:
len(list_videos())

13320

In [None]:
def save_video(video):
  cache_path = os.path.join(cache_videos, video)
  #print(cache_path)
  if not os.path.exists(cache_path):
    path_url = request.urljoin(root_folder, video)
    #print(path_url)
    data = request.urlopen(path_url, context = context).read()
    open(cache_path, 'wb').write(data)
  return cache_path

In [None]:
save_video('v_PizzaTossing_g23_c03.avi')

'/tmp/tmpsouh7vsa/v_PizzaTossing_g23_c03.avi'

In [None]:
def load_video(path, visualize = False):
  cap = cv2.VideoCapture(path)
  frames = []
  while True:
    connected, frame = cap.read()
    if not connected:
      break
    frame = cv2.resize(frame, (224, 224))
    frames.append(frame)

    if visualize == True:
      cv2_imshow(frame)
  cap.release()
  return np.array(frames) / 255.0

In [None]:
load_video('/tmp/tmpsouh7vsa/v_PizzaTossing_g23_c03.avi', False)

In [None]:
!ffmpeg -i /tmp/tmpsouh7vsa/v_PizzaTossing_g23_c03.avi output.mp4

In [None]:
from IPython.display import HTML
from base64 import b64encode
mp4 = open('/content/output.mp4', 'rb').read()
data_url = 'data:video/mp4;base64,' + b64encode(mp4).decode()

In [None]:
data_url

'data:video/mp4;base64,AAAAIGZ0eXBpc29tAAACAGlzb21pc28yYXZjMW1wNDEAAAAIZnJlZQAGAlRtZGF0AAACrgYF//+q3EXpvebZSLeWLNgg2SPu73gyNjQgLSBjb3JlIDE1MiByMjg1NCBlOWE1OTAzIC0gSC4yNjQvTVBFRy00IEFWQyBjb2RlYyAtIENvcHlsZWZ0IDIwMDMtMjAxNyAtIGh0dHA6Ly93d3cudmlkZW9sYW4ub3JnL3gyNjQuaHRtbCAtIG9wdGlvbnM6IGNhYmFjPTEgcmVmPTMgZGVibG9jaz0xOjA6MCBhbmFseXNlPTB4MzoweDExMyBtZT1oZXggc3VibWU9NyBwc3k9MSBwc3lfcmQ9MS4wMDowLjAwIG1peGVkX3JlZj0xIG1lX3JhbmdlPTE2IGNocm9tYV9tZT0xIHRyZWxsaXM9MSA4eDhkY3Q9MSBjcW09MCBkZWFkem9uZT0yMSwxMSBmYXN0X3Bza2lwPTEgY2hyb21hX3FwX29mZnNldD0tMiB0aHJlYWRzPTMgbG9va2FoZWFkX3RocmVhZHM9MSBzbGljZWRfdGhyZWFkcz0wIG5yPTAgZGVjaW1hdGU9MSBpbnRlcmxhY2VkPTAgYmx1cmF5X2NvbXBhdD0wIGNvbnN0cmFpbmVkX2ludHJhPTAgYmZyYW1lcz0zIGJfcHlyYW1pZD0yIGJfYWRhcHQ9MSBiX2JpYXM9MCBkaXJlY3Q9MSB3ZWlnaHRiPTEgb3Blbl9nb3A9MCB3ZWlnaHRwPTIga2V5aW50PTI1MCBrZXlpbnRfbWluPTI1IHNjZW5lY3V0PTQwIGludHJhX3JlZnJlc2g9MCByY19sb29rYWhlYWQ9NDAgcmM9Y3JmIG1idHJlZT0xIGNyZj0yMy4wIHFjb21wPTAuNjAgcXBtaW49MCBxcG1heD02OSBxcHN0ZXA9NCBpcF9yYXRpbz0xLjQwIGFxPTE6M

In [None]:
HTML("""
  <video controls>
    <source src="%s" type="video/mp4"
  </video>
""" % data_url)

# Action recognition

In [None]:
model = hub.load('https://tfhub.dev/deepmind/i3d-kinetics-400/1').signatures['default']

In [None]:
test_video = load_video('/tmp/tmpsouh7vsa/v_PizzaTossing_g23_c03.avi')

In [None]:
test_video.shape

(148, 224, 224, 3)

In [None]:
test_video = tf.constant(test_video, dtype=tf.float32)[tf.newaxis, ...]
test_video.shape

TensorShape([1, 148, 224, 224, 3])

In [None]:
logits = model(test_video)['default'][0]

In [None]:
logits

In [None]:
probabilities = tf.nn.softmax(logits)
probabilities

In [None]:
np.argmax(probabilities)

188

In [None]:
labels[188]

'making pizza'

In [None]:
print('Top 5 main actions:')
for i in np.argsort(probabilities)[::-1][:5]:
  print(f"{labels[i]:35}: {probabilities[i]*100:5.2f}")

Top 5 main actions:
making pizza                       : 36.47
punching bag                       : 28.43
catching or throwing frisbee       :  6.93
pumping fist                       :  5.17
washing dishes                     :  4.70
