In [35]:
!pip install -q imageio
!pip install -q opencv-python
!pip install -q git+https://github.com/tensorflow/docs

In [None]:
# !pip install tensorflow_hub

In [12]:
#@title Import the necessary modules
# TensorFlow and TF-Hub modules.
from absl import logging

import tensorflow as tf
import tensorflow_hub as hub
from tensorflow_docs.vis import embed

logging.set_verbosity(logging.ERROR)

# Some modules to help with reading the UCF101 dataset.
import random
import re
import os
import tempfile
import ssl
import cv2
import numpy as np

# Some modules to display an animation using imageio.
import imageio
from IPython import display

from urllib import request
from model_build import buildmodel
model1=buildmodel()

In [2]:
#@title Helper functions for the UCF101 dataset

# Utilities to fetch videos from UCF101 dataset
UCF_ROOT = "https://www.crcv.ucf.edu/THUMOS14/UCF101/UCF101/"
_VIDEO_LIST = None
_CACHE_DIR = tempfile.mkdtemp()
# As of July 2020, crcv.ucf.edu doesn't use a certificate accepted by the
# default Colab environment anymore.
unverified_context = ssl._create_unverified_context()

def list_ucf_videos():
  """Lists videos available in UCF101 dataset."""
  global _VIDEO_LIST
  if not _VIDEO_LIST:
    index = request.urlopen(UCF_ROOT, context=unverified_context).read().decode("utf-8")
    videos = re.findall("(v_[\w_]+\.avi)", index)
    _VIDEO_LIST = sorted(set(videos))
  return list(_VIDEO_LIST)

def fetch_ucf_video(video):
  """Fetchs a video and cache into local filesystem."""
  cache_path = os.path.join(_CACHE_DIR, video)
  if not os.path.exists(cache_path):
    urlpath = request.urljoin(UCF_ROOT, video)
    print("Fetching %s => %s" % (urlpath, cache_path))
    data = request.urlopen(urlpath, context=unverified_context).read()
    open(cache_path, "wb").write(data)
  return cache_path

# Utilities to open video files using CV2
def crop_center_square(frame):
  y, x = frame.shape[0:2]
  min_dim = min(y, x)
  start_x = (x // 2) - (min_dim // 2)
  start_y = (y // 2) - (min_dim // 2)
  return frame[start_y:start_y+min_dim,start_x:start_x+min_dim]

def load_video(path, max_frames=0, resize=(224, 224)):
  cap = cv2.VideoCapture(path)
  frames = []
  try:
    while True:
      ret, frame = cap.read()
      if not ret:
        break
      frame = crop_center_square(frame)
      frame = cv2.resize(frame, resize)
      frame = frame[:, :, [2, 1, 0]]
      frames.append(frame)

      if len(frames) == max_frames:
        break
  finally:
    cap.release()
  return np.array(frames) / 255.0

def to_gif(images):
  converted_images = np.clip(images * 255, 0, 255).astype(np.uint8)
  imageio.mimsave('./animation.gif', converted_images, fps=25)
  return embed.embed_file('./animation.gif')

In [17]:
#@title Helper functions for the UCF101 dataset

# Utilities to fetch videos from UCF101 dataset
UCF_ROOT = "https://www.crcv.ucf.edu/THUMOS14/UCF101/UCF101/"
_VIDEO_LIST = None
_CACHE_DIR = tempfile.mkdtemp()
# As of July 2020, crcv.ucf.edu doesn't use a certificate accepted by the
# default Colab environment anymore.
unverified_context = ssl._create_unverified_context()

def list_ucf_videos():
  """Lists videos available in UCF101 dataset."""
  global _VIDEO_LIST
  if not _VIDEO_LIST:
    index = request.urlopen(UCF_ROOT, context=unverified_context).read().decode("utf-8")
    videos = re.findall("(v_[\w_]+\.avi)", index)
    _VIDEO_LIST = sorted(set(videos))
  return list(_VIDEO_LIST)

def fetch_ucf_video(video):
  """Fetchs a video and cache into local filesystem."""
  cache_path = os.path.join(_CACHE_DIR, video)
  if not os.path.exists(cache_path):
    urlpath = request.urljoin(UCF_ROOT, video)
    print("Fetching %s => %s" % (urlpath, cache_path))
    data = request.urlopen(urlpath, context=unverified_context).read()
    open(cache_path, "wb").write(data)
  return cache_path

# Utilities to open video files using CV2
def crop_center_square(frame):
  y, x = frame.shape[0:2]
  min_dim = min(y, x)
  start_x = (x // 2) - (min_dim // 2)
  start_y = (y // 2) - (min_dim // 2)
  return frame[start_y:start_y+min_dim,start_x:start_x+min_dim]

def load_video(path, max_frames=0, resize=(224, 224)):
  cap = cv2.VideoCapture(path)
  frames = []
  try:
    while True:
      ret, frame = cap.read()
      if not ret:
        break
      frame = crop_center_square(frame)
      frame = cv2.resize(frame, resize)
      frame = frame[:, :, [2, 1, 0]]
      frames.append(frame)

      if len(frames) == max_frames:
        break
  finally:
    cap.release()
  return np.array(frames) / 255.0

def to_gif(images):
  converted_images = np.clip(images * 255, 0, 255).astype(np.uint8)
  imageio.mimsave('./animation.gif', converted_images, fps=25)
  return embed.embed_file('./animation.gif')

In [18]:
#@title Get the kinetics-400 labels
# Get the kinetics-400 action labels from the GitHub repository.
KINETICS_URL = "https://raw.githubusercontent.com/deepmind/kinetics-i3d/master/data/label_map.txt"
with request.urlopen(KINETICS_URL) as obj:
  labels = [line.decode("utf-8").strip() for line in obj.readlines()]
print("Found %d labels." % len(labels))

Found 400 labels.


In [19]:
# Get the list of videos in the dataset.
ucf_videos = list_ucf_videos()

categories = {}
for video in ucf_videos:
  category = video[2:-12]
  if category not in categories:
    categories[category] = []
  categories[category].append(video)
print("Found %d videos in %d categories." % (len(ucf_videos), len(categories)))

for category, sequences in categories.items():
  summary = ", ".join(sequences[:2])
  print("%-20s %4d videos (%s, ...)" % (category, len(sequences), summary))

Found 13320 videos in 101 categories.
ApplyEyeMakeup        145 videos (v_ApplyEyeMakeup_g01_c01.avi, v_ApplyEyeMakeup_g01_c02.avi, ...)
ApplyLipstick         114 videos (v_ApplyLipstick_g01_c01.avi, v_ApplyLipstick_g01_c02.avi, ...)
Archery               145 videos (v_Archery_g01_c01.avi, v_Archery_g01_c02.avi, ...)
BabyCrawling          132 videos (v_BabyCrawling_g01_c01.avi, v_BabyCrawling_g01_c02.avi, ...)
BalanceBeam           108 videos (v_BalanceBeam_g01_c01.avi, v_BalanceBeam_g01_c02.avi, ...)
BandMarching          155 videos (v_BandMarching_g01_c01.avi, v_BandMarching_g01_c02.avi, ...)
BaseballPitch         150 videos (v_BaseballPitch_g01_c01.avi, v_BaseballPitch_g01_c02.avi, ...)
BasketballDunk        131 videos (v_BasketballDunk_g01_c01.avi, v_BasketballDunk_g01_c02.avi, ...)
Basketball            134 videos (v_Basketball_g01_c01.avi, v_Basketball_g01_c02.avi, ...)
BenchPress            160 videos (v_BenchPress_g01_c01.avi, v_BenchPress_g01_c02.avi, ...)
Biking              

In [20]:
# Get a sample cricket video.
video_path = fetch_ucf_video("v_CricketShot_g04_c02.avi")
sample_video = load_video(video_path)

Fetching https://www.crcv.ucf.edu/THUMOS14/UCF101/UCF101/v_CricketShot_g04_c02.avi => C:\Users\MEDHAT~1\AppData\Local\Temp\tmpzon5xlct\v_CricketShot_g04_c02.avi


In [21]:
sample_video.shape

(116, 224, 224, 3)

In [22]:
# from sklearn.model_selection import train_test_split

# # Step 1: Divide the data into X and y
# X = []
# y = []

# for category, sequences in categories.items():
#     for video in sequences:
#         video_path = fetch_ucf_video(video)
#         X.append(video_path)
#         y.append(labels.index(category))

# # Step 2: Split the data into training and testing sets
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [23]:
# import tensorflow as tf
# from tensorflow.keras.layers import Conv3D, MaxPooling3D, Dense, Flatten, Dropout
# from tensorflow.keras import Model, Input
# from tensorflow.keras.layers import BatchNormalization, Activation

# def i3d_inception_module(x, filters):
#     branch_0 = Conv3D(filters, (1, 1, 1), padding='same')(x)
#     branch_0 = BatchNormalization()(branch_0)
#     branch_0 = Activation('relu')(branch_0)

#     branch_1 = Conv3D(filters, (1, 1, 1), padding='same')(x)
#     branch_1 = BatchNormalization()(branch_1)
#     branch_1 = Activation('relu')(branch_1)
#     branch_1 = Conv3D(filters, (3, 3, 3), padding='same')(branch_1)
#     branch_1 = BatchNormalization()(branch_1)
#     branch_1 = Activation('relu')(branch_1)

#     branch_2 = Conv3D(filters, (1, 1, 1), padding='same')(x)
#     branch_2 = BatchNormalization()(branch_2)
#     branch_2 = Activation('relu')(branch_2)
#     branch_2 = Conv3D(filters, (3, 3, 3), padding='same')(branch_2)
#     branch_2 = BatchNormalization()(branch_2)
#     branch_2 = Activation('relu')(branch_2)

#     branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same')(x)
#     branch_3 = Conv3D(filters, (1, 1, 1), padding='same')(branch_3)
#     branch_3 = BatchNormalization()(branch_3)
#     branch_3 = Activation('relu')(branch_3)

#     x = tf.keras.layers.concatenate([branch_0, branch_1, branch_2, branch_3], axis=-1)
#     return x

# def build_i3d_model(input_shape=(64, 224, 224, 3), num_classes=101):
#     inputs = Input(shape=input_shape)

#     x = Conv3D(64, (7, 7, 7), strides=(2, 2, 2), padding='same')(inputs)
#     x = BatchNormalization()(x)
#     x = Activation('relu')(x)
#     x = MaxPooling3D((1, 3, 3), strides=(1, 2, 2), padding='same')(x)

#     x = Conv3D(64, (1, 1, 1), padding='same')(x)
#     x = BatchNormalization()(x)
#     x = Activation('relu')(x)
#     x = Conv3D(192, (3, 3, 3), padding='same')(x)
#     x = BatchNormalization()(x)
#     x = Activation('relu')(x)
#     x = MaxPooling3D((1, 3, 3), strides=(1, 2, 2), padding='same')(x)

#     x = i3d_inception_module(x, 64)
#     x = i3d_inception_module(x, 120)
#     x = MaxPooling3D((3, 3, 3), strides=(2, 2, 2), padding='same')(x)

#     x = i3d_inception_module(x, 128)
#     x = i3d_inception_module(x, 192)
#     x = MaxPooling3D((2, 2, 2), strides=(2, 2, 2), padding='same')(x)

#     x = i3d_inception_module(x, 192)
#     x = i3d_inception_module(x, 192)
#     x = MaxPooling3D((2, 2, 2), strides=(2, 2, 2), padding='same')(x)

#     x = i3d_inception_module(x, 256)
#     x = i3d_inception_module(x, 320)

#     x = MaxPooling3D((2, 2, 2), strides=(2, 2, 2), padding='same')(x)
#     x = Flatten()(x)
#     x = Dense(4096, activation='relu')(x)
#     x = Dropout(0.5)(x)
#     x = Dense(4096, activation='relu')(x)
#     x = Dropout(0.5)(x)
#     outputs = Dense(num_classes, activation='softmax')(x)

#     model = Model(inputs, outputs)
#     return model

# # Build the model
# input_shape = (64, 224, 224, 3)
# num_classes = 101  # Adjust based on your dataset
# model = build_i3d_model(input_shape, num_classes)
# model.summary()

# # Compile the model
# model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [24]:
# import tensorflow as tf
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import Conv3D, MaxPooling3D, Flatten, Dense, Dropout

# # Model architecture
# model = Sequential([
#     Conv3D(32, (3, 3, 3), activation='relu', input_shape=(10, 224,224, 3)),
#     MaxPooling3D(pool_size=(2, 2, 2)),
#     Conv3D(64, (3, 3, 3), activation='relu'),
#     MaxPooling3D(pool_size=(2, 2, 2)),
#     Conv3D(128, (3, 3, 3), activation='relu'),
#     MaxPooling3D(pool_size=(2, 2, 2)),
#     Flatten(),
#     Dense(512, activation='relu'),
#     Dropout(0.5),
#     Dense(num_classes, activation='softmax')
# ])

# model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# model.summary()

In [25]:
# # Function to preprocess videos for the I3D model
# def preprocess_video(frames):
#     frames = tf.convert_to_tensor(frames, dtype=tf.float32)
#     # Resize frames to match the input shape of the I3D model
#     frames = tf.image.resize(frames, (224, 224))
#     # Expand dimensions to create a batch of size 1
#     frames = tf.expand_dims(frames, axis=0)
#     return frames

# # Function to predict action label for a given video
# def predict_video_action(video_frames):
#     # Preprocess video frames
#     processed_frames = preprocess_video(video_frames)
    
#     # Predict action probabilities using the I3D model
#     logits = i3d_model(processed_frames)
#     probabilities = tf.nn.softmax(logits, axis=-1)
    
#     # Get the predicted action label
#     predicted_label_index = tf.argmax(probabilities, axis=-1).numpy()[0]
#     predicted_label = labels[predicted_label_index]
    
#     return predicted_label

In [26]:
# ucf_videos

In [27]:
# labels=[]
# for i in ucf_videos:
#     i=i.split("_")
#     labels.append(i[1])
# labels

In [28]:
# num_classes=len(set(labels))
# num_classes

In [29]:
# from tensorflow.keras.models import Model
# from tensorflow.keras.layers import Input, Dense, Dropout
# from tensorflow.keras.optimizers import Adam
# from tensorflow.keras.callbacks import ModelCheckpoint
# import tensorflow_hub as hub

# # Define your model architecture
# def build_model():
#     # Load the pre-trained I3D model from TensorFlow Hub
#     i3d_url = "https://tfhub.dev/deepmind/i3d-kinetics-400/1"
#     i3d_model = hub.KerasLayer(i3d_url, output_key="default", trainable=False)

#     # Define input tensor with appropriate shape
#     input_tensor = Input(shape=(64, 224, 224, 3))

#     # Pass the input tensor directly through the I3D model
#     i3d_features = i3d_model(input_tensor)

#     # Add additional layers as needed
#     x = Dense(256, activation='relu')(i3d_features)
#     x = Dropout(0.5)(x)
#     output_tensor = Dense(num_classes, activation='softmax')(x)  # Adjust num_classes as needed

#     model = Model(inputs=input_tensor, outputs=output_tensor)
#     return model

# # Compile the model
# model = build_model()
# model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

# # Prepare your data (load .avi files, preprocess, split into train and validation sets)
# # Example:
# train_videos = ucf_videos # List of paths to training .avi files
# train_labels = labels  # Corresponding labels for training videos
# # validation_videos = [...]  # List of paths to validation .avi files
# # validation_labels = [...]  # Corresponding labels for validation videos

# # Train your model
# checkpoint = ModelCheckpoint('model_checkpoint.h5', save_best_only=True)
# model.fit(train_videos, train_labels, epochs=10, callbacks=[checkpoint])

# # Evaluate your model
# # loss, accuracy = model.evaluate(validation_videos, validation_labels)
# # print("Validation Loss:", loss)
# # print("Validation Accuracy:", accuracy)


In [30]:
def predict(sample_video):
  # Add a batch axis to the sample video.
  model_input = tf.constant(sample_video, dtype=tf.float32)[tf.newaxis, ...]

  logits = i3d_model(model_input)['default'][0]
  probabilities = tf.nn.softmax(logits)

  print("Top 5 actions:")
  for i in np.argsort(probabilities)[::-1][:5]:
    print(f"  {labels[i]:22}: {probabilities[i] * 100:5.2f}%")


In [34]:
video_path = fetch_ucf_video(r"C:\Users\Medha Trust\Desktop\Mini_Project\Mini-project\videos\fixing_hair\lL2UelTTRww7181.mp4")
sample_video = load_video(video_path)
predict(sample_video)

Top 5 actions:
  singing               :  9.19%
  pumping fist          :  5.75%
  laughing              :  5.15%
  sneezing              :  4.78%
  taking a shower       :  4.68%


In [110]:
set(labels)

{'abseiling',
 'air drumming',
 'answering questions',
 'applauding',
 'applying cream',
 'archery',
 'arm wrestling',
 'arranging flowers',
 'assembling computer',
 'auctioning',
 'baby waking up',
 'baking cookies',
 'balloon blowing',
 'bandaging',
 'barbequing',
 'bartending',
 'beatboxing',
 'bee keeping',
 'belly dancing',
 'bench pressing',
 'bending back',
 'bending metal',
 'biking through snow',
 'blasting sand',
 'blowing glass',
 'blowing leaves',
 'blowing nose',
 'blowing out candles',
 'bobsledding',
 'bookbinding',
 'bouncing on trampoline',
 'bowling',
 'braiding hair',
 'breading or breadcrumbing',
 'breakdancing',
 'brush painting',
 'brushing hair',
 'brushing teeth',
 'building cabinet',
 'building shed',
 'bungee jumping',
 'busking',
 'canoeing or kayaking',
 'capoeira',
 'carrying baby',
 'cartwheeling',
 'carving pumpkin',
 'catching fish',
 'catching or throwing baseball',
 'catching or throwing frisbee',
 'catching or throwing softball',
 'celebrating',
 'cha