In [1]:
!pip install -q imageio
!pip install -q opencv-python
!pip install -q git+https://github.com/tensorflow/docs
!pip install tensorflow



  Preparing metadata (setup.py) ... [?25l[?25hdone


In [2]:
import os
import cv2
import ssl
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import ConvLSTM2D, MaxPooling3D, TimeDistributed, Dropout, Flatten, Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
import itertools
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import to_categorical
from tqdm import tqdm
import matplotlib.pyplot as plt
import tempfile
from urllib import request
import re


In [3]:
UCF_ROOT = "https://www.crcv.ucf.edu/THUMOS14/UCF101/UCF101/"
_CACHE_DIR = tempfile.mkdtemp()

unverified_context = ssl._create_unverified_context()


def list_ucf_videos():
    """Lists videos available in UCF101 dataset."""
    index = request.urlopen(UCF_ROOT, context=unverified_context).read().decode("utf-8")
    videos = re.findall("(v_[\w_]+\.avi)", index)
    return sorted(set(videos))

def fetch_ucf_video(video):
    """Fetches a video and caches it into the local filesystem."""
    cache_path = os.path.join(_CACHE_DIR, video)
    if not os.path.exists(cache_path):
        urlpath = request.urljoin(UCF_ROOT, video)
        print("Fetching %s => %s" % (urlpath, cache_path))
        data = request.urlopen(urlpath, context=unverified_context).read()
        open(cache_path, "wb").write(data)
    return cache_path

def crop_center_square(frame):
    y, x = frame.shape[0:2]
    min_dim = min(y, x)
    start_x = (x // 2) - (min_dim // 2)
    start_y = (y // 2) - (min_dim // 2)
    return frame[start_y:start_y+min_dim, start_x:start_x+min_dim]

def load_video(path, max_frames=20, resize=(224, 224)):
    cap = cv2.VideoCapture(path)
    frames = []
    try:
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            frame = crop_center_square(frame)
            frame = cv2.resize(frame, resize)
            frame = frame[:, :, [2, 1, 0]]  # Convert BGR to RGB
            frames.append(frame)
            if max_frames != 0 and len(frames) == max_frames:
                break
    finally:
        cap.release()
    return np.array(frames) / 255.0



In [4]:
video_list = list_ucf_videos()[:400]

print(video_list)


['v_ApplyEyeMakeup_g01_c01.avi', 'v_ApplyEyeMakeup_g01_c02.avi', 'v_ApplyEyeMakeup_g01_c03.avi', 'v_ApplyEyeMakeup_g01_c04.avi', 'v_ApplyEyeMakeup_g01_c05.avi', 'v_ApplyEyeMakeup_g01_c06.avi', 'v_ApplyEyeMakeup_g02_c01.avi', 'v_ApplyEyeMakeup_g02_c02.avi', 'v_ApplyEyeMakeup_g02_c03.avi', 'v_ApplyEyeMakeup_g02_c04.avi', 'v_ApplyEyeMakeup_g03_c01.avi', 'v_ApplyEyeMakeup_g03_c02.avi', 'v_ApplyEyeMakeup_g03_c03.avi', 'v_ApplyEyeMakeup_g03_c04.avi', 'v_ApplyEyeMakeup_g03_c05.avi', 'v_ApplyEyeMakeup_g03_c06.avi', 'v_ApplyEyeMakeup_g04_c01.avi', 'v_ApplyEyeMakeup_g04_c02.avi', 'v_ApplyEyeMakeup_g04_c03.avi', 'v_ApplyEyeMakeup_g04_c04.avi', 'v_ApplyEyeMakeup_g04_c05.avi', 'v_ApplyEyeMakeup_g04_c06.avi', 'v_ApplyEyeMakeup_g04_c07.avi', 'v_ApplyEyeMakeup_g05_c01.avi', 'v_ApplyEyeMakeup_g05_c02.avi', 'v_ApplyEyeMakeup_g05_c03.avi', 'v_ApplyEyeMakeup_g05_c04.avi', 'v_ApplyEyeMakeup_g05_c05.avi', 'v_ApplyEyeMakeup_g05_c06.avi', 'v_ApplyEyeMakeup_g05_c07.avi', 'v_ApplyEyeMakeup_g06_c01.avi', 'v_Appl

In [5]:
import pandas as pd

labels = [video.split('_')[1] for video in video_list]

df = pd.DataFrame({'Video_Path': video_list, 'Label': labels})

print(df)



                       Video_Path           Label
0    v_ApplyEyeMakeup_g01_c01.avi  ApplyEyeMakeup
1    v_ApplyEyeMakeup_g01_c02.avi  ApplyEyeMakeup
2    v_ApplyEyeMakeup_g01_c03.avi  ApplyEyeMakeup
3    v_ApplyEyeMakeup_g01_c04.avi  ApplyEyeMakeup
4    v_ApplyEyeMakeup_g01_c05.avi  ApplyEyeMakeup
..                            ...             ...
395         v_Archery_g24_c05.avi         Archery
396         v_Archery_g24_c06.avi         Archery
397         v_Archery_g25_c01.avi         Archery
398         v_Archery_g25_c02.avi         Archery
399         v_Archery_g25_c03.avi         Archery

[400 rows x 2 columns]


In [6]:
IMAGE_HEIGHT,IMAGE_WIDTH = 224,224
SEQUENCE_LENGTH = 20

CLASSES_LIST=video_list

In [7]:
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, GlobalAveragePooling2D


In [8]:
def create_dataset():
  features = []
  labels = []
  video_files_paths = []
  base_model = InceptionV3(include_top=False, weights='imagenet', input_shape=(IMAGE_HEIGHT, IMAGE_WIDTH, 3))

  for class_index, video_url in enumerate(CLASSES_LIST):
    print(f'Extracting Data of Class: {video_url}')

    # Download the video file
    video_path = fetch_ucf_video(video_url)

    frames = load_video(video_path)
    if len(frames) == SEQUENCE_LENGTH:
      # Extract features using InceptionV3
      extracted_features = []
      for frame in frames:
        features_batch = np.expand_dims(frame, axis=0)  # Ensure single frame batch
        features_batch = base_model.predict(features_batch)



        extracted_features.append(features_batch)

      # Keep features as a sequence for LSTM (avoid flattening)
      features.append(extracted_features)
      labels.append(class_index)
      video_files_paths.append(video_path)

  features = np.asarray(features)  # Features remain a sequence of frame features
  labels = np.array(labels)

  return features, labels, video_files_paths


In [9]:
features,labels,video_files_paths = create_dataset()


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Extracting Data of Class: v_ApplyLipstick_g07_c01.avi
Fetching https://www.crcv.ucf.edu/THUMOS14/UCF101/UCF101/v_ApplyLipstick_g07_c01.avi => /tmp/tmpvq751yjv/v_ApplyLipstick_g07_c01.avi
Extracting Data of Class: v_ApplyLipstick_g07_c02.avi
Fetching https://www.crcv.ucf.edu/THUMOS14/UCF101/UCF101/v_ApplyLipstick_g07_c02.avi => /tmp/tmpvq751yjv/v_ApplyLipstick_g07_c02.avi
Extracting Data of Class: v_ApplyLipstick_g07_c03.avi
Fetching https://www.crcv.ucf.edu/THUMOS14/UCF101/UCF101/v_ApplyLipstick_g07_c03.avi => /tmp/tmpvq751yjv/v_ApplyLipstick_g07_c03.avi
Extracting Data of Class: v_ApplyLipstick_g07_c04.avi
Fetching https://www.crcv.ucf.edu/THUMOS14/UCF101/UCF101/v_ApplyLipstick_g07_c04.avi => /tmp/tmpvq751yjv/v_ApplyLipstick_g07_c04.avi
Extracting Data of Class: v_ApplyLipstick_g08_c01.avi
Fetching https://www.crcv.ucf.edu/THUMOS14/UCF101/UCF101/v_ApplyLipstick_g08_c01.avi => /tmp/tmpvq751yjv/v_ApplyLipstick_g08_c01.avi


In [10]:
df['Label']

0      ApplyEyeMakeup
1      ApplyEyeMakeup
2      ApplyEyeMakeup
3      ApplyEyeMakeup
4      ApplyEyeMakeup
            ...      
395           Archery
396           Archery
397           Archery
398           Archery
399           Archery
Name: Label, Length: 400, dtype: object

In [12]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()


df['Label'] = label_encoder.fit_transform(df['Label'])

label_mapping = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))
print("Label Mapping:", label_mapping)


Label Mapping: {'ApplyEyeMakeup': 0, 'ApplyLipstick': 1, 'Archery': 2}


In [13]:
import random
import tensorflow as tf

y=df['Label']

x_train, x_test, y_train, y_test = train_test_split(features,y, test_size=0.2, random_state=42)


x_train.shape

(320, 20, 1, 5, 5, 2048)

In [14]:
NUM_CLASSES = len(np.unique(df['Label']))
print(NUM_CLASSES)

3


In [15]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Flatten, TimeDistributed

# Define model architecture
model = Sequential()
model.add(TimeDistributed(Flatten(), input_shape=(x_train.shape[1:])))
model.add(LSTM(units=128))
model.add(Dropout(0.5))
model.add(Dense(NUM_CLASSES, activation='softmax'))
model.summary()


model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])



Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 time_distributed (TimeDist  (None, 20, 51200)         0         
 ributed)                                                        
                                                                 
 lstm (LSTM)                 (None, 128)               26280448  
                                                                 
 dropout (Dropout)           (None, 128)               0         
                                                                 
 dense (Dense)               (None, 3)                 387       
                                                                 
Total params: 26280835 (100.25 MB)
Trainable params: 26280835 (100.25 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [16]:
history = model.fit(x_train,y_train, epochs=10, batch_size=32, validation_split=0.2, verbose=1)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [17]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# Predict on the testing set
y_pred = model.predict(x_test)

# Convert predictions to class labels
predicted_labels = np.argmax(y_pred, axis=1)

# Use y_test directly as true_labels
true_labels = y_test

# Calculate evaluation metrics
accuracy = accuracy_score(true_labels, predicted_labels)
precision = precision_score(true_labels, predicted_labels, average='weighted', zero_division=1)
recall = recall_score(true_labels, predicted_labels, average='weighted', zero_division=1)
f1 = f1_score(true_labels, predicted_labels, average='weighted')

print("Evaluation Metrics:")
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1-Score: {f1}")

conf_matrix = confusion_matrix(true_labels, predicted_labels)
print(conf_matrix)

Evaluation Metrics:
Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1-Score: 1.0
[[36  0  0]
 [ 0 17  0]
 [ 0  0 27]]
