In [3]:
import tensorflow as tf
import pandas as pd

In [4]:
df_train=pd.read_csv("../archive/Train.csv")
df_train.head()
test_df = pd.read_csv("../archive/test.csv")

print(f"Total videos for training: {len(df_train)}")
print(f"Total videos for testing: {len(test_df)}")

Total videos for training: 50420
Total videos for testing: 6981


In [5]:
#Show the distinct classes 
distinct_label=df_train["label"].unique()
distinct_label

array(['Doing other things', 'Pushing Two Fingers Away',
       'Drumming Fingers', 'Sliding Two Fingers Down',
       'Pushing Hand Away', 'Shaking Hand', 'Pulling Two Fingers In',
       'Stop Sign', 'Zooming In With Two Fingers',
       'Sliding Two Fingers Up', 'Zooming Out With Two Fingers',
       'Zooming In With Full Hand', 'No gesture', 'Swiping Right',
       'Thumb Down', 'Rolling Hand Forward', 'Pulling Hand In',
       'Zooming Out With Full Hand', 'Swiping Left',
       'Rolling Hand Backward', 'Turning Hand Counterclockwise',
       'Swiping Up', 'Turning Hand Clockwise', 'Sliding Two Fingers Left',
       'Swiping Down', 'Thumb Up', 'Sliding Two Fingers Right'],
      dtype=object)

In [6]:
# Count the number of videos per class
videos_per_class = df_train['label'].value_counts()
videos_per_class

label
Doing other things               4374
Pulling Two Fingers In           1859
Zooming Out With Two Fingers     1847
No gesture                       1844
Pushing Two Fingers Away         1843
Thumb Up                         1841
Sliding Two Fingers Down         1832
Zooming Out With Full Hand       1832
Pulling Hand In                  1829
Swiping Down                     1824
Stop Sign                        1821
Drumming Fingers                 1818
Sliding Two Fingers Left         1816
Pushing Hand Away                1812
Thumb Down                       1810
Zooming In With Two Fingers      1801
Zooming In With Full Hand        1799
Shaking Hand                     1789
Rolling Hand Forward             1788
Sliding Two Fingers Right        1780
Sliding Two Fingers Up           1779
Swiping Up                       1768
Swiping Left                     1762
Swiping Right                    1730
Rolling Hand Backward            1715
Turning Hand Counterclockwise    1380
Turnin

In [7]:
!pip3 install tensorflow_hub
!pip3 install tensorflow_docs



In [8]:
# TensorFlow and TF-Hub modules.
from absl import logging

import tensorflow as tf
import tensorflow_hub as hub
from tensorflow_docs.vis import embed
logging.set_verbosity(logging.ERROR)
# Some modules to help with reading the UCF101 dataset.
import random
import re
import os
import tempfile
import ssl
import cv2
import numpy as np

# Some modules to display an animation using imageio.
import imageio
from IPython import display

from urllib import request  # requires python3
from keras.applications.densenet import DenseNet121
import keras 

In [9]:
def crop_center_square(frame):
  y, x = frame.shape[0:2]
  min_dim = min(y, x)
  start_x = (x // 2) - (min_dim // 2)
  start_y = (y // 2) - (min_dim // 2)
  return frame[start_y:start_y+min_dim,start_x:start_x+min_dim]

def load_video_from_images(folder_path, max_frames=0, resize=(224, 224)):
    frames = []
    for filename in sorted(os.listdir(folder_path)):
        if filename.endswith('.jpg') or filename.endswith('.png'):
            frame_path = os.path.join(folder_path, filename)
            frame = cv2.imread(frame_path)
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB
            frame = crop_center_square(frame)
            frame = cv2.resize(frame, resize)
            frames.append(frame)
            if len(frames) == max_frames:
                break
    return np.array(frames) / 255.0

def to_gif(images):
  converted_images = np.clip(images * 255, 0, 255).astype(np.uint8)
  imageio.mimsave('./animation.gif', converted_images, fps=25)
  return embed.embed_file('./animation.gif')

In [10]:
MAX_SEQ_LENGTH = 37
NUM_FEATURES = 1024
IMG_SIZE = 128
EPOCHS = 50

In [11]:
#Feature extractor using densenet
def build_feature_extractor():
    feature_extractor = DenseNet121(
        weights="imagenet",
        include_top=False,
        pooling="avg",
        input_shape=(IMG_SIZE, IMG_SIZE, 3),
    )
    preprocess_input = keras.applications.densenet.preprocess_input

    inputs = keras.Input((IMG_SIZE, IMG_SIZE, 3))
    preprocessed = preprocess_input(inputs)

    outputs = feature_extractor(preprocessed) 
    return keras.Model(inputs, outputs, name="feature_extractor")

In [12]:
# Label preprocessing with StringLookup.
label_processor = keras.layers.StringLookup(
    num_oov_indices=0, vocabulary=np.unique(df_train["label"]), mask_token=None
)
print(label_processor.get_vocabulary())

['Doing other things', 'Drumming Fingers', 'No gesture', 'Pulling Hand In', 'Pulling Two Fingers In', 'Pushing Hand Away', 'Pushing Two Fingers Away', 'Rolling Hand Backward', 'Rolling Hand Forward', 'Shaking Hand', 'Sliding Two Fingers Down', 'Sliding Two Fingers Left', 'Sliding Two Fingers Right', 'Sliding Two Fingers Up', 'Stop Sign', 'Swiping Down', 'Swiping Left', 'Swiping Right', 'Swiping Up', 'Thumb Down', 'Thumb Up', 'Turning Hand Clockwise', 'Turning Hand Counterclockwise', 'Zooming In With Full Hand', 'Zooming In With Two Fingers', 'Zooming Out With Full Hand', 'Zooming Out With Two Fingers']


In [13]:
# Calculates the number of frames in each video 
def class_process(dir_path):
    if not os.path.isdir(dir_path):
        return

    for file_name in os.listdir(dir_path):
        if file_name.startswith('.'):  # Skip hidden files
            continue
        
        video_dir_path = os.path.join(dir_path, file_name)
        image_indices = []
        for image_file_name in os.listdir(video_dir_path):
            if '00' not in image_file_name:
                continue
            image_indices.append(int(image_file_name[0:4]))

        if len(image_indices) == 0:
            print('no image files', video_dir_path)
            n_frames = 0
        else:
            image_indices.sort(reverse=True)
            n_frames = len(image_indices)
            print(video_dir_path, n_frames)
        with open(os.path.join(video_dir_path, 'n_frames'), 'w') as dst_file:
            dst_file.write(str(n_frames))


dir_path = "../archive/Train/"
class_process(dir_path)


../archive/Train/90107 37
../archive/Train/134434 37
../archive/Train/100601 37
../archive/Train/146014 37
../archive/Train/86062 37
../archive/Train/128857 37
../archive/Train/143566 37
../archive/Train/62822 37
../archive/Train/83510 37
../archive/Train/127023 37
../archive/Train/146828 37
../archive/Train/105373 37
../archive/Train/53759 37
../archive/Train/74947 37
../archive/Train/131146 37
../archive/Train/1855 37
../archive/Train/30805 37
../archive/Train/34499 37
../archive/Train/111541 37
../archive/Train/125774 37
../archive/Train/80395 37
../archive/Train/107424 37
../archive/Train/30039 37
../archive/Train/60749 37
../archive/Train/29928 37
../archive/Train/114033 37
../archive/Train/111773 37
../archive/Train/81075 37
../archive/Train/17429 37
../archive/Train/141003 37
../archive/Train/107616 37
../archive/Train/120808 37
../archive/Train/29326 37
../archive/Train/97110 37
../archive/Train/124494 37
../archive/Train/79456 37
../archive/Train/98964 37
../archive/Train/8450

In [16]:
import pandas as pd

# Assuming main_path is the main directory where all videos are stored
# and df contains columns 'video_id' and 'label'
def create_video_path_df(df, main_path):
    # Create a new column 'video_path' by combining main_path with video_id
    df['video_path'] = main_path + "/"+ df['video_id'].astype(str)  # Assuming video_id is integer

    # Return a DataFrame containing video_id, video_path, and label columns
    return df[['video_id', 'video_path', 'label']]


#application
main_path = "/Users/mac/Documents/2CS_IASD/S2/PROJET_2CS/Sprint1_arsl/Jester_classification/archive/Train"
video_path_df = create_video_path_df(df_train, main_path)
print(video_path_df['video_path'].iloc[2])


/Users/mac/Documents/2CS_IASD/S2/PROJET_2CS/Sprint1_arsl/Jester_classification/archive/Train/6


In [None]:

def build_feature_extractor():
    feature_extractor = DenseNet121(
        weights="imagenet",
        include_top=False,
        pooling="avg",
        input_shape=(IMG_SIZE, IMG_SIZE, 3),
    )
    preprocess_input = keras.applications.densenet.preprocess_input

    inputs = keras.Input((IMG_SIZE, IMG_SIZE, 3))
    preprocessed = preprocess_input(inputs)

    outputs = feature_extractor(preprocessed)
    return keras.Model(inputs, outputs, name="feature_extractor")


feature_extractor = build_feature_extractor()


# Label preprocessing with StringLookup.
label_processor = keras.layers.StringLookup(
    num_oov_indices=0, vocabulary=np.unique(df_train["label"]), mask_token=None
)
print(label_processor.get_vocabulary())

center_crop_layer = layers.CenterCrop(IMG_SIZE, IMG_SIZE)


def crop_center(frame):
    cropped = center_crop_layer(frame[None, ...])
    cropped = keras.ops.convert_to_numpy(cropped)
    cropped = keras.ops.squeeze(cropped)
    return cropped


def load_video(path, max_frames=0, offload_to_cpu=False):
    cap = cv2.VideoCapture(path)
    frames = []
    try:
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            frame = frame[:, :, [2, 1, 0]]
            frame = crop_center(frame)
            if offload_to_cpu and keras.backend.backend() == "torch":
                frame = frame.to("cpu")
            frames.append(frame)

            if len(frames) == max_frames:
                break
    finally:
        cap.release()
    if offload_to_cpu and keras.backend.backend() == "torch":
        return np.array([frame.to("cpu").numpy() for frame in frames])
    return np.array(frames)






In [None]:
import os
from PIL import Image
import numpy as np

from PIL import Image
import numpy as np

def load_frame(frame_path, target_size=(128, 128)):
    # Open the image file using PIL (Python Imaging Library)
    img = Image.open(frame_path)
    # Resize the image to the target size
    img_resized = img.resize(target_size)
    # Convert the resized image to a NumPy array and normalize pixel values to the range [0, 1]
    img_array = np.array(img_resized) / 255.0
    return img_array


def prepare_all_videos(df, root_dir):
    num_samples = len(df)
    video_paths = df["video_path"].values.tolist()
    labels = df["label"].values
    labels = label_processor(labels[..., None]).numpy()

    # `frame_features` are what we will feed to our sequence model.
    frame_features = np.zeros(
        shape=(num_samples, MAX_SEQ_LENGTH, NUM_FEATURES), dtype="float32"
    )

    # For each video path.
    for idx, path in enumerate(video_paths):
        frames = []

        # Iterate over frames in the video folder.
        for frame_name in os.listdir(os.path.join(root_dir, path)):
            frame_path = os.path.join(root_dir, path, frame_name)
            frame = load_frame(frame_path)  # Load the frame (modify this function according to your needs)
            frames.append(frame)

            if len(frames) == MAX_SEQ_LENGTH:
                break  # Stop iterating if we've reached the maximum sequence length

        # Pad shorter sequences.
        if len(frames) < MAX_SEQ_LENGTH:
            diff = MAX_SEQ_LENGTH - len(frames)
            padding = np.zeros((diff, IMG_SIZE, IMG_SIZE, 3))
            frames.extend(padding)

        # Initialize placeholder to store the features of the current video.
        temp_frame_features = np.zeros(
            shape=(1, MAX_SEQ_LENGTH, NUM_FEATURES), dtype="float32"
        )

        # Extract features from the frames.
        for i, frame in enumerate(frames):
            if np.mean(frame) > 0.0:
                temp_frame_features[0, i, :] = feature_extractor.predict(
                    frame[None, ...]
                )
            else:
                temp_frame_features[0, i, :] = 0.0

        frame_features[idx] = temp_frame_features.squeeze()

   

    return frame_features, labels


In [None]:
frame_feature, labels= prepare_all_videos(video_path_df[:300],"/Users/mac/Documents/2CS_IASD/S2/PROJET_2CS/Sprint1_arsl/Jester_classification/archive/Train")

In [None]:
print(frame_feature)

In [None]:
# Reshape the 3D array into 2D
reshaped_array = frame_feature.reshape(frame_feature.shape[0], -1)

# Save the reshaped array to a text file
np.savetxt('./preprocessed/features.txt', reshaped_array)
