In [1]:
import requests
import zipfile
import io

# Download the zip file
url = "http://thetis.image.ece.ntua.gr/databases/VIDEO_RGB.zip"
response = requests.get(url)
with zipfile.ZipFile(io.BytesIO(response.content), 'r') as zip_ref:
    zip_ref.extractall("/tennis")

In [2]:
import os


folder_paths = [
    "/tennis/VIDEO_RGB/flat_service",
    "/tennis/VIDEO_RGB/forehand_volley",
    "/tennis/VIDEO_RGB/backhand_slice"
]

# Function to rename files sequentially within a folder
def rename_files(folder_path):
    # Get the list of files in the folder
    files = os.listdir(folder_path)


    # Iterate through the files and rename them sequentially
    for i, filename in enumerate(files, start=1):
        # Construct the new filename with the desired numbering format
        new_filename = f"{i}.avi"

        # Construct the full paths for the old and new filenames
        old_filepath = os.path.join(folder_path, filename)
        new_filepath = os.path.join(folder_path, new_filename)

        # Rename the file
        os.rename(old_filepath, new_filepath)


# Rename files in each folder
for folder_path in folder_paths:
    rename_files(folder_path)
    print("Done\n")


Done

Done

Done



In [1]:
import numpy as np

# Define the total number of items per class
total_items_per_class = 165

# Define the number of classes
num_classes = 3

# Calculate the total number of items
total_items = total_items_per_class * num_classes

# Generate indices for all items
all_indices = [(item, class_label) for class_label in range(num_classes)
               for item in range(1, total_items_per_class + 1)]

# Shuffle the indices
np.random.shuffle(all_indices)

# Calculate split sizes
total_samples_per_class = total_items_per_class
train_size_per_class = int(0.8 * total_samples_per_class)
val_size_per_class = int(0.1 * total_samples_per_class)
test_size_per_class = total_samples_per_class - train_size_per_class - val_size_per_class

# Initialize split indices
train_idx = []
val_idx = []
test_idx = []

# Split indices for each class
for class_label in range(num_classes):
    class_indices = [(item, label) for item, label in all_indices if label == class_label]
    np.random.shuffle(class_indices)

    train_idx.extend(class_indices[:train_size_per_class])
    val_idx.extend(class_indices[train_size_per_class:train_size_per_class+val_size_per_class])
    test_idx.extend(class_indices[train_size_per_class+val_size_per_class:total_samples_per_class])

print("Train Index:", train_idx)
print("Val Index:", val_idx)
print("Test Index:", test_idx)

Train Index: [(163, 0), (84, 0), (63, 0), (67, 0), (5, 0), (76, 0), (81, 0), (162, 0), (156, 0), (152, 0), (144, 0), (105, 0), (10, 0), (1, 0), (77, 0), (147, 0), (80, 0), (96, 0), (62, 0), (53, 0), (94, 0), (109, 0), (21, 0), (56, 0), (135, 0), (157, 0), (41, 0), (161, 0), (3, 0), (28, 0), (47, 0), (68, 0), (49, 0), (54, 0), (134, 0), (133, 0), (55, 0), (59, 0), (26, 0), (23, 0), (38, 0), (61, 0), (104, 0), (148, 0), (48, 0), (101, 0), (16, 0), (98, 0), (50, 0), (137, 0), (165, 0), (116, 0), (103, 0), (27, 0), (146, 0), (64, 0), (42, 0), (115, 0), (112, 0), (32, 0), (2, 0), (17, 0), (159, 0), (100, 0), (74, 0), (128, 0), (145, 0), (19, 0), (149, 0), (12, 0), (150, 0), (57, 0), (130, 0), (99, 0), (164, 0), (129, 0), (120, 0), (72, 0), (82, 0), (127, 0), (113, 0), (95, 0), (107, 0), (83, 0), (6, 0), (51, 0), (35, 0), (125, 0), (8, 0), (110, 0), (132, 0), (60, 0), (29, 0), (118, 0), (91, 0), (40, 0), (108, 0), (121, 0), (111, 0), (43, 0), (25, 0), (139, 0), (44, 0), (18, 0), (117, 0), (8

In [4]:
import cv2
import os

def edge_from_path(element_tuple):
  idx, class_label = element_tuple
  class_folder = {0: "/tennis/VIDEO_RGB/flat_service",
                    1: "/tennis/VIDEO_RGB/forehand_volley",
                    2: "/tennis/VIDEO_RGB/backhand_slice"}

  video_path_in = os.path.join(class_folder[class_label], f"{idx}.avi")

  cap = cv2.VideoCapture(video_path_in)

  LEARNING_RATE = -1
  fgbg = cv2.createBackgroundSubtractorMOG2()

  # Obtain the width and height of the input video
  width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
  height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

  video_path_out = os.path.join(class_folder[class_label], f"{idx}out.avi")

  fourcc = cv2.VideoWriter_fourcc(*'XVID')
  out = cv2.VideoWriter(video_path_out, fourcc, cap.get(cv2.CAP_PROP_FPS), (width, height))

  while True:
      # Capture frame-by-frame
      ret, frame = cap.read()
      if not ret:
          break

      # Apply MOG
      motion_mask = fgbg.apply(frame, LEARNING_RATE)

      # Apply median filter to remove salt-and-pepper noise
      motion_mask_smooth = cv2.medianBlur(motion_mask, 5)  # You can adjust the kernel size (5 in this case)

      # Write smoothed motion mask to the output video
      out.write(cv2.cvtColor(motion_mask_smooth, cv2.COLOR_GRAY2BGR))

  # Release resources
  cap.release()
  out.release()
  cv2.destroyAllWindows()


for idx in all_indices:
  edge_from_path(idx)

In [2]:
import os
import numpy as np
import cv2


def count_frames(video_path):
    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    cap.release()
    return total_frames




def get_video_tensor(element_tuple, num_frames=30):
    idx, class_label = element_tuple
    class_folder = {0: "/tennis/VIDEO_RGB/flat_service",
                    1: "/tennis/VIDEO_RGB/forehand_volley",
                    2: "/tennis/VIDEO_RGB/backhand_slice"}



    # Load video using OpenCV
    video_path = os.path.join(class_folder[class_label], f"{idx}out.avi")
    toatal_frames = count_frames(video_path)
    per_frame = toatal_frames // num_frames
    cap = cv2.VideoCapture(video_path)

    frames = []
    frame_count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Extract frames with even time distances
        if frame_count % per_frame == 0:
            frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            frame_gray = cv2.resize(frame_gray, (299, 299), interpolation = cv2.INTER_AREA)
            frames.append(frame_gray)

        # Break if the desired number of frames is reached
        if len(frames) == num_frames:
            break

        frame_count += 1

    cap.release()

    frames = np.array(frames)

    return frames



In [3]:
from tensorflow.keras.applications.xception import preprocess_input
X_train_list = []
for data in train_idx:
  X_train_list.append(get_video_tensor(data, 16))

X_train = np.array(X_train_list)
X_train = X_train.reshape(-1, 299, 299, 1)
X_train = preprocess_input(X_train)

y_train = np.array([y for (x, y) in train_idx for _ in range(16)])
y_train = np.eye(num_classes)[y_train]

In [4]:
X_val_list = []
for data in val_idx:
  X_val_list.append(get_video_tensor(data, 16))

X_val = np.array(X_val_list)
X_val = X_val.reshape(-1, 299, 299, 1)
X_val = preprocess_input(X_val)

y_val = np.array([y for (x, y) in val_idx for _ in range(16)])
y_val = np.eye(num_classes)[y_val]

In [5]:
X_train_list = []
X_val_list = []

In [6]:
from keras.models import Model, Sequential, load_model
from keras.layers import Input, LSTM, Dense, TimeDistributed, Lambda, Dropout, Concatenate
from keras import backend as K
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import Xception
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalMaxPooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import regularizers

In [7]:
gpu_devices = tf.config.experimental.list_physical_devices('GPU')
if gpu_devices:
    print("Using GPU.")
    for gpu in gpu_devices:
        tf.config.experimental.set_memory_growth(gpu, True)
else:
    print("No GPU available, switching to CPU.")


# Define constants
NUM_CLASSES = 3  # Change this to the number of classes in your data
LEARNING_RATE = 0.0001
BATCH_SIZE = 16
EPOCHS = 5



# Define input shape for grayscale images
input_shape = (299, 299, 1)  # Change 'height' and 'width' to your image dimensions

# Define input layer for grayscale images
input_layer = Input(shape=input_shape)

# Replicate single-channel input into three channels
expanded_input = Concatenate()([input_layer, input_layer, input_layer])

# Load Xception model without the top layer (include_top=False) and with pre-trained weights
base_model = tf.keras.applications.Xception(weights='imagenet', include_top=False, input_tensor=expanded_input)

# Continue with your model definition...



# Add custom top layers for your specific task
x = base_model.output
x = GlobalMaxPooling2D()(x)
x = Dense(2048, activation='relu')(x)
predictions = Dense(NUM_CLASSES, activation='softmax')(x)


# Define the model to be trained
model_fine_tuned = Model(inputs=base_model.input, outputs=predictions)



# Compile the model
model_fine_tuned.compile(optimizer=Adam(learning_rate=LEARNING_RATE),
              loss='categorical_crossentropy',  # Use 'sparse_categorical_crossentropy' for integer labels
              metrics=['accuracy'])

# Train the model
model_fine_tuned.fit(
    X_train, y_train,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=(X_val, y_val)
)

# Evaluate the model on the validation set
loss, accuracy = model_fine_tuned.evaluate(X_val, y_val)
print("Validation Loss:", loss)
print("Validation Accuracy:", accuracy)

# Save the trained model
model_fine_tuned.save('fine_tuned_xception.h5')


Using GPU.
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Validation Loss: 0.7271984219551086
Validation Accuracy: 0.765625


  saving_api.save_model(


In [8]:
X_train = None
X_val = None
y_train = None
y_val = None

In [9]:
# get the feature outputs of second-to-last layer (final FC layer)
outputs = model_fine_tuned.layers[-2].output

cnn_model = Model(inputs=model_fine_tuned.input, outputs=outputs)

In [10]:
from tensorflow.keras.applications.xception import preprocess_input
seq = []
for data in train_idx:
  vid = get_video_tensor(data, 16)
  _, label = data
  for frame in vid:
    frame = np.expand_dims(frame, axis=0)
    frame = preprocess_input(frame)
    features = cnn_model.predict(frame)
    seq.append(features[0])

[1;30;43mStreaming output truncated to the last 5000 lines.[0m


In [None]:
path = os.path.join('', 'train_features.npy')
np.save(path, seq)

In [11]:
seq_val = []
for data in val_idx:
  vid = get_video_tensor(data, 16)
  _, label = data
  for frame in vid:
    frame = np.expand_dims(frame, axis=0)
    frame = preprocess_input(frame)
    features = cnn_model.predict(frame)
    seq_val.append(features[0])



In [None]:
path = os.path.join('', 'val_features.npy')
np.save(path, seq_val)

In [12]:
seq_test = []
for data in test_idx:
  vid = get_video_tensor(data, 16)
  _, label = data
  for frame in vid:
    frame = np.expand_dims(frame, axis=0)
    frame = preprocess_input(frame)
    features = cnn_model.predict(frame)
    seq_test.append(features[0])



In [None]:
path = os.path.join('', 'test_features.npy')
np.save(path, seq_test)

In [13]:
X_train = np.array(seq)
X_train = X_train.reshape(len(train_idx), 16, 2048)
X_test = np.array(seq_test)
X_test = X_test.reshape(len(test_idx), 16, 2048)

y_train = np.array([y for (x, y) in train_idx])
y_train = np.eye(num_classes)[y_train]

y_test = np.array([y for (x, y) in test_idx])
y_test = np.eye(num_classes)[y_test]

X_val = np.array(seq_val)
X_val = X_val.reshape(len(val_idx), 16, 2048)

y_val = np.array([y for (x, y) in val_idx])
y_val = np.eye(num_classes)[y_val]

In [14]:
class LSTM_model():

    def __init__(self, num_features=2048, hidden_units=256, dense_units=256, reg=1e-1, dropout_rate=1e-1, seq_length=16, num_classes=3):
            # hidden_units: dimension of cell
            # dense_units: number of neurons in fully connected layer above LSTM
            # reg: regularization for LSTM and dense layer
            # - currently adding L2 regularization for RNN connections, and for inputs to dense layer

            model = Sequential()

            # return_sequences flag sets whether hidden state returned for each time step
            # NOTE: set return_sequences=True if using TimeDistributed, else False


            # LSTM layer (dropout)
            model.add(Dropout(dropout_rate, input_shape=(seq_length, num_features)))  # input to LSTM
            model.add(LSTM(hidden_units, return_sequences=True))

            # --- AVERAGE LSTM OUTPUTS --- #

            # dropout between LSTM and softmax
            model.add(TimeDistributed(Dropout(dropout_rate)))

            # commenting out additional FC layer for now
            # model.add(TimeDistributed(Dense(dense_units)))

            # apply softmax
            model.add(TimeDistributed(Dense(num_classes, activation="softmax")))

            # average outputs
            average_layer = Lambda(function=lambda x: K.mean(x, axis=1))
            model.add(average_layer)

            self.model = model

In [15]:
# setup optimizer: ADAM algorithm
optimizer = Adam(learning_rate=1e-6)
# metrics for judging performance of model
metrics = ['categorical_accuracy']



dense_units = 128
hidden_units = 128
reg = 0.1
#lstm model
init = LSTM_model(hidden_units=hidden_units, dense_units=dense_units, reg=reg, dropout_rate=0.3, seq_length=16, num_classes=num_classes)
model = init.model
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=metrics)

In [16]:
history = model.fit(
    X_train,
    y_train,
    batch_size=32,
    epochs=100,
    validation_data=(X_val, y_val),
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [18]:
score = model.evaluate(x=X_train, y=y_train, verbose=1)
print("Train Loss using Classic Methods for all poses: %2.3f" % score[0])
print("Train Accuracy using Classic Methods for all poses: %1.3f\n" % score[1])

score = model.evaluate(x=X_val, y=y_val, verbose=1)
print("Val Loss using Classic Methods for all poses: %2.3f" % score[0])
print("Val Accuracy using Classic Methods for all poses: %1.3f\n" % score[1])

score = model.evaluate(x=X_test, y=y_test, verbose=1)
print("Test Loss using Classic Methods for all poses: %2.3f" % score[0])
print("Test Accuracy using Classic Methods for all poses: %1.3f\n" % score[1])

Train Loss using Classic Methods for all poses: 0.288
Train Accuracy using Classic Methods for all poses: 1.000

Val Loss using Classic Methods for all poses: 0.471
Val Accuracy using Classic Methods for all poses: 0.979

Test Loss using Classic Methods for all poses: 0.485
Test Accuracy using Classic Methods for all poses: 0.941

