In [None]:
# Import necessary libraries
import cv2
import matplotlib.pyplot as plt
import mediapipe as mp
import numpy as np
import optuna
import os
import pandas as pd
import pickle
import seaborn as sns
import tensorflow as tf
from collections import defaultdict, deque
from IPython.display import Video
from moviepy.editor import *
from pytube import YouTube
from sklearn.metrics import auc, classification_report, confusion_matrix, roc_curve
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import label_binarize
from tensorflow import keras
from tensorflow.keras import mixed_precision
from tqdm import tqdm
from urllib.request import urlretrieve
from yt_dlp import YoutubeDL
import yt_dlp as youtube_dl

In [None]:
# Set random seed for reproducibility
tf.random.set_seed(42)
np.random.seed(42)

# Define constants
SEQUENCE_LENGTH = 30  # Number of frames to use for each video
NUM_JOINTS = 33  # Number of joints in MediaPipe Pose
#SELECTED_CLASSES = ['Kayaking', 'Basketball', 'JumpRope']
SELECTED_CLASSES = ['Kayaking', 'Basketball', 'JumpRope', 'Diving', 'HorseRace', 'PullUps','MilitaryParade']
DATASET_DIR = 'workspace/UCF50'
BATCH_SIZE = 32
NUM_EPOCHS = 30
LEARNING_RATE = 0.0001

# Initialize MediaPipe Pose
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5, min_tracking_confidence=0.5)


In [None]:
# Function to extract pose from a single frame using MediaPipe
def extract_pose(frame):
    results = pose.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
    if results.pose_landmarks:
        landmarks = np.array([[lm.x, lm.y, lm.z] for lm in results.pose_landmarks.landmark])
        return landmarks.flatten()
    return np.zeros(NUM_JOINTS * 3)

# Function to extract pose sequence from a video
def extract_pose_sequence(video_path):
    cap = cv2.VideoCapture(video_path)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    skip_frames_window = max(int(frame_count / SEQUENCE_LENGTH), 1)
    
    poses = []
    for _ in range(SEQUENCE_LENGTH):
        cap.set(cv2.CAP_PROP_POS_FRAMES, _ * skip_frames_window)
        success, frame = cap.read()
        if not success:
            break
        pose = extract_pose(frame)
        poses.append(pose)
    
    cap.release()
    
    # If we don't have enough frames, we'll pad with zeros
    while len(poses) < SEQUENCE_LENGTH:
        poses.append(np.zeros_like(poses[0]))
    
    return np.array(poses)


In [None]:
# Function to preprocess the entire dataset
def preprocess_dataset(video_paths, labels):
    preprocessed_data = []
    preprocessed_labels = []
    for video_path, label in tqdm(zip(video_paths, labels), desc="Preprocessing dataset", total=len(video_paths)):
        pose_sequence = extract_pose_sequence(video_path)
        preprocessed_data.append(pose_sequence)
        preprocessed_labels.append(label)
    return np.array(preprocessed_data), np.array(preprocessed_labels)

# Function to save preprocessed dataset
def save_preprocessed_dataset(data, labels, filename):
    with open(filename, 'wb') as f:
        pickle.dump((data, labels), f)
    print(f"Preprocessed dataset saved to {filename}")

# Function to load preprocessed dataset
def load_preprocessed_dataset(filename):
    with open(filename, 'rb') as f:
        data, labels = pickle.load(f)
    print(f"Preprocessed dataset loaded from {filename}")
    return data, labels

# Function to load all video paths and labels
def load_dataset():
    video_paths = []
    labels = []
    for class_idx, class_name in enumerate(SELECTED_CLASSES):
        class_dir = os.path.join(DATASET_DIR, class_name)
        for video_name in os.listdir(class_dir):
            if video_name.endswith('.avi'):
                video_path = os.path.join(class_dir, video_name)
                video_paths.append(video_path)
                labels.append(class_idx)
    return video_paths, labels

In [None]:
# Load and preprocess dataset
print("Loading and preprocessing dataset...")
preprocessed_file = 'preprocessed_pose_dataset_tens-7-32.pkl'

if os.path.exists(preprocessed_file):
    preprocessed_data, preprocessed_labels = load_preprocessed_dataset(preprocessed_file)
else:
    video_paths, labels = load_dataset()
    preprocessed_data, preprocessed_labels = preprocess_dataset(video_paths, labels)
    save_preprocessed_dataset(preprocessed_data, preprocessed_labels, preprocessed_file)


In [None]:
# Split dataset into train and test sets
train_data, test_data, train_labels, test_labels = train_test_split(
    preprocessed_data, preprocessed_labels, test_size=0.2, random_state=42, stratify=preprocessed_labels
)

# Convert labels to one-hot encoded format
train_labels = keras.utils.to_categorical(train_labels)
test_labels = keras.utils.to_categorical(test_labels)

# Create TensorFlow datasets
train_dataset = tf.data.Dataset.from_tensor_slices((train_data, train_labels))
test_dataset = tf.data.Dataset.from_tensor_slices((test_data, test_labels))

# Prepare datasets for training
train_dataset = train_dataset.shuffle(buffer_size=len(train_data)).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
test_dataset = test_dataset.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)


In [None]:

def objective(trial):
    # Hyperparameters
    lr = trial.suggest_float("lr", 1e-5, 1e-2, log=True)
    dropout_rate = trial.suggest_float("dropout_rate", 0.1, 0.5)
    batch_size = trial.suggest_categorical("batch_size", [16, 32, 64])
    lstm_units_1 = trial.suggest_int("lstm_units_1", 64, 256)
    lstm_units_2 = trial.suggest_int("lstm_units_2", 64, 256)
    dense_units = trial.suggest_int("dense_units", 32, 128)

    # Enable mixed precision
    policy = mixed_precision.Policy('mixed_float16')
    mixed_precision.set_global_policy(policy)

    # Model creation
    input_shape = (SEQUENCE_LENGTH, NUM_JOINTS * 3)
    num_classes = len(SELECTED_CLASSES)
    model = create_lstm_model(input_shape, num_classes, lstm_units_1, lstm_units_2, dense_units, dropout_rate)

    # Optimizer and loss function
    optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
    optimizer = mixed_precision.LossScaleOptimizer(optimizer)
    loss_fn = tf.keras.losses.CategoricalCrossentropy(from_logits=False)

    # Create datasets
    train_dataset = tf.data.Dataset.from_tensor_slices((train_data, train_labels))
    test_dataset = tf.data.Dataset.from_tensor_slices((test_data, test_labels))
    train_dataset = train_dataset.shuffle(buffer_size=len(train_data)).batch(batch_size).prefetch(tf.data.AUTOTUNE)
    test_dataset = test_dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)

    # Compile the model
    model.compile(optimizer=optimizer, loss=loss_fn, metrics=['accuracy'])

    # Training loop
    best_val_accuracy = 0
    best_epoch = 0
    patience = 10  # Early stopping patience
    max_epochs = 100  # Maximum number of epochs to try

    for epoch in range(max_epochs):
        # Train the model
        history = model.fit(train_dataset, epochs=1, verbose=0)
        
        # Evaluate the model
        val_loss, val_accuracy = model.evaluate(test_dataset, verbose=0)
        
        if val_accuracy > best_val_accuracy:
            best_val_accuracy = val_accuracy
            best_epoch = epoch + 1
        
        print(f"Epoch {epoch+1}, Loss: {history.history['loss'][0]:.4f}, Val Accuracy: {val_accuracy:.4f}")

        # Early stopping
        if epoch + 1 - best_epoch >= patience:
            print(f"Early stopping triggered at epoch {epoch+1}")
            break

    # Report the best epoch as a trial intermediate value
    trial.report(best_epoch, step=max_epochs)

    return best_val_accuracy


In [None]:
# Run Optuna study
def run_optuna_study(n_trials=200):
    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=n_trials)

    print("Number of finished trials: ", len(study.trials))
    print("Best trial:")
    trial = study.best_trial

    print("  Value: ", trial.value)
    print("  Params: ")
    for key, value in trial.params.items():
        print("    {}: {}".format(key, value))

    # Get the best number of epochs
    best_epoch = trial.intermediate_values[max(trial.intermediate_values.keys())]
    print(f"  Best number of epochs: {best_epoch}")

    # Add best_epoch to the best parameters
    best_params = trial.params
    best_params['best_epoch'] = best_epoch

    return best_params

# Run the Optuna study
best_params = run_optuna_study(n_trials=200)  

In [None]:
# Function to create LSTM model
def create_lstm_model(input_shape, num_classes, lstm_units_1, lstm_units_2, dense_units, dropout_rate):
    model = keras.Sequential([
        keras.layers.LSTM(lstm_units_1, return_sequences=True, input_shape=input_shape),
        keras.layers.LSTM(lstm_units_2),
        keras.layers.Dense(dense_units, activation='relu'),
        keras.layers.Dropout(dropout_rate),
        keras.layers.Dense(num_classes, activation='softmax')
    ])
    return model

In [None]:
def train_final_model(best_params):
    # Create the model with best parameters
    input_shape = (SEQUENCE_LENGTH, NUM_JOINTS * 3)
    num_classes = len(SELECTED_CLASSES)
    model = create_lstm_model(input_shape, num_classes, 
                              best_params['lstm_units_1'], 
                              best_params['lstm_units_2'], 
                              best_params['dense_units'], 
                              best_params['dropout_rate'])

    # Compile the model
    optimizer = tf.keras.optimizers.Adam(learning_rate=best_params['lr'])
    optimizer = mixed_precision.LossScaleOptimizer(optimizer)
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

    # Prepare datasets
    train_dataset = tf.data.Dataset.from_tensor_slices((train_data, train_labels))
    test_dataset = tf.data.Dataset.from_tensor_slices((test_data, test_labels))
    train_dataset = train_dataset.shuffle(buffer_size=len(train_data)).batch(best_params['batch_size']).prefetch(tf.data.AUTOTUNE)
    test_dataset = test_dataset.batch(best_params['batch_size']).prefetch(tf.data.AUTOTUNE)

    # Train the model
    history = model.fit(train_dataset, epochs= 30, validation_data=test_dataset, verbose=1)

    # Save the best model
    model.save('best_model_optuna.h5')
    print("Training completed. Best model saved as 'best_model_optuna.h5'")

    return model, history

# Train the final model
final_model, final_history = train_final_model(best_params)

# Print model summary
final_model.summary()

In [None]:
# Plotting training curves
def plot_training_curves(history):
    plt.figure(figsize=(12, 4))
    plt.subplot(121)
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.subplot(122)
    plt.plot(history.history['accuracy'], label='Train Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.tight_layout()
    plt.savefig('training_curves.png')
    plt.close()
    
# Plot training curves
plot_training_curves(final_history)
    


In [None]:
# Evaluate the model on the test set
all_predictions = []
all_labels = []

# Prepare test dataset
test_dataset = tf.data.Dataset.from_tensor_slices((test_data, test_labels))
test_dataset = test_dataset.batch(best_params['batch_size']).prefetch(tf.data.AUTOTUNE)

for frames, labels in tqdm(test_dataset, desc="Testing"):
    outputs = final_model.predict(frames)
    predicted = np.argmax(outputs, axis=1)
    all_predictions.extend(predicted)
    all_labels.extend(np.argmax(labels, axis=1))

# Calculate and print overall accuracy
accuracy = 100 * sum(np.array(all_predictions) == np.array(all_labels)) / len(all_labels)
print(f"Overall Test Accuracy: {accuracy:.2f}%")

In [None]:
# Plot confusion matrix
def plot_confusion_matrix(y_true, y_pred, classes):
    cm = confusion_matrix(y_true, y_pred)
    cm_percent = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] * 100

    plt.figure(figsize=(10, 8))
    sns.heatmap(cm_percent, annot=True, fmt='.1f', cmap='Blues', xticklabels=classes, yticklabels=classes)
    plt.title('Confusion Matrix (Percentages)')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.savefig('confusion_matrix_percent.png')
    plt.close()

plot_confusion_matrix(all_labels, all_predictions, SELECTED_CLASSES)


In [None]:
# Plot classification report
def plot_classification_report(y_true, y_pred, classes):
    report = classification_report(y_true, y_pred, target_names=classes, output_dict=True)
    df = pd.DataFrame(report).transpose()
    df = df.drop(['accuracy', 'macro avg', 'weighted avg'])
    df = df.drop('support', axis=1)  # Remove the support column

    fig, ax = plt.subplots(figsize=(8, 6))
    ax.axis('off')
    table = ax.table(cellText=df.values.round(2),
                     rowLabels=df.index,
                     colLabels=df.columns,
                     cellLoc='center',
                     loc='center')
    table.auto_set_font_size(False)
    table.set_fontsize(10)
    table.scale(1.2, 1.5)

    for i, key in enumerate(df.columns):
        cell = table[0, i]
        cell.set_text_props(weight='bold', color='white')
        cell.set_facecolor('#4C72B0')

    plt.title('Classification Report', fontsize=16, fontweight='bold', pad=20)
    plt.tight_layout()
    plt.savefig('classification_report.png', dpi=300, bbox_inches='tight')
    plt.close()
    
plot_classification_report(all_labels, all_predictions, SELECTED_CLASSES)


In [None]:
# Plot ROC curve
def plot_roc_curve(all_labels, all_predictions, SELECTED_CLASSES):
    y_true = label_binarize(all_labels, classes=range(len(SELECTED_CLASSES)))
    y_pred = label_binarize(all_predictions, classes=range(len(SELECTED_CLASSES)))

    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    for i in range(len(SELECTED_CLASSES)):
        fpr[i], tpr[i], _ = roc_curve(y_true[:, i], y_pred[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])

    plt.figure(figsize=(12, 8))
    colors = plt.cm.get_cmap('Set1')(np.linspace(0, 1, len(SELECTED_CLASSES)))
    for i, color in zip(range(len(SELECTED_CLASSES)), colors):
        plt.plot(fpr[i], tpr[i], color=color, lw=2,
                 label=f'ROC curve of {SELECTED_CLASSES[i]} (area = {roc_auc[i]:0.2f})')

    plt.plot([0, 1], [0, 1], 'k--', lw=2)
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver Operating Characteristic (ROC) Curve')
    plt.legend(loc="lower right", fontsize='small')
    plt.savefig('roc_curve.png', dpi=300, bbox_inches='tight')
    plt.close()

plot_roc_curve(all_labels, all_predictions, SELECTED_CLASSES)

In [None]:
#Function to predict and analyze video
def predict_and_analyze_video(video_file_path, output_video_path, output_graph_path, final_model, SEQUENCE_LENGTH, SELECTED_CLASSES):
    # GPU setup
    gpus = tf.config.experimental.list_physical_devices('GPU')
    if gpus:
        try:
            for gpu in gpus:
                tf.config.experimental.set_memory_growth(gpu, True)
            print("Using GPU for predictions")
        except RuntimeError as e:
            print(e)
    else:
        print("Using CPU for predictions")

    # Initialize MediaPipe Pose
    mp_pose = mp.solutions.pose
    mp_drawing = mp.solutions.drawing_utils
    pose = mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5, min_tracking_confidence=0.5)

    # Video setup
    video_reader = cv2.VideoCapture(video_file_path)
    fps = int(video_reader.get(cv2.CAP_PROP_FPS))
    frame_count = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))
    original_video_width = int(video_reader.get(cv2.CAP_PROP_FRAME_WIDTH))
    original_video_height = int(video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT))
    video_writer = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*'mp4v'), 
                                   fps, (original_video_width, original_video_height))

    # Initialize variables for prediction and analysis
    pose_sequence = deque(maxlen=SEQUENCE_LENGTH)
    frame_predictions = []
    class_probabilities = defaultdict(list)
    predicted_class_name = ''

    # Process video frames
    with tqdm(total=frame_count, desc="Processing video") as pbar:
        while video_reader.isOpened():
            ok, frame = video_reader.read()
            if not ok:
                break

            # Extract pose from the frame
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            results = pose.process(frame_rgb)
            
            if results.pose_landmarks:
                landmarks = np.array([[lm.x, lm.y, lm.z] for lm in results.pose_landmarks.landmark])
                pose_data = landmarks.flatten()
            else:
                pose_data = np.zeros(NUM_JOINTS * 3)

            pose_sequence.append(pose_data)

            if len(pose_sequence) == SEQUENCE_LENGTH:
                # Prepare the pose sequence for the model
                input_sequence = np.array([list(pose_sequence)])

                # Get predictions
                outputs = final_model.predict(input_sequence, verbose=0)
                probabilities = outputs[0]
                predicted_class_index = np.argmax(probabilities)
                predicted_class_name = SELECTED_CLASSES[predicted_class_index]

                # Store predictions and probabilities
                frame_predictions.append(predicted_class_name)
                for i, class_name in enumerate(SELECTED_CLASSES):
                    class_probabilities[class_name].append(probabilities[i])
            else:
                frame_predictions.append(None)
                for class_name in SELECTED_CLASSES:
                    class_probabilities[class_name].append(0)

            # Draw the pose on the frame
            mp_drawing.draw_landmarks(frame, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)

            # Write predicted class name on top of the frame
            cv2.putText(frame, predicted_class_name, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

            # Write the frame into the disk using the VideoWriter Object
            video_writer.write(frame)

            # Update progress bar
            pbar.update(1)

    video_reader.release()
    video_writer.release()

    # Pad the beginning of predictions and probabilities
    pad_length = SEQUENCE_LENGTH - 1
    frame_predictions = [None] * pad_length + frame_predictions[pad_length:]
    for class_name in SELECTED_CLASSES:
        class_probabilities[class_name] = [0] * pad_length + class_probabilities[class_name][pad_length:]

    # Plot frame-by-frame results
    plot_frame_by_frame_results(frame_predictions, class_probabilities, fps, output_graph_path)

    return frame_predictions, class_probabilities, fps

def plot_frame_by_frame_results(frame_predictions, class_probabilities, fps, output_path):
    frame_count = len(frame_predictions)
    time_axis = np.arange(frame_count) / fps
    
    plt.figure(figsize=(15, 10))
    
    # Plot class probabilities
    plt.subplot(2, 1, 1)
    for class_name, probs in class_probabilities.items():
        plt.plot(time_axis, probs, label=class_name)
    plt.title("Class Probabilities Over Time")
    plt.xlabel("Time (seconds)")
    plt.ylabel("Probability")
    plt.legend()
    plt.grid(True)
    
    # Plot predicted classes
    plt.subplot(2, 1, 2)
    unique_classes = list(set(frame_predictions) - {None})
    class_to_num = {cls: i for i, cls in enumerate(unique_classes)}
    numeric_predictions = [class_to_num[cls] if cls is not None else -1 for cls in frame_predictions]
    plt.scatter(time_axis, numeric_predictions, marker='.')
    plt.yticks(range(len(unique_classes)), unique_classes)
    plt.title("Predicted Class Over Time")
    plt.xlabel("Time (seconds)")
    plt.ylabel("Predicted Class")
    plt.grid(True)
    
    plt.tight_layout()
    plt.savefig(output_path)
    plt.close()

In [None]:
def process_test_video(final_model, SEQUENCE_LENGTH, SELECTED_CLASSES):
    test_videos_directory = 'test_videos'
    video_title = 'kayaking'  # Replace with the actual video title

    input_video_file_path = f'{test_videos_directory}/{video_title}.mp4'
    output_video_file_path = f'{test_videos_directory}/{video_title}-Output-SeqLen{SEQUENCE_LENGTH}.mp4'
    output_graph_path = f'{test_videos_directory}/{video_title}_frame_analysis.png'

    frame_predictions, class_probabilities, fps = predict_and_analyze_video(
        input_video_file_path, output_video_file_path, output_graph_path,
        final_model, SEQUENCE_LENGTH, SELECTED_CLASSES
    )

    print(f"Processed video saved to: {output_video_file_path}")
    print(f"Frame-by-frame analysis graph saved to: {output_graph_path}")

# Call this function after training your model
process_test_video(final_model, SEQUENCE_LENGTH, SELECTED_CLASSES)

In [None]:
#Extra
#Function to download youtube videos
def download_yt_videos(yt_url_list, save_dir):
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    
    ydl_opts = {
        'outtmpl': os.path.join(save_dir, '%(title)s.%(ext)s'),
        'format': 'bestvideo+bestaudio/best',
        'merge_output_format': 'mp4'
    }
    
    for url in yt_url_list:
        try:
            with YoutubeDL(ydl_opts) as ydl:
                ydl.download([url])
                print(f"Downloaded: {url}")
        except Exception as e:
            print(f"Failed to download {url}: {e}")

yt_url_list = [
    'https://www.youtube.com/shorts/5sUhFYATYeM'
    
]
save_dir = 'test_data'
download_yt_videos(yt_url_list, save_dir)