In [None]:
# Import necessary libraries
import cv2
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import pickle
import seaborn as sns
import tensorflow as tf
from collections import defaultdict, deque
from IPython.display import Video
from moviepy.editor import *
from pytube import YouTube
from sklearn.metrics import auc, classification_report, confusion_matrix, roc_curve
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import label_binarize
from sklearn.utils.class_weight import compute_class_weight
from tensorflow import keras
from tensorflow.keras import mixed_precision
from tqdm import tqdm
from urllib.request import urlretrieve
from yt_dlp import YoutubeDL
import yt_dlp as youtube_dl

In [None]:
# Set random seed 
tf.random.set_seed(42)
np.random.seed(42)

# Define constants
SEQUENCE_LENGTH = 15  # Number of frames to use for each video
IMG_HEIGHT, IMG_WIDTH = 128, 128  # Image dimensions
SELECTED_CLASSES = ['Kayaking', 'Basketball', 'JumpRope']
#SELECTED_CLASSES = ['Kayaking', 'Basketball', 'JumpRope', 'Diving', 'HorseRace', 'PullUps','MilitaryParade']

DATASET_DIR = 'workspace/UCF50'
BATCH_SIZE = 16
NUM_EPOCHS = 7
LEARNING_RATE = 0.0001

# Set device (GPU if available, else CPU)
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("Using GPU")
    except RuntimeError as e:
        print(e)
else:
    print("Using CPU")

In [None]:
# Function to extract image sequence from a video
def extract_image_sequence(video_path):
    cap = cv2.VideoCapture(video_path)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    skip_frames_window = max(int(frame_count / SEQUENCE_LENGTH), 1)
    
    images = []
    for _ in range(SEQUENCE_LENGTH):
        cap.set(cv2.CAP_PROP_POS_FRAMES, _ * skip_frames_window)
        success, frame = cap.read()
        if not success:
            break
        frame = cv2.resize(frame, (IMG_WIDTH, IMG_HEIGHT))
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        images.append(frame)
    
    cap.release()
    
    # If we don't have enough frames, we'll pad with zeros
    while len(images) < SEQUENCE_LENGTH:
        images.append(np.zeros((IMG_HEIGHT, IMG_WIDTH, 3), dtype=np.uint8))
    
    return np.array(images)

In [None]:
# function to save preprocessed dataset
def save_preprocessed_dataset(data, labels, filename):
    with open(filename, 'wb') as f:
        pickle.dump((data, labels), f)
    print(f"Preprocessed dataset saved to {filename}")

# New function to load preprocessed dataset
def load_preprocessed_dataset(filename):
    with open(filename, 'rb') as f:
        data, labels = pickle.load(f)
    print(f"Preprocessed dataset loaded from {filename}")
    return data, labels

# Function to load all video paths and labels
def load_dataset():
    video_paths = []
    labels = []
    for class_idx, class_name in enumerate(SELECTED_CLASSES):
        class_dir = os.path.join(DATASET_DIR, class_name)
        for video_name in os.listdir(class_dir):
            if video_name.endswith('.avi'):
                video_path = os.path.join(class_dir, video_name)
                video_paths.append(video_path)
                labels.append(class_idx)
    return video_paths, labels

In [None]:
#Function to apply data augmentation
def augment_image_sequence(image_sequence):
    augmented_sequence = []
    for image in image_sequence:
        image = tf.image.random_flip_left_right(image)
        image = tf.image.random_flip_up_down(image)
        image = tf.image.random_brightness(image, max_delta=0.2)
        image = tf.image.random_contrast(image, lower=0.8, upper=1.2)
        augmented_sequence.append(image)
    return tf.stack(augmented_sequence)

# Function to preprocess the entire dataset with augmentation
def preprocess_dataset(video_paths, labels, augment=False):
    preprocessed_data = []
    preprocessed_labels = []
    for video_path, label in tqdm(zip(video_paths, labels), desc="Preprocessing dataset", total=len(video_paths)):
        image_sequence = extract_image_sequence(video_path)
        if augment:
            image_sequence = augment_image_sequence(image_sequence)
        preprocessed_data.append(image_sequence)
        preprocessed_labels.append(label)
    return np.array(preprocessed_data), np.array(preprocessed_labels)

# Load and preprocess dataset
print("Loading and preprocessing dataset...")
preprocessed_file = 'preprocessed_image_dataset-tens-3.pkl'
#preprocessed_file = 'preprocessed_image_dataset-tens-7.pkl'

if os.path.exists(preprocessed_file):
    preprocessed_data, preprocessed_labels = load_preprocessed_dataset(preprocessed_file)
else:
    video_paths, labels = load_dataset()
    preprocessed_data, preprocessed_labels = preprocess_dataset(video_paths, labels, augment=True)
    save_preprocessed_dataset(preprocessed_data, preprocessed_labels, preprocessed_file)


In [None]:
# this code to visualize class distribution
def visualize_class_distribution(labels, class_names):
    unique, counts = np.unique(labels, return_counts=True)
    # Pie chart
    plt.figure(figsize=(10, 10))
    plt.pie(counts, labels=class_names, autopct='%1.1f%%', startangle=90)
    plt.title('Class Distribution in Dataset')
    plt.axis('equal')
    plt.tight_layout()
    plt.savefig('class_distribution_pie.png')
    plt.close()

    print("Class distribution visualizations saved as 'class_distribution_bar.png' and 'class_distribution_pie.png'")

# Call the function after preprocessing the dataset
visualize_class_distribution(preprocessed_labels, SELECTED_CLASSES)

In [None]:
# Compute class weights
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(preprocessed_labels),
    y=preprocessed_labels
)
class_weight_dict = dict(enumerate(class_weights))
print("Class weights:", class_weight_dict)

In [None]:
# Split dataset into train and test sets
train_data, test_data, train_labels, test_labels = train_test_split(
    preprocessed_data, preprocessed_labels, test_size=0.2, random_state=42, stratify=preprocessed_labels
)

# Convert labels to one-hot encoded format
train_labels = keras.utils.to_categorical(train_labels)
test_labels = keras.utils.to_categorical(test_labels)

# Create TensorFlow datasets
train_dataset = tf.data.Dataset.from_tensor_slices((train_data, train_labels))
test_dataset = tf.data.Dataset.from_tensor_slices((test_data, test_labels))

# Prepare datasets for training
train_dataset = train_dataset.shuffle(buffer_size=len(train_data)).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
test_dataset = test_dataset.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

In [None]:
# Enable mixed precision
policy = mixed_precision.Policy('mixed_float16')
mixed_precision.set_global_policy(policy)

# Update constants based on given parameters
LEARNING_RATE = 0.0003
BATCH_SIZE = 16
DROPOUT_RATE = 0.32
NUM_CONV_LAYERS = 1
NUM_FILTERS = 71
KERNEL_SIZE = 3
LSTM_UNITS = 69

# Create CNN-LSTM model
def create_cnn_lstm_model(input_shape, num_classes):
    model = keras.Sequential()
    
    # CNN layers
    model.add(keras.layers.TimeDistributed(keras.layers.Conv2D(NUM_FILTERS, (KERNEL_SIZE, KERNEL_SIZE), activation='relu', padding='same'), input_shape=input_shape))
    model.add(keras.layers.TimeDistributed(keras.layers.BatchNormalization()))
    model.add(keras.layers.TimeDistributed(keras.layers.MaxPooling2D((2, 2))))
    
    for _ in range(NUM_CONV_LAYERS - 1):
        model.add(keras.layers.TimeDistributed(keras.layers.Conv2D(NUM_FILTERS, (KERNEL_SIZE, KERNEL_SIZE), activation='relu', padding='same')))
        model.add(keras.layers.TimeDistributed(keras.layers.BatchNormalization()))
        model.add(keras.layers.TimeDistributed(keras.layers.MaxPooling2D((2, 2))))
    
    # Flatten the output for the LSTM layers
    model.add(keras.layers.TimeDistributed(keras.layers.Flatten()))
    
    # LSTM layers
    model.add(keras.layers.LSTM(LSTM_UNITS, return_sequences=True))
    model.add(keras.layers.Dropout(DROPOUT_RATE))
    model.add(keras.layers.LSTM(LSTM_UNITS))
    model.add(keras.layers.Dropout(DROPOUT_RATE))
    
    # Dense layers
    model.add(keras.layers.Dense(64, activation='relu'))
    model.add(keras.layers.Dropout(DROPOUT_RATE))
    model.add(keras.layers.Dense(num_classes, activation='softmax', dtype='float32'))  # Ensure the output is float32
    
    return model

In [None]:
# Initialize the model
input_shape = (SEQUENCE_LENGTH, IMG_HEIGHT, IMG_WIDTH, 3)
num_classes = len(SELECTED_CLASSES)
model = create_cnn_lstm_model(input_shape, num_classes)

# Compile the model with a mixed precision optimizer
optimizer = mixed_precision.LossScaleOptimizer(keras.optimizers.Adam(learning_rate=LEARNING_RATE))
model.compile(optimizer=optimizer,
              loss='categorical_crossentropy',
              metrics=['accuracy'],
              weighted_metrics=['accuracy'])

# Training loop
print("Starting training...")
best_accuracy = 0
train_losses, train_accuracies = [], []
val_losses, val_accuracies = [], []

for epoch in range(NUM_EPOCHS):
    # Training
    train_loss = 0
    train_accuracy = 0
    for batch in tqdm(train_dataset, desc=f"Epoch {epoch+1}/{NUM_EPOCHS} - Training"):
        metrics = model.train_on_batch(
            batch[0], 
            batch[1], 
            class_weight=class_weight_dict
        )
        train_loss += metrics[0]
        train_accuracy += metrics[1]  # Assuming accuracy is the second metric
    
    train_loss /= len(train_dataset)
    train_accuracy /= len(train_dataset)
    
    # Validation
    val_loss = 0
    val_accuracy = 0
    for batch in tqdm(test_dataset, desc=f"Epoch {epoch+1}/{NUM_EPOCHS} - Validation"):
        metrics = model.test_on_batch(
            batch[0], 
            batch[1]
        )  # Remove class_weight argument
        val_loss += metrics[0]
        val_accuracy += metrics[1]  # Assuming accuracy is the second metric
    
    val_loss /= len(test_dataset)
    val_accuracy /= len(test_dataset)
    
    train_losses.append(train_loss)
    train_accuracies.append(train_accuracy)
    val_losses.append(val_loss)
    val_accuracies.append(val_accuracy)
    
    print(f"Epoch {epoch+1}/{NUM_EPOCHS}")
    print(f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.2f}")
    print(f"Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.2f}")
    
    if val_accuracy > best_accuracy:
        best_accuracy = val_accuracy
        model.save('best_model_image.h5')
        print(f"New best model saved with accuracy: {best_accuracy:.2f}")

print("Training completed.")

In [None]:
# Plot training and validation curves
plt.figure(figsize=(12, 4))
plt.subplot(121)
plt.plot(train_losses, label='Train Loss')
plt.plot(val_losses, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.subplot(122)
plt.plot(train_accuracies, label='Train Accuracy')
plt.plot(val_accuracies, label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.tight_layout()
plt.savefig('training_curves.png')
plt.close()


In [None]:
# After training, evaluate the model on the test set
all_predictions = []
all_labels = []

for frames, labels in tqdm(test_dataset, desc="Testing"):
    outputs = model.predict(frames)
    predicted = np.argmax(outputs, axis=1)
    all_predictions.extend(predicted)
    all_labels.extend(np.argmax(labels, axis=1))

# Calculate and print overall accuracy
accuracy = 100 * sum(np.array(all_predictions) == np.array(all_labels)) / len(all_labels)
print(f"Overall Test Accuracy: {accuracy:.2f}%")



In [None]:
# Plot confusion matrix
def plot_confusion_matrix(y_true, y_pred, classes):
    cm = confusion_matrix(y_true, y_pred)
    cm_percentage = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] * 100
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm_percentage, annot=True, fmt='.2f', cmap='Blues', xticklabels=classes, yticklabels=classes)
    plt.title('Confusion Matrix (Percentage)')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.savefig('confusion_matrix.png')
    plt.close()

plot_confusion_matrix(all_labels, all_predictions, SELECTED_CLASSES)

In [None]:
# Generate the classification report
report = classification_report(all_labels, all_predictions, target_names=SELECTED_CLASSES, output_dict=True)

# Convert the report to a DataFrame and remove unwanted rows
df = pd.DataFrame(report).transpose()
df = df.drop(['accuracy', 'macro avg', 'weighted avg'])
df = df.drop('support', axis=1)  # Remove the support column

# Create a figure and axis for the table
fig, ax = plt.subplots(figsize=(12, 8))  # Increased figure size

# Hide axes
ax.axis('off')

# Create the table
table = ax.table(cellText=df.values.round(2),
                 rowLabels=df.index,
                 colLabels=df.columns,
                 cellLoc='center',
                 loc='center')

# Modify table properties
table.auto_set_font_size(False)
table.set_fontsize(9)  # Slightly smaller font to fit more classes
table.scale(1.2, 1.5)

# Color the header row
for i, key in enumerate(df.columns):
    cell = table[0, i]
    cell.set_text_props(weight='bold', color='white')
    cell.set_facecolor('#4C72B0')

# Set title
plt.title('Classification Report', fontsize=16, fontweight='bold', pad=20)

# Save the figure
plt.tight_layout()
plt.savefig('classification_report.png', dpi=300, bbox_inches='tight')
plt.close()



In [None]:
# ROC Curve
y_true = label_binarize(all_labels, classes=range(len(SELECTED_CLASSES)))
y_pred = label_binarize(all_predictions, classes=range(len(SELECTED_CLASSES)))

# Compute ROC curve and ROC area for each class
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(len(SELECTED_CLASSES)):
    fpr[i], tpr[i], _ = roc_curve(y_true[:, i], y_pred[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# Plot ROC curve
plt.figure(figsize=(12, 8))  # Increased figure size
colors = plt.cm.get_cmap('Set1')(np.linspace(0, 1, len(SELECTED_CLASSES)))
for i, color in zip(range(len(SELECTED_CLASSES)), colors):
    plt.plot(fpr[i], tpr[i], color=color, lw=2,
             label=f'ROC curve of {SELECTED_CLASSES[i]} (area = {roc_auc[i]:0.2f})')

plt.plot([0, 1], [0, 1], 'k--', lw=2)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc="lower right", fontsize='small')  # Smaller font size for legend
plt.savefig('roc_curve.png', dpi=300, bbox_inches='tight')
plt.close()

In [None]:
# Function to predict on video and generate frame-by-frame analysis
def predict_and_analyze_video(video_file_path, output_video_path, output_graph_path, model, SEQUENCE_LENGTH, SELECTED_CLASSES):
    # GPU setup
    physical_devices = tf.config.list_physical_devices('GPU')
    if len(physical_devices) > 0:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
        print("GPU is available and will be used for predictions.")
    else:
        print("No GPU found. Using CPU for predictions.")
    
    # Suppress TensorFlow logging
    tf.get_logger().setLevel('ERROR')
    
    # Video setup
    video_reader = cv2.VideoCapture(video_file_path)
    fps = int(video_reader.get(cv2.CAP_PROP_FPS))
    frame_count = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))
    original_video_width = int(video_reader.get(cv2.CAP_PROP_FRAME_WIDTH))
    original_video_height = int(video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT))
    video_writer = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*'mp4v'), 
                                   fps, (original_video_width, original_video_height))

    # Initialize variables for prediction and analysis
    frame_queue = deque(maxlen=SEQUENCE_LENGTH)
    frame_predictions = []
    class_probabilities = defaultdict(list)
    predicted_class_name = ''

    # Process video frames
    with tqdm(total=frame_count, desc="Processing video") as pbar:
        while video_reader.isOpened():
            ok, frame = video_reader.read()
            if not ok:
                break

            # Preprocess the frame
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame_resized = cv2.resize(frame_rgb, (IMG_WIDTH, IMG_HEIGHT))
            frame_queue.append(frame_resized)

            if len(frame_queue) == SEQUENCE_LENGTH:
                # Prepare the frame sequence for the model
                input_frames = np.array([frame_queue])

                # Get predictions
                with tf.device('/GPU:0'):
                    outputs = model.predict(input_frames, verbose=0)
                probabilities = outputs[0]
                predicted_class_index = np.argmax(probabilities)
                predicted_class_name = SELECTED_CLASSES[predicted_class_index]

                # Store predictions and probabilities
                frame_predictions.append(predicted_class_name)
                for i, class_name in enumerate(SELECTED_CLASSES):
                    class_probabilities[class_name].append(probabilities[i])
            else:
                frame_predictions.append(None)
                for class_name in SELECTED_CLASSES:
                    class_probabilities[class_name].append(0)

            # Write predicted class name on top of the frame
            cv2.putText(frame, predicted_class_name, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

            # Write the frame into the disk using the VideoWriter Object
            video_writer.write(frame)

            # Update progress bar
            pbar.update(1)

    video_reader.release()
    video_writer.release()

    # Pad the beginning of predictions and probabilities
    pad_length = SEQUENCE_LENGTH - 1
    frame_predictions = [None] * pad_length + frame_predictions[pad_length:]
    for class_name in SELECTED_CLASSES:
        class_probabilities[class_name] = [0] * pad_length + class_probabilities[class_name][pad_length:]

    # Plot frame-by-frame results
    plot_frame_by_frame_results(frame_predictions, class_probabilities, fps, output_graph_path)

    return frame_predictions, class_probabilities, fps
def plot_frame_by_frame_results(frame_predictions, class_probabilities, fps, output_path):
    frame_count = len(frame_predictions)
    time_axis = np.arange(frame_count) / fps
    
    plt.figure(figsize=(15, 10))
    
    # Plot class probabilities
    plt.subplot(2, 1, 1)
    for class_name, probs in class_probabilities.items():
        plt.plot(time_axis, probs, label=class_name)
    plt.title("Class Probabilities Over Time")
    plt.xlabel("Time (seconds)")
    plt.ylabel("Probability")
    plt.legend()
    plt.grid(True)
    
    # Plot predicted classes
    plt.subplot(2, 1, 2)
    unique_classes = list(set(frame_predictions) - {None})
    class_to_num = {cls: i for i, cls in enumerate(unique_classes)}
    numeric_predictions = [class_to_num[cls] if cls is not None else -1 for cls in frame_predictions]
    plt.scatter(time_axis, numeric_predictions, marker='.')
    plt.yticks(range(len(unique_classes)), unique_classes)
    plt.title("Predicted Class Over Time")
    plt.xlabel("Time (seconds)")
    plt.ylabel("Predicted Class")
    plt.grid(True)
    
    plt.tight_layout()
    plt.savefig(output_path)
    plt.close()
def process_test_video(model, SEQUENCE_LENGTH, SELECTED_CLASSES):
    test_videos_directory = 'test_videos'
    video_title = 'basketball'  # Replace with the actual video title

    input_video_file_path = f'{test_videos_directory}/{video_title}.mp4'
    output_video_file_path = f'{test_videos_directory}/{video_title}-Output-SeqLen{SEQUENCE_LENGTH}.mp4'
    output_graph_path = f'{test_videos_directory}/{video_title}_frame_analysis.png'

    frame_predictions, class_probabilities, fps = predict_and_analyze_video(
        input_video_file_path, output_video_file_path, output_graph_path,
        model, SEQUENCE_LENGTH, SELECTED_CLASSES
    )

    print(f"Processed video saved to: {output_video_file_path}")
    print(f"Frame-by-frame analysis graph saved to: {output_graph_path}")

# Call this function after training your model
process_test_video(model, SEQUENCE_LENGTH, SELECTED_CLASSES)

In [None]:
#EXTRA
#Function to download youtube videos
def download_yt_videos(yt_url_list, save_dir):
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    
    ydl_opts = {
        'outtmpl': os.path.join(save_dir, '%(title)s.%(ext)s'),
        'format': 'bestvideo+bestaudio/best',
        'merge_output_format': 'mp4'
    }
    
    for url in yt_url_list:
        try:
            with YoutubeDL(ydl_opts) as ydl:
                ydl.download([url])
                print(f"Downloaded: {url}")
        except Exception as e:
            print(f"Failed to download {url}: {e}")

yt_url_list = [
    'https://www.youtube.com/shorts/7fj2-7o2XAg'
    
]
save_dir = 'test_data'
download_yt_videos(yt_url_list, save_dir)