In [5]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
import cv2
from tqdm import tqdm

# Base directory for the dataset
BASE_DIR = "/kaggle/input/ucf101-action-recognition/"

# Parameters
SEQ_LENGTH = 20  # Total sequence length
INPUT_LENGTH = 10  # Input frames for the model
IMG_WIDTH = 64
IMG_HEIGHT = 64
IMG_CHANNEL = 1
BATCH_SIZE = 16
EPOCHS = 50
LR = 0.001

# Load video data with proper handling of train/val/test split directories
def load_videos_from_split_directory(base_dir, split, seq_length, img_width, img_height):
    """
    Load video frames from the specified split directory (train/val/test).
    """
    split_dir = os.path.join(base_dir, split)
    videos = []
    categories = os.listdir(split_dir)
    for category in tqdm(categories, desc=f"Processing {split} categories"):
        category_path = os.path.join(split_dir, category)
        if not os.path.isdir(category_path):  # Skip non-directory files
            continue
        
        video_files = [f for f in os.listdir(category_path) if f.endswith(".avi")]
        for video_name in video_files:
            video_path = os.path.join(category_path, video_name)
            cap = cv2.VideoCapture(video_path)
            frames = []
            while len(frames) < seq_length:
                ret, frame = cap.read()
                if not ret:
                    break
                frame = cv2.resize(frame, (img_width, img_height))
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)  # Convert to grayscale
                frames.append(frame)
            cap.release()
            if len(frames) == seq_length:  # Only keep videos with enough frames
                videos.append(np.array(frames, dtype=np.float32))
    return np.array(videos)

# Load datasets
print("Loading training data...")
train_data = load_videos_from_split_directory(BASE_DIR, "train", SEQ_LENGTH, IMG_WIDTH, IMG_HEIGHT)
train_data = train_data / 255.0  # Normalize pixel values
train_data = train_data.reshape((-1, SEQ_LENGTH, IMG_HEIGHT, IMG_WIDTH, IMG_CHANNEL))
print(f"Training data shape: {train_data.shape}")

print("Loading validation data...")
val_data = load_videos_from_split_directory(BASE_DIR, "val", SEQ_LENGTH, IMG_WIDTH, IMG_HEIGHT)
val_data = val_data / 255.0
val_data = val_data.reshape((-1, SEQ_LENGTH, IMG_HEIGHT, IMG_WIDTH, IMG_CHANNEL))
print(f"Validation data shape: {val_data.shape}")

print("Loading test data...")
test_data = load_videos_from_split_directory(BASE_DIR, "test", SEQ_LENGTH, IMG_WIDTH, IMG_HEIGHT)
test_data = test_data / 255.0
test_data = test_data.reshape((-1, SEQ_LENGTH, IMG_HEIGHT, IMG_WIDTH, IMG_CHANNEL))
print(f"Test data shape: {test_data.shape}")

# Prepare input-output pairs for training and validation
x_train, y_train = train_data[:, :INPUT_LENGTH], train_data[:, INPUT_LENGTH:]
x_val, y_val = val_data[:, :INPUT_LENGTH], val_data[:, INPUT_LENGTH:]



Loading training data...


Processing train categories:  25%|██▍       | 25/101 [00:17<00:52,  1.44it/s]


KeyboardInterrupt: 

In [None]:
import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, Model

# Custom pixel-level accuracy metric
def pixel_accuracy(y_true, y_pred, threshold=0.5):
    y_true = tf.cast(y_true, tf.float32)
    y_pred = tf.cast(y_pred, tf.float32)
    correct_pixels = tf.abs(y_true - y_pred) < threshold
    accuracy = tf.reduce_mean(tf.cast(correct_pixels, tf.float32))
    return accuracy

# PredRNN Cell Class
class PredRNNCell(tf.keras.layers.Layer):
    def __init__(self, filters, kernel_size, stride=1):
        super(PredRNNCell, self).__init__()
        self.filters = filters
        self.kernel_size = kernel_size
        self.stride = stride
        self.conv_x = layers.Conv2D(
            self.filters, self.kernel_size, strides=self.stride, padding="same", activation="relu"
        )
        self.conv_h = layers.Conv2D(
            self.filters, self.kernel_size, strides=self.stride, padding="same", activation="relu"
        )

    def build(self, input_shape):
        self.conv_x.build(input_shape)
        self.conv_h.build((None, input_shape[1], input_shape[2], self.filters))
        super(PredRNNCell, self).build(input_shape)

    def call(self, inputs, states):
        x, h = inputs, states
        if h is None:
            batch_size, height, width, _ = tf.shape(x)[0], tf.shape(x)[1], tf.shape(x)[2], self.filters
            h = tf.zeros((batch_size, height, width, self.filters), dtype=tf.float32)
        xh = self.conv_x(x)
        hh = self.conv_h(h)
        h_next = tf.nn.relu(xh + hh)
        return h_next, h_next

# PredRNN Model Class
class PredRNN(tf.keras.Model):
    def __init__(self, input_shape, hidden_size, output_channels, num_layers=3):
        super(PredRNN, self).__init__()
        self.layers_list = [PredRNNCell(hidden_size, (3, 3)) for _ in range(num_layers)]
        self.conv_output = layers.Conv2D(output_channels, (3, 3), padding="same")

    def build(self, input_shape):
        time_steps, height, width, channels = input_shape[1:]
        for layer in self.layers_list:
            layer.build((None, height, width, channels))
            channels = layer.filters  # Update channels for the next layer
        self.conv_output.build((None, height, width, channels))
        super(PredRNN, self).build(input_shape)

    def call(self, inputs):
        batch_size = tf.shape(inputs)[0]
        time_steps, height, width = inputs.shape[1], inputs.shape[2], inputs.shape[3]
        channels = inputs.shape[4]
        
        states = [None] * len(self.layers_list)
        outputs = []
        for t in range(time_steps):
            x = inputs[:, t]
            for i, layer in enumerate(self.layers_list):
                if states[i] is None:
                    states[i] = tf.zeros((batch_size, height, width, layer.filters), dtype=tf.float32)
                x, states[i] = layer(x, states[i])
            outputs.append(x)
        outputs = tf.stack(outputs, axis=1)
        channels = self.layers_list[-1].filters
        outputs = tf.reshape(outputs, (-1, height, width, channels))
        outputs = self.conv_output(outputs)
        outputs = tf.reshape(outputs, (batch_size, time_steps, height, width, -1))
        return outputs

# Model Initialization and Compilation
input_shape = (INPUT_LENGTH, IMG_HEIGHT, IMG_WIDTH, IMG_CHANNEL)
output_channels = IMG_CHANNEL
hidden_size = 64
model = PredRNN(input_shape[1:], hidden_size, output_channels)
model.build((None,) + input_shape)

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=LR),
    loss="mse",
    metrics=[pixel_accuracy]  # Adding pixel accuracy metric
)

# Model Training
model.fit(x_train, y_train, validation_data=(x_val, y_val), batch_size=BATCH_SIZE, epochs=EPOCHS)

# Save the model and weights
SAVE_DIR = "/kaggle/working/checkpoints/"
os.makedirs(SAVE_DIR, exist_ok=True)
model.save(os.path.join(SAVE_DIR, "predrnn_model.h5"))
model.save_weights(os.path.join(SAVE_DIR, "predrnn_model_weights.weights.h5"))

# Test and Save Results
predictions = model.predict(x_val[:5])
RESULTS_DIR = "/kaggle/working/results/"
os.makedirs(RESULTS_DIR, exist_ok=True)

for i, prediction in enumerate(predictions):
    output_dir = os.path.join(RESULTS_DIR, f"sample_{i}")
    os.makedirs(output_dir, exist_ok=True)
    for t, frame in enumerate(prediction):
        cv2.imwrite(os.path.join(output_dir, f"frame_{t + 1}.png"), (frame.squeeze() * 255).astype(np.uint8))

print(f"Results saved in: {RESULTS_DIR}")


In [None]:
import imageio

# Function to create a GIF
def create_gif(frames, gif_path):
    # Convert frames to uint8 and write to GIF
    frames = [(frame * 255).astype(np.uint8) for frame in frames]
    imageio.mimsave(gif_path, frames, fps=10)  # Save GIF at 10 FPS

# Save results with ground truth comparison
RESULTS_DIR = "/kaggle/working/results/"
os.makedirs(RESULTS_DIR, exist_ok=True)

for i, (prediction, ground_truth) in enumerate(zip(predictions, y_val[:5])):
    output_dir = os.path.join(RESULTS_DIR, f"sample_{i}")
    os.makedirs(output_dir, exist_ok=True)
    
    prediction_frames = []
    ground_truth_frames = []
    combined_frames = []

    for t in range(prediction.shape[0]):
        pred_frame = (prediction[t].squeeze() * 255).astype(np.uint8)  # Convert prediction to uint8
        gt_frame = (ground_truth[t].squeeze() * 255).astype(np.uint8)  # Convert ground truth to uint8

        # Save individual frames for debugging
        cv2.imwrite(os.path.join(output_dir, f"pred_frame_{t + 1}.png"), pred_frame)
        cv2.imwrite(os.path.join(output_dir, f"gt_frame_{t + 1}.png"), gt_frame)

        # Store frames for GIF creation
        prediction_frames.append(pred_frame)
        ground_truth_frames.append(gt_frame)

        # Create side-by-side comparison
        combined_frame = np.hstack((gt_frame, pred_frame))  # Horizontal stack
        combined_frames.append(combined_frame)

    # Save GIFs
    create_gif(prediction_frames, os.path.join(output_dir, "prediction.gif"))
    create_gif(ground_truth_frames, os.path.join(output_dir, "ground_truth.gif"))
    create_gif(combined_frames, os.path.join(output_dir, "comparison.gif"))

print(f"GIFs and frames saved in: {RESULTS_DIR}")


In [None]:
from skimage.metrics import structural_similarity as ssim
from sklearn.metrics import mean_squared_error
import cv2
import os

# Function to compute average MSE and SSIM from saved prediction and ground truth frames
def compute_metrics_from_saved_frames(results_dir):
    """
    Compute average MSE and SSIM from saved prediction and ground truth frames.
    """
    total_mse = 0
    total_ssim = 0
    total_frames = 0

    # Iterate through each sample directory
    for sample_dir in os.listdir(results_dir):
        sample_path = os.path.join(results_dir, sample_dir)
        if not os.path.isdir(sample_path):
            continue

        # Load prediction and ground truth frames
        pred_frames = sorted(
            [os.path.join(sample_path, f) for f in os.listdir(sample_path) if f.startswith("pred_frame")]
        )
        gt_frames = sorted(
            [os.path.join(sample_path, f) for f in os.listdir(sample_path) if f.startswith("gt_frame")]
        )

        for pred_file, gt_file in zip(pred_frames, gt_frames):
            # Read frames
            pred_frame = cv2.imread(pred_file, cv2.IMREAD_GRAYSCALE)
            gt_frame = cv2.imread(gt_file, cv2.IMREAD_GRAYSCALE)

            # Ensure frames are the same size
            assert pred_frame.shape == gt_frame.shape, f"Shape mismatch: {pred_file} and {gt_file}"

            # Compute MSE
            mse_value = mean_squared_error(gt_frame.flatten(), pred_frame.flatten())
            total_mse += mse_value

            # Compute SSIM
            ssim_value, _ = ssim(gt_frame, pred_frame, full=True)
            total_ssim += ssim_value

            total_frames += 1

    # Calculate averages
    avg_mse = total_mse / total_frames
    avg_ssim = total_ssim / total_frames
    return avg_mse, avg_ssim

# Calculate metrics
print("Calculating MSE and SSIM...")
avg_mse, avg_ssim = compute_metrics_from_saved_frames(RESULTS_DIR)

print(f"Average MSE: {avg_mse:.4f}")
print(f"Average SSIM: {avg_ssim:.4f}")
