In [45]:
# install librarires
import os
import tensorflow as tf
import numpy as np
import random

In [46]:
# global variables
# set dataset directories


DATASET_DIR = "/kaggle/input/msrdailyactivity3d-rgb-videos-only"
ALL_CLASS_NAMES = os.listdir(DATASET_DIR)

# Global constant variables -> 
NO_OF_CLASSES = 12
CLASSES_LIST = ALL_CLASS_NAMES[:NO_OF_CLASSES]

# Model Configuration
IMAGE_HEIGHT, IMAGE_WIDTH = 128, 128
SEQUENCE_LENGTH = 15

# set drop out rate
DROPOUT_RATE = 0.3

# set datas
MAX_VIDEO_PER_CLASS = 20

# split dataset
TEST_SIZE = 0.20

# model fit parameters
EPOCHS = 50
BATCH_SIZE = 16
VALIDATION_SPLIT = 0.20


# give a name of the model to save
MODEL_NAME = "Xception"

print(f"There are total {len(ALL_CLASS_NAMES)} classes, selected {NO_OF_CLASSES} classes")
print(f"Setting {MAX_VIDEO_PER_CLASS}/class to train the model.")
print(f"Image size {IMAGE_HEIGHT}x{IMAGE_WIDTH} with {SEQUENCE_LENGTH} sequence length")
print(f"Dropout rate: {DROPOUT_RATE}")
print(f"Train-Test split ratio {int((1-TEST_SIZE)*100)}/{int(TEST_SIZE*100)}")
print(f"Validation data from Train set {VALIDATION_SPLIT*100}%")

There are total 16 classes, selected 12 classes
Setting 20/class to train the model.
Image size 128x128 with 15 sequence length
Dropout rate: 0.3
Train-Test split ratio 80/20
Validation data from Train set 20.0%


In [47]:
# set seeed to get similar values
seed_constant = 27
np.random.seed(seed_constant)
random.seed(seed_constant)
tf.random.set_seed(seed_constant)

In [48]:
import cv2
from concurrent.futures import ThreadPoolExecutor

def resize_and_normalize_frame(frame, image_height, image_width):
    try:
        resized_frame = cv2.resize(frame, (image_width, image_height), interpolation=cv2.INTER_LINEAR)
        normalized_frame = resized_frame / 255.0
        return normalized_frame
    except Exception as e:
        print(f"Error processing frame: {e}")
        return None

def frames_extraction(video_path, 
                      sequence_length=SEQUENCE_LENGTH, 
                      image_height=IMAGE_HEIGHT, 
                      image_width=IMAGE_WIDTH):
    # Declare a list to store video frames
    frames_list = []

    # Check if video file exists
    if not os.path.exists(video_path):
        print(f"Error: Video file not found at {video_path}")
        return None

    # Read the video file using VideoCapture with optimized settings
    video_reader = cv2.VideoCapture(video_path, cv2.CAP_FFMPEG)

    # Check if the video was opened successfully
    if not video_reader.isOpened():
        print(f"Error: Could not open video file {video_path}")
        video_reader.release()
        return None

    # Get the total number of frames in the video
    video_frames_count = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))

    # Ensure the video has enough frames
    if video_frames_count < sequence_length:
        print(f"Warning: Video {video_path} has only {video_frames_count} frames, less than required {sequence_length}")
        video_reader.release()
        return None

    # Calculate the interval after which frames will be sampled
    skip_frames_window = max(int(video_frames_count / sequence_length), 1)

    # Pre-allocate frame indices to extract
    frame_indices = [i * skip_frames_window for i in range(sequence_length)]

    # Read and process frames in parallel
    frames = []
    for idx in frame_indices:
        video_reader.set(cv2.CAP_PROP_POS_FRAMES, idx)
        success, frame = video_reader.read()
        if not success or frame is None:
            print(f"Warning: Failed to read frame at index {idx} from {video_path}")
            break
        frames.append(frame)

    # Release the VideoCapture object early
    video_reader.release()

    # Ensure the correct number of frames is read
    if len(frames) != sequence_length:
        print(f"Warning: Read {len(frames)} frames instead of {sequence_length} from {video_path}")
        return None

    # Process frames in parallel using ThreadPoolExecutor
    with ThreadPoolExecutor() as executor:
        processed_frames = list(executor.map(
            lambda f: resize_and_normalize_frame(f, image_height, image_width), 
            frames
        ))

    # Check for any failed frame processing
    if any(f is None for f in processed_frames):
        print(f"Warning: Some frames failed to process in {video_path}")
        return None

    # Convert to NumPy array
    frames_array = np.array(processed_frames, dtype=np.float32)

    return frames_array

In [49]:
# RUN create dataset function definition
def create_dataset(dataset_dir,
                   classes_list, 
                   sequence_length=SEQUENCE_LENGTH, 
                   image_height=IMAGE_HEIGHT, 
                   image_width=IMAGE_WIDTH, 
                   max_videos_per_class=None,
                   augmentations=False
                  ):
    
    # Initialize lists to store features, labels, and video file paths
    features = []
    labels = []

    # Check if dataset directory exists
    if not os.path.exists(dataset_dir):
        raise FileNotFoundError(f"Dataset directory not found: {dataset_dir}")

    # Iterate through all classes in the classes list
    for class_index, class_name in enumerate(classes_list):
        class_path = os.path.join(dataset_dir, class_name)
        
        # Check if class directory exists
        if not os.path.exists(class_path):
            print(f"Warning: Class directory not found: {class_path}")
            continue

        print(f'Extracting Data of Class: {class_name}')

        # Get the list of video files in the class directory
        files_list = os.listdir(class_path)

        # Limit the number of videos if specified
        if max_videos_per_class is not None:
            files_list = files_list[:max_videos_per_class]

        # Iterate through all video files
        for file_name in files_list:
            video_file_path = os.path.join(class_path, file_name)

            # Extract frames using the updated frames_extraction function
            frames = frames_extraction(video_file_path, sequence_length, image_height, image_width)

            # Skip videos where frame extraction failed
            if frames is None:
                print(f"Skipping video {video_file_path} due to frame extraction failure")
                continue

            # Append the data to respective lists
            features.append(frames)
            labels.append(class_index)

    # Convert lists to numpy arrays
    if not features:
        raise ValueError("No valid videos were processed. Check dataset or parameters.")
    features = np.asarray(features)
    labels = np.array(labels)

    print(f"Dataset created with {len(features)} videos")
    print(f"Features shape: {features.shape}")
    print(f"Labels shape: {labels.shape}")

    return features, labels

In [50]:
# RUN Create the dataset with explicit parameters
try:
    features, labels = create_dataset(
        dataset_dir=DATASET_DIR,
        classes_list=CLASSES_LIST,
        sequence_length=SEQUENCE_LENGTH,
        image_height=IMAGE_HEIGHT,
        image_width=IMAGE_WIDTH,
        # Limit to 10 videos per class to manage memory
        augmentations=True,
        max_videos_per_class=MAX_VIDEO_PER_CLASS
    )
except FileNotFoundError as e:
    print(f"Error: {e}")
except ValueError as e:
    print(f"Error: {e}")
except Exception as e:
    print(f"Unexpected error: {e}")

features.shape, labels.shape

Extracting Data of Class: write on a paper
Extracting Data of Class: use laptop
Extracting Data of Class: read book
Extracting Data of Class: sit still
Extracting Data of Class: drink
Extracting Data of Class: sit down
Extracting Data of Class: use vacuum cleaner
Extracting Data of Class: eat
Extracting Data of Class: play guitar
Extracting Data of Class: lie down on sofa
Extracting Data of Class: stand up
Extracting Data of Class: toss paper
Dataset created with 240 videos
Features shape: (240, 15, 128, 128, 3)
Labels shape: (240,)


((240, 15, 128, 128, 3), (240,))

In [51]:
from tensorflow.keras.utils import Sequence
import numpy as np

class VideoDataGenerator(Sequence):
    def __init__(self, video_frames, labels, batch_size):
        self.video_frames = video_frames  # List of sequences, shape: (sequence_length, height, width, channels)
        self.labels = labels  # List or array of labels
        self.batch_size = batch_size
        self.indices = np.arange(len(self.video_frames))  # For shuffling

        # Convert inputs to NumPy arrays, assuming frames are already normalized to [0, 1]
        self.video_frames = [np.array(seq, dtype=np.float32) for seq in self.video_frames]
        self.labels = np.array(self.labels)

    def __len__(self):
        # Return the number of batches per epoch
        return int(np.ceil(len(self.video_frames) / self.batch_size))

    def __getitem__(self, idx):
        # Get batch indices
        batch_indices = self.indices[idx * self.batch_size:(idx + 1) * self.batch_size]
    
        # Handle case where batch_indices is empty
        if len(batch_indices) == 0:
            return np.array([], dtype=np.float32), np.array([], dtype=np.float32)
    
        # Select batch data
        batch_video_frames = [self.video_frames[i] for i in batch_indices]
        batch_labels = self.labels[batch_indices]
    
        # Initialize lists to store frames
        all_frames = []
        all_labels = []
        
        # Process each sequence in the batch
        for i, sequence in enumerate(batch_video_frames):
            # Original frames only
            all_frames.append(sequence)
            all_labels.append(batch_labels[i])
    
        # Convert to numpy arrays
        all_frames = np.array(all_frames, dtype=np.float32)
        all_labels = np.array(all_labels)
    
        return all_frames, all_labels

    def on_epoch_end(self):
        # Shuffle indices at the end of each epoch
        np.random.shuffle(self.indices)

    def as_dataset(self):
        def generator():
            for idx in range(len(self)):
                frames, labels = self[idx]
                # Skip empty batches
                if frames.size == 0:
                    continue
                # Yield batches with shape (batch_size, sequence_length, height, width, channels)
                yield frames, labels

        # Create a tf.data.Dataset
        dataset = tf.data.Dataset.from_generator(
            generator,
            output_types=(tf.float32, tf.float32),
            output_shapes=(
                (None, self.video_frames[0].shape[0], self.video_frames[0].shape[1], 
                 self.video_frames[0].shape[2], self.video_frames[0].shape[3]),
                (None,) + self.labels.shape[1:]
            )
        )
        dataset = dataset.prefetch(tf.data.AUTOTUNE)
        return dataset

In [52]:
from tensorflow.keras.utils import to_categorical
# RUN Using Keras's to_categorical method to convert labels into one-hot-encoded vectors
one_hot_encoded_labels = to_categorical(labels)

In [53]:
# delele to free memory
#del features_train, labels_train, train_video_frames, train_labels, val_video_frames, val_labels, augmented_features, augmented_labels

In [54]:
from tensorflow.keras.applications import Xception
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GlobalAveragePooling2D
from tensorflow.keras.layers import TimeDistributed, Flatten, LSTM, Dropout, Dense

def create_xception_lstm_model(sequence_length, image_height, image_width, classes_list=None):
    if classes_list is None:
        raise ValueError("classes_list must be provided to define the output layer size")

    try:
        # Load Xception model with pre-trained ImageNet weights
        print("Loading Xception base model...")
        xception = Xception(
            weights='imagenet',
            include_top=False,
            input_shape=(image_height, image_width, 3),
            name="Xception"
        )
        # Freeze Xception layers
        for layer in xception.layers[:-20]:  # Unfreeze last 20 layers
            layer.trainable = False
        for layer in xception.layers[-20:]:
            layer.trainable = True
        # Define the Sequential model
        model = Sequential([
            TimeDistributed(
                xception,
                input_shape=(sequence_length, image_height, image_width, 3),
                name="TimeDistributed_Xception"
            ),
            TimeDistributed(GlobalAveragePooling2D(), name="global_avg_pooling"),  # Reduces to (sequence_length, 2048)
            LSTM(256, activation="tanh", return_sequences=False, name="LSTM"),
            Dropout(DROPOUT_RATE, name="Dropout"),
            Dense(len(classes_list), activation="softmax", name="Output")
        ])
        
        # Print model summary
        print("Model architecture created successfully!")
        model.summary()

        return model

    except Exception as e:
        print(f"Error creating model: {e}")
        return None

In [55]:
# donwload model weights
from tensorflow.keras.applications import Xception
print("Pre-loading Xception weights...")
base_model = Xception(weights='imagenet', include_top=False, input_shape=(IMAGE_HEIGHT, IMAGE_WIDTH, 3))
print("Weights loaded successfully!")

Pre-loading Xception weights...
Weights loaded successfully!


In [56]:
# Clear previous session to free memory
tf.keras.backend.clear_session()

In [57]:
# Create the model
xlstm_model = create_xception_lstm_model(
    sequence_length=SEQUENCE_LENGTH,
    image_height=IMAGE_HEIGHT,
    image_width=IMAGE_WIDTH,
    classes_list=CLASSES_LIST
)

# Check if model was created successfully
if xlstm_model is None:
    print("Failed to create model. Check error messages above.")
else:
    print("Model Created Successfully!")

Loading Xception base model...
Model architecture created successfully!


  super().__init__(**kwargs)


Model Created Successfully!


In [58]:
# Plot the structure of the contructed model.
from tensorflow.keras.utils import plot_model

plot_model(xlstm_model, to_file = f'{MODEL_NAME}_model_Plot.png', show_shapes = True, show_layer_names = True)

print(f"{MODEL_NAME} Model Plot saved successfully...")

Xception Model Plot saved successfully...


In [59]:
# RUN Split the Data into Train ( 75% ) and Test Set ( 25% ).
from sklearn.model_selection import train_test_split
features_train, features_test, labels_train, labels_test = train_test_split(features,
                                                                            one_hot_encoded_labels,
                                                                            test_size = TEST_SIZE,
                                                                            shuffle = True,
                                                                            random_state = seed_constant)

features_train.shape, features_test.shape

((192, 15, 128, 128, 3), (48, 15, 128, 128, 3))

In [60]:
# Assuming features_train and labels_train are defined
train_video_frames, val_video_frames = features_train[:int(0.8 * len(features_train))], features_train[int(0.8 * len(features_train)):]
train_labels, val_labels = labels_train[:int(0.8 * len(labels_train))], labels_train[int(0.8 * len(labels_train)):]

train_video_frames.shape, val_video_frames.shape

((153, 15, 128, 128, 3), (39, 15, 128, 128, 3))

In [61]:
# Create the training and validation generators
train_gen = VideoDataGenerator(
    video_frames=train_video_frames, 
    labels=train_labels, 
    batch_size=BATCH_SIZE, 
)

val_gen = VideoDataGenerator(
    video_frames=val_video_frames, 
    labels=val_labels, 
    batch_size=BATCH_SIZE
)

In [62]:
# Create an Instance of Early Stopping Callback
from tensorflow.keras.callbacks import EarlyStopping

early_stopping_callback = EarlyStopping(monitor = 'val_loss', 
                                        patience = 7, 
                                        mode = 'min', 
                                        restore_best_weights = True)

# Compile the model and specify loss function, optimizer and metrics values to the model
xlstm_model.compile(loss = 'categorical_crossentropy', 
                    optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
                    metrics = ["accuracy"])


In [None]:
from tensorflow.keras.callbacks import ReduceLROnPlateau

lr_scheduler = ReduceLROnPlateau(monitor='val_loss', 
                                 factor=0.5, 
                                 patience=3, 
                                 min_lr=1e-6)


xlstm_model.fit(
    train_gen.as_dataset(),
    epochs=EPOCHS,
    validation_data=val_gen.as_dataset(),
    callbacks=[early_stopping_callback, lr_scheduler]
)

Epoch 1/50
     10/Unknown [1m193s[0m 9s/step - accuracy: 0.1145 - loss: 2.5272



[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m239s[0m 14s/step - accuracy: 0.1160 - loss: 2.5277 - val_accuracy: 0.1538 - val_loss: 2.4316 - learning_rate: 1.0000e-04
Epoch 2/50
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m108s[0m 11s/step - accuracy: 0.5603 - loss: 2.0252 - val_accuracy: 0.2308 - val_loss: 2.2681 - learning_rate: 1.0000e-04
Epoch 3/50
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m104s[0m 11s/step - accuracy: 0.7423 - loss: 1.7201 - val_accuracy: 0.2564 - val_loss: 2.2146 - learning_rate: 1.0000e-04
Epoch 4/50
[1m 5/10[0m [32m━━━━━━━━━━[0m[37m━━━━━━━━━━[0m [1m45s[0m 9s/step - accuracy: 0.8512 - loss: 1.3503

In [None]:
# previous code
model_evaluation_history = xlstm_model.evaluate(features_test, labels_test)

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Create test generator with batch_size matching test set size
test_gen = VideoDataGenerator(
    video_frames=features_test,
    labels=labels_test,
    batch_size=4,  # Process all test samples at once
)

# Get predictions
predictions = xlstm_model.predict(test_gen.as_dataset())
predicted_classes = np.argmax(predictions, axis=1)
true_classes = np.argmax(labels_test, axis=1)

# Verify shapes
print(f"True classes shape: {true_classes.shape}")
print(f"Predicted classes shape: {predicted_classes.shape}")
assert len(true_classes) == len(predicted_classes), "Sample counts do not match!"

# Generate classification report
print("Classification Report:")
print(classification_report(true_classes, predicted_classes, target_names=CLASSES_LIST))

# Plot confusion matrix
cm = confusion_matrix(true_classes, predicted_classes)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=CLASSES_LIST, yticklabels=CLASSES_LIST)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()