In [None]:
from google.colab import drive
drive.mount('/content/drive')

# **Coursework 10 CNN-RNN**

In [None]:
import os
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

from keras.models import Model
from keras.layers import Input, Activation
from keras.layers import Conv2D, MaxPooling2D,TimeDistributed, Concatenate
from keras.layers import Dense, Dropout, Flatten
from keras.optimizers import Adam

from tensorflow.keras.utils import to_categorical


### Part 1 :: import the dataset and convert video into numpy array


*   3 dataset wave, boxing, clapping
*   show 2 images for each class



In [None]:
import cv2
import numpy as np

# Video frame
def video_to_frames(video_path, color_mode=cv2.COLOR_BGR2GRAY, max_frames=10):
    cap = cv2.VideoCapture(video_path)
    frames = []
    try:
        while True:
            ret, frame = cap.read()
            if not ret or len(frames) == max_frames:
                break
            gray_frame = cv2.cvtColor(frame, color_mode)
            frames.append(gray_frame)
    finally:
        cap.release()
    return np.array(frames)

def plot_first_two_frames(frames):
    # Check if there are at least two frames to plot
    if frames.shape[0] < 2:
        print("The array does not contain enough frames.")
        return
    # Set up the figure with two subplots
    fig, axes = plt.subplots(1, 2, figsize=(10, 5))
    # Plot the first frame
    axes[0].imshow(frames[0], cmap='gray')
    axes[0].set_title('First Frame')
    axes[0].axis('off')  # Hide axes ticks
    # Plot the second frame
    axes[1].imshow(frames[1], cmap='gray')
    axes[1].set_title('Second Frame')
    axes[1].axis('off')  # Hide axes ticks

    # Display the plots
    plt.show()


# Example usage:
video_path_box = '/content/drive/MyDrive/MICRO-573/cw10/data/boxing/person01_boxing_d1_uncomp.avi'
video_path_clap = '/content/drive/MyDrive/MICRO-573/cw10/data/handclapping/person01_handclapping_d1_uncomp.avi'
video_path_wave = '/content/drive/MyDrive/MICRO-573/cw10/data/handwaving/person01_handwaving_d1_uncomp.avi'

frames_wave = video_to_frames(video_path_wave)
frames_box = video_to_frames(video_path_box)
frames_clap = video_to_frames(video_path_clap)


print(frames_wave.shape)  # Output the shape to verify


In [None]:
# Showing the first images of each dataset
print('Plotting the first two frame of class : waving')
plot_first_two_frames(frames_wave)
print('Plotting the first two frame of class : boxing')
plot_first_two_frames(frames_box)
print('Plotting the first two frame of class : claping')
plot_first_two_frames(frames_clap)

## Part 2 :: CNN-RNN Training and testing


*   Implement the network depicted in Figure below with 32 neurons in the hidden layer that is before the output layer.
* Use only two frames/sequences.
*    Report your test accuracy.



In [None]:
# Preprocess the dataset for the part 2

"""
This part will have focus on the creation of a train/test dataset as well as the labelling of the data.
"""

def load_data_from_folders(folder_list, labels,
                           frames_per_video=2,
                           max_frames=100):
    """
    Loads data and labels from a list of folders.
    """
    data = []
    label_data = []

    for folder, label in zip(folder_list, labels):
        videos = [os.path.join(folder, f) for f in os.listdir(folder) if f.endswith('.avi')]
        for video in videos:
            video_frames = video_to_frames(video, max_frames=max_frames) # extract the first 10 frames
            for i in range(0, max_frames, frames_per_video):
                data.append(video_frames[i:i + frames_per_video])
                label_data.append(label)

    return np.array(data), np.array(label_data)

# Example folder paths and labels
folders = ['/content/drive/MyDrive/MICRO-573/cw10/data/handwaving',
           '/content/drive/MyDrive/MICRO-573/cw10/data/handclapping',
           '/content/drive/MyDrive/MICRO-573/cw10/data/boxing']
labels = [0, # 0=Waving,
          1, # 1=Clapping,
          2] # 2=Boxing

# Create the dataset
data, labels = load_data_from_folders(folders, labels)

# One-hot encode labels
labels_one_hot = to_categorical(labels)
# Add extra dimension for channel CNN
data_with_channel = np.expand_dims(data, axis=-1)
# normalize the data
data_with_channel_norm = data_with_channel / 255.0

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data_with_channel_norm, labels_one_hot, test_size=0.2, random_state=42)

print('Here is a summary of our dataset : \n')

print('the shape of the TRAIN labels : ',y_train.shape)

print('the shape of the TRAIN data (EXTRA DIM FOR CNN) : ',X_train.shape)
print('the shape of the TEST data : ',X_test.shape)

print('\nWe have more than 200 test samples :: GOOD')

In [None]:
plot_first_two_frames(X_train[0]) # Sample test
print('the label is : ',y_train[0])

In [None]:
X_train[0,0,:,:,0]

###**Part 2 model creation and training**

In [None]:
from tensorflow.keras.layers import Input, TimeDistributed, Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Activation, Concatenate
from tensorflow.keras.models import Model


input_shape = (2, 120, 160, 1)
num_classes = 3  # Number of classes to predict
dropout_ratio = 0.5  # Dropout rate

max_epochs = 30  # maxmimum number of epochs to be iterated
batch_size = 8   # batch size for the training data set
batch_shuffle = True   # shuffle the training data prior to batching before each epoch
num_hidden_nodes = 32 # number of nodes in hidden fully connected layer

loss = 'categorical_crossentropy'  # loss (cost) function to be minimised by the optimiser
metrics = ['categorical_accuracy']  # network accuracy metric to be determined after each epoch

optimizer_type = Adam(learning_rate=0.0001)  # optimisation algorithm: SGD stochastic gradient decent
validtrain_split_ratio = 0.2  # % of the seen dataset to be put aside for validation, rest is for training


# MODEL STARTING POINT
inputs = Input(shape=input_shape)

# Convolutional and pooling layers
down_01 = TimeDistributed(Conv2D(filters=4, kernel_size=(3, 3), strides=(1, 1), padding='same'))(inputs)
down_01 = TimeDistributed(Activation('relu'))(down_01)
down_01_pool = TimeDistributed(MaxPooling2D((2, 2), strides=(2, 2)))(down_01)

down_02 = TimeDistributed(Conv2D(filters=8, kernel_size=(3, 3), strides=(1, 1), padding='same'))(down_01_pool)
down_02 = TimeDistributed(Activation('relu'))(down_02)
down_02_pool = TimeDistributed(MaxPooling2D((2, 2), strides=(2, 2)))(down_02)

down_03 = TimeDistributed(Conv2D(filters=12, kernel_size=(3, 3), strides=(1, 1), padding='same'))(down_02_pool)
down_03 = TimeDistributed(Activation('relu'))(down_03)
down_03_pool = TimeDistributed(MaxPooling2D((2, 2), strides=(2, 2)))(down_03)

# Flatten and dense layers
flatten = TimeDistributed(Flatten())(down_03_pool)
dense_01 = TimeDistributed(Dropout(dropout_ratio))(flatten)
dense_01 = TimeDistributed(Dense(num_hidden_nodes))(dense_01)
dense_01 = TimeDistributed(Activation('sigmoid'))(dense_01)

# Concatenation of features from the last time-distributed dense layer
concat = Concatenate(axis=1)([dense_01[:, 0, :], dense_01[:, 1, :]])

# Final dense layers for classification
dense_02 = Dense(num_classes)(concat)
outputs = Activation('softmax')(dense_02)
# MODEL ENDING POINT


# Create the model
model = Model(inputs=inputs, outputs=outputs)

In [None]:
# Compile model
model.compile(optimizer=optimizer_type, loss=loss, metrics=metrics)
# display a summary of the compiled neural network
print(model.summary())
print()

In [None]:
print('* Training the compiled network *')
print()

history = model.fit(X_train, y_train, \
                    batch_size=batch_size, \
                    epochs=max_epochs, \
                    validation_split=validtrain_split_ratio, \
                    shuffle=batch_shuffle)

print()
print('Training completed')
print()

### **Part 3 : optimize the model to get better performences**

Their is multiple ways to optimize a CNN-RNN. The ones we are going to focus on are the followings :



*   Use more than 2 frames for one sample to capture more informations
*   Adding neurons to the network
*   Concatenate with the two previous outputs

