In [None]:
import numpy as np
import datetime as dt
import tensorflow as tf

In [None]:
from sklearn.model_selection import train_test_split

from tensorflow.keras.layers import *
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import plot_model

In [None]:
data_dir = '/kaggle/input/ucf11-action-recognize/UCF11_updated_mpg'
categories = os.listdir(data_dir)
num_classes = len(categories)

In [None]:
SEQUENCE_LENGTH = 8
DIM=(64,64)

def load_groups(input_folder):
    groups = []
    label_folders = os.listdir(input_folder)
    index = 0
    for label_folder in sorted(label_folders):
        label_folder_path = os.path.join(input_folder, label_folder)
        if os.path.isdir(label_folder_path):
            group_folders = os.listdir(label_folder_path)
            for group_folder in group_folders:
                if group_folder != 'Annotation':
                    groups.append([os.path.join(label_folder_path, group_folder), label_folder])
            index += 1
    return groups
def frames_extraction(video_path):
    frames_list = []
    video_reader = cv2.VideoCapture(video_path)
    video_frames_count = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))
    skip_frames_window = max(int(video_frames_count / SEQUENCE_LENGTH), 1)

    for frame_counter in range(SEQUENCE_LENGTH):
        video_reader.set(cv2.CAP_PROP_POS_FRAMES, frame_counter * skip_frames_window)
        success, frame = video_reader.read()
        if not success:
            break
        resized_frame = cv2.resize(frame, DIM)
        normalized_frame = resized_frame / 255
        frames_list.append(normalized_frame)

    video_reader.release()
    return frames_list

def split_into_train_test(groups):
    data = []
    labels = []
    original_labels=[]
    label_to_index = {}  # Create a mapping from labels to integer indices
    index_to_label = {}  # Create a reverse mapping from integer indices to labels

    for group in tqdm(groups):
        video_files = os.listdir(group[0])
        np.random.shuffle(video_files)
        for idx, video_file in enumerate(video_files):
            video_file_path = os.path.abspath(os.path.join(group[0], video_file))
            frames = frames_extraction(video_file_path)  # Extract frames from video
            if len(frames) == SEQUENCE_LENGTH:
                data.append(frames)
                label = group[1]
                original_labels.append(label)
                if label not in label_to_index:
                    index = len(label_to_index)
                    label_to_index[label] = index
                    index_to_label[index] = label
                labels.append(label_to_index[label])
        num_classes = len(label_to_index)
        data=np.asarray(data)
        label = np.array(label)

        encoded_labels = to_categorical(labels, num_classes=num_classes)
        train_data, test_data, train_labels, test_labels = train_test_split(data, encoded_labels, test_size=0.2, random_state=19, stratify=encoded_labels)
        return train_data, test_data, train_labels, test_labels, original_labels, encoded_labels


In [None]:
groups = load_groups(data_dir)

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv1D, Activation, SpatialDropout1D, GlobalAveragePooling1D, Dense
from tensorflow.keras.models import Model

In [None]:
def Inception3D(input_tensor):
    # Define the Inception module(Inflated 3D CNN)
    conv1x1 = Conv3D(64, (1, 1, 1), padding='same', activation='relu')(input_tensor)
    conv3x3_reduce = Conv3D(96, (1, 1, 1), padding='same', activation='relu')(input_tensor)
    conv3x3 = Conv3D(128, (3, 3, 3), padding='same', activation='relu')(conv3x3_reduce)
    conv5x5_reduce = Conv3D(16, (1, 1, 1), padding='same', activation='relu')(input_tensor)
    conv5x5 = Conv3D(32, (5, 5, 5), padding='same', activation='relu')(conv5x5_reduce)
    maxpool = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same')(input_tensor)
    conv1x1_proj = Conv3D(32, (1, 1, 1), padding='same', activation='relu')(maxpool)
    inception_output = tf.keras.layers.concatenate([conv1x1, conv3x3, conv5x5, conv1x1_proj], axis=-1)
    return inception_output

In [None]:
class TemporalConvNet(Layer):
    def __init__(self, num_filters, kernel_size, dilations):
        super(TemporalConvNet, self).__init__()
        self.num_filters = num_filters
        self.kernel_size = kernel_size
        self.dilations = dilations
        #self.use_batch_norm = use_batch_norm
        self.conv_layers = []

        for dilation in dilations:
            conv = Conv1D(filters=num_filters,
                          kernel_size=kernel_size,
                          dilation_rate=dilation,
                          padding='causal',
                          activation='relu',
                          kernel_initializer='he_normal')

            self.conv_layers.append(conv)

    def call(self, inputs):
        x = inputs
        for layer in self.conv_layers:
            x = layer(x)
            #if self.use_batch_norm:
                #x = BatchNormalization()(x)
            x = Activation('relu')(x)
        return x

In [None]:
num_filters = 32
kernel_size = 3
dilations = [1, 2, 4]

In [None]:
input_shape = (SEQUENCE_LENGTH, IMAGE_HEIGHT, IMAGE_WIDTH, 3)

input_tensor = Input(shape=input_shape)

I3D_conv1 = Conv3D(64, (7, 7, 7), strides=(2, 2, 2), padding='same', activation='relu')(input_tensor)
I3D_maxpool1 = MaxPooling3D((1, 3, 3), strides=(1, 2, 2), padding='same')(I3D_conv1)
dropout1 = tf.keras.layers.Dropout(0.4)(I3D_maxpool1)

I3D_conv2 = Conv3D(64, (1, 1, 1), padding='same', activation='relu')(dropout1)
I3D_conv3 = Conv3D(192, (3, 3, 3), padding='same', activation='relu')(I3D_conv2)
I3D_maxpool2 = MaxPooling3D((1, 3, 3), strides=(1, 2, 2), padding='same')(I3D_conv3)
dropout2 = tf.keras.layers.Dropout(0.4)(I3D_maxpool2)

I3D_inception3a = Inception3D(dropout2)
I3D_inception3b = Inception3D(I3D_inception3a)
I3D_gap = GlobalAveragePooling3D()(I3D_inception3b)

tcn_layer_1 = TemporalConvNet(num_filters, kernel_size, dilations)(input_tensor)
tcn_layer_1 = tf.keras.layers.BatchNormalization()(tcn_layer_1)
tcn_layer_2 = TemporalConvNet(num_filters, kernel_size, dilations)(tcn_layer_1)
tcn_layer_2 = tf.keras.layers.BatchNormalization()(tcn_layer_2)
tcn_gap = GlobalAveragePooling3D()(tcn_layer_2)

combined_features = tf.keras.layers.concatenate([I3D_gap, tcn_gap])


fc1 = Dense(512, activation='relu')(combined_features)
output = Dense(NUM_CLASSES, activation='softmax')(fc1)

model_com = tf.keras.Model(inputs=input_tensor, outputs=output)

model_com.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

model_com.summary()

In [None]:
train_data, test_data, train_labels, test_labels, original_labels, encoded_labels = split_into_train_test(groups)

In [None]:
training_history = model_com.fit(x =  features_train, y = labels_train, epochs = 90)

In [None]:
plt.figure(figsize=(12,7))
plt.suptitle("Model performance")

plt.subplot(1, 2, 1)
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.title('Training and Validation Loss')
plt.plot(training_history.history["loss"], label="training loss")
plt.plot(training_history.history["val_loss"], label="validation loss")
plt.legend()

In [None]:
plt.subplot(1, 2, 2)
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.title('Training and Validation Accuracy')
plt.plot(training_history.history['accuracy'], label="training accuracy")
plt.plot(training_history.history['val_accuracy'], label="validation accuracy")
plt.legend()
plt.show()