In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## Importing Libraries


In [None]:
import tensorflow as tf
import keras
import cv2
import seaborn as sns
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Bidirectional, ConvLSTM3D,AveragePooling3D, MaxPooling3D
from tensorflow.keras.layers import Bidirectional, ConvLSTM2D,AveragePooling2D, MaxPooling2D, GlobalAveragePooling2D
from tensorflow.keras.layers import Dense, Flatten, TimeDistributed, ZeroPadding3D,Dropout
from tensorflow.keras.optimizers import Adam,Adagrad,Adadelta,SGD
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.regularizers import l2,l1,l1_l2
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical,plot_model
%matplotlib inline

In [None]:
data_dir = '/kaggle/input/ucf11-action-recognize/UCF11_updated_mpg'
categories = os.listdir(data_dir)
num_classes = len(categories)


In [None]:
categories

In [None]:
import os
import cv2
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tqdm import tqdm


SEQUENCE_LENGTH = 15  
DIM=(64,64)

def load_groups(input_folder):
    groups = []
    label_folders = os.listdir(input_folder)
    index = 0
    for label_folder in sorted(label_folders):
        label_folder_path = os.path.join(input_folder, label_folder)
        if os.path.isdir(label_folder_path):
            group_folders = os.listdir(label_folder_path)
            for group_folder in group_folders:
                if group_folder != 'Annotation':
                    groups.append([os.path.join(label_folder_path, group_folder), label_folder])
            index += 1
    return groups

def frames_extraction(video_path):
    frames_list = []
    video_reader = cv2.VideoCapture(video_path)
    video_frames_count = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))
    skip_frames_window = max(int(video_frames_count / SEQUENCE_LENGTH), 1)

    for frame_counter in range(SEQUENCE_LENGTH):
        video_reader.set(cv2.CAP_PROP_POS_FRAMES, frame_counter * skip_frames_window)
        success, frame = video_reader.read()
        if not success:
            break
        resized_frame = cv2.resize(frame, DIM)
        normalized_frame = resized_frame / 255
        frames_list.append(normalized_frame)

    video_reader.release()
    return frames_list

def split_into_train_test(groups):
    data = []
    labels = []
    label_to_index = {}  # Create a mapping from labels to integer indices
    index_to_label = {}  # Create a reverse mapping from integer indices to labels

    for group in tqdm(groups):
        video_files = os.listdir(group[0])
        np.random.shuffle(video_files)
        for idx, video_file in enumerate(video_files):
            video_file_path = os.path.abspath(os.path.join(group[0], video_file))
            frames = frames_extraction(video_file_path)  # Extract frames from video
            if len(frames) == SEQUENCE_LENGTH:
                data.append(frames)
                label = group[1]
                if label not in label_to_index:
                    index = len(label_to_index)
                    label_to_index[label] = index
                    index_to_label[index] = label
                labels.append(label_to_index[label])

    num_classes = len(label_to_index)
    data=np.asarray(data)
    label = np.array(label)

    encoded_labels = to_categorical(labels, num_classes=num_classes)

    # Split data and labelsencoded_labels
    train_data, test_data, train_labels, test_labels = train_test_split(data, encoded_labels, test_size=0.2, random_state=19, stratify=encoded_labels)

    return train_data, test_data, train_labels, test_labels, label_to_index, index_to_label


def make_csv_file(items, labels, file_path):
    items =[ item.reshape(-1) for item in items]
    labels = [ label.reshape(-1) for label in labels]
    data = {'Video_Frames': items, 'Label': labels}
    df = pd.DataFrame(data)
    df.to_csv(file_path, index=False)



In [None]:
groups = load_groups(data_dir)
train_data, test_data, train_labels, test_labels, label_to_index, index_to_label = split_into_train_test(groups)

In [None]:
print(test_labels)

In [None]:
make_csv_file(train_data, train_labels, 'final_train_data.csv')
make_csv_file(test_data, test_labels, 'final_test_data.csv')

## Data Visualization of Training And Testing Datset

In [None]:
train_df = pd.read_csv("/kaggle/working/final_train_data.csv")
test_df = pd.read_csv("/kaggle/working/final_test_data.csv")

In [None]:
train_df.columns,test_df.columns

In [None]:
train_df.head

In [None]:
test_df.head

In [None]:
print(f"Dimension of Training Datset is : {train_df.shape}")
print(f"Dimension of Training Datset is : {test_df.shape}")

### Training Datatset

In [None]:
train_class_counts = train_df['Label'].map(index_to_label).value_counts()

plt.figure(figsize=(10, 10))
plt.bar(train_class_counts.index, train_class_counts.values)
plt.xlabel('Class Labels')
plt.ylabel('Class Counts')
plt.title('Class Distribution in Training Dataset')
plt.xticks(rotation=90)
plt.show()

In [None]:
train_average_count = np.mean(train_class_counts)
print(f'Average number of videos in each category for Training Dataset: {train_average_count:.0f}')

### Testing Dataset

In [None]:
test_class_counts = test_df['Label'].map(index_to_label).value_counts()

plt.figure(figsize=(10, 6))
plt.bar(test_class_counts.index, test_class_counts.values)
plt.xlabel('Class Labels')
plt.ylabel('Class Counts')
plt.title('Class Distribution in Training Dataset')
plt.xticks(rotation=90)
plt.show()

In [None]:
test_average_count = np.mean(test_class_counts)
print(f'Average number of videos in each category for Testing Dataset: {test_average_count:.0f}')

## Making CNN+LSTM Model for Training 

##### Defining the Dimesnsion of input

In [None]:
frames = SEQUENCE_LENGTH   
height = DIM[0]
width = DIM[1]

In [None]:
from tensorflow.keras.layers import Dropout

In [None]:
def SEQ_Model():
    model = Sequential()
    
    model.add(ConvLSTM2D(filters=8, kernel_size=(3, 3), input_shape=(frames, height, width, 3),
                         strides=(1, 1), padding='same', activation='tanh', return_sequences=True, recurrent_dropout=0.2))
    model.add(MaxPooling3D(pool_size=(1, 2, 2), padding='same'))
    model.add(TimeDistributed(Dropout(0.2)))
    
    model.add(ConvLSTM2D(filters=20, kernel_size=(3, 3), strides=(1, 1), padding='same', activation='tanh', return_sequences=True, recurrent_dropout=0.2))
    model.add(MaxPooling3D(pool_size=(1, 2, 2), padding='same'))
    model.add(TimeDistributed(Dropout(0.2)))
    
    
    model.add(ConvLSTM2D(filters=30, kernel_size=(3, 3), strides=(1, 1), padding='same', activation='tanh', return_sequences=True, recurrent_dropout=0.2))
    model.add(MaxPooling3D(pool_size=(1, 2, 2), padding='same'))
    model.add(TimeDistributed(Dropout(0.2)))
    
    model.add(Flatten())  
    
    model.add(Dense(72, activation='relu', kernel_regularizer=l2(l2=0.01)))
    model.add(Dropout(0.3))
    model.add(Dense(48, activation='relu', kernel_regularizer=l2(l2=0.01)))
    model.add(Dropout(0.3))
    model.add(Dense(24, activation='relu', kernel_regularizer=l2(l2=0.01)))
    model.add(Dense(num_classes, activation='softmax'))
    
    model.summary()
    
    return model
    

In [None]:
print("Model Architecture : ")
final_model = SEQ_Model()

## Structure of Model Architecture

In [None]:
model_str = plot_model(final_model, to_file='model.png', show_shapes=True, show_layer_names=True)
model_str

## Compile and Train the Model

In [None]:
early_stopper = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=10,
    mode='min',
    restore_best_weights=True,
)

optimizer = Adagrad(learning_rate=0.009)

final_model.compile(optimizer=optimizer,  
              loss='categorical_crossentropy', 
              metrics=['accuracy'])  

model_history = final_model.fit(
    train_data,
    train_labels,
    batch_size=15,
    epochs=100,
    verbose='auto',
    validation_split = 0.2,
    shuffle=True,
    use_multiprocessing=True,
    callbacks=early_stopper
)

## Model Evaluation


In [None]:
test_loss,test_accuracy = final_model.evaluate(test_data,test_labels)

In [None]:
plt.figure(figsize=(12,7))
plt.suptitle("Model performance")

plt.subplot(1, 2, 1)
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.title('Training and Validation Loss')
plt.plot(model_history.history["loss"], label="training loss")
plt.plot(model_history.history["val_loss"], label="validation loss")
plt.legend()


In [None]:
plt.subplot(1, 2, 2)
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.title('Training and Validation Accuracy')
plt.plot(model_history.history['accuracy'], label="training accuracy")
plt.plot(model_history.history['val_accuracy'], label="validation accuracy")
plt.legend()
plt.show()

## Saving the Model

In [None]:
Adadelta_HARmodel = final_model.save("AdadeltaHAR.h5")