<a href="https://colab.research.google.com/github/rudyhendrawn/traditional-dance-video-classification/blob/main/vgg16_lstm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!rm -rfv sample_data

### Mounting Repository

Mount the repository to get all the data (directories and utilities files) ready to use.

In [None]:
!git clone https://github.com/rudyhendrawn/traditional-dance-video-classification.git
!mv traditional-dance-video-classification/* .
!rm -rfv traditional-dance-video-classification

### Mounting Google Drive

Google drive need to be setup and mounted to this specific project. Using this code below to setup and mount the google drive.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

### Initial Setup 🧑‍💻

Setup the project, import the required dependencies

In [None]:
import os
import warnings
import time

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

import lib.helpers as helpers

from lib.keras_video import VideoFrameGenerator

from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc

from tensorflow.keras.layers import GlobalAveragePooling2D, LSTM, Dense, Dropout, TimeDistributed
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.applications.vgg16 import VGG16

from IPython import get_ipython

get_ipython().run_line_magic("matplotlib", "inline")
warnings.filterwarnings("ignore")

### Pre-Defining Global Variable

Setup global variable to use in the entire file

In [None]:
DS = os.path.sep
DATASET_DIR = "/path/to/your/dataset_drive_dir" # Change with the correct path to your dataset

BATCH_SIZE = 2
NB_COLOR_CHANNELS = 3
NB_FRAMES = 30
RESOLUTION = (224, 224)

### Generating Class Names & Glob Pattern

Load all the file paths at the DATASET_DIR to generate the class names. Also, define glob pattern to get the dataset

In [None]:
class_names = helpers.get_generated_class_names(DATASET_DIR, "train")

train_glob_pattern = helpers.get_generated_glob_pattern(DATASET_DIR, "train")
test_glob_pattern = helpers.get_generated_glob_pattern(DATASET_DIR, "test");
val_glob_pattern = helpers.get_generated_glob_pattern(DATASET_DIR, "val");


### Dataset Setup

Setup the dataset with `keras_video.VideoFrameGenerator` to do the dataset extraction

In [None]:
train_dataset_generator = VideoFrameGenerator(
    batch_size=BATCH_SIZE,
    classes=class_names,
    glob_pattern=train_glob_pattern,
    nb_channel=NB_COLOR_CHANNELS,
    nb_frames=NB_FRAMES,
    seed=42,
    target_shape=RESOLUTION,
    transformation=None,
    use_frame_cache=False
)

In [None]:
test_dataset_generator = VideoFrameGenerator(
    batch_size=BATCH_SIZE,
    classes=class_names,
    glob_pattern=test_glob_pattern,
    nb_channel=NB_COLOR_CHANNELS,
    nb_frames=NB_FRAMES,
    seed=42,
    target_shape=RESOLUTION,
    transformation=None,
    use_frame_cache=False
)

In [None]:
val_dataset_generator = VideoFrameGenerator(
    batch_size=BATCH_SIZE,
    classes=class_names,
    glob_pattern=val_glob_pattern,
    nb_channel=NB_COLOR_CHANNELS,
    nb_frames=NB_FRAMES,
    seed=42,
    target_shape=RESOLUTION,
    transformation=None,
    use_frame_cache=False
)

In [None]:
input_shape = (NB_FRAMES,) + RESOLUTION + (NB_COLOR_CHANNELS,)

### Prepare `vgg16` Layer

Preparing the configuration to create the `vgg16` layer to add to the created model

In [None]:
vgg16_model = VGG16(
  include_top=False,
  input_shape=input_shape[1:],
  weights="imagenet"
)

vgg16_model.trainable = False

### Model Creation

Creating `Sequential` model and add `vgg16`, `lstm` and some other layers to the created model

In [None]:
model = Sequential()

model.add(TimeDistributed(vgg16_model, input_shape=input_shape))
model.add(TimeDistributed(GlobalAveragePooling2D()))
model.add(LSTM(256))
model.add(Dense(1024, activation="relu"))
model.add(Dropout(.2))
model.add(Dense(int(len(class_names)), activation="softmax"))

model.summary()

### Compiling & Fitting Setup

Some setup configuration for compiling and fitting the model. Defining epochs, earlystopping, checkpoint, and callbacks.

In [None]:
model_epochs = 25
model_earlystopping = EarlyStopping(monitor="val_loss", patience=3)
model_checkpoint = ModelCheckpoint(
    filepath="checkpoint/vgg16_lstm-{epoch:02d}-{val_loss:.2f}.h5",
    mode="min",
    monitor="val_loss",
    save_best_only=True,
    verbose=1
)

model_callbacks = [model_earlystopping, model_checkpoint]

### Model Compile

Compiling model with pre-defined configuration

In [None]:
model.compile(
  loss="categorical_crossentropy",
  metrics=["acc"],
  optimizer="adam"
)

### Model Training/Fitting

Fit the model with real dataset with defined epochs and callbacks

In [None]:
start_time = time.time()

model_history = model.fit(
    train_dataset_generator,
    callbacks=model_callbacks,
    epochs=model_epochs,
    validation_data=val_dataset_generator
)

end_time = time.time()
exec_time = end_time - start_time

print("Fitting execution time : {}s".format(exec_time))

### Save Model

Saving model file into `model` directory

In [None]:
model.save("model/dance/vgg16-lstm-5e.h5")

### Acc Visualization

Visualizing acc data with Matplotlib graph

In [None]:
helpers.get_visualized_graph(
  plots=[model_history.history["acc"], model_history.history["val_acc"]],
  title="Model Accuracy",
  x_label="Epoch",
  y_label="Accuracy",
  legend=["train", "test"]
).show()

### Loss Visualization

Visualizing loss data with Matplotlib graph

In [None]:
helpers.get_visualized_graph(
  plots=[model_history.history["loss"], model_history.history["val_loss"]],
  title="Model Loss",
  x_label="Epoch",
  y_label="Loss",
  legend=["train", "test"]
).show()

### Export Dataframe From Model

Export dataframe to `.csv` file from the model history via Pandas library

In [None]:
model_history_dataframe = pd.DataFrame(model_history.history)
model_history_fpath = "history/dance/vgg16-lstm-5e.csv"

with open(model_history_fpath, mode="w") as history_file:
    model_history_dataframe.to_csv(history_file)

### Model Evaluation

Evaluating model with test dataset

In [None]:
model.evaluate(test_dataset_generator)

### Populate Y Data

Populating Y's `prediction` and `test` data with test dataset

In [None]:
y_prediction_max, y_true = helpers.get_populated_y_data(
    batch_size=BATCH_SIZE,
    generator=test_dataset_generator,
    model=model
)

### Score Visualization

Visualizing some of calculated model score types, like `accuracy`, `precision`, `recall`, and `f1` score

In [None]:
score_accuracy, score_precision, score_recall, score_f1 = helpers.get_calculated_score(y_true, y_prediction_max)

print(f"Accuracy Score\t: {np.round(score_accuracy, 3)}")
print(f"Precision Score\t: {np.round(score_precision, 3)}")
print(f"Recall Score\t: {np.round(score_recall, 3)}")
print(f"F1 Score\t: {np.round(score_f1, 3)}")

### Classification Report Visualization

Visualizing classification report of test dataset

In [None]:
test_class_names = test_dataset_generator.classes

print(classification_report(
    y_true,
    y_prediction_max,
    target_names=test_class_names
))

### Confusion Matrix Visualization

Visualizing confusion matrix with heatmap table

In [None]:
confusion_matrix_result = confusion_matrix(y_true, y_prediction_max)

sns.heatmap(
    confusion_matrix_result,
    annot=True,
    cmap="Blues"
)

### AUC Score Visualization

Visualization of AUC score calculated with FPR and TPR

In [None]:
fpr, tpr, _ = roc_curve(y_true, y_prediction_max, pos_label=6)
score_auc = auc(fpr, tpr)

print(f"AUC Score\t: {np.round(score_auc, 3)}")

### True/False Positive Rate Visualization

Visualizing `true`/`false` rate with Matplotlib graph calculated from FPR and TPR

In [None]:
plt.plot(fpr, tpr, marker=".")
plt.plot([0, 1], [0, 1], color="navy", linestyle="--")

plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")

plt.show()

### Visualizing Checkpoint Model

Visualizing all the score/calculated score from the checkpoint model

In [None]:
from tensorflow.keras.models import load_model

checkpoint_model = load_model("checkpoint/vgg16-lstm.h5")
checkpoint_model.evaluate(test_dataset_generator)

### Populate Y Data

Populating checkpoint model Y's `prediction` and `test` data with test dataset

In [None]:
y_prediction_max, y_true = helpers.get_populated_y_data(
    batch_size=BATCH_SIZE,
    generator=test_dataset_generator,
    model=model
)

### Score Visualization

Visualizing some of calculated checkpoint model score types, like `accuracy`, `precision`, `recall`, and `f1` score

In [None]:
score_accuracy, score_precision, score_recall, score_f1 = helpers.get_calculated_score(y_true, y_prediction_max)

print(f"Accuracy Score\t: {np.round(score_accuracy, 3)}")
print(f"Precision Score\t: {np.round(score_precision, 3)}")
print(f"Recall Score\t: {np.round(score_recall, 3)}")
print(f"F1 Score\t: {np.round(score_f1, 3)}")

### Classification Report Visualization

Visualizing checkpoint model classification report of test dataset

In [None]:
test_class_names = test_dataset_generator.classes

print(classification_report(
    y_true,
    y_prediction_max,
    target_names=test_class_names
))

### Confusion Matrix Visualization

Visualizing checkpoint model confusion matrix with heatmap table

In [None]:
confusion_matrix_result = confusion_matrix(y_true, y_prediction_max)

sns.heatmap(
    confusion_matrix_result,
    annot=True,
    cmap="Blues"
)

### AUC Score Visualization

Visualization of checkpoint model AUC score calculated with FPR and TPR

In [None]:
fpr, tpr, _ = roc_curve(y_true, y_prediction_max, pos_label=6)
score_auc = auc(fpr, tpr)

print(f"AUC Score\t: {np.round(score_auc, 3)}")

### True/False Positive Rate Visualization

Visualizing checkpoint model `true`/`false` rate with Matplotlib graph calculated from FPR and TPR

In [None]:
plt.plot(fpr, tpr, marker=".")
plt.plot([0, 1], [0, 1], color="navy", linestyle="--")

plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")

plt.show()