In [None]:
import torch
import numpy as np
import torch.optim as optim
from torch.utils import data
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import os
import cv2
import h5py
import re
from glob import glob
from dataset_loader import create_multimodal_pytorch_dataset
import parameters
from functions import get_total_performance_metrics
from functions import get_global_performance_metrics
from functions import get_performance_metrics, get_multimodal_performance_metrics

window_len = parameters.window_len
stride = parameters.stride
fair_comparison = parameters.fair_comparison
device = parameters.device
key_frame_extraction = parameters.key_frame_extraction
key_frame_extraction_algorithm = parameters.key_frame_extraction_algorithm
feature_extraction = parameters.feature_extraction
background_subtraction = parameters.background_subtraction
background_subtraction_algorithm = parameters.background_subtraction_algorithm
data_augmentation = parameters.data_augmentation
anomaly_detection_model = parameters.anomaly_detection_model
dropout = parameters.dropout
learning_rate = parameters.learning_rate
num_epochs = parameters.num_epochs
chunk_size = parameters.chunk_size
forward_chunk_size = parameters.forward_chunk_size
spatial_temporal_loss = parameters.spatial_temporal_loss
frame_rate_adjusted_dataset = parameters.frame_rate_adjusted_dataset
synchronise_video = parameters.synchronise_video
pad_video = parameters.pad_video
fusion_type = parameters.fusion_type

dataset_category = parameters.dataset_category
w1 = parameters.w1
w2 = parameters.w2
loss_fn = parameters.loss_fn
TOD = parameters.TOD
project_directory = parameters.project_directory
dataset_directory = parameters.dataset_directory
ht = parameters.ht
wd = parameters.wd

In [None]:
# To display Original Video, h5py file Video, Input Video and Reconstructed Output Video
def display_videos(
    names,
    dsets,
    paths,
    vid_folder_list,
    original_indices_list,
    input_video_list,
    reconstructed_video_list,
    original_labels_list,
):

    display_ht = 256
    display_wd = 256
    # Video is adjusted to 8 fps. So each frame can be 125 ms
    ms_per_frame = 50  # millisecond per frame

    # Extract if fall folder or ADL folder. Can be taken from any modality
    dir_type = re.findall("[a-zA-Z]+", vid_folder_list[0])[0]

    # Original Video
    original_video_list = []
    for i in range(2):
        vid_location = "{}\Dataset\Fall-Data\{}\{}\{}\{}".format(
            dataset_directory, dataset_category, dsets[i], dir_type, vid_folder_list[i]
        )
        vid_location = glob(vid_location + "/*.jpg") + glob(vid_location + "/*.png")
        vid_location.sort(key=lambda var: [int(x) if x.isdigit() else x for x in re.findall(r"[^0-9]|[0-9]+", var)])
        original_video = []
        for filename in vid_location:
            img = cv2.imread(filename, cv2.IMREAD_ANYCOLOR)
            if img is not None:
                original_video.append(img)
        # Pick only the frames shortlisted in frame syncrhonisation
        original_video = [original_video[j] for j in original_indices_list[i]]
        original_video_list.append(original_video)

    # Preprocessed Video
    preprocessed_video_list = []
    for i in range(2):
        with h5py.File(paths[i], "r") as hf:
            data_dict = hf["{}/Processed/Split_by_video".format(names[i])]
            preprocessed_video = data_dict[vid_folder_list[i]]["Data"][:]
            # Pick only the frames shortlisted in frame syncrhonisation
            preprocessed_video = [preprocessed_video[j] for j in original_indices_list[i]]
            preprocessed_video_list.append(preprocessed_video)

    # Modified Video
    modified_video_list = []
    for i in range(2):
        input_video = input_video_list[i]
        modified_video = []
        # input_video is in windowed format, convert it to frames
        for j in range(len(input_video) - 1):
            modified_video.append(input_video[j][0])  # First frame of each window
        # Concatenate all the frames from the final window
        modified_video.extend(input_video[-1])
        # Windowing code creates 1 window less than required. So duplicate last frame
        modified_video.append(modified_video[-1])
        modified_video = np.array(modified_video)
        modified_video_list.append(modified_video)

    # Output Video.
    output_video_list = []
    for i in range(2):
        reconstructed_video = reconstructed_video_list[i]
        output_video = []
        # Reconstructed Video. It is in window format, convert to frames
        # If length is 510, it means 510 windows are there.
        # So from 0th to 509th, take the first frame. For the 510th window, take all the frames
        for j in range(len(reconstructed_video) - 1):
            output_video.append(reconstructed_video[j][0])  # First frame of each window
        # Concatenate all the frames from the final window
        output_video.extend(reconstructed_video[-1])
        # Windowing code creates 1 window less than required. So duplicate last frame
        output_video.append(output_video[-1])
        output_video = np.array(output_video)
        output_video_list.append(output_video)

    # Labels
    labels_list = []
    for i in range(2):
        original_label = original_labels_list[i]
        labels = []
        # original_label is in windowed labels format, convert it to frame level labels
        for j in range(len(original_label) - 1):
            labels.append(original_label[j][0])  # First label of each window (label of first frame)
        # Concatenate all the frame labels from the final window
        labels.extend(original_label[-1])
        # Windowing code creates 1 window less than required. So duplicate last frame label
        labels.append(labels[-1])
        labels = np.array(labels)
        labels_list.append(labels)

    if background_subtraction:
        for i in range(2):
            # [-1,1] Normalisation. Only apply if background_subtraction is turned on.
            output_video_list[i] = (
                2.0 * (output_video_list[i] - np.min(output_video_list[i])) / np.ptp(output_video_list[i]) - 1
            )

            # Remove first 120 elements. This is because background subtraction history is 120.
            # So those intial elements will be black
            original_video_list[i] = original_video_list[i][120:]
            preprocessed_video_list[i] = preprocessed_video_list[i][120:]
            modified_video_list[i] = modified_video_list[i][120:]
            output_video_list[i] = output_video_list[i][120:]
            labels_list[i] = labels_list[i][120:]

    # print(len(original_video_list[0]),len(preprocessed_video_list[0]),len(modified_video_list[0]),len(output_video_list[0]),len(labels_list[0]))  # fmt: skip
    # print(len(original_video_list[1]),len(preprocessed_video_list[1]),len(modified_video_list[1]),len(output_video_list[1]),len(labels_list[1]))  # fmt: skip

    for index in range(len(original_video_list[0])):

        original_frame_list = []
        for i in range(2):
            original_frame = cv2.resize(original_video_list[i][index], (display_ht, display_wd))
            # uint8 to float32
            original_frame = (np.array(original_frame, dtype=np.float32)) / 255
            original_frame_list.append(original_frame)

        preprocessed_frame_list = []
        for i in range(2):
            preprocessed_frame = cv2.resize(preprocessed_video_list[i][index], (display_ht, display_wd))
            preprocessed_frame = np.expand_dims(preprocessed_frame, axis=-1)
            # float64 to float32
            preprocessed_frame = np.array(preprocessed_frame, dtype=np.float32)
            preprocessed_frame = cv2.cvtColor(preprocessed_frame, cv2.COLOR_GRAY2RGB)
            preprocessed_frame_list.append(preprocessed_frame)

        modified_frame_list = []
        for i in range(2):
            modified_frame = cv2.resize(modified_video_list[i][index], (display_ht, display_wd))
            modified_frame = np.expand_dims(modified_frame, axis=-1)
            # uint8 to float32
            modified_frame = np.array(modified_frame, dtype=np.float32)
            modified_frame = cv2.cvtColor(modified_frame, cv2.COLOR_GRAY2RGB)
            modified_frame_list.append(modified_frame)

        output_frame_list = []
        for i in range(2):
            output_frame = cv2.resize(output_video_list[i][index], (display_ht, display_wd))
            output_frame = np.expand_dims(output_frame, axis=-1)
            # float32
            # output_frame = np.array(output_frame, dtype=np.float32)
            output_frame = cv2.cvtColor(output_frame, cv2.COLOR_GRAY2RGB)
            output_frame_list.append(output_frame)

        modality_1_frames = np.concatenate(
            [original_frame_list[0], preprocessed_frame_list[0], modified_frame_list[0], output_frame_list[0]], axis=1
        )

        # Rotate Images of Modality 2 by 90 Degree Clockwise
        # Modified_frame and output_frame will already be in correct orientation

        original_frame_list[1] = cv2.rotate(original_frame_list[1], cv2.ROTATE_90_CLOCKWISE)
        preprocessed_frame_list[1] = cv2.rotate(preprocessed_frame_list[1], cv2.ROTATE_90_CLOCKWISE)
        # If it is ONI_IR, Then Flip Horizontally
        if dsets[1] == "ONI_IR":
            original_frame_list[1] = cv2.flip(original_frame_list[1], 1)
            preprocessed_frame_list[1] = cv2.flip(preprocessed_frame_list[1], 1)

        modality_2_frames = np.concatenate(
            [original_frame_list[1], preprocessed_frame_list[1], modified_frame_list[1], output_frame_list[1]], axis=1
        )

        vertical_concatenation = np.concatenate([modality_1_frames, modality_2_frames], axis=0)

        cv2.imshow("Original, Preprocessed, Modified, Output", vertical_concatenation)

        if labels_list[0][index] == labels_list[1][index] == 1:
            print("Fall at Frame - {}".format(index))

        k = cv2.waitKey(ms_per_frame) & 0xFF
        # Exit on 'esc' key
        if k == 27:
            break

    cv2.destroyAllWindows()


def demo_pipeline_multimodality(names, dsets, paths, modelpath):
    (
        Test_Dataset,
        test_dataloader,
        multi_x_data_test,
        multi_y_data_test,
        multi_x_info_test,
    ) = create_multimodal_pytorch_dataset(names, dsets, paths, window_len, fair_comparison, stride)

    filepath_model = project_directory + "\Output\Models\Demo\\" + modelpath

    # Prepare GPU
    torch.cuda.empty_cache()
    # Load the model. Using MultiModal_3DCAE
    model = parameters.multi_modal_models[0]().to(device)
    model.load_state_dict(torch.load(filepath_model))  # Load saved model weights
    model.eval()  # Sets the model in testing mode.

    print("Device Used - " + device)
    print("Key Frame Extraction - {}".format(key_frame_extraction))
    if key_frame_extraction:
        print("Key Frame Extraction Algorithm - {}".format(key_frame_extraction_algorithm))
    print("Feature Extraction - {}".format(feature_extraction))
    if feature_extraction:
        print("Background Subtraction - {}".format(background_subtraction))
        if background_subtraction:
            print("Background Subtraction Algorithm - {}".format(background_subtraction_algorithm))
    print("Data Augmentation - {}".format(data_augmentation))
    print("Frame rate adjusted dataset - {}".format(frame_rate_adjusted_dataset))
    print("Synchronise Video - {}".format(synchronise_video))
    if synchronise_video:
        print("Video length adjustment method - Not Applicable")
    else:
        if pad_video:
            print("Video length adjustment method - Pad Minimum")
        else:
            print("Video length adjustment method - Trim Maximum")
    print("Window Length = {}\n".format(window_len))

    with torch.no_grad():
        for i, (sample, labels, original_indices_list) in enumerate(test_dataloader):
            # Get vid_folder across both modalities
            vid_folder_list = np.array(multi_x_info_test)[:, i]
            # Both folders will be same
            print("{} - {}, {} - {}".format(dsets[0], vid_folder_list[0], dsets[1], vid_folder_list[1]))

            # forward pass to get output
            print("Forward Pass Initiated")

            sample1 = sample[:, 0, :, :, :, :]
            sample2 = sample[:, 1, :, :, :, :]

            labels1 = labels[:, 0, :, :]
            labels2 = labels[:, 1, :, :]

            torch.cuda.empty_cache()

            sample1 = sample1.to(device, dtype=torch.float)
            sample2 = sample2.to(device, dtype=torch.float)

            chunks1 = torch.split(sample1, chunk_size, dim=1)
            chunks2 = torch.split(sample2, chunk_size, dim=1)

            recon_vid1 = []
            recon_vid2 = []

            for chunk1, chunk2 in zip(chunks1, chunks2):
                output1, output2 = model(chunk1, chunk2)
                output1 = output1.to(device).permute(1, 0, 2, 3, 4)
                output2 = output2.to(device).permute(1, 0, 2, 3, 4)
                recon_vid1.append(output1)
                recon_vid2.append(output2)
                torch.cuda.empty_cache()

            output1 = torch.cat(recon_vid1, dim=1)
            output2 = torch.cat(recon_vid2, dim=1)
            output = torch.stack((output1, output2), dim=1)

            # convert tensors to numpy arrays for easy manipluations
            sample1 = sample1.data.cpu().numpy()
            output1 = output1.data.cpu().numpy()
            labels1 = labels1.data.cpu().numpy()

            sample2 = sample2.data.cpu().numpy()
            output2 = output2.data.cpu().numpy()
            labels2 = labels2.data.cpu().numpy()

            sample = sample.data.cpu().numpy()
            output = output.data.cpu().numpy()
            labels = labels.data.cpu().numpy()

            print("Forward Pass Completed")

            # Accesing Index 0 as batch_size = 1
            # original_indices_list - By frame synchronisation, frames are equalised (and some frames are removed).
            # This list contains the selected frames original indices in the original / h5py data (for both the modalities).
            # Sample - Windowed Modified Input
            # Output - Windowed Reconstructed Output
            # Labels - Windowed Class Labels

            display_videos(
                names, dsets, paths, vid_folder_list, original_indices_list[0], sample[0], output[0], labels[0]
            )


# Multimodality
list_of_files = [["Thermal", "ONI_IR"], ["Thermal", "IP"]]
list_of_datasets = [["Thermal_T3", "ONI_IR_T"], ["Thermal_T3", "IP_T"]]

# These are file paths for MultiModal_3DCAE model only. Cannot be used for other MultiModal models
if background_subtraction:
    # Both are FPS Adjusted, Default GMG, Smooth L1 Loss, Synchronise Video
    thermal_oni_ir_model = "MultiModal_Thermal_T3_ONI_IR_T_2024-04-17-14-58-26"  # Trial-14
    thermal_ip_model = "MultiModal_Thermal_T3_IP_T_2024-04-18-09-19-42"  # Trial-8
else:
    # Both are FPS Adjusted, L1 Loss, Synchronise Video
    thermal_oni_ir_model = "MultiModal_Thermal_T3_ONI_IR_T_2024-04-24-16-37-55"  # Trial-7
    thermal_ip_model = "MultiModal_Thermal_T3_IP_T_2024-04-24-17-38-22"  # Trial-4

list_of_models = [thermal_oni_ir_model, thermal_ip_model]

modalities_index = 0  # 0 or 1

dsets = list_of_files[modalities_index]
names = list_of_datasets[modalities_index]
modelpath = list_of_models[modalities_index]
paths = [f"{project_directory}\Dataset\H5PY\{dataset_category}_Data_set-{name}-imgdim64x64.h5" for name in names]

demo_pipeline_multimodality(names, dsets, paths, modelpath)