# Imports:

In [18]:
# Imports:
import os
import math
import pandas as pd
import numpy as np
import torch.optim as optim
import torchmetrics
import pytorch_lightning as pl

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
import time
from numpy import genfromtxt

# Dataset & Dataloader:

In [47]:
# Dataset & Dataloader:

# can be None:
MEDIA_PIPE_SELECTION = [468,473,282,52,4,0,16,40,90,270,320,199]
# cannot be None:
OPENFACE_FEATURE_SELECTED = ["AU01","AU02","AU04","AU05","AU06","AU07","AU09","AU10","AU11","AU12","AU14","AU15","AU17","AU20","AU23","AU24","AU25","AU26","AU28","AU43","anger","disgust","fear","happiness","sadness","surprise","neutral"]

if not (MEDIA_PIPE_SELECTION is None):
    MEDIA_PIPE_SELECTION_LEN = len(MEDIA_PIPE_SELECTION)
else:
    MEDIA_PIPE_SELECTION_LEN = 498  # all
    
OPENFACE_FEATURE_SELECTED_LEN = len(OPENFACE_FEATURE_SELECTED)



def load_coord_data(sample_data_path, frame_cap):
    
    if sample_data_path.endswith(".npy"):
        load_sample = np.load(sample_data_path)[:frame_cap]
        # print("Original shape:", load_sample.shape)
        if MEDIA_PIPE_SELECTION:
            load_sample = load_sample[:,MEDIA_PIPE_SELECTION,:]
        # print("After selection:", load_sample.shape)
    elif sample_data_path.endswith(".csv"):
        load_sample = pd.read_csv(sample_data_path)[:frame_cap]
        if OPENFACE_FEATURE_SELECTED:
            load_sample = load_sample[OPENFACE_FEATURE_SELECTED]
        load_sample = load_sample.values
        return load_sample
        # load_sample = load_sample.values()
    else:
        print(">>> WARNING: No support for {sample_data_path}. Returning None.")
        load_sample = None
    
    return load_sample



class DeceptionDataset(Dataset):
    """Face Landmarks dataset."""

    def __init__(self, mode, collection_type, data_dir, csv_dir,
                 class_to_num={"truth": 0, "lie": 1}, num_to_class={0: "truth", 1: "lie"}, transform=None,
                 seconds_input_size=3, fps_min=29, keypoints_quantity=478, coordinate_amount=3):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.data_dir = data_dir
        self.data_df = pd.read_csv(os.path.join(csv_dir, f"{mode}_DARE.csv"))
        # print("frame_sum:", sum(self.data_df["total_frame_count"].tolist()))
        self.transform = transform
        if collection_type == "MediaPipe":
            self.sample_postfix = "_MP_coord.npy"
        elif collection_type == "OpenFace":
            self.sample_postfix = ".csv"
        else:
            print(">>> WARNING: No such collection type was used before. Sample postfix append was set to blank.")
            self.sample_postfix = ""
        self.class_to_num = class_to_num
        self.num_to_class = num_to_class
        self.frame_cap = fps_min * seconds_input_size
        self.input_dim = (self.frame_cap*keypoints_quantity*coordinate_amount)  # torch.Size([4, 87, 478, 3])
        print(f"{mode}'s defined input shape:", self.input_dim)
        self.keypoints_quantity = keypoints_quantity
        self.coordinate_amount = coordinate_amount
        # 126, 87 (29FPS*3=87), *45*
        # 87
            

    def __len__(self):
        return len(self.data_df)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        row_data = self.data_df.iloc[idx]
        
        # get path to data itself:
        base_name = row_data["video_name"].split(".")[0]
        data_path = os.path.join(self.data_dir,
                                 f"{base_name}{self.sample_postfix}").replace("\\", "/")
        
        # get label:
        label = self.class_to_num[row_data["label"]]        
        data  = load_coord_data(data_path, self.frame_cap)
        
        # if FPS is lower, duplicate every frame to increase (generate slow video as workaround):        
        if data.shape[0] < self.frame_cap:
            repeat_for = int(math.ceil(self.frame_cap / data.shape[0]))
            data = np.repeat(data, repeat_for, axis=0)[:self.frame_cap]
            # add additional edge case carry out:
            if len(data) < self.frame_cap:
                extra_needed = self.frame_cap - len(data)
                extra_array = np.zeros((extra_needed, data.shape[1], 3), dtype=float)
                data = np.concatenate((data, extra_array))
        # print("Before flatten keypoints X coord shape:", data.shape)  # (87, 478, 3)  # 87*478*3 = 124758
        data = data.reshape(self.frame_cap*self.keypoints_quantity * self.coordinate_amount)
        # print("After flattening keypoints X coord shape:", data.shape)
                
        return data, label

# LSTM Model Declaration:

In [85]:
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
        super(LSTMModel, self).__init__()
        # Hidden dimensions
        self.hidden_dim = hidden_dim

        # Number of hidden layers
        self.layer_dim = layer_dim

        # Building your LSTM
        # batch_first=True causes input/output tensors to be of shape
        # (batch_dim, seq_dim, feature_dim)
        self.lstm = nn.LSTM(input_dim, hidden_dim, layer_dim, batch_first=True).double()

        # Readout layer
        self.fc = nn.Linear(hidden_dim, output_dim).double()

    def forward(self, x):
        # Initialize hidden state with zeros
        # h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).requires_grad_()
        h0 = torch.zeros(self.layer_dim, self.hidden_dim).requires_grad_().double()

        # Initialize cell state
        # c0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).requires_grad_()
        c0 = torch.zeros(self.layer_dim, self.hidden_dim).requires_grad_().double()

        # One time step
        # We need to detach as we are doing truncated backpropagation through time (BPTT)
        # If we don't, we'll backprop all the way to the start even after going through another batch
        out, (hn, cn) = self.lstm(x, (h0.detach(), c0.detach()))

        # Index hidden state of last time step
        # out.size() --> 100, 28, 100
        # out[:, -1, :] --> 100, 100 --> just want last time step hidden states! 
        # out = self.fc(out[:, -1, :])  # WAS
        # print("out.shape:", out[-1].shape)
        # print("out.shape:", out.shape)
        out = self.fc(out)  # out[-1])
        # out.size() --> 100, 10
        m = nn.Sigmoid()
        return m(out)

# Training:

In [98]:
def train(data_type="MediaPipe",
          data_root=os.path.join(os.getcwd(), "mediaPipe_keypoints_data_UPD").replace("\\", "/"),
          csv_root=os.path.join("../data/").replace("\\", "/"),
          keypoints_quantity=MEDIA_PIPE_SELECTION_LEN,
          coordinate_amount=3,
          desired_fps=29,
          input_in_seconds=3,
          batch_size=32,
          n_iters = 3000):
    
    # STEP 1: LOADING DATASET
    CAUT_train_dataset = DeceptionDataset(mode="train",
                                          collection_type=data_type,  # "MediaPipe",
                                          data_dir=data_root,  # 'mediaPipe_keypoints_data_UPD'),
                                          csv_dir=csv_root,  # audio_data_UPD,
                                          class_to_num={"truth": 0, "lie": 1},
                                          num_to_class={0: "truth", 1: "lie"},
                                          transform=None,
                                          seconds_input_size=input_in_seconds,
                                          fps_min=desired_fps,
                                          keypoints_quantity=keypoints_quantity,  # 12,  # 478,
                                          coordinate_amount=coordinate_amount)  # 3)
    CAUT_val_dataset = DeceptionDataset(mode="val",
                                        collection_type=data_type,  # "MediaPipe",
                                        data_dir=data_root,  # 'mediaPipe_keypoints_data_UPD'),
                                        csv_dir=csv_root,  # audio_data_UPD,
                                        class_to_num={"truth": 0, "lie": 1},
                                        num_to_class={0: "truth", 1: "lie"},
                                        transform=None,
                                        seconds_input_size=input_in_seconds,
                                        fps_min=desired_fps,
                                        keypoints_quantity=keypoints_quantity,  # 12,  # 478,
                                        coordinate_amount=coordinate_amount)  # 3)
    
    # STEP 2: MAKING DATASET ITERABLE   
    train_loader = torch.utils.data.DataLoader(dataset=CAUT_train_dataset, 
                                               batch_size=batch_size, 
                                               shuffle=True,
                                               drop_last=True)

    test_loader = torch.utils.data.DataLoader(dataset=CAUT_val_dataset, 
                                              batch_size=batch_size, 
                                              shuffle=False,
                                              drop_last=True)
    
    # STEP 3: DECLARE LSTM CLASS (ALREADY DECLARED ABOVE)
    
    # STEP 4: INSTANTIATE MODEL CLASS
    input_dim = CAUT_train_dataset.input_dim
    hidden_dim = 100
    layer_dim = 3  # ONLY CHANGE IS HERE FROM ONE LAYER TO TWO LAYER
    output_dim = 1
    
    model = LSTMModel(input_dim, hidden_dim, layer_dim, output_dim)
    
    # JUST PRINTING MODEL & PARAMETERS 
    print(model)
    print(len(list(model.parameters())))
    for i in range(len(list(model.parameters()))):
        print(list(model.parameters())[i].size())
    
    
    # STEP 5: INSTANTIATE LOSS CLASS
    criterion = nn.BCELoss()  # CrossEntropyLoss()
    
    
    # STEP 6: INSTANTIATE OPTIMIZER CLASS
    learning_rate = 0.1
    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
    
    # STEP 7: TRAIN THE MODEL
    iter = 0
    num_epochs = n_iters / (len(CAUT_train_dataset) / batch_size)
    num_epochs = int(num_epochs)
    
    for epoch in range(num_epochs):
        # for i, (images, labels) in enumerate(train_loader):
        for i, (data, labels) in enumerate(train_loader):
            data = data.double()
            labels = labels.double()
            # print(f"data.shape: {data.shape}")
            # print(f"labels.shape: {labels.shape}")
            # Load images as Variable
            # images = images.view(-1, seq_dim, input_dim).requires_grad_()
            # data = data.requires_grad()

            # Clear gradients w.r.t. parameters
            optimizer.zero_grad()

            # Forward pass to get output/logits
            # outputs.size() --> 100, 10
            outputs = model(data).squeeze()
            # print(f"outputs.shape: {outputs.shape}")
            # print(f"outputs: {outputs}")

            # Calculate Loss: softmax --> cross entropy loss
            loss = criterion(outputs, labels)

            # Getting gradients w.r.t. parameters
            loss.backward()

            # Updating parameters
            optimizer.step()

            iter += 1

            if iter % 50 == 0:
                # Calculate Accuracy         
                correct = 0
                total = 0
                # Iterate through test dataset
                for data, labels in test_loader:
                    data = data.double()
                    labels = labels.double()
                    # print(f"data.shape: {data.shape}")
                    # print(f"labels.shape: {labels.shape}")
                    # Load images to a Torch Variable
                    # images = images.view(-1, seq_dim, input_dim).requires_grad_()
                    # data = data.requires_grad()

                    # Forward pass only to get logits/output
                    outputs = model(data).squeeze()
                    # print(f"outputs.shape: {outputs.shape}")
                    # print(f"outputs: {outputs}")

                    # Get predictions from the maximum value
                    # _, predicted = torch.max(outputs.data, 1)
                    _, predicted = torch.max(outputs.data, 0)

                    # Total number of labels
                    total += labels.size(0)

                    # Total correct predictions
                    correct += (predicted == labels).sum()

                accuracy = 100 * correct / total

                # Print Loss
                print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iter, loss.item(), accuracy))

    
    
    
    
    
    # for i, (data, labels) in enumerate(train_loader):
        
        
        
        
train()    

train's defined input shape: 3132
val's defined input shape: 3132
LSTMModel(
  (lstm): LSTM(3132, 100, num_layers=3, batch_first=True)
  (fc): Linear(in_features=100, out_features=1, bias=True)
)
14
torch.Size([400, 3132])
torch.Size([400, 100])
torch.Size([400])
torch.Size([400])
torch.Size([400, 100])
torch.Size([400, 100])
torch.Size([400])
torch.Size([400])
torch.Size([400, 100])
torch.Size([400, 100])
torch.Size([400])
torch.Size([400])
torch.Size([1, 100])
torch.Size([1])
Iteration: 50. Loss: 0.6947583280561123. Accuracy: 0.0
Iteration: 100. Loss: 0.6946104392013721. Accuracy: 0.0
Iteration: 150. Loss: 0.6743771189122384. Accuracy: 0.0
Iteration: 200. Loss: 0.684663355590329. Accuracy: 0.0
Iteration: 250. Loss: 0.6881782222720956. Accuracy: 0.0
Iteration: 300. Loss: 0.6994213170482642. Accuracy: 0.0
Iteration: 350. Loss: 0.6958855084979003. Accuracy: 0.0
Iteration: 400. Loss: 0.690952617687733. Accuracy: 0.0
Iteration: 450. Loss: 0.6987961126153708. Accuracy: 0.0
Iteration: 500. 