# Import libraries

In [2]:
import keras
import tensorflow_hub as hub
import tensorflow as tf
from tensorflow.data import Dataset
from keras import layers
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from tqdm import tqdm
import datetime

# Declaring constants

In [3]:
IMG_SIZE = 224
MAX_SEQUENCE_LEN = 32

# Processing the video files
Code from: https://colab.research.google.com/github/keras-team/keras-io/blob/master/examples/vision/ipynb/video_transformers.ipynb#scrollTo=qidBV4ha1T1V

In [3]:
image_crop = layers.Cropping2D(cropping=(10, 70))

def crop_image(frame):
    cropped = image_crop(frame[None, ...])
    cropped = cropped.numpy().squeeze()
    return cropped

def preprocess_video(path):
    print("Start", path)
    try: 
        cap = cv2.VideoCapture(path)
        frame_cnt = 0
        frames = []
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            
            # Cut first ~1.3 seconds of the video
            if frame_cnt < 40:
                frame_cnt += 1
                continue
            
            frame = tf.image.resize(frame, (240,368))
            frame = crop_image(frame)
            frame = frame[:, :, [2, 1, 0]]
            frame = tf.image.resize(frame, (IMG_SIZE, IMG_SIZE)).numpy().astype(int)
            frames.append(frame)
    finally:
        cap.release()
    frames = np.array(frames)
    mask = np.zeros((MAX_SEQUENCE_LEN,))
    mask[:len(frames)] = 1
    if len(frames) > MAX_SEQUENCE_LEN:
        difference = len(frames) - MAX_SEQUENCE_LEN
        frames = frames[int(np.ceil(difference/3)):-int(np.floor(2*difference/3)), :, :,:]
    if len(frames) < MAX_SEQUENCE_LEN:
        frames = np.pad(frames,pad_width=((0,MAX_SEQUENCE_LEN-len(frames)), (0,0), (0,0), (0,0)), mode="constant")
    
    frames = np.transpose(frames, [3,0,1,2])
    return frames, mask



# Creating the dataset

In [6]:
df = pd.read_csv(os.path.join("data","train.csv"))

In [5]:
train_folder_prefix = os.path.join("data", "VideoFaceEmotion", "train")
emotion_dirs = os.listdir(train_folder_prefix)

df = pd.DataFrame()
for emotion_dir in emotion_dirs:
    videos = os.listdir(os.path.join(train_folder_prefix, emotion_dir))
    temp_df = pd.DataFrame()
    temp_df["video_name"] = videos
    temp_df["emotion"] = emotion_dir.lower()
    temp_df["dataset"] = "train"
    temp_df["video_path"] = os.path.join(train_folder_prefix,emotion_dir,"") + temp_df["video_name"]
    df = pd.concat([df, temp_df], ignore_index=True)

#df.to_csv(os.path.join("data","train.csv"),index=False)


In [5]:
df = pd.read_csv(os.path.join("data", "train_validation.csv"))
print(df)

               video_name   emotion     dataset  \
0       trainAnger001.avi     anger       train   
1       trainAnger002.avi     anger  validation   
2       trainAnger003.avi     anger  validation   
3       trainAnger004.avi     anger       train   
4       trainAnger005.avi     anger       train   
..                    ...       ...         ...   
420  trainSurprise067.avi  surprise       train   
421  trainSurprise068.avi  surprise       train   
422  trainSurprise069.avi  surprise  validation   
423  trainSurprise070.avi  surprise  validation   
424  trainSurprise071.avi  surprise       train   

                                            video_path  
0    data\VideoFaceEmotion\train\Anger\trainAnger00...  
1    data\VideoFaceEmotion\train\Anger\trainAnger00...  
2    data\VideoFaceEmotion\train\Anger\trainAnger00...  
3    data\VideoFaceEmotion\train\Anger\trainAnger00...  
4    data\VideoFaceEmotion\train\Anger\trainAnger00...  
..                                           

## Creating a validation dataset

In [179]:
df.loc[df[df["emotion"] == "anger"].sample(frac=0.2).index,"dataset"] = "validation"
df.loc[df[df["emotion"] == "happiness"].sample(frac=0.2).index,"dataset"] = "validation"
df.loc[df[df["emotion"] == "disgust"].sample(frac=0.2).index,"dataset"] = "validation"
df.loc[df[df["emotion"] == "surprise"].sample(frac=0.2).index,"dataset"] = "validation"
df.loc[df[df["emotion"] == "fear"].sample(frac=0.2).index,"dataset"] = "validation"
df.loc[df[df["emotion"] == "sadness"].sample(frac=0.2).index,"dataset"] = "validation"

In [180]:
df["dataset"].value_counts(normalize=True)

train         0.797647
validation    0.202353
Name: dataset, dtype: float64

In [181]:
df.to_csv(os.path.join("data", "train_validation.csv"),index=False)

In [5]:
df = pd.read_csv(os.path.join("data", "train_validation.csv"))

# Saving the datasets

In [8]:
train_df = df[df["dataset"] == "train"].copy()
validation_df = df[df["dataset"] == "validation"].copy()

#train_df.to_csv(os.path.join("data", "train.csv"),index=False)
#validation_df.to_csv(os.path.join("data", "validation.csv"),index=False)

# Creating the Tensorflow datasets

In [4]:
train_df = pd.read_csv(os.path.join("data", "train.csv"))
validation_df = pd.read_csv(os.path.join("data", "validation.csv"))

In [5]:
def create_label_processor(classes):
    return layers.StringLookup(num_oov_indices=0, vocabulary=classes)

label_processor = create_label_processor(np.unique(df["emotion"]))

## Train dataset

In [None]:
x = train_df["video_path"].apply(lambda x: list(preprocess_video(x)[0])).values
x = [x for x in x]
x = np.array(x)
x = x.astype(float)

y = tf.convert_to_tensor(train_df["emotion"].apply(lambda x: label_processor(x).numpy()).values)
y = y[..., None]

print(x.shape, y.shape)
t_df = Dataset.from_tensor_slices((x, y))
t_df.save(os.path.join("data", "prepared_train_dataset"))

## Validation dataset

In [None]:
x = validation_df["video_path"].apply(lambda x: list(preprocess_video(x)[0])).values
x = [x for x in x]
x = np.array(x)
x = x.astype(float)

y = tf.convert_to_tensor(validation_df["emotion"].apply(lambda x: label_processor(x).numpy()).values)
y = y[..., None]

print(x.shape, y.shape)
v_df = Dataset.from_tensor_slices((x, y))
v_df.save(os.path.join("data", "prepared_validation_dataset"))