# Declaring constants

In [4]:
import tensorflow as tf 
from keras import layers
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [6]:
IMG_SIZE = 224
MAX_SEQUENCE_LEN = 32

# Preparing the Data
Code from: https://colab.research.google.com/github/keras-team/keras-io/blob/master/examples/vision/ipynb/video_transformers.ipynb#scrollTo=qidBV4ha1T1V

In [17]:
import tensorflow as tf 
from keras import layers
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt


image_crop = layers.Cropping2D(cropping=(10, 70))

def crop_image(frame):
    cropped = image_crop(frame[None, ...])
    cropped = cropped.numpy().squeeze()
    return cropped

def preprocess_video(path):
    print("Start", path)
    try: 
        cap = cv2.VideoCapture(path)
        frame_cnt = 0
        frames = []
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            
            # Cut first ~1.3 seconds of the video
            if frame_cnt < 40:
                frame_cnt += 1
                continue
            
            frame = tf.image.resize(frame, (240,368))
            frame = crop_image(frame)
            frame = frame[:, :, [2, 1, 0]]
            frame = tf.image.resize(frame, (IMG_SIZE, IMG_SIZE)).numpy().astype(int)
            frames.append(frame)
    finally:
        cap.release()
    frames = np.array(frames)
    mask = np.zeros((MAX_SEQUENCE_LEN,))
    mask[:len(frames)] = 1
    if len(frames) > MAX_SEQUENCE_LEN:
        difference = len(frames) - MAX_SEQUENCE_LEN
        frames = frames[int(np.ceil(difference/3)):-2*int(np.floor(difference/3)), :, :,:]
    if len(frames) < MAX_SEQUENCE_LEN:
        frames = np.pad(frames,pad_width=((0,MAX_SEQUENCE_LEN-len(frames)), (0,0), (0,0), (0,0)), mode="constant")
    frames = np.transpose(frames, [3,1,2,0])
    return frames, mask



In [8]:
import pandas as pd
df = pd.read_csv(os.path.join("data","train.csv"))

In [5]:
import pandas as pd
train_folder_prefix = os.path.join("data", "VideoFaceEmotion", "train")
emotion_dirs = os.listdir(train_folder_prefix)

df = pd.DataFrame()
for emotion_dir in emotion_dirs:
    videos = os.listdir(os.path.join(train_folder_prefix, emotion_dir))
    temp_df = pd.DataFrame()
    temp_df["video_name"] = videos
    temp_df["emotion"] = emotion_dir.lower()
    temp_df["dataset"] = "train"
    temp_df["video_path"] = os.path.join(train_folder_prefix,emotion_dir,"") + temp_df["video_name"]
    df = pd.concat([df, temp_df], ignore_index=True)

#df.to_csv(os.path.join("data","train.csv"),index=False)


In [9]:
df = pd.read_csv(os.path.join("data", "train_validation.csv"))
print(df)

               video_name   emotion     dataset  \
0       trainAnger001.avi     anger       train   
1       trainAnger002.avi     anger  validation   
2       trainAnger003.avi     anger  validation   
3       trainAnger004.avi     anger       train   
4       trainAnger005.avi     anger       train   
..                    ...       ...         ...   
420  trainSurprise067.avi  surprise       train   
421  trainSurprise068.avi  surprise       train   
422  trainSurprise069.avi  surprise  validation   
423  trainSurprise070.avi  surprise  validation   
424  trainSurprise071.avi  surprise       train   

                                            video_path  
0    data\VideoFaceEmotion\train\Anger\trainAnger00...  
1    data\VideoFaceEmotion\train\Anger\trainAnger00...  
2    data\VideoFaceEmotion\train\Anger\trainAnger00...  
3    data\VideoFaceEmotion\train\Anger\trainAnger00...  
4    data\VideoFaceEmotion\train\Anger\trainAnger00...  
..                                           

## Create a validation set

In [179]:
df.loc[df[df["emotion"] == "anger"].sample(frac=0.2).index,"dataset"] = "validation"
df.loc[df[df["emotion"] == "happiness"].sample(frac=0.2).index,"dataset"] = "validation"
df.loc[df[df["emotion"] == "disgust"].sample(frac=0.2).index,"dataset"] = "validation"
df.loc[df[df["emotion"] == "surprise"].sample(frac=0.2).index,"dataset"] = "validation"
df.loc[df[df["emotion"] == "fear"].sample(frac=0.2).index,"dataset"] = "validation"
df.loc[df[df["emotion"] == "sadness"].sample(frac=0.2).index,"dataset"] = "validation"

In [180]:
df["dataset"].value_counts(normalize=True)

train         0.797647
validation    0.202353
Name: dataset, dtype: float64

In [181]:
df.to_csv(os.path.join("data", "train_validation.csv"),index=False)

In [114]:
df = pd.read_csv(os.path.join("data", "train_validation.csv"))

# Building the model
Code from https://colab.research.google.com/github/keras-team/keras-io/blob/master/examples/vision/ipynb/video_classification.ipynb#scrollTo=8PXw88Y1_s2x

In [10]:
import keras
from keras import layers
import tensorflow_hub as hub


def create_label_processor(classes):
    return layers.StringLookup(num_oov_indices=0, vocabulary=classes)



def create_transformer_model(classes):
    feature_extractor = hub.KerasLayer("https://tfhub.dev/shoaib6174/swin_small_patch244_window877_kinetics400_1k/1")
    feature_extractor.trainable = False

    video_input = keras.Input((3, MAX_SEQUENCE_LEN, IMG_SIZE, IMG_SIZE))
    mask_input = keras.Input((MAX_SEQUENCE_LEN,), dtype=bool)
    #x_video = preprocess_video_input(video_input)
    x = feature_extractor(video_input)
    x = layers.Conv3D(filters=16, kernel_size=3, activation="relu")(x)
    x = layers.MaxPool3D()(x)
    x = layers.Flatten()(x)
    x = layers.Dense(units=16, activation="relu")(x)
    x = layers.Dropout(0.4)(x)
    x = layers.Dense(8, activation="relu")(x)
    output = layers.Dense(len(classes), activation="softmax")(x)

    transformer_model = keras.Model([video_input], output, name="RNN_Emotion_Recognition")

    return transformer_model


In [11]:
import numpy as np
classes = np.unique(df["emotion"])
transformer_model = create_transformer_model(classes=classes)
transformer_model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
label_processor = create_label_processor(classes)


In [12]:
transformer_model.summary()

Model: "RNN_Emotion_Recognition"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 3, 32, 224, 224)  0         
                             ]                                   
                                                                 
 keras_layer_1 (KerasLayer)  (None, 768, 16, 7, 7)     53197014  
                                                                 
 conv3d_2 (Conv3D)           (None, 766, 14, 5, 16)    3040      
                                                                 
 max_pooling3d_1 (MaxPooling  (None, 383, 7, 2, 16)    0         
 3D)                                                             
                                                                 
 flatten (Flatten)           (None, 85792)             0         
                                                                 
 dense (Dense)               (None, 16)    

In [128]:
train_df = df[df["dataset"] == "train"].copy()
validation_df = df[df["dataset"] == "validation"].copy()

train_df.to_csv(os.path.join("data", "train.csv"),index=False)
validation_df.to_csv(os.path.join("data", "validation.csv"),index=False)

In [10]:
train_df = pd.read_csv(os.path.join("data", "train.csv"))
validation_df = pd.read_csv(os.path.join("data", "validation.csv"))

In [11]:
import tensorflow as tf
from tensorflow.data import Dataset


def dataset_entry_to_preprocessed_video(path, label):
  video, mask = preprocess_video(path.numpy().decode())
  return (video, mask, label)
  


t_df = Dataset.from_tensor_slices((train_df["video_path"].values, train_df["emotion"].values))
t_df = t_df.map(lambda path, label: tf.py_function(dataset_entry_to_preprocessed_video, inp=[path, label], Tout=[tf.float32, tf.float32, tf.string]))
t_df.save(os.path.join("data", "prepared_train_dataset"))


v_df = Dataset.from_tensor_slices((validation_df["video_path"].values, validation_df["emotion"].values)) 
v_df = v_df.map(lambda path, label: tf.py_function(dataset_entry_to_preprocessed_video, inp=[path, label], Tout=[tf.float32, tf.float32, tf.string]))
v_df.save(os.path.join("data", "prepared_validation_dataset"))



data\VideoFaceEmotion\train\Anger\trainAnger001.avi
data\VideoFaceEmotion\train\Anger\trainAnger004.avi
data\VideoFaceEmotion\train\Anger\trainAnger005.avi
data\VideoFaceEmotion\train\Anger\trainAnger007.avi
data\VideoFaceEmotion\train\Anger\trainAnger008.avi
data\VideoFaceEmotion\train\Anger\trainAnger009.avi
data\VideoFaceEmotion\train\Anger\trainAnger010.avi
data\VideoFaceEmotion\train\Anger\trainAnger012.avi
data\VideoFaceEmotion\train\Anger\trainAnger013.avi
data\VideoFaceEmotion\train\Anger\trainAnger014.avi
data\VideoFaceEmotion\train\Anger\trainAnger015.avi
data\VideoFaceEmotion\train\Anger\trainAnger016.avi
data\VideoFaceEmotion\train\Anger\trainAnger017.avi
data\VideoFaceEmotion\train\Anger\trainAnger018.avi
data\VideoFaceEmotion\train\Anger\trainAnger020.avi
data\VideoFaceEmotion\train\Anger\trainAnger023.avi
data\VideoFaceEmotion\train\Anger\trainAnger024.avi
data\VideoFaceEmotion\train\Anger\trainAnger025.avi
data\VideoFaceEmotion\train\Anger\trainAnger026.avi
data\VideoFa

tf.Tensor(
[[[[216. 216. 216. ... 216. 216. 216.]
   [216. 216. 216. ... 216. 216. 216.]
   [215. 217. 217. ... 216. 216. 216.]
   ...
   [205. 205. 206. ... 207. 207. 207.]
   [205. 205. 205. ... 205. 205. 205.]
   [205. 205. 205. ... 205. 205. 205.]]

  [[216. 216. 216. ... 216. 216. 216.]
   [216. 216. 216. ... 217. 217. 216.]
   [216. 219. 217. ... 217. 217. 217.]
   ...
   [204. 204. 206. ... 207. 207. 207.]
   [204. 204. 205. ... 205. 205. 205.]
   [204. 204. 204. ... 205. 205. 205.]]

  [[215. 215. 215. ... 215. 215. 215.]
   [215. 215. 215. ... 215. 215. 215.]
   [217. 217. 217. ... 215. 215. 217.]
   ...
   [204. 206. 206. ... 205. 205. 205.]
   [204. 205. 205. ... 205. 205. 205.]
   [204. 204. 204. ... 205. 205. 205.]]

  ...

  [[204. 204. 205. ... 205. 205. 204.]
   [204. 204. 205. ... 206. 204. 204.]
   [203. 205. 205. ... 207. 204. 205.]
   ...
   [196. 196. 196. ... 197. 197. 197.]
   [194. 194. 195. ... 197. 197. 197.]
   [194. 194. 195. ... 197. 197. 197.]]

  [[203. 2

KeyboardInterrupt: 