# 08 Classify videos by joke type.

Each video demonstrates a single joke type. We have this as meta-data. Can we train a classifier based on the movement data?

Each video shows one joke from a set of five possibilities [Peekaboo,TearingPaper,NomNomNom,ThatsNotAHat,ThatsNotACat].

We will use TensorFlow to train a classifier to predict the joke type from the movement data.


In [1]:
import os
import pandas as pd
import numpy as np

# Make numpy values easier to read.
np.set_printoptions(precision=4, suppress=True)

import tensorflow as tf
from tensorflow.keras import layers

# local imports
import utils
import display
import calcs




## 8.1 Load the data

### Use either small demo 
Consists of 54 videos. From 4 families (parent and baby) demoing five jokes three times each. (Some missing)

In [8]:
videos_in = os.path.join("..","LookitLaughter.test")
demo_data = os.path.join("..","data", "demo")
temp_out = os.path.join("..","data","0_temp")
data_out = os.path.join("..","data","1_interim")
videos_out = os.path.join("..","data","2_final")

### Or the Full set

Consists of 1425 videos. From 90 familes (parent and baby) demoing approximately five jokes three times each. Some repetitions and omissions.

In [4]:
videos_in = os.path.join("..","..","LookitLaughter.full.videos")
temp_out = os.path.join("..","..","LookitLaughter.full.data","0_temp")
data_out = os.path.join("..","..","LookitLaughter.full.data","1_interim")
videos_out = os.path.join("..","..","LookitLaughter.full.data","2_final")


In [9]:
processedvideos = utils.getprocessedvideos(data_out)
minFrames = processedvideos['Frames'].min()
maxFrames = processedvideos['Frames'].max()
print(f"We have {len(processedvideos)} processed videos.")
print(f"Min Frames: {minFrames}\nMax Frames: {maxFrames}")
processedvideos.head()

Found existing processedvideos.xlsx
We have 54 processed videos.
Min Frames: 47
Max Frames: 586


Unnamed: 0,VideoID,ChildID,JokeType,Joke.Label,JokeNum,JokeRep,JokeTake,HowFunny,LaughYesNo,Frames,...,Speech.file,Speech.when,Objects.file,Objects.when,Understand.file,Understand.when,Faces.normed,Keypoints.normed,annotatedVideo,annotated.when
0,2UWdXP.joke1.rep2.take1.Peekaboo.mp4,2UWdXP,Peekaboo,2,1,2,1,Slightly funny,No,217,...,../data/1_interim/2UWdXP.joke1.rep2.take1.Peek...,2023-09-20 16:58:38,,,,,../data/1_interim/2UWdXP.joke1.rep2.take1.Peek...,../data/1_interim/2UWdXP.joke1.rep2.take1.Peek...,../data/2_final/2UWdXP.joke1.rep2.take1.Peekab...,2024-02-16 11:03:50
1,2UWdXP.joke1.rep3.take1.Peekaboo.mp4,2UWdXP,Peekaboo,2,1,3,1,Slightly funny,No,152,...,../data/1_interim/2UWdXP.joke1.rep3.take1.Peek...,2023-09-20 16:58:39,,,,,../data/1_interim/2UWdXP.joke1.rep3.take1.Peek...,../data/1_interim/2UWdXP.joke1.rep3.take1.Peek...,../data/2_final/2UWdXP.joke1.rep3.take1.Peekab...,2024-02-16 11:03:51
2,2UWdXP.joke2.rep1.take1.NomNomNom.mp4,2UWdXP,NomNomNom,1,2,1,1,Funny,No,95,...,../data/1_interim/2UWdXP.joke2.rep1.take1.NomN...,2023-09-20 16:58:40,,,,,../data/1_interim/2UWdXP.joke2.rep1.take1.NomN...,../data/1_interim/2UWdXP.joke2.rep1.take1.NomN...,../data/2_final/2UWdXP.joke2.rep1.take1.NomNom...,2024-02-16 11:03:52
3,2UWdXP.joke2.rep2.take1.NomNomNom.mp4,2UWdXP,NomNomNom,1,2,2,1,Slightly funny,No,97,...,../data/1_interim/2UWdXP.joke2.rep2.take1.NomN...,2023-09-20 16:58:40,,,,,../data/1_interim/2UWdXP.joke2.rep2.take1.NomN...,../data/1_interim/2UWdXP.joke2.rep2.take1.NomN...,../data/2_final/2UWdXP.joke2.rep2.take1.NomNom...,2024-02-16 11:03:53
4,2UWdXP.joke2.rep3.take1.NomNomNom.mp4,2UWdXP,NomNomNom,1,2,3,1,Slightly funny,No,133,...,../data/1_interim/2UWdXP.joke2.rep3.take1.NomN...,2023-09-20 16:58:48,,,,,../data/1_interim/2UWdXP.joke2.rep3.take1.NomN...,../data/1_interim/2UWdXP.joke2.rep3.take1.NomN...,../data/2_final/2UWdXP.joke2.rep3.take1.NomNom...,2024-02-16 11:03:54


## 8.2 Load and preprocess the data

1. Load normed movement data. 
2. Pad all sequences to the same length. 
3. Interpolate missing values (up to last frame of real data).
4. Replace final missing values with zeros.
5. Add to tf.data.Dataset.

In [10]:
def createMovementDataset(processedVideos, maxFrames = None, ragged = False):
    """
    Creates a movement dataset from processed videos.

    Args:
        processedVideos (pandas.DataFrame): A DataFrame containing processed video data.
        maxFrames (int, optional): The maximum number of frames to include in the dataset. Defaults to max of all videos.
        ragged (bool, optional): Whether to create a ragged tensor. Defaults to False (TODO: not implemented yet)

    Returns:
        tf.data.Dataset: A TensorFlow Dataset containing features and labels.
    """
    if maxFrames is None:
        maxFrames = processedVideos["Frames"].max()
    if ragged:
        raise NotImplementedError("Ragged tensors not implemented yet.")    
    dataset = []
    labels = []
    # for each row of processedVideos, we add one timeseries to the dataset
    for index, r in processedvideos.iterrows():
        df = pd.read_csv(r['Keypoints.normed'])
        df = utils.padMovementData(df, maxFrames)
        df = utils.interpolateMovementData(df)
        df = df.replace(np.nan, 0)
        df = utils.flattenMovementDataset(df)
        
        features = tf.convert_to_tensor(df.values, dtype=tf.float32)
        label = r["Joke.Label"]
        dataset.append(features)
        labels.append(label)
    
    return tf.data.Dataset.from_tensor_slices((dataset, labels))

In [11]:
frames = round((maxFrames + minFrames) / 2)
tfdataset = createMovementDataset(processedvideos,frames)

train, test = tf.keras.utils.split_dataset(tfdataset, left_size=0.8)

InvalidArgumentError: {{function_node __wrapped__Pack_N_54_device_/job:localhost/replica:0/task:0/device:CPU:0}} Shapes of all inputs must match: values[0].shape = [317,115] != values[28].shape = [585,115] [Op:Pack] name: component_0

In [None]:
#get first element of dataset so we can grab its dimensions
keyPoints = next(iter(train))[0]

#let's build a simple model
model = tf.keras.Sequential([
    layers.Input(shape=(keyPoints.shape[0], keyPoints.shape[1])),
    layers.LSTM(8),
    layers.Dense(1)
])

# Compile the model
model.compile(loss=tf.losses.MeanSquaredError(),
              sample_weight_mode='temporal',
              optimizer=tf.optimizers.Adam(),
              metrics=[tf.metrics.MeanAbsoluteError()])

# Train the model
model.fit(train.batch(32), epochs=10)


In [None]:
#let's evaluate the model
model.evaluate(test.batch(32))

#table of predictions
predictions = model.predict(test.batch(32))