# 08 Classify videos by joke type.

Each video demonstrates a single joke type. We have this as meta-data. Can we train a classifier based on the movement data?

Each video shows one joke from a set of five possibilities [Peekaboo,TearingPaper,NomNomNom,ThatsNotAHat,ThatsNotACat].

We will use TensorFlow to train a classifier to predict the joke type from the movement data.


In [None]:
import os
import pandas as pd
import numpy as np

# Make numpy values easier to read.
np.set_printoptions(precision=3, suppress=True)

import tensorflow as tf
from tensorflow.keras import layers

import utils
import display
import calcs

### Either small demo set

In [None]:
videos_in = os.path.join("..","LookitLaughter.test")
demo_data = os.path.join("..","data", "demo")
temp_out = os.path.join("..","data","0_temp")
data_out = os.path.join("..","data","1_interim")
videos_out = os.path.join("..","data","2_final")



In [None]:
processedvideos = utils.getprocessedvideos(data_out)
minFrames = processedvideos['Frames'].min()
maxFrames = processedvideos['Frames'].max()
print(f"We have {len(processedvideos)} processed videos.")
print(f"Min Frames: {minFrames}\nMax Frames: {maxFrames}")
processedvideos.head()

In [None]:
def appendDictToDf(df, dict_to_append):
    df = pd.concat([df, pd.DataFrame.from_records(dict_to_append)],ignore_index=True)
    return df

def padMovementData(keyPoints, maxFrames = None):
    """
    We pad the keyPoints array so that for each person [0,1]:
    1. There is a row entry for each frame in the video 
    2. if the video is less than the maxFrames, we pad out to maxFrames
    Nan values are used to pad the array.
    """
    if maxFrames is None:
        maxFrames = keyPoints.shape[1]
    
    # a list of frame numbers
    frameNumbers = pd.Index(np.arange(0,maxFrames + 1), name="frame")
    
    paddedKeyPoints = keyPoints.iloc[:0].copy()
    
    #There are two people indexed 0 and 1. 
    #We need to pad both arrays
    for idx in range(2):
        thisperson = keyPoints[keyPoints["index"]==idx]
        missing_frames = frameNumbers.difference(thisperson["frame"])
        
        add_df = pd.DataFrame(index=missing_frames, columns=thisperson.columns).fillna(np.nan)
        add_df["frame"] = missing_frames
        add_df["index"] = idx
        add_df["person"] = idx
        thisperson = pd.concat([thisperson, add_df])
        # add the paddedKeyPoints to the dataframe
        paddedKeyPoints = appendDictToDf(paddedKeyPoints,thisperson)
        
    return paddedKeyPoints.sort_values(by=["frame","index"])
    

In [None]:
def interpolateMovementData(keyPoints):
    """
    We interpolate the keyPoints array so that for each person [0,1]:
    1. There is a row entry for each frame in the video 
    2. if the video is less than the maxFrames, we pad out to maxFrames
    Nan values are used to pad the array.
    """
    # a list of frame numbers
    frameNumbers = pd.Index(np.arange(0,maxFrames + 1), name="frame")
    
    interpolatedKeyPoints = keyPoints.iloc[:0].copy()
    
    #There are two people indexed 0 and 1. 
    #We need to interpolate both arrays
    for idx in range(2):
        thisperson = keyPoints[keyPoints["index"]==idx]
        thisperson = thisperson.set_index("frame")
        thisperson = thisperson.reindex(frameNumbers)
        thisperson = thisperson.interpolate(method='linear',axis=0,limit_direction='backward')
        thisperson["frame"] = thisperson.index
        thisperson["index"] = idx
        thisperson["person"] = idx
        # add the paddedKeyPoints to the dataframe
        interpolatedKeyPoints = appendDictToDf(interpolatedKeyPoints,thisperson)
        
    return interpolatedKeyPoints.sort_values(by=["frame","index"])

def flattenMovementDataset(kps):
    """
    We flatten the movement dataset so that each row is a frame for both people.
    """
    #There are two people indexed 0 and 1. 
    #We w
    flattenedKps = kps.pivot(index='frame', columns='index')
    flattenedKps.columns = ["_".join((str(j),i)) for i,j in flattenedKps.columns]
    flattenedKps = flattenedKps.reset_index()
    return flattenedKps

In [None]:
kps = utils.readKeyPointsFromCSV(processedvideos,VIDEO_FILE,True)
 
paddedKps = padMovementData(kps, 400)
paddedKps.head()

In [None]:
interpolatedKps = interpolateMovementData(paddedKps)
interpolatedKps.head()

In [None]:
def createMovementDataset(processedVideos):
    dataset = []
    labels = []
    # for each row of processedVideos, we add one timeseries to the dataset
    for index, r in processedvideos.iterrows():
        df = pd.read_csv(r['Keypoints.normed'])
        df = padMovementData(df, maxFrames)
        df = interpolateMovementData(df)
        df = df.replace(np.nan, 0)
        df = flattenMovementDataset(df)
        
        features = tf.convert_to_tensor(df.values, dtype=tf.float32)
        label = r["Joke.Label"]
        dataset.append(features)
        labels.append(label)
    
    
    return tf.data.Dataset.from_tensor_slices((dataset, labels))

In [None]:
flattenedKps = flattenMovementDataset(interpolatedKps)
flattenedKps.head()

In [None]:
tfdataset = createMovementDataset(processedvideos)

train, test = tf.keras.utils.split_dataset(tfdataset, left_size=0.8)

In [None]:
#get first element of dataset so we can grab its dimensions
keyPoints = next(iter(train))[0]

#let's build a simple model
model = tf.keras.Sequential([
    layers.Input(shape=(keyPoints.shape[0], keyPoints.shape[1])),
    layers.LSTM(8),
    layers.Dense(1)
])

# Compile the model
model.compile(loss=tf.losses.MeanSquaredError(),
              sample_weight_mode='temporal',
              optimizer=tf.optimizers.Adam(),
              metrics=[tf.metrics.MeanAbsoluteError()])

# Train the model
model.fit(train.batch(32), epochs=10)


In [None]:
#let's evaluate the model
model.evaluate(test.batch(32))