In [1]:
import numpy as np
from tensorflow.keras import Model as Model_
from tensorflow.keras.layers import Input, PReLU, LeakyReLU, MaxPooling2D, Dropout, concatenate, UpSampling2D, ReLU, Conv2D, Flatten, Reshape, Conv1D, LSTM
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import backend
from tensorflow.keras.models import Sequential
import tensorflow as tf
from tensorflow.keras.layers import Conv2DTranspose, Dense, BatchNormalization
from tensorflow.keras.datasets import mnist, cifar10
from tqdm.auto import tqdm

import matplotlib.pyplot as plt
import pandas as pd


# torch.manual_seed(0) # Set for testing purposes, please do not change!

print(tf.__version__)

2.1.0


In [20]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Loading and preprocessing data 

In [2]:
#Function to downsample the dataset to half of its size
def downsample_dataset(dataset) :
    return np.delete(dataset, np.s_[::2], axis=0)

In [3]:
import os
import re

class Dataset_Preprocessing:
    def __init__(self, dir_path, include_dimension = 2, sample_size = 50, total_classes = 17):
        
        #Dataset Directory path
        self.dir_path = dir_path
        
        #Which Dimension file to include, possible values: 2 and 3
        self.include_dimension = include_dimension
        
        #Total frames in one Sample
        self.sample_size = sample_size
        
        #Activity classes to include
        self.classes = ['SittingDown', 'Walking', 'Directions', 'Discussion', 'Sitting', 'Phoning', 'Eating', 'Posing', 'Greeting', 'Smoking']
        
        #Total activity classes
        self.total_classes = len(self.classes)
        
        #Subject Folders names in the Dataset
        self.internal_folders = ['S1', 'S5','S6','S7','S8','S9','S11']
    
    def read_dataset(self):
        try:
            #Contains all the different activity vectors
            activity_vector = {}
            
            #Contains the overall dataset
            sampled_data = None
            
            #Based on dimensions, which folder to use for extracting the dataset files
            data_folder = 'Poses_D2_Positions' if self.include_dimension == 2 else 'Poses_D3_Positions'
            
            #Checking if the dataset path is valid
            if not os.path.exists(self.dir_path):
                print('The Data Directory Does not Exist!')
                return None

            #Iterating over all the subject folders
            for fld in self.internal_folders:
                #Iterating for each file in the specified folder
                for file in os.listdir(os.path.join(self.dir_path, fld, data_folder)):
                    #Extracting the activity from the filename
                    activity = self.__extract_activity(file)
                    
                    if activity not in self.classes:
                        continue
                    
                    #Reading the CSV file using Pandas
                    data = pd.read_csv(os.path.join(self.dir_path, fld, data_folder, file), header=None)

                    #Formulating the activity vector using one hot encoding
                    if activity not in activity_vector:
                        total_keys = len(activity_vector.keys())
                        activity_vector[activity] = np.zeros(self.total_classes)
                        activity_vector[activity][total_keys] = 1
                    vector = activity_vector[activity]
                    
                    #Sampling the dataset
                    grouped_sample = self.__group_samples(data, self.sample_size, vector)
                    sampled_data = grouped_sample if sampled_data is None else np.append(sampled_data, grouped_sample, axis=0)
                    
            return sampled_data
        except Exception as e:
            print(e)
    
    def __extract_activity(self, filename):
        try:
            #Extracting the filename and excluding the extension
            name = os.path.splitext(filename)[0]
            
            #Substituting the empty string with characters other than english alphabets
            activity = re.sub('[^A-Za-z]+' , '' , name)
            return activity
        except Exception as e:
            print(e)
    
    def __group_samples(self, dataset, sample_size, activity):
        try:
            #Checking if the dataset is a Pandas Dataframe
            if not isinstance(dataset, pd.DataFrame):
                print('Expecting Pandas Dataframe, but got {}'.format(type(dataset)))
                return None
            
            #Appending activity class to each row in the dataset
            dataset = pd.concat([dataset, pd.DataFrame(np.tile(activity, (dataset.shape[0],1)))], axis=1)
            
            #Reshaping the dataset into sample batches
            total_samples = dataset.shape[0]//sample_size
            total_features = dataset.shape[1]
            grouped_rows = dataset.to_numpy()[:total_samples*self.sample_size].reshape((-1,self.sample_size, total_features))
            
            return grouped_rows
        except Exception as e:
            print(e)

In [10]:
#For short term prediction, we need a sample size of 20(10 frames input sequance, 10 frames predicted sequance)
sampled_data = Dataset_Preprocessing('/content/drive/MyDrive/Colab Notebooks/H3.6csv', sample_size=20).read_dataset()

In [11]:
sampled_data.shape

(77144, 20, 74)

In [12]:
print('Total Samples: {}'.format(sampled_data.shape[0]))
print('Total Frames: {}'.format(sampled_data.shape[1]))
print('Total Features: {}'.format(sampled_data.shape[2]))

Total Samples: 77144
Total Frames: 20
Total Features: 74


In [13]:
#Split the data into training, validation and test 
def split_data(data_array, validation_size=0.1, test_size=0.1) :
    assert validation_size+test_size < 0.5 , f"total size of validation and testing set should not exceed half of the dataset"
    assert validation_size > 0, f"validation size should be greater than zero"
    assert test_size > 0, f"test size should be greater than zero"
    
    validation_step = int(1/validation_size)
    test_step = int(1/test_size)
    
    
    mask = np.ones(data_array.shape, dtype=bool)
    validation_data = data_array[np.s_[::validation_step], :, :]
    mask[np.s_[1::validation_step],:,:] = False
    test_data = data_array[np.s_[1::test_step], :, :]
    mask[np.s_[::test_step],:,:] = False
    training_data = data_array[mask]
    training_data_size = data_array.shape[0] - (validation_data.shape[0]+test_data.shape[0])
    training_data = training_data.reshape((training_data_size, data_array.shape[1], data_array.shape[2]))
    return training_data, validation_data, test_data

In [14]:
def split_to_features_labels(dataset, input_sequance_size=10) :
    assert input_sequance_size < dataset.shape[1], f"input sequance should be smaller than the total sample size"
    features = dataset[:, np.s_[0:input_sequance_size], :]
    labels = dataset[:,np.s_[input_sequance_size:], :]
    
    return features, labels

In [15]:
#Default parameters make up for 80%-10%-10% split
training_data, validation_data, test_data = split_data(sampled_data)

In [16]:
print("Training data shape is ", training_data.shape)
print("Validation data shape is ", validation_data.shape)
print("Test data shape is ", test_data.shape)

Training data shape is  (61714, 20, 74)
Validation data shape is  (7715, 20, 74)
Test data shape is  (7715, 20, 74)


In [17]:
training_dataX, training_dataY = split_to_features_labels(training_data, input_sequance_size=10)

In [18]:
training_dataX.shape

(61714, 10, 74)

In [19]:
training_dataY.shape

(61714, 10, 74)

In [21]:
validation_dataX, validation_dataY = split_to_features_labels(validation_data, input_sequance_size=10)
test_dataX, test_dataY = split_to_features_labels(test_data, input_sequance_size=10)

# Start of model implementation

## Short term model implementation (Glogen only)

In [22]:
class GloGen_Bidirectional_RNN_encoder(Model_):
    def __init__(self, num_recurrent_neurons=200):
        super(GloGen_Bidirectional_RNN_encoder, self).__init__()

        #Return Sequances=True to assure return of output corresponding to each timestep
        self.recurrent_layer = LSTM(num_recurrent_neurons, return_sequences=True)

    def call(self, input_x):
        output = self.recurrent_layer(input_x)
        return output

In [23]:
class GloGen_Bidirectional_RNN_decoder(Model_):
  def __init__(self, num_recurrent_neurons=64):
    super(GloGen_Bidirectional_RNN_decoder, self).__init__()
    #Return Sequances=True to assure return of output corresponding to each timestep
    self.recurrent_layer = LSTM(num_recurrent_neurons, return_sequences=True)

  def call(self, encoder_output):
    glogen_output = self.recurrent_layer(encoder_output)
    #output = tf.one_hot(tf.argmax(output, axis = 1), depth=3)
    return glogen_output

In [24]:
class GloGen(Model_):
    def __init__(self, embedding_diminisions=200, decoder_output_diminsions=64):
        super(GloGen, self).__init__()
        self.encoder = GloGen_Bidirectional_RNN_encoder(embedding_diminisions)
        self.decoder = GloGen_Bidirectional_RNN_decoder(decoder_output_diminsions)

    def call(self, input_sequance):
        encoder_states = self.encoder(input_sequance)
        glogen_output = self.decoder(encoder_states)
        return glogen_output

In [25]:
glogen_bidirectional_RNN_encoder = GloGen_Bidirectional_RNN_encoder()

In [26]:
#simulating on validation data as training data is too larget to run on one go
output_encoder = glogen_bidirectional_RNN_encoder(validation_dataX)

In [27]:
#shape of 10x200 predictions for each sample inside the batch
output_encoder.shape

TensorShape([7715, 10, 200])

In [28]:
#Simulating the output shape for the decoder
glogen_decoder = GloGen_Bidirectional_RNN_decoder()

In [29]:
glogen_sparse_output_predictions = glogen_decoder(output_encoder)

In [30]:
#Prediction for 32 joints position for the next 10 timesteps for each sample inside the batch
glogen_sparse_output_predictions.shape

TensorShape([7715, 10, 64])

In [31]:
glogen_model= GloGen()

In [32]:
glogen_sparse_output_predictions = glogen_model(validation_dataX)

In [33]:
glogen_sparse_output_predictions.shape

TensorShape([7715, 10, 64])

In [66]:
class Optimizer:
    def __init__(self, model, mb = 8, lr = 0.002, opt=tf.keras.optimizers.Adam, lambda_1=0.5, lambda_2=0.5):
        self.model     = model
        self.optimizer = opt(learning_rate = lr)
        self.mb        = mb

        self.lambda_1 = lambda_1
        self.lambda_2 = lambda_2

        self.train_loss     = tf.keras.metrics.Mean(name='train_loss')
        self.test_loss     = tf.keras.metrics.Mean(name='test_loss')
  
    @tf.function
    def train_step(self, x , y):
        with tf.GradientTape() as tape:
            predictions = self.model(x)
            #TODO Get the features in a proper way
            loss = self.total_loss(y[:,:,:64], predictions)

        #print(self.model.trainable_variables)
        gradients = tape.gradient(loss, self.model.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, self.model.trainable_variables))
        self.train_loss(loss)
        return loss

    @tf.function
    def test_step(self, x , y):
        predictions = self.model(x)
        #TODO Get the features in a proper way
        loss = self.total_loss(y[:,:,:64], predictions)
        self.test_loss(loss)
        
    def train(self):
        for mbX, mbY in self.train_ds:
            self.train_step(mbX, mbY)

    def test(self):
        for mbX, mbY in self.test_ds:
            self.test_step(mbX, mbY)    

    def run(self, dataX, dataY, testX, testY, epochs, verbose=2):
        historyTR = []
        historyTS = []
        template = '{} {}, {}: {}, {}: {}'
        self.train_ds = tf.data.Dataset.from_tensor_slices((dataX, dataY)).shuffle(16000).batch(self.mb)
        self.test_ds  = tf.data.Dataset.from_tensor_slices((testX,testY)).batch(self.mb)
        for i in range(epochs):

            self.train ()
            #   print(lossTR)
            self.test  ()
            if verbose > 0:
                print(template.format("epoch: ", i+1,
                              " TRAIN LOSS: ", self.train_loss.result(),
                              " TEST LOSS: " , self.test_loss.result()))
                               #" TRAIN ACC: " , self.train_accuracy.result()*100,
                               #" TEST ACC: "  , self.test_accuracy.result()*100) )

            temp = '{}'
            historyTR.append(float(temp.format(self.train_loss.result())))
            historyTS.append(float(temp.format(self.test_loss.result() )))

            self.train_loss.reset_states()
            self.test_loss.reset_states()
        return historyTR, historyTS

    #Defining the loss function utilities
    def loss_joint(self, predicted_sequance_batch, target_sequance_batch) :
        diff_norm_2 = tf.math.reduce_sum(tf.square(tf.subtract(predicted_sequance_batch, target_sequance_batch)), axis=2)
        return tf.reduce_sum(diff_norm_2, axis=1) 

    def loss_motion_flow(self, predicted_sequance_batch, target_sequance_batch) :
        predictions_tomporal_diffs = tf.experimental.numpy.diff(predicted_sequance_batch, axis=1)
        real_tomporal_diffs = tf.experimental.numpy.diff(target_sequance_batch, axis=1)
        prediction_motion_flow_diff_norm_2 = tf.reduce_sum(tf.square(tf.subtract(predictions_tomporal_diffs, real_tomporal_diffs)), axis=2)
        return tf.reduce_sum(prediction_motion_flow_diff_norm_2, axis=1)
    
    
    def total_loss(self, predicted_sequance_batch, target_sequance_batch) :
        joints_loss = self.loss_joint(predicted_sequance_batch, target_sequance_batch)
        motion_flow_loss = self.loss_motion_flow(predicted_sequance_batch, target_sequance_batch)
        return self.lambda_1*joints_loss + self.lambda_2*motion_flow_loss

In [38]:
training_dataX = np.float32(training_dataX)
training_dataY = np.float32(training_dataY)
validation_dataX = np.float32(validation_dataX)
validation_dataY = np.float32(validation_dataY)
test_dataX = np.float32(test_dataX)
test_dataY = np.float32(test_dataY)


In [67]:
#Running experiment for short term prediction training 
glogen_model = GloGen()
optimizer = Optimizer(glogen_model, mb=100, lr=0.002)

In [63]:
#Running with low learning rate
training_loss_history, validation_loss_history = optimizer.run(training_dataX, training_dataY, validation_dataX, validation_dataY, 10)

epoch:  1,  TRAIN LOSS: : 77421632.0,  TEST LOSS: : 77381944.0
epoch:  2,  TRAIN LOSS: : 77375424.0,  TEST LOSS: : 77351376.0
epoch:  3,  TRAIN LOSS: : 77353496.0,  TEST LOSS: : 77336920.0
epoch:  4,  TRAIN LOSS: : 77342992.0,  TEST LOSS: : 77329032.0
epoch:  5,  TRAIN LOSS: : 77336920.0,  TEST LOSS: : 77324376.0
epoch:  6,  TRAIN LOSS: : 77333144.0,  TEST LOSS: : 77321240.0
epoch:  7,  TRAIN LOSS: : 77330424.0,  TEST LOSS: : 77318952.0
epoch:  8,  TRAIN LOSS: : 77328376.0,  TEST LOSS: : 77317200.0
epoch:  9,  TRAIN LOSS: : 77326888.0,  TEST LOSS: : 77315856.0
epoch:  10,  TRAIN LOSS: : 77325648.0,  TEST LOSS: : 77314696.0


## Long term prediction model (Adding interpolation and Locgen) 

In [None]:
#returns a list of the interpolated frames between x0 and x1
def interpolateFrames(glogen_sparse_output_predictions, num_of_frames=5) :
  batch_size = glogen_sparse_output_predictions.shape[0]
  timesteps = glogen_sparse_output_predictions.shape[1]
  features = glogen_sparse_output_predictions.shape[2]
  interpolated_frames = np.zeros((batch_size, timesteps, num_of_frames, features))
  for batch in range(glogen_sparse_output_predictions.shape[0]) :
    for t in range(glogen_sparse_output_predictions.shape[1]-1) :
      for j in range(num_of_frames) :
        X_i0 = glogen_sparse_output_predictions[batch, t]
        X_i1 = glogen_sparse_output_predictions[batch, t+1]
        
        alpha_j = j/num_of_frames
        current_frame = alpha_j*X_i0 + (1-alpha_j)*X_i1
        interpolated_frames[batch, t, j] = current_frame

  return interpolated_frames 

In [None]:
interpolated_frames = interpolateFrames(glogen_sparse_output_predictions)

In [None]:
#Shape of interpolated frames should be 10(batch size) x 10(timesteps)
#x 5(num of interpolated frames between these timesteps) x 64(num_features)
interpolated_frames.shape

(10, 10, 5, 64)

In [None]:
#timesteps and interpolated frames diminsion can be flattened to 10(batch size) x 50(dense frames) x 64(features)
interpolated_frames = np.reshape(interpolated_frames, (10, 50, 64))

In [None]:
#Simulating the locgen phase with batches as well
locgen_encoder = LocGen_Bidirectional_RNN_encoder()
locgen_encoder_output = locgen_encoder(interpolated_frames)
print("loc gen encoder output is ", locgen_encoder_output.shape)

locgen_decoder = LocGen_Bidirectional_RNN_decoder()
final_predictions = locgen_decoder(locgen_encoder_output)

print("final predictions shape is ", final_predictions.shape)



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

loc gen encoder output is  (10, 50, 200)
final predictions shape is  (10, 50, 64)


In [None]:
class LocGen_Bidirectional_RNN_encoder(Model_) :
  def __init__(self, num_recurrent_neurons=200) :
    super(LocGen_Bidirectional_RNN_encoder, self).__init__()
    #Return Sequances=True to assure return of output corresponding to each timestep
    self.recurrent_layer = LSTM(num_recurrent_neurons, return_sequences=True)


  def call(self, interpolated_output) :
    predictions = self.recurrent_layer(interpolated_output)
    return predictions

In [None]:
class LocGen_Bidirectional_RNN_decoder(Model_) :
  def __init__(self, num_recurrent_neurons=64) :
    super(LocGen_Bidirectional_RNN_decoder, self).__init__()
    #Return Sequances=True to assure return of output corresponding to each timestep
    self.recurrent_layer = LSTM(num_recurrent_neurons, return_sequences=True)

  def call(self, encoder_output) :
    final_predictions = self.recurrent_layer(encoder_output)
    return final_predictions