In [1]:
import numpy as np
from tensorflow.keras import Model as Model_
from tensorflow.keras.layers import Input, PReLU, LeakyReLU, MaxPooling2D, Dropout, concatenate, UpSampling2D, ReLU, Conv2D, Flatten, Reshape, Conv1D, LSTM
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import backend
from tensorflow.keras.models import Sequential
import tensorflow as tf
from tensorflow.keras.layers import Conv2DTranspose, Dense, BatchNormalization
from tensorflow.keras.datasets import mnist, cifar10
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
import pandas as pd


# torch.manual_seed(0) # Set for testing purposes, please do not change!

print(tf.keras.__version__)

2.8.0


In [2]:
!pip install tensorflow_model_remediation

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting tensorflow_model_remediation
  Downloading tensorflow_model_remediation-0.1.7.1-py3-none-any.whl (142 kB)
[K     |████████████████████████████████| 142 kB 27.7 MB/s 
Collecting mock
  Downloading mock-4.0.3-py3-none-any.whl (28 kB)
Installing collected packages: mock, tensorflow-model-remediation
Successfully installed mock-4.0.3 tensorflow-model-remediation-0.1.7.1


In [3]:
import tensorflow_model_remediation

In [4]:
#Need only to be used with google colab
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
import os
import re

class Dataset_Preprocessing:
    def __init__(self, dir_path, include_dimension = 2, sample_size = 50, total_classes = 17):
        
        #Dataset Directory path
        self.dir_path = dir_path
        
        #Which Dimension file to include, possible values: 2 and 3
        self.include_dimension = include_dimension
        
        #Total frames in one Sample
        self.sample_size = sample_size
        
        #Activity classes to include
        self.classes = ['SittingDown', 'Walking', 'Directions', 'Discussion', 'Sitting', 'Phoning', 'Eating', 'Posing', 'Greeting', 'Smoking']
        
        #Total activity classes
        self.total_classes = len(self.classes)
        
        #Subject Folders names in the Dataset
        self.internal_folders = ['S1', 'S5','S6','S7','S8','S9','S11']
    
    def read_dataset(self):
        try:
            #Contains all the different activity vectors
            activity_vector = {}
            
            #Contains the overall dataset
            sampled_data = None
            
            #Based on dimensions, which folder to use for extracting the dataset files
            data_folder = 'Poses_D2_Positions' if self.include_dimension == 2 else 'Poses_D3_Positions'
            
            #Checking if the dataset path is valid
            if not os.path.exists(self.dir_path):
                print('The Data Directory Does not Exist!')
                return None

            #Iterating over all the subject folders
            for fld in self.internal_folders:
                #Iterating for each file in the specified folder
                for file in os.listdir(os.path.join(self.dir_path, fld, data_folder)):
                    #Extracting the activity from the filename
                    activity = self.__extract_activity(file)
                    
                    if activity not in self.classes:
                        continue
                    
                    #Reading the CSV file using Pandas
                    data = pd.read_csv(os.path.join(self.dir_path, fld, data_folder, file), header=None)

                    #Formulating the activity vector using one hot encoding
                    if activity not in activity_vector:
                        total_keys = len(activity_vector.keys())
                        activity_vector[activity] = np.zeros(self.total_classes)
                        activity_vector[activity][total_keys] = 1
                    vector = activity_vector[activity]
                    
                    #Sampling the dataset
                    grouped_sample = self.__group_samples(data, self.sample_size, vector)
                    sampled_data = grouped_sample if sampled_data is None else np.append(sampled_data, grouped_sample, axis=0)
                    
            return sampled_data
        except Exception as e:
            print(e)
    
    def __extract_activity(self, filename):
        try:
            #Extracting the filename and excluding the extension
            name = os.path.splitext(filename)[0]
            
            #Substituting the empty string with characters other than english alphabets
            activity = re.sub('[^A-Za-z]+' , '' , name)
            return activity
        except Exception as e:
            print(e)
    
    def __group_samples(self, dataset, sample_size, activity):
        try:
            #Checking if the dataset is a Pandas Dataframe
            if not isinstance(dataset, pd.DataFrame):
                print('Expecting Pandas Dataframe, but got {}'.format(type(dataset)))
                return None
            
            #Appending activity class to each row in the dataset
            dataset = pd.concat([dataset, pd.DataFrame(np.tile(activity, (dataset.shape[0],1)))], axis=1)
            
            #Reshaping the dataset into sample batches
            total_samples = dataset.shape[0]//sample_size
            total_features = dataset.shape[1]
            grouped_rows = dataset.to_numpy()[:total_samples*self.sample_size].reshape((-1,self.sample_size, total_features))
            
            return grouped_rows
        except Exception as e:
            print(e)

In [6]:
#For short term prediction, we need a sample size of 20(10 frames input sequance, 10 frames predicted sequance)
sampled_data = Dataset_Preprocessing('/content/drive/MyDrive/Colab Notebooks/H3.6csv', sample_size=20).read_dataset()

In [7]:
sampled_data.shape

(77144, 20, 74)

In [8]:
def split_to_features_labels(dataset, input_sequance_size=10) :
    assert input_sequance_size < dataset.shape[1], f"input sequance should be smaller than the total sample size"
    features = dataset[:, np.s_[0:input_sequance_size], :]
    labels = dataset[:,np.s_[input_sequance_size:], :64]
    
    return features, labels

In [9]:
sampled_dataX, sampled_dataY = split_to_features_labels(sampled_data, input_sequance_size=10)

In [10]:
print('Total Samples: {}'.format(sampled_dataY.shape[0]))
print('Total Frames: {}'.format(sampled_dataY.shape[1]))
print('Total Features: {}'.format(sampled_dataY.shape[2]))

Total Samples: 77144
Total Frames: 10
Total Features: 64


In [11]:
from keras.models import Model
from keras.layers import Input, LSTM, Dense, Bidirectional

# Define an input sequence and process it.
encoder_inputs = Input(shape=(None, 74))
encoder = LSTM(200, return_state=True, return_sequences=True)
encoder_outputs, state_h, state_c = encoder(encoder_inputs)
# We discard `encoder_outputs` and only keep the states.
encoder_states = [state_h, state_c]

# Set up the decoder, using `encoder_states` as initial state.
decoder_inputs = Input(shape=(None, 200))
# We set up our decoder to return full output sequences,
# and to return internal states as well. We don't use the 
# return states in the training model, but we will use them in inference.
decoder_lstm = LSTM(200, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_inputs,
                                     initial_state=encoder_states)
decoder_dense = Dense(64, activation='relu')
decoder_outputs = decoder_dense(decoder_outputs)

# Define the model that will turn
# `encoder_input_data` & `decoder_input_data` into `decoder_target_data`
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

In [12]:
def loss_joint(predicted_sequance_batch, target_sequance_batch) :
    diff_norm_2 = tf.math.reduce_sum(tf.square(tf.subtract(predicted_sequance_batch, target_sequance_batch)), axis=2)
    return tf.reduce_sum(diff_norm_2, axis=1) 

def loss_motion_flow(predicted_sequance_batch, target_sequance_batch) :
    predictions_tomporal_diffs = tf.experimental.numpy.diff(predicted_sequance_batch, axis=1)
    real_tomporal_diffs = tf.experimental.numpy.diff(target_sequance_batch, axis=1)
    prediction_motion_flow_diff_norm_2 = tf.reduce_sum(tf.square(tf.subtract(predictions_tomporal_diffs, real_tomporal_diffs)), axis=2)
    return tf.reduce_sum(prediction_motion_flow_diff_norm_2, axis=1)


def total_loss(target_sequance_batch, predicted_sequance_batch) :
    joints_loss = loss_joint(predicted_sequance_batch, target_sequance_batch)
    motion_flow_loss = loss_motion_flow(predicted_sequance_batch, target_sequance_batch)
    return 0.5*joints_loss + 0.5*motion_flow_loss

In [25]:
#Whatever loss that accepts true values and predictions can be used
model.compile(optimizer='adam', loss=total_loss, metrics=[tensorflow_model_remediation.min_diff.losses.MMDLoss(kernel="laplacian")])
history = model.fit([sampled_dataX, encoder(sampled_dataX)[0]], sampled_dataY,
          batch_size=100,
          epochs=10)

InternalError: ignored

In [21]:
model.predict([sampled_dataX, encoder(sampled_dataX)[0]]).shape

(77144, 10, 64)

In [23]:
sampled_dataY.shape

(77144, 10, 64)

In [17]:
history.history

{'accuracy': [0.024643927812576294,
  0.0598541684448719,
  0.0598541684448719,
  0.0598541684448719,
  0.0598541684448719,
  0.0598541684448719,
  0.0598541684448719,
  0.061210401356220245,
  0.08420481532812119,
  0.0946350172162056],
 'loss': [61417832.0,
  37850928.0,
  23338742.0,
  14991470.0,
  10690170.0,
  8801425.0,
  8113604.0,
  7633512.5,
  6673252.0,
  5815005.0],
 'val_accuracy': [0.02505023032426834,
  0.02505023032426834,
  0.02505023032426834,
  0.02505023032426834,
  0.02505023032426834,
  0.02505023032426834,
  0.02505023032426834,
  0.026573335751891136,
  0.04958195611834526,
  0.0511309877038002],
 'val_loss': [47519920.0,
  29205120.0,
  18277972.0,
  12349246.0,
  9546026.0,
  8474323.0,
  7995638.0,
  7606455.5,
  6257934.0,
  5380754.0]}