# Get data into dictionary

In [1]:
from sklearn import metrics

In [2]:
import scipy.io
import os
import numpy as np
path = 'groundTruthmat/'
dataset = []
files = os.listdir(path)
data_dict = {}
# devide four folds
# s1: P03 – P15
# s2: P16 – P28
# s3: P29 – P41
# s4: P42 – P54
for file in files:
    read = scipy.io.loadmat(path + file)
    data_dict[file] = read['labseqid']

# Helper functions

In [3]:
def one_hot():
# create frame_label for one hot encoding
    frame_label = {}
    for i in range(48):
        frame_label[i] = 48*[0]
        frame_label[i][i] = 1
    frame_label[-1] = 48*[0]
    return frame_label

frame_label = one_hot()

In [4]:
def get_input_frames(data_dict, trainProportion):
    new_dict = {}
    train_y = []
    for filename, frames in data_dict.items():
        frameLen = len(frames)
        
        inputLen = round(frameLen*trainProportion)  
        inputFrames = frames[:inputLen]
        new_dict[filename] = inputFrames
    return new_dict

In [5]:
def get_input_y(data_dict, input_dict):
    train_y = []
    for filename, frames in data_dict.items():
        frame_len = len(input_dict[filename])
#         print(input_dict[filename])
        y = [input_dict[filename][frame_len-1][0]]
        for frame in frames[frame_len:]:
#             print(frame[0], y[-1])s
            if frame[0] != y[-1]:
                y.append(frame[0])
        y.pop(0)

        train_y.append(y)
    return train_y

In [6]:
# convert seccessive frmaes of same action to single action
""" data_dict structure eample
    {'P25_stereo01_P25_sandwich.mat': array([[ 0],
        [ 0],
        [ 0],
        ...,
        [39],
        [39],
        [39]], dtype=int32),"""

def frames_to_action(input_dict):
    for filename, frames in input_dict.items():
        action_list = []
        for frame in frames:
            if len(action_list) == 0:
                action_list.append(frame[0])
            else:
                if frame[0] != action_list[-1]:
                    action_list.append(frame[0])
        input_dict[filename] = action_list   
    return input_dict

In [7]:
# add padding, each video has same length of action input
def add_padding(data_dict):
    maxLen = 0
    count = 0
    for frames in data_dict.values():
        if len(frames) > maxLen:
            maxLen = len(frames)
    for filename, frames in data_dict.items():
#         print(frames)
        data_dict[filename] = (maxLen - len(frames)) * [-1]  + frames
    return data_dict, maxLen
            

In [8]:
def add_padding_to_y(trainY):
    maxLen = 0
    count = 0
    new_trainY = []
    for frames in trainY:
        if len(frames) > maxLen:
            maxLen = len(frames)
    for frames in trainY:
        
        temp = (maxLen - len(frames)) * [-1] +frames
        new_trainY.append(temp)
    return new_trainY, maxLen

In [9]:
def feature_encoding(input_dict):
    for filename, frames in input_dict.items():
        # get corresponding one-hot encode
        new = []
        for each in frames:
            new.append(frame_label[each])
        input_dict[filename] = new
    return input_dict

In [18]:
def label_encoding(trainY):
    new_trainY = []
    for frames in trainY:
        new = []
        for each in frames:
            new.append(frame_label[each])
        new_trainY.append(new)
    return new_trainY

# LSTM

In [22]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM, RepeatVector
from keras.layers import Dropout, Masking, TimeDistributed, Activation
def lstm_model(frame_len, max_timesteps):
    model = Sequential()
    # Change to input layer
    model.add(Masking(mask_value = 48*[0], input_shape=(frame_len,48)))
    model.add(LSTM(512, activation='tanh', return_sequences=True)) 
    model.add(LSTM(256))
    model.add(RepeatVector(max_timesteps))
    model.add(LSTM(100, activation='relu', return_sequences=True))
    model.add(TimeDistributed(Dense(48, activation = "softmax")))
    model.compile(loss='mae', optimizer='adam', metrics=['accuracy'], sample_weight_mode="temporal")
    model.summary()
    return model


# 4-fold cross validation

In [23]:
import tensorflow as tf
from keras import backend as K
def train_model(trainX, trainY, model):
    model.fit(np.array(trainX), np.array(trainY), epochs = 20, batch_size = 128)
    

In [15]:
def evaluation(testX, testY, model, max_timesteps):
    predictions = model.predict(testX)
#     print(predictions)
    results = []
    count = 0
    accuracy_list = []
    for i in range(len(predictions)):
        each_video = []
        for j in range(len(predictions[i])):
            result = np.array([0] * 48)
            index = predictions[i][j].argmax(axis=-1)
            result[index] = 1
            each_video.append(result)
        results.append(each_video)
    
    
    # delete paddings
    for i in range(len(testY)):
        count = 0
        for j in  range(len(testY[i])):
            if (testY[i][j] == 48*[0]).all():
                count += 1
        testY[i] = testY[i][count:]
        results[i] = results[i][count:]

    
    # max number of actions in the output
    for i in range(max_timesteps):
        correct = 0
        valid = 0
        # loop each video
        for j in range(len(testY)):
            if len(testY[j]) > i:
                if not (results[j][i] == 48*[0]).all():
                    valid += 1

                    if (testY[j][i] == results[j][i]).all():
                        correct += 1
        if valid == 0:
            accuracy_list.append(0)
            print("timestpe", i+1, ":",  0)
        else:
            accuracy_list.append(correct/valid)
            print("timestpe", i+1, ":",  correct/valid)
    return (accuracy_list)

In [16]:
import copy
from operator import add
def cross_validation(input_dict, encoded_y, model, max_timesteps):
    file_count = 0
    s1_x, s2_x, s3_x, s4_x = [], [], [], []
    s1_y, s2_y, s3_y, s4_y = [], [], [], []
    # s1: P03 – P15
    # s2: P16 – P28
    # s3: P29 – P41
    # s4: P42 – P54
    count = 0
    for filename, frames in input_dict.items():
        if int(filename[1:3]) <= 15:
            s1_x.append(input_dict[filename])
            s1_y.append(encoded_y[file_count])
        elif 16 <= int(filename[1:3]) <= 28:
            s2_x.append(input_dict[filename])
            s2_y.append(encoded_y[file_count])
        elif 29 <= int(filename[1:3]) <= 41:
            s3_x.append(input_dict[filename])
            s3_y.append(encoded_y[file_count])
        elif 42 <= int(filename[1:3]) <= 54:
            s4_x.append(input_dict[filename])
            s4_y.append(encoded_y[file_count])
        file_count += 1
        
    splits_x = [s1_x, s2_x, s3_x, s4_x]
    splits_y = [s1_y, s2_y, s3_y, s4_y]
    final_acc = []
    for i in range(4):
        trainX = None
        trainY = None
        for j in range(4):
            if splits_x[j] != splits_x[i]:
                if trainX == None:
                    trainX = copy.deepcopy(splits_x[j])
                else:
                    trainX += copy.deepcopy(splits_x[j])

            if splits_y[j] != splits_y[i]:
                if trainY == None:
                    print(np.array(splits_y[j]).shape)
                    trainY = copy.deepcopy(splits_y[j])
                else:
                    trainY += copy.deepcopy(splits_y[j])
        testX = copy.deepcopy(splits_x[i])
        testY = copy.deepcopy(splits_y[i])
#         print(np.array(trainX).shape, np.array(trainY).shape,)
        train_model(trainX, trainY, model)
        if final_acc == []:
            final_acc =  evaluation(testX, testY, model, max_timesteps)
            
        else:
            final_acc = list( map(add, final_acc,  evaluation(testX, testY, model, max_timesteps)) )
#         print(final_acc)
    final_acc = [i/4 for i in final_acc]
    print(final_acc) 
    return (final_acc) 

In [24]:
from keras.backend import clear_session
# input_proportion = [0.1]
input_proportion = [0.1, 0.2, 0.3, 0.4, 0.5]
results = {}
for proportion in input_proportion:
    input_dict = get_input_frames(data_dict, proportion)
    train_y = get_input_y(data_dict, input_dict)
    input_dict = frames_to_action(input_dict)
    input_dict, frame_len = add_padding(input_dict)
    input_dict = feature_encoding(input_dict)
    
    train_y, max_timesteps = add_padding_to_y(train_y)
    encoded_y = label_encoding(train_y)
    encoded_y = np.array(encoded_y)
    model = lstm_model(frame_len, max_timesteps)
    result = cross_validation(input_dict, encoded_y, model, max_timesteps)
#     results[proportion] = result
#     clear_session() 

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
masking_2 (Masking)          (None, 6, 48)             0         
_________________________________________________________________
lstm_5 (LSTM)                (None, 6, 512)            1148928   
_________________________________________________________________
lstm_6 (LSTM)                (None, 256)               787456    
_________________________________________________________________
repeat_vector_2 (RepeatVecto (None, 23, 256)           0         
_________________________________________________________________
lstm_7 (LSTM)                (None, 23, 100)           142800    
_________________________________________________________________
time_distributed_2 (TimeDist (None, 23, 48)            4848      
Total params: 2,084,032
Trainable params: 2,084,032
Non-trainable params: 0
____________________________________________

KeyboardInterrupt: 

In [None]:
results