In [1]:
import numpy as np
import pandas as pd
##_____________________________

def get_ds_infos():
    ## 0:Code, 1:Weight, 2:Height, 3:Age, 4:Gender
    dss = np.genfromtxt("data_subjects_info.csv",delimiter=',')
    dss = dss[1:]
    print("----> Data subjects information is imported.")
    return dss
##____________

def creat_time_series(num_features, num_act_labels, num_gen_labels, label_codes, trial_codes):
    dataset_columns = num_features+num_act_labels+num_gen_labels
    ds_list = get_ds_infos()
    train_data = np.zeros((0,dataset_columns))
    test_data = np.zeros((0,dataset_columns))
    for i, sub_id in enumerate(ds_list[:,0]):
        for j, act in enumerate(label_codes):
            for trial in trial_codes[act]:
                fname = 'A_DeviceMotion_data/'+act+'_'+str(trial)+'/sub_'+str(int(sub_id))+'.csv'
                raw_data = pd.read_csv(fname)
                raw_data = raw_data.drop(['Unnamed: 0'], axis=1)
                unlabel_data = raw_data.values
                label_data = np.zeros((len(unlabel_data), dataset_columns))
                label_data[:,:-(num_act_labels + num_gen_labels)] = unlabel_data
                label_data[:,label_codes[act]] = 1
                label_data[:,-(num_gen_labels)] = int(ds_list[i,4])
                ## We consider long trials as training dataset and short trials as test dataset
                if trial > 10:
                    test_data = np.append(test_data, label_data, axis = 0)
                else:    
                    train_data = np.append(train_data, label_data, axis = 0)
    return train_data , test_data
#________________________________


print("--> Start...")
## Here we set parameter to build labeld time-series from dataset of "(A)DeviceMotion_data"
num_features = 12 # attitude(roll, pitch, yaw); gravity(x, y, z); rotationRate(x, y, z); userAcceleration(x,y,z)
num_act_labels = 4 # dws, ups, wlk, jog
num_gen_labels = 1 # 0/1(female/male)
label_codes = {"dws":num_features, "ups":num_features+1, "wlk":num_features+2, "jog":num_features+3}
trial_codes = {"dws":[1,2,11], "ups":[3,4,12], "wlk":[7,8,15], "jog":[9,16]}    
## Calling 'creat_time_series()' to build time-series
print("--> Building Training and Test Datasets...")
train_ts, test_ts = creat_time_series(num_features, num_act_labels, num_gen_labels, label_codes, trial_codes)
print("--> Shape of Training Time-Seires:", train_ts.shape)
print("--> Shape of Test Time-Series:", test_ts.shape)

--> Start...
--> Building Training and Test Datasets...
----> Data subjects information is imported.
--> Shape of Training Time-Seires: (621973, 17)
--> Shape of Test Time-Series: (145687, 17)


In [2]:
def time_series_to_section(dataset, num_act_labels, num_gen_labels, sliding_window_size, step_size_of_sliding_window, standardize = False, **options):
    data = dataset[: , 0:-(num_act_labels+num_gen_labels)]
    act_labels = dataset[: , -(num_act_labels+num_gen_labels):-(num_gen_labels)]
    gen_labels = dataset[: , -(num_gen_labels)]
    mean = 0
    std = 1
    
    if standardize:
        ## Standardize each sensor’s data to have a zero mean and unity standard deviation.
        ## As usual, we normalize test dataset by training dataset's parameters 
        if options:
            mean = options.get("mean")
            std = options.get("std")
            print("----> Test Data has been standardized")
        else:
            mean = data.mean(axis=0)
            std = data.std(axis=0)
            print("----> Training Data has been standardized:\n the mean is = ",str(mean.mean())," ; and the std is = ",str(std.mean()))            
  
        data -= mean
        data /= std
    else:
        print("----> Without Standardization.....")

    ## We want the Rows of matrices show each Feature and the Columns show time points.
    data = data.T
            
    size_features = data.shape[0]
    size_data = data.shape[1]
    number_of_secs = round(((size_data - sliding_window_size)/step_size_of_sliding_window))
            
    ##  Create a 3D matrix for Storing Snapshots  
    secs_data = np.zeros((number_of_secs , size_features , sliding_window_size ))
    act_secs_labels = np.zeros((number_of_secs, 4))
    gen_secs_labels = np.zeros(number_of_secs)
    
    k=0    
    for i in range(0 ,(size_data)-sliding_window_size  , step_size_of_sliding_window):
        j = i // step_size_of_sliding_window
        if(j>=number_of_secs):
            break
        if(gen_labels[i] != gen_labels[i+sliding_window_size-1]): 
            continue
        if(not (act_labels[i] == act_labels[i+sliding_window_size-1]).all()): 
            continue    
        secs_data[k] = data[0:size_features, i:i+sliding_window_size]
        act_secs_labels[k] = act_labels[i].astype(int)
        gen_secs_labels[k] = gen_labels[i].astype(int)
        k = k+1
    secs_data = secs_data[0:k]
    act_secs_labels = act_secs_labels[0:k]
    gen_secs_labels = gen_secs_labels[0:k]
    
    return secs_data, act_secs_labels, gen_secs_labels, mean, std
##________________________________________________________________


## This Variable Defines the Size of Sliding Window
## ( e.g. 100 means in each snapshot we just consider 100 consecutive observations of each sensor) 
sliding_window_size = 50 # 50 Equals to 1 second for MotionSense Dataset (it is on 50Hz samplig rate)
## Here We Choose Step Size for Building Diffrent Snapshots from Time-Series Data
## ( smaller step size will increase the amount of the instances and higher computational cost may be incurred )
step_size_of_sliding_window = 10 
print("--> Sectioning Training and Test datasets: shape of each section will be: (",num_features,"x",sliding_window_size,")")
train_data, act_train_labels, gen_train_labels, train_mean, train_std = time_series_to_section(train_ts.copy(),
                                                                                               num_act_labels,
                                                                                               num_gen_labels,
                                                                                               sliding_window_size,
                                                                                               step_size_of_sliding_window,
                                                                                               standardize = True)

test_data, act_test_labels, gen_test_labels, test_mean, test_std = time_series_to_section(test_ts.copy(),
                                                                                          num_act_labels,
                                                                                          num_gen_labels,
                                                                                          sliding_window_size,
                                                                                          step_size_of_sliding_window,
                                                                                          standardize = True,
                                                                                          mean = train_mean, 
                                                                                          std = train_std)
print("--> Shape of Training Sections:", train_data.shape)
print("--> Shape of Test Sections:", test_data.shape)

--> Sectioning Training and Test datasets: shape of each section will be: ( 12 x 50 )
----> Training Data has been standardized:
 the mean is =  -0.016953116747561597  ; and the std is =  0.8714768261722003
----> Test Data has been standardized
--> Shape of Training Sections: (61726, 12, 50)
--> Shape of Test Sections: (14094, 12, 50)


In [3]:
from keras.models import Sequential
from keras.models import Model 
from keras.layers import Input, Dense, Flatten, Reshape
from keras.layers import Convolution2D, MaxPooling2D, Dropout
from keras.utils import np_utils 
##______________________________
## Here we add an extra dimension to the datasets just to be ready for using with Convolution2D
train_data = np.expand_dims(train_data,axis=3)
test_data = np.expand_dims(test_data,axis=3)
print("--> Shape of Training Sections:", train_data.shape)
print("--> Shape of Test Sections:", test_data.shape)


Using TensorFlow backend.


--> Shape of Training Sections: (61726, 12, 50, 1)
--> Shape of Test Sections: (14094, 12, 50, 1)


In [4]:
##***@@@ This Will Be the ESTIMATOR @@@***##
## Here we set up the parameters for MTCNN
num_train, height, width, channel = train_data.shape
metrics = ['acc']
## Activity Recognition
act_last_layer_dim = num_act_labels
act_loss_func = "categorical_crossentropy"
act_activation_func = 'softmax'
## Gender Classification
gen_last_layer_dim = num_gen_labels 
gen_loss_func = "binary_crossentropy"
gen_activation_func = 'sigmoid'
## Training Phase
batch_size = 64
num_of_epochs = 20
verbosity = 1
## MTCNN
kernel_size_1 = 5
kernel_size_2 = 3
pool_size_1 = 2
pool_size_2 = 3  
conv_depth_1 = 50 
conv_depth_2 = 40 
conv_depth_3 = 20 
drop_prob_1 = 0.2 
drop_prob_2 = 0.4 
hidden_size = 400 

## Note that: because each section of time-series is a matrix, we use Convolution2D.
## On the other side: because each row of the matrix correspond to one feature of
##   time-series, so we use a (1,k) kernel to convolve the data points of each row with 
##   just that row's data points
inp = Input(shape=(height, width,1))     
conv_0 = Convolution2D(conv_depth_1, (1 , kernel_size_1), padding='valid', activation='relu')(inp)
conv_1 = Convolution2D(conv_depth_1, (1 , kernel_size_2), padding='same', activation='relu')(conv_0)
dense_1 = Dense(conv_depth_1, activation='relu')(conv_1)
pool_1 = MaxPooling2D(pool_size=(1, pool_size_1))(dense_1)
drop_1 = Dropout(drop_prob_1)(pool_1)

conv_2 = Convolution2D(conv_depth_2, (1 , kernel_size_1), padding='valid', activation='relu')(drop_1)
dense_2 = Dense(conv_depth_2, activation='relu')(conv_2)
pool_2 = MaxPooling2D(pool_size=(1, pool_size_2))(dense_2)
drop_2 = Dropout(drop_prob_1)(pool_2)

conv_3 = Convolution2D(conv_depth_3, (1 , kernel_size_2), padding='valid', activation='relu')(drop_2)
drop_3 = Dropout(drop_prob_1)(conv_3)

flat = Flatten()(drop_3)
hidden = Dense(hidden_size, activation='relu')(flat)
drop_4 = Dropout(drop_prob_2)(hidden)

out1 = Dense(act_last_layer_dim, activation= act_activation_func, name = "ACT")(drop_4)
out2 = Dense(gen_last_layer_dim, activation= gen_activation_func, name = "GEN")(drop_4)

act_gen_model = Model(inputs=inp, outputs=[out1,out2]) 

act_gen_model.compile(loss=[act_loss_func, gen_loss_func], 
          optimizer='adam', 
          metrics=metrics)

history = act_gen_model.fit(train_data, [act_train_labels, gen_train_labels],                
              batch_size = batch_size,
              epochs = num_of_epochs,
              verbose = verbosity) 


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [5]:
results_1 = act_gen_model.evaluate(test_data, [act_test_labels, gen_test_labels],
                                 verbose = verbosity)

print("--> Evaluation on Test Dataset:")
print("**** Accuracy for Activity Recognition task is: ", results_1[3])
print("**** Accuracy for Gender Classification task is: ", results_1[4])

--> Evaluation on Test Dataset:
**** Accuracy for Activity Recognition task is:  0.951113949172866
**** Accuracy for Gender Classification task is:  0.9523910883894977


In [6]:
## serialize model to JSON and save MTCNN model
act_gen_model_json = act_gen_model.to_json()
with open("act_gen_model_1_ms_t.json", "w") as json_file:
    json_file.write(act_gen_model_json)
## serialize weights to HDF5 and save learned weights
act_gen_model.save_weights("act_gen_weights_1_ms_t.h5")
print("--> Saved MTCNN and its weights to disk!")

--> Saved MTCNN and its weights to disk!


In [7]:
##***@@@ This Will Be the NEUTRALIZER @@@***##
import keras.backend as K
def gen_equ_loss_func(y_true, y_pred):
    loss = K.mean(K.abs(0.5 - y_pred))
    return loss
##____________________________________

In [8]:
##***@@@ This Will Be the GUARDIAN @@@***##
## Here we set up the Autoencoder
ae_inp_size = height*width
ae_input = Input(shape=(height, width,1))
x = Reshape((ae_inp_size,), input_shape=((height,width,1)))(ae_input)
x = Dense(ae_inp_size, activation='linear')(x)

encoded = Dense(ae_inp_size//2, activation='relu')(x)
encoded = Dense(ae_inp_size//4, activation='relu')(encoded)

y = Dense(ae_inp_size//8, activation='relu')(encoded)

decoded = Dense(ae_inp_size//4, activation='relu')(y)
decoded = Dense(ae_inp_size//2, activation='relu')(decoded)

z = Dense(ae_inp_size, activation='linear')(decoded)
z = Reshape((height,width,1), input_shape=(ae_inp_size,))(z)
ae_model = Model(ae_input, z)

In [9]:
##***@@@ This Will Be the The Final GEN @@@***##
## Here we freeze the weights of the MTCNN layers and attach the output of 
## deep autoencoder to the input of the MTCNN to build the GEN neural network. 
act_gen_model.trainable = False
dp = ae_model(ae_input)
dp = act_gen_model(dp)
dp_model = Model(inputs=ae_input, outputs=dp)

dp_model.compile(loss=[act_loss_func, gen_equ_loss_func], 
                 optimizer='adam',
                 metrics=metrics)

num_of_epochs = 20
dp_model.fit(train_data , [act_train_labels, gen_train_labels],
                epochs = num_of_epochs,
                batch_size = batch_size,
                )

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7f1fa83e10f0>

In [10]:
tr_test_data = ae_model.predict(test_data)
results_2 = act_gen_model.evaluate(tr_test_data, [act_test_labels, gen_test_labels])
print("@@@@ Transformed Test ACT acc: ", results_2[3])
print("@@@@ Transformed Test GEN acc: ", results_2[4])

@@@@ Transformed Test ACT acc:  0.938910174516984
@@@@ Transformed Test GEN acc:  0.4957428693082022
