In [1]:
import pickle
from tensorflow.keras.models import load_model
from scipy.interpolate import interp1d
import numpy as np
import pandas as pd
import os
from joblib import Parallel, delayed

In [2]:
model_directory = './models/activity_estimator_both_study_magnitude_final_5_labels.hdf5'
model = load_model(model_directory)

In [3]:
all_data = pickle.load(open('./data/mORAL_dataset_for_python_upload_09072020/processed_data/all_data_right_wrist.p','rb'))

In [4]:
def interpolate_acl(a,window_size=20,fs_now=25,fs_new=20):
    x_now = np.linspace(0,window_size,a.shape[0])
    f = interp1d(x_now,a,axis=0,fill_value='extrapolate')
    x_new = np.linspace(0,window_size,window_size*fs_new)
    return f(x_new)

def get_magnitude_array(a):
    a = a[a[:,0].argsort()]
    magnitude_array = np.sqrt(np.square(a[:,1])+np.square(a[:,2])+np.square(a[:,3])).reshape(-1,1)
    magnitude_interpolated_array = interpolate_acl(magnitude_array)
    return magnitude_interpolated_array

all_data['magnitude'] = all_data['data'].apply(lambda a:get_magnitude_array(a).reshape(1,-1,1))
all_data = all_data.sort_values('timestamp').reset_index(drop=True)
X = np.concatenate(list(all_data['magnitude'].values))
prediction = model.predict(X).argmax(axis=1)
all_data['prediction'] = prediction
# final_activity_list = ['Stationery','Stairs','Exercise','Walking','Sports']
final_activity_list = ['Stationery','Walking','Exercise','Walking','Sports']
all_data['prediction'] = all_data['prediction'].apply(lambda a:final_activity_list[a])

In [11]:
users = all_data.groupby('user',as_index=False).count().sort_values('timestamp',ascending=False).reset_index(drop=True)[:-2]['user'].values
all_data = all_data[all_data.user.isin(users)]

In [13]:
def interpolate_all_data(a):
    a = a[a[:,0].argsort()]
    a = a[:,1:].reshape(-1,3)
    a = interpolate_acl(a,fs_new=25)
    return a.reshape(1,-1,3)


def get_data(df,split_type='train',train_split = 0.8):
    df = df.sort_values('timestamp').reset_index(drop=True)
    if split_type=='train':
        df = df.iloc[:int(train_split*df.shape[0])]
    else:
        df = df.iloc[int(train_split*df.shape[0]):]
    return df

def save_data_by_labels(df):
    activity = df['prediction'].values[0]
    if not os.path.isdir(os.path.join(data_directory,activity)):
        os.makedirs(os.path.join(data_directory,activity))
    df['final_data'] = df['data'].apply(lambda a:interpolate_all_data(a))
    train_data = pd.concat([get_data(df_user,split_type='train') for j,df_user in df.groupby('user',as_index=False)]).sort_values('timestamp').reset_index(drop=True)
    train_path = os.path.join(data_directory,activity,'train.p')
    pickle.dump(train_data,open(train_path,'wb'))
    test_data = pd.concat([get_data(df_user,split_type='test') for j,df_user in df.groupby('user',as_index=False)]).sort_values('timestamp').reset_index(drop=True)
    test_path = os.path.join(data_directory,activity,'test.p')
    pickle.dump(test_data,open(test_path,'wb'))
    print(activity,train_data.shape,test_data.shape)
    return activity,train_data.shape,test_data.shape

data_directory = './data/mORAL_dataset_for_python_upload_09072020/processed_data/'
done = Parallel(n_jobs=-1,verbose=2)(delayed(save_data_by_labels)(df) for i,df in all_data.groupby('prediction',as_index=False))   
    

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   4 | elapsed:   18.2s remaining:   18.2s
[Parallel(n_jobs=-1)]: Done   4 out of   4 | elapsed:  1.7min finished


[('Exercise', (244, 8), (71, 8)),
 ('Sports', (40782, 8), (10204, 8)),
 ('Stationery', (286619, 8), (71662, 8)),
 ('Walking', (15871, 8), (3976, 8))]