In [None]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from tqdm import tqdm

# original data split

In [None]:
# setting variables
user_lst = ['./data/user_06']
#var_lst9 = ['mAcc_x','mAcc_y','mAcc_z', 'mGyr_x', 'mGyr_y', 'mGyr_z', 'mMag_x', 'mMag_y', 'mMag_z']
var_lst = ['mAcc_x','mAcc_y','mAcc_z']
target_name = 'activity'

# make custom dataset function
def CustomDataset(user_lst,var_lst,target_name):
    whole_x=[]
    whole_y=[]
    
    # import user
    for user in user_lst:
        data = pd.read_csv(user+'.csv')
        
        # find unique ts number
        unique_ts=data['ts'].unique().astype('int64')
        
        # make window about each ts
        for ts in tqdm(unique_ts):
            
            tmp_df = data[data['ts']==ts]
            tmp_y = tmp_df['activity'].unique()[0] # all same activity

            start = 150 # about 4s
            end = 230 # window size + start

            while(end<=tmp_df.shape[0]):
                tmp_x=tmp_df[var_lst][start:end].to_numpy()
                whole_x.append(tmp_x)
                whole_y.append(tmp_y)
                start+=40
                end+=40 # 50% overlap
                      
    x=np.array(whole_x)
    y=np.array(whole_y)

    print('whole_X', x.shape)
    print('whole_Y', y.shape)
    
    return x, y      

In [None]:
# get dataset
x_list, y_list = CustomDataset(user_lst, var_lst, target_name)

In [None]:
print(x_list.shape)
print(y_list.shape)

## preprocessing

In [None]:
# y one-hot
enco= OneHotEncoder(sparse=False).fit(y_list[:,np.newaxis])
y_list = enco.transform(y_list[:,np.newaxis])
print('y shape:',y_list.shape)

In [None]:
# 분할
# train:valid:test = 0.9 : 0.05 : 0.05
x_train, x_valid, y_train, y_valid = train_test_split(x_list, y_list, test_size=0.1, random_state=42)

In [None]:
x_valid, x_test, y_valid, y_test = train_test_split(x_valid, y_valid, test_size=0.5, random_state=42)

In [None]:
print('x_train shape :', x_train.shape)
print('y_train shape :', y_train.shape)

In [None]:
print('x_valid shape :', x_valid.shape)
print('y_valid shape :', y_valid.shape)

In [None]:
print('x_test shape :', x_test.shape)
print('y_test shape :', y_test.shape)

In [None]:
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train.reshape(-1, x_train.shape[-1])).reshape(x_train.shape)
x_valid = scaler.transform(x_valid.reshape(-1, x_valid.shape[-1])).reshape(x_valid.shape)
x_test = scaler.transform(x_test.reshape(-1, x_test.shape[-1])).reshape(x_test.shape)

#### data check

In [None]:
print(x_train)
print(y_train)

## save preprocessing data

In [None]:
np.save("./data/x_train.npy", x_train)
np.save("./data/x_valid.npy", x_valid)
np.save("./data/x_test.npy", x_test)

np.save("./data/y_train.npy", y_train)
np.save("./data/y_valid.npy", y_valid)
np.save("./data/y_test.npy", y_test)

In [None]:
import joblib
joblib.dump(scaler,'./data/scaler.pkl')
joblib.dump(enco,'./data/onehot.pkl')

# Let's apply the data into model!!