## sisfall_dataset으로 훈련시킨 LSTM 모델

### 1. 종속성 라이브러리 불러오기

In [1]:
## 파이썬 데이터과학, 기본 라이브러리
import pandas as pd
import numpy as np
from scipy.signal import butter, lfilter, freqz

import time
import os
import math
import random
import warnings
warnings.filterwarnings(action='ignore')

In [2]:
## 머신러닝, 딥러닝 모델 라이브러리
import tensorflow as tf
import keras
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Conv1D, Lambda
from tensorflow.keras.losses import Huber, binary_crossentropy
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

In [3]:
## 시각화 라이브러리
import matplotlib.pyplot as plt
# from tqdm.auto import tqdm # 진행률 프로세스바

### 2. 데이터셋 불러오기

In [4]:
pd.set_option('display.max_rows',100) # pandas 표를 출력할 때 최대 행 수 설정

In [5]:
## 데이터셋 디렉터리 구조, 경로

path = './SisFall_csv/'

person = ['SA01','SA02','SA03','SA04','SA05','SA06','SA07','SA08','SA09','SA10','SA11','SA12','SA13','SA14','SA15','SA16','SA17','SA18','SA19','SA20', 'SA21','SA22','SA23',
         'SE01', 'SE02', 'SE03', 'SE04', 'SE05', 'SE06', 'SE07', 'SE08', 'SE09', 'SE10', 'SE11', 'SE12', 'SE13', 'SE14', 'SE15']


In [6]:
dailies = ['D01', 'D02', 'D03', 'D04', 'D05', 'D06', 'D07', 'D08', 'D09', 'D10', 'D11', 'D12', 'D13', 'D14', 'D15', 'D16', 'D17', 'D18', 'D19']
falls = ['F01', 'F02', 'F03', 'F04', 'F05', 'F06', 'F07', 'F08', 'F09', 'F10', 'F11', 'F12', 'F13', 'F14', 'F15']

trials = ['R01', 'R02','R03','R04','R05']

In [7]:
## 데이터셋 불러오기 함수

def load_dataset(person):
    subjectList = []
    adl_list = []
    fall_list = []
    for man in person:
        a = pd.read_csv(path + f'{man}.csv')
        df = a[['ADXL_x', 'ADXL_y', 'ADXL_z', 'ITG_x', 'ITG_y', 'ITG_z', 'subject', 'activity', 'trial']]
        subjectList.append(df)
    
    # 데이터를 ADLs과 Falls로 나누기
    for s in subjectList:
        for d in dailies:
            tempdf = s[s['activity'] == d]
            adl_list.append(tempdf)

        for f in falls:
            tempdf = s[s['activity'] == f]
            fall_list.append(tempdf)
            
    return fall_list, adl_list

In [8]:
## 테스트용으로 시각화 출력해주는 함수

def printer(df, min_max=True, range=[0,3000], *args):
    idx = {}
    if min_max:
        x = df.drop(['activity','person'],axis=1).astype('float64')
        k = (x - x.min()) / (x.max() - x.min())
        for w in args:
            plt.plot(k[w],label=w)
            idx[w] = df[w][df[w] == df[w].max()].index.values
        plt.title(f'person:{df.person[0]}  ,  activity:{df.activity[0]}')
        plt.xlim(range)
        plt.legend()
        plt.show()
        for w in idx:
            print(f'{w} : {idx[w]}')
    else:
        for w in args:
            plt.plot(df[w],label=w)
        plt.title(df.activity[0])
        plt.xlim(range)
        plt.legend()
        plt.show()

### 3. 데이터 전처리

In [9]:
## 데이터 필터링 함수
order = 4 # 차수
fs = 200 # 샘플주파수
cutoff = 5.0 # 차단주파수

def butter_lowpass(cutoff, fs, order=5):
    nyq = 0.5 * fs
    normal_cutoff = cutoff / nyq
    b, a = butter(order, normal_cutoff, btype='low', analog=False)
    return b, a


def butter_lowpass_filter(data, cutoff, fs, order=5):
    b, a = butter_lowpass(cutoff, fs, order=order)
    y = lfilter(b, a, data)
    return y

In [10]:
## 데이터 특성 추출 함수
def feature_extraction(df):
    df_list = []
    rol_val = 200
    for my_df in df:
        for trial in trials:
            trial_df = my_df[my_df['trial'] == trial]
            if len(trial_df) == 0:
                continue
            trial_df.reset_index(inplace=True)
            
            tempdf = pd.DataFrame()
            
            # 데이터 필터링 : butterworth lowpass filtering ( fx = 필터링된 가속도센서 x축 )
            # [특성1] vm2 : 필터링된 가속도센서 (x,y,z축)데이터들의 합, Sum vector magnitude
            tempdf['ax'], tempdf['ay'], tempdf['az'] = trial_df['ADXL_x'], trial_df['ADXL_y'], trial_df['ADXL_z']
            
            tempdf['fx'] = pd.Series(butter_lowpass_filter(tempdf['ax'], cutoff, fs, order))
            tempdf['fy'] = pd.Series(butter_lowpass_filter(tempdf['ay'], cutoff, fs, order))
            tempdf['fz'] = pd.Series(butter_lowpass_filter(tempdf['az'], cutoff, fs, order))
            tempdf['vm2'] =np.sqrt(tempdf['fx'] ** 2 + tempdf['fy'] ** 2 + tempdf['fz'] ** 2)

            
            # [특성2] diff_sum : 요소 간 차이의 합 ( bx = 필터링된 가속도센서 데이터에서 x축 요소들간의 차 )
            tempdf['bx'] = tempdf['fx'].diff()
            tempdf['by'] = tempdf['fy'].diff()
            tempdf['bz'] = tempdf['fz'].diff()
            tempdf['diff_sum'] = np.sqrt(tempdf['bx'] ** 2 + tempdf['by'] ** 2 + tempdf['bz'] ** 2)
        
            
            # [특성3] g_sum : 필터링된 자이로센서 (x,y,z축)데이터들의 합
            tempdf['gx'], tempdf['gy'], tempdf['gz'] = trial_df['ITG_x'], trial_df['ITG_y'], trial_df['ITG_z']
            
            tempdf['g_fx'] = pd.Series(butter_lowpass_filter(tempdf['gx'], cutoff, fs, order))
            tempdf['g_fy'] = pd.Series(butter_lowpass_filter(tempdf['gy'], cutoff, fs, order))
            tempdf['g_fz'] = pd.Series(butter_lowpass_filter(tempdf['gz'], cutoff, fs, order))
            tempdf['g_sum'] = np.sqrt(tempdf['g_fx'] ** 2 + tempdf['g_fy'] ** 2 + tempdf['g_fz'] ** 2)


            # [특성]
            tempdf['activity'] = trial_df['activity']
            tempdf['person'] = trial_df['subject']


            ## ==================================================================
            # 아래는 사용하지 않은 특성들 (sisfall논문에서 성능이 잘 나왔던 지표들의 특성들)
            # 롤링 표준편차 Rolling standard deviations
            tempdf['fx_std'] = tempdf['fx'].rolling(rol_val,min_periods = 1).std()
            tempdf['fy_std'] = tempdf['fy'].rolling(rol_val,min_periods = 1).std()
            tempdf['fz_std'] = tempdf['fz'].rolling(rol_val,min_periods = 1).std()
            tempdf['bx_std'] = tempdf['bx'].rolling(rol_val,min_periods = 1).std()
            tempdf['by_std'] = tempdf['by'].rolling(rol_val,min_periods = 1).std()
            tempdf['bz_std'] = tempdf['bz'].rolling(rol_val,min_periods = 1).std()
            tempdf['gx_std'] = tempdf['gx'].rolling(rol_val,min_periods = 1).std()
            tempdf['gy_std'] = tempdf['gy'].rolling(rol_val,min_periods = 1).std()
            tempdf['gz_std'] = tempdf['gz'].rolling(rol_val,min_periods = 1).std()

            #C8
            tempdf['horiz_std_mag9'] = np.sqrt(tempdf['fx_std'] ** 2 + tempdf['fz_std'] ** 2)
            #C2
            tempdf['horiz_vector_mag9'] = np.sqrt(tempdf['fx'] ** 2 + tempdf['fz'] ** 2)
            tempdf['std_mag9'] = np.sqrt(tempdf['fx_std'] ** 2 + tempdf['fy_std'] ** 2 + tempdf['fz_std'] ** 2)
            #C9
            tempdf['horiz_std_mag2'] = np.sqrt(tempdf['bx_std'] ** 2 + tempdf['bz_std'] ** 2)
            tempdf['gyro_horiz_std_mag'] = np.sqrt(tempdf['gx_std'] ** 2 + tempdf['gz_std'] ** 2)
            tempdf['gyro_vector_mag'] = np.sqrt(tempdf['gx'] ** 2 + tempdf['gy'] ** 2 + tempdf['gz'] ** 2)
            tempdf['gyro_horiz_mag'] = np.sqrt(tempdf['gx'] ** 2 + tempdf['gz'] ** 2)
            # C2
            tempdf['horiz_mag'] = np.sqrt(tempdf['fx'] ** 2 + tempdf['fz'] ** 2)
            ## ======================================================================
            
            # 특성으로 사용하지 않을 컬럼 지우기
            temp_list1 = tempdf['person']
            temp_list2 = tempdf['activity']
            
            tempdf = tempdf.drop(['ax', 'ay', 'az', 'fx', 'fy', 'fz', 'bx', 'by', 'bz', 'gx', 'gy', 'gz', 'g_fx', 'g_fy', 'g_fz', 
                         'fx_std', 'fy_std', 'fz_std', 'bx_std', 'by_std', 'bz_std', 'gx_std', 'gy_std', 'gz_std', 'person', 'activity'], axis=1)
            
            tempdf.insert(loc=0,column='person',value=temp_list1)
            tempdf.insert(loc=1,column='activity',value=temp_list2)
            
            df_list.append(tempdf.fillna(0))
            
    return df_list

In [11]:
## 데이터 라벨링

# 기준으로 삼을 특성, 특성들의 값이 차이가 많이나서 min_max 정규화, 어떤 특성들을 사용 할 것인지.
def labeling(fall_list, adl_list, standard, check_list):
    adl_df = []
    fall_df = []

    for df in fall_list:
        df = df.loc[:,check_list]
        k = df[standard].idxmax() # 최대값을 가지는 인덱스를 k로
        fall = df.loc[k - 160 : k] # vm2(기준으로 잡음)가 최고점을 찍은 순간의 0.8초 전까지를 fall로 라벨렝 (미리 fall을 예측해야하므로)
        fall['label'] = 1
        fall_df.append(fall)
    
    for df in adl_list:
        df = df.loc[120:,check_list] # ADL은 어쩌피 다 비슷하게 나올 것이므로, 데이터 크기가 커서 그냥 임의로 잘라줌
        df['label'] = 0
        adl_df.append(df)
        
    return fall_df, adl_df

In [12]:
## 데이터 슬라이딩 윈도잉

WINDOWSIZE = 60

def windowing(fall_df, adl_df, WINDOWSIZE):
    x = []
    y = []
    for df in fall_df:
        for i in range(len(df) - WINDOWSIZE):
            df_x = df.drop('label',axis=1).iloc[i : i+ WINDOWSIZE]
            df_y = df['label'].iloc[1]
            x.append(df_x)
            y.append(df_y)
    for df in adl_df:
        count = 0
        for i in range(120, len(df) - WINDOWSIZE, 20):
            count += 1
            df_x = df.drop('label',axis=1).iloc[i : i+ WINDOWSIZE]
            df_y = df['label'].iloc[1]
            x.append(df_x)
            y.append(df_y)
            if count > 120:
                break
    x = np.array(x)
    y = np.array(y)
    
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state = 0)
    return x_train, x_test, y_train, y_test

In [13]:
## 전처리 하는 부분

# 데이터 불러오기
fall_list, adl_list = load_dataset(person)

# 데이터 특성 추출 해서 각 리스트에 담기
fall_list = feature_extraction(fall_list)
adl_list = feature_extraction(adl_list)

# 어떤 특성을 사용할 것인지 checkList
checkList = [
    'vm2',
    'g_sum',
    'diff_sum'
]

# 데이터 라벨링
fall_df, adl_df = labeling(fall_list, adl_list, 'vm2', checkList)

# 데이터 슬라이딩 윈도잉으로 자르고, 훈련&테스트 데이터 분할
x_train, x_test, y_train, y_test = windowing(fall_df , adl_df, 60)

# 훈련데이터에서 검증데이터 분할
x_train, x_valid, y_train, y_valid = train_test_split(x_train, y_train, test_size=0.2, random_state = 0)

In [None]:
## 특성 시각화
# 시각화한 특성들로 무슨 특성을 사용해야 할지 결정
count = 0

fig, ax = plt.subplots(6,5,figsize=(30,30))
for k in range(0, 0, 10):
    count2 = 0
    df = fall_list[k][checkList]
    df = (df - df.min()) / (df.max() - df.min())
    for i in checkList:

        ax[count,count2].plot(df[i], label=i)
        ax[count,count2].plot(df['vm2'] ,label = 'vm2')
        ax[count,count2].legend()
        count2 += 1
    count += 1

plt.show()

### 4. 모델 선택

In [14]:
## LSTM 모델

model = tf.keras.models.Sequential([
    tf.keras.layers.LSTM(128,input_shape=[WINDOWSIZE, 3], activation='tanh', return_sequences=True),
    tf.keras.layers.LSTM(64,  return_sequences=True,dropout=0.2 ),
    tf.keras.layers.LSTM(32),
    tf.keras.layers.Dense(16, activation="relu"),
    tf.keras.layers.Dense(8, activation="relu"),
    tf.keras.layers.Dense(1, activation="sigmoid"),
])

In [None]:
model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), optimizer='adam', metrics='accuracy')
early_stop = EarlyStopping(monitor='val_loss', patience=30)
filename = os.path.join(path + 'tmp_checkpoint.h5')
checkpoint = ModelCheckpoint(filename, monitor='val_loss', verbose=1, save_best_only=True, mode='auto')

history = model.fit(x_train, y_train, 
                    epochs=100, 
                    batch_size=32,
                    validation_data=(x_valid, y_valid), 
                    callbacks=[early_stop, checkpoint])

Epoch 1/100
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'

Epoch 1: val_loss improved from inf to 0.17416, saving model to ./SisFall_csv\tmp_checkpoint.h5
Epoch 2/100
Epoch 2: va

#### 모델 불러오기 및 저장

In [None]:
## 모델 불러오기
# model = keras.models.load_model('LSTM_sisfall.h5')

## 모델 저장하기
# from tensorflow.python.keras.models import load_model
# model.save('LSTM_sisfall_v3_minmax_x.h5')

### 5. 평가 및 검증

In [None]:
# score = model.evaluate(x_test, y_test)
# print(score[1])
# print(score[0])
print('\n# Evaluate on test data')
results = model.evaluate(x_test, y_test, batch_size=128)
print('test loss, test acc:', results)