#**스마트폰 센서 데이터 기반 모션 분류**
# 단계3 : 단계별 모델링


## 0.미션4

* 단계별로 나눠서 모델링을 수행하고자 합니다.  
* 단계 구분 예시
    * 단계1 : 정적(0), 동적(1) 행동 분류 모델 생성
    * 단계2 : 세부 동작에 대한 분류모델 생성
        * 단계1 모델에서 0으로 예측 -> 정적 행동 3가지 분류 모델링
        * 단계1 모델에서 1으로 예측 -> 동적 행동 3가지 분류 모델링
* 모델 통합
    * 두 단계 모델을 통합하고, 새로운 데이터에 대해서 최종 예측결과와 성능평가가 나오도록 함수로 만들기
* 성능 비교
    * 기본 모델링의 성능과 비교
    * 성능 가이드
        * Accuracy : 0.97 ~ 0.99
* 파이프라인 구성
    * test 데이터를 입력하여, 전처리 및 예측결과가 나오도록 함수 구성

## 1.환경설정

* 세부 요구사항
    - 경로 설정 : 구글콜랩
        * 구글 드라이브 바로 밑에 project3 폴더를 만들고,
        * 데이터 파일을 복사해 넣습니다.
    - 기본적으로 필요한 라이브러리를 import 하도록 코드가 작성되어 있습니다.
        * 필요하다고 판단되는 라이브러리를 추가하세요.


### (1) 경로 설정

* 구글 드라이브 연결

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
path = '/content/drive/MyDrive/project3/'

### (2) 라이브러리 불러오기

* 라이브러리 로딩

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import joblib

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.metrics import *

from keras.models import Sequential
from keras.layers import Dense, Flatten, Dropout, Input
from keras.backend import clear_session
from keras.optimizers import Adam

In [None]:
# 학습곡선 함수
def dl_history_plot(history):
    plt.figure(figsize=(10,6))
    plt.plot(history['loss'], label='train_err', marker = '.')
    plt.plot(history['val_loss'], label='val_err', marker = '.')

    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend()
    plt.grid()
    plt.show()

### (3) 데이터 불러오기

* 주어진 데이터셋
    * data01_train.csv : 학습 및 검증용
    * data01_test.csv : 테스트용
    * feature.csv : feature 이름을 계층구조로 정리한 데이터

* 세부 요구사항
    * 칼럼 삭제 : data01_train.csv와 data01_test.csv 에서 'subject' 칼럼은 불필요하므로 삭제합니다.

#### 1) 데이터로딩

In [None]:
file1 = 'data01_train.csv'
file2 = 'data01_test.csv'
file3 = 'features.csv'

In [None]:
data = pd.read_csv(path + file1)
test = pd.read_csv(path + file2)
features = pd.read_csv(path + file3)

In [None]:
# 불필요한 칼럼 삭제
data.drop('subject', axis=1, inplace=True)
test.drop('subject', axis=1, inplace=True)

#### 2) 기본 정보 조회

In [None]:
#전체 데이터의 행,열 개수 확인
data.shape

(5881, 562)

In [None]:
#전체 데이터의 상위 5개 행 확인
data.head()

Unnamed: 0,tBodyAcc-mean()-X,tBodyAcc-mean()-Y,tBodyAcc-mean()-Z,tBodyAcc-std()-X,tBodyAcc-std()-Y,tBodyAcc-std()-Z,tBodyAcc-mad()-X,tBodyAcc-mad()-Y,tBodyAcc-mad()-Z,tBodyAcc-max()-X,...,fBodyBodyGyroJerkMag-skewness(),fBodyBodyGyroJerkMag-kurtosis(),"angle(tBodyAccMean,gravity)","angle(tBodyAccJerkMean),gravityMean)","angle(tBodyGyroMean,gravityMean)","angle(tBodyGyroJerkMean,gravityMean)","angle(X,gravityMean)","angle(Y,gravityMean)","angle(Z,gravityMean)",Activity
0,0.288508,-0.009196,-0.103362,-0.988986,-0.962797,-0.967422,-0.989,-0.962596,-0.96565,-0.929747,...,-0.487737,-0.816696,-0.042494,-0.044218,0.307873,0.07279,-0.60112,0.331298,0.165163,STANDING
1,0.265757,-0.016576,-0.098163,-0.989551,-0.994636,-0.987435,-0.990189,-0.99387,-0.987558,-0.937337,...,-0.23782,-0.693515,-0.062899,0.388459,-0.765014,0.771524,0.345205,-0.769186,-0.147944,LAYING
2,0.278709,-0.014511,-0.108717,-0.99772,-0.981088,-0.994008,-0.997934,-0.982187,-0.995017,-0.942584,...,-0.535287,-0.829311,0.000265,-0.525022,-0.891875,0.021528,-0.833564,0.202434,-0.032755,STANDING
3,0.289795,-0.035536,-0.150354,-0.231727,-0.006412,-0.338117,-0.273557,0.014245,-0.347916,0.008288,...,-0.004012,-0.408956,-0.255125,0.612804,0.747381,-0.072944,-0.695819,0.287154,0.111388,WALKING
4,0.394807,0.034098,0.091229,0.088489,-0.106636,-0.388502,-0.010469,-0.10968,-0.346372,0.584131,...,-0.157832,-0.563437,-0.044344,-0.845268,-0.97465,-0.887846,-0.705029,0.264952,0.137758,WALKING_DOWNSTAIRS


In [None]:
#전체 데이터의 수치형 변수 분포 확인
data.describe()

Unnamed: 0,tBodyAcc-mean()-X,tBodyAcc-mean()-Y,tBodyAcc-mean()-Z,tBodyAcc-std()-X,tBodyAcc-std()-Y,tBodyAcc-std()-Z,tBodyAcc-mad()-X,tBodyAcc-mad()-Y,tBodyAcc-mad()-Z,tBodyAcc-max()-X,...,fBodyBodyGyroJerkMag-meanFreq(),fBodyBodyGyroJerkMag-skewness(),fBodyBodyGyroJerkMag-kurtosis(),"angle(tBodyAccMean,gravity)","angle(tBodyAccJerkMean),gravityMean)","angle(tBodyGyroMean,gravityMean)","angle(tBodyGyroJerkMean,gravityMean)","angle(X,gravityMean)","angle(Y,gravityMean)","angle(Z,gravityMean)"
count,5881.0,5881.0,5881.0,5881.0,5881.0,5881.0,5881.0,5881.0,5881.0,5881.0,...,5881.0,5881.0,5881.0,5881.0,5881.0,5881.0,5881.0,5881.0,5881.0,5881.0
mean,0.274811,-0.017799,-0.109396,-0.603138,-0.509815,-0.604058,-0.628151,-0.525944,-0.605374,-0.46549,...,0.126955,-0.305883,-0.623548,0.008524,-0.001185,0.00934,-0.007099,-0.491501,0.059299,-0.054594
std,0.067614,0.039422,0.058373,0.448807,0.501815,0.417319,0.424345,0.485115,0.413043,0.544995,...,0.249176,0.322808,0.310371,0.33973,0.447197,0.60819,0.476738,0.509069,0.29734,0.278479
min,-0.503823,-0.684893,-1.0,-1.0,-0.999844,-0.999667,-1.0,-0.999419,-1.0,-1.0,...,-0.965725,-0.979261,-0.999765,-0.97658,-1.0,-1.0,-1.0,-1.0,-1.0,-0.980143
25%,0.262919,-0.024877,-0.121051,-0.992774,-0.97768,-0.980127,-0.993602,-0.977865,-0.980112,-0.936067,...,-0.02161,-0.541969,-0.845985,-0.122361,-0.294369,-0.481718,-0.373345,-0.811397,-0.018203,-0.141555
50%,0.277154,-0.017221,-0.108781,-0.943933,-0.844575,-0.856352,-0.948501,-0.849266,-0.849896,-0.878729,...,0.133887,-0.342923,-0.712677,0.010278,0.005146,0.011448,-0.000847,-0.709441,0.182893,0.003951
75%,0.288526,-0.01092,-0.098163,-0.24213,-0.034499,-0.26269,-0.291138,-0.068857,-0.268539,-0.01369,...,0.288944,-0.127371,-0.501158,0.154985,0.28503,0.499857,0.356236,-0.51133,0.248435,0.111932
max,1.0,1.0,1.0,1.0,0.916238,1.0,1.0,0.967664,1.0,1.0,...,0.9467,0.989538,0.956845,1.0,1.0,0.998702,0.996078,0.977344,0.478157,1.0


In [None]:
#전체 데이터의 모든 변수 확인
data.columns

Index(['tBodyAcc-mean()-X', 'tBodyAcc-mean()-Y', 'tBodyAcc-mean()-Z',
       'tBodyAcc-std()-X', 'tBodyAcc-std()-Y', 'tBodyAcc-std()-Z',
       'tBodyAcc-mad()-X', 'tBodyAcc-mad()-Y', 'tBodyAcc-mad()-Z',
       'tBodyAcc-max()-X',
       ...
       'fBodyBodyGyroJerkMag-skewness()', 'fBodyBodyGyroJerkMag-kurtosis()',
       'angle(tBodyAccMean,gravity)', 'angle(tBodyAccJerkMean),gravityMean)',
       'angle(tBodyGyroMean,gravityMean)',
       'angle(tBodyGyroJerkMean,gravityMean)', 'angle(X,gravityMean)',
       'angle(Y,gravityMean)', 'angle(Z,gravityMean)', 'Activity'],
      dtype='object', length=562)

## 2.데이터 전처리

* 세부 요구사항
    - Label 추가 : 1단계 모델을 위한 레이블 추가
    - train : val = 8 : 2 혹은 7 : 3
    - random_state 옵션을 사용하여 다른 모델과 비교를 위해 성능이 재현되도록 합니다.

### (1) 1단계 모델링을 위한 레이블

In [None]:
static_activities = ['STANDING', 'SITTING', 'LAYING']
dynamic_activities = ['WALKING', 'WALKING_UPSTAIRS', 'WALKING_DOWNSTAIRS']
data['is_dynamic'] = data['Activity'].apply(lambda x: 0 if x in static_activities else 1)
data.head()

Unnamed: 0,tBodyAcc-mean()-X,tBodyAcc-mean()-Y,tBodyAcc-mean()-Z,tBodyAcc-std()-X,tBodyAcc-std()-Y,tBodyAcc-std()-Z,tBodyAcc-mad()-X,tBodyAcc-mad()-Y,tBodyAcc-mad()-Z,tBodyAcc-max()-X,...,fBodyBodyGyroJerkMag-kurtosis(),"angle(tBodyAccMean,gravity)","angle(tBodyAccJerkMean),gravityMean)","angle(tBodyGyroMean,gravityMean)","angle(tBodyGyroJerkMean,gravityMean)","angle(X,gravityMean)","angle(Y,gravityMean)","angle(Z,gravityMean)",Activity,is_dynamic
0,0.288508,-0.009196,-0.103362,-0.988986,-0.962797,-0.967422,-0.989,-0.962596,-0.96565,-0.929747,...,-0.816696,-0.042494,-0.044218,0.307873,0.07279,-0.60112,0.331298,0.165163,STANDING,0
1,0.265757,-0.016576,-0.098163,-0.989551,-0.994636,-0.987435,-0.990189,-0.99387,-0.987558,-0.937337,...,-0.693515,-0.062899,0.388459,-0.765014,0.771524,0.345205,-0.769186,-0.147944,LAYING,0
2,0.278709,-0.014511,-0.108717,-0.99772,-0.981088,-0.994008,-0.997934,-0.982187,-0.995017,-0.942584,...,-0.829311,0.000265,-0.525022,-0.891875,0.021528,-0.833564,0.202434,-0.032755,STANDING,0
3,0.289795,-0.035536,-0.150354,-0.231727,-0.006412,-0.338117,-0.273557,0.014245,-0.347916,0.008288,...,-0.408956,-0.255125,0.612804,0.747381,-0.072944,-0.695819,0.287154,0.111388,WALKING,1
4,0.394807,0.034098,0.091229,0.088489,-0.106636,-0.388502,-0.010469,-0.10968,-0.346372,0.584131,...,-0.563437,-0.044344,-0.845268,-0.97465,-0.887846,-0.705029,0.264952,0.137758,WALKING_DOWNSTAIRS,1


### (2) x, y 분리

In [None]:
target = 'is_dynamic'
target_2 = 'Activity'
data.drop(columns = target_2, inplace=True)
x = data.drop(columns = target)
y = data.loc[:, target]

### (3) 스케일링


* 세부 요구사항
    - 스케일링을 필요로 하는 알고리즘 사용을 위해서 코드 수행
    - min-max 방식 혹은 standard 방식 중 한가지 사용.

In [None]:
# 기본 데이터가 -1~1이면 scaling 안해도 된다.

In [None]:
# scaler = MinMaxScaler()
# x = scaler.fit_transform(x)

### (4) 데이터 분할
* train, val 분할

In [None]:
x_train, x_val, y_train, y_val = train_test_split(x, y, test_size = .2, random_state = 20)

## **3.단계별 모델링**

### (1) 단계1

* 세부 요구사항
    * 적절한 단계로 구분한 후, 1단계를 분류하는 모델 생성
        * 예시 : 정적 행동(Laying, Sitting, Standing)과 동적 행동(동적 : Walking, Walking-Up, Walking-Down)을 구분하는 모델 생성.
    * 몇 가지 모델을 만들고 가장 성능이 좋은 모델을 선정하시오.

#### 1) 모델1

In [None]:
n = x_train.shape[1]
n

561

In [None]:
model1 = Sequential( [Input(shape = (n,)),
                     Dense(60, activation='relu'),
                     Dropout(0.3),
                     Dense(80, activation='relu'),
                     Dropout(0.4),
                     Dense(40, activation='relu'),
                     Dropout(0.1),
                     Dense(20, activation='relu'),
                     Dense(2, activation = 'softmax')] )

model1.compile(optimizer = Adam(learning_rate = 0.0002), loss = 'sparse_categorical_crossentropy')

model1.fit(x_train, y_train, epochs = 100, validation_split = 0.2)

pred1 = model1.predict(x_val)
pred1 = np.argmax(pred1, axis = 1)

print("Accuracy: ", accuracy_score(y_val, pred1))

Epoch 1/100
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m69s[0m 18ms/step - loss: 0.2857 - val_loss: 0.0035
Epoch 2/100
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 0.0059 - val_loss: 0.0013
Epoch 3/100
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0017 - val_loss: 7.8346e-04
Epoch 4/100
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 8.8303e-04 - val_loss: 6.0663e-04
Epoch 5/100
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 4.7624e-04 - val_loss: 5.0179e-04
Epoch 6/100
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 3.5419e-04 - val_loss: 3.5761e-04
Epoch 7/100
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 3.6143e-04 - val_loss: 2.3979e-04
Epoch 8/100
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 1.5768e-04 - val_los

#### 2) 모델2

In [None]:
from keras.callbacks import EarlyStopping

es = EarlyStopping(monitor = 'val_loss', min_delta = 0.0001, patience = 5)

model2 = Sequential([Input(shape=(n,)),
                   Dense(1000, activation='relu'),
                   Dropout(0.3),
                   Dense(1000, activation='relu'),
                   Dropout(0.3),
                   Dense(500, activation='relu'),
                   Dropout(0.3),
                   Dense(100, activation='relu'),
                   Dropout(0.3),
                   Dense(1, activation='sigmoid')])

model2.compile(optimizer = Adam(learning_rate = 0.0001), loss = 'binary_crossentropy')

model2.fit(x_train, y_train, epochs = 100, validation_split = 0.2, callbacks = [es])

pred2 = model2.predict(x_val)
pred2 = np.where(pred2 >= 0.5, 1, 0)

print("Accuracy: ", accuracy_score(y_val, pred2))

Epoch 1/100
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 33ms/step - loss: 0.2078 - val_loss: 1.2002e-04
Epoch 2/100
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 52ms/step - loss: 0.0030 - val_loss: 2.5063e-05
Epoch 3/100
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 49ms/step - loss: 0.0030 - val_loss: 5.7354e-05
Epoch 4/100
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 85ms/step - loss: 0.0010 - val_loss: 2.1956e-04
Epoch 5/100
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 70ms/step - loss: 0.0025 - val_loss: 7.8268e-06
Epoch 6/100
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 66ms/step - loss: 0.0023 - val_loss: 3.9257e-06
Epoch 7/100
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 64ms/step - loss: 2.3435e-04 - val_loss: 9.0766e-06
Epoch 8/100
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 58ms/step - loss: 2.5220e-04 - v

### (2) 단계2

#### 1) 단계2-1 : 정적 동작 세부 분류

* 세부 요구사항
    * 정적 행동(Laying, Sitting, Standing)인 데이터 추출
    * Laying, Sitting, Standing 를 분류하는 모델을 생성
    * 몇가지 모델을 만들고 가장 성능이 좋은 모델을 선정하시오.

pred

0: Laying
1: Sitting
2: Standing

#### 모델 2-1-1

In [None]:
# 제일 위에 데이터 불러오고 밑에 코드들 순서대로 실행했습니다.

static_activities = ['STANDING', 'SITTING', 'LAYING']
dynamic_activities = ['WALKING', 'WALKING_UPSTAIRS', 'WALKING_DOWNSTAIRS']
data['is_dynamic'] = data['Activity'].apply(lambda x: 0 if x in static_activities else 1)
data.head()

Unnamed: 0,tBodyAcc-mean()-X,tBodyAcc-mean()-Y,tBodyAcc-mean()-Z,tBodyAcc-std()-X,tBodyAcc-std()-Y,tBodyAcc-std()-Z,tBodyAcc-mad()-X,tBodyAcc-mad()-Y,tBodyAcc-mad()-Z,tBodyAcc-max()-X,...,fBodyBodyGyroJerkMag-kurtosis(),"angle(tBodyAccMean,gravity)","angle(tBodyAccJerkMean),gravityMean)","angle(tBodyGyroMean,gravityMean)","angle(tBodyGyroJerkMean,gravityMean)","angle(X,gravityMean)","angle(Y,gravityMean)","angle(Z,gravityMean)",Activity,is_dynamic
0,0.288508,-0.009196,-0.103362,-0.988986,-0.962797,-0.967422,-0.989,-0.962596,-0.96565,-0.929747,...,-0.816696,-0.042494,-0.044218,0.307873,0.07279,-0.60112,0.331298,0.165163,STANDING,0
1,0.265757,-0.016576,-0.098163,-0.989551,-0.994636,-0.987435,-0.990189,-0.99387,-0.987558,-0.937337,...,-0.693515,-0.062899,0.388459,-0.765014,0.771524,0.345205,-0.769186,-0.147944,LAYING,0
2,0.278709,-0.014511,-0.108717,-0.99772,-0.981088,-0.994008,-0.997934,-0.982187,-0.995017,-0.942584,...,-0.829311,0.000265,-0.525022,-0.891875,0.021528,-0.833564,0.202434,-0.032755,STANDING,0
3,0.289795,-0.035536,-0.150354,-0.231727,-0.006412,-0.338117,-0.273557,0.014245,-0.347916,0.008288,...,-0.408956,-0.255125,0.612804,0.747381,-0.072944,-0.695819,0.287154,0.111388,WALKING,1
4,0.394807,0.034098,0.091229,0.088489,-0.106636,-0.388502,-0.010469,-0.10968,-0.346372,0.584131,...,-0.563437,-0.044344,-0.845268,-0.97465,-0.887846,-0.705029,0.264952,0.137758,WALKING_DOWNSTAIRS,1


In [None]:
from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder()
data['Activity'] = encoder.fit_transform(data['Activity'])

In [None]:
df_0 = data.loc[data['Activity'].isin([0, 1, 2])]

# x, y 분리
target = 'Activity'
x0 = df_0.drop([target,'is_dynamic'], axis = 1)
y0 = df_0.loc[:, target]

# 데이터 분할
x_train0, x_val0, y_train0, y_val0 = train_test_split(x0, y0, test_size = .3, random_state = 20)

In [None]:
from keras.regularizers import l1, l2

clear_session()

n = x_train0.shape[1]

model3 = Sequential([Input(shape = (n, )),
                     Dense(512, activation = 'relu', kernel_regularizer = l1(0.001)),
                     Dense(256, activation = 'relu', kernel_regularizer = l1(0.001)),
                     Dense(128, activation = 'relu', kernel_regularizer = l1(0.001)),
                     Dense(64, activation = 'relu', kernel_regularizer = l1(0.001)),
                     Dense(32, activation = 'relu', kernel_regularizer = l1(0.001)),
                     Dense(3, activation = 'softmax')])


model3.compile(optimizer = Adam(learning_rate = 0.0005), loss = 'sparse_categorical_crossentropy')

model3.fit(x_train0, y_train0, epochs = 100, validation_split = 0.2)

pred3 = model3.predict(x_val0)
pred3 = np.argmax(pred3, axis = 1)

print("Accuracy: ", accuracy_score(y_val0, pred3))

Epoch 1/100
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 24ms/step - loss: 18.2846 - val_loss: 12.4745
Epoch 2/100
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 11ms/step - loss: 11.0780 - val_loss: 7.6799
Epoch 3/100
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 6.8609 - val_loss: 5.2956
Epoch 4/100
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 5.0351 - val_loss: 4.3351
Epoch 5/100
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - loss: 4.1573 - val_loss: 3.7553
Epoch 6/100
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - loss: 3.6077 - val_loss: 3.8186
Epoch 7/100
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - loss: 3.3498 - val_loss: 3.0093
Epoch 8/100
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - loss: 2.8990 - val_loss: 2.6923
Epoch 9/100
[1m57/57[0m [32m━━━━━━

#### 모델 2-2-2

In [None]:
clear_session()

n = x_train0.shape[1]

es = EarlyStopping(monitor = 'val_loss', min_delta = 0, patience = 3)

model4 = Sequential([Input(shape = (n, )),
                     Dense(1024, activation="relu"),
                     Dense(2048, activation="relu"),
                     Dropout(0.2),
                     Dense(512, activation="relu"),
                     Dense(256, activation="relu"),
                     Dense(64, activation="relu"),
                     Dense(16, activation="relu"),
                     Dense(3, activation = 'softmax')])


model4.compile(optimizer = Adam(learning_rate = 0.001), loss = 'sparse_categorical_crossentropy')

model4.fit(x_train0, y_train0, epochs = 50, validation_split = 0.2, callbacks = [es])

pred4 = model4.predict(x_val0)
pred4 = np.argmax(pred4, axis = 1)

print("Accuracy: ", accuracy_score(y_val0, pred4))

Epoch 1/50
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 132ms/step - loss: 0.9491 - val_loss: 0.5677
Epoch 2/50
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 69ms/step - loss: 0.4535 - val_loss: 0.3346
Epoch 3/50
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 82ms/step - loss: 0.2508 - val_loss: 0.2076
Epoch 4/50
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 85ms/step - loss: 0.1627 - val_loss: 0.2424
Epoch 5/50
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 72ms/step - loss: 0.1595 - val_loss: 0.1736
Epoch 6/50
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 76ms/step - loss: 0.2469 - val_loss: 0.1580
Epoch 7/50
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 93ms/step - loss: 0.1236 - val_loss: 0.3178
Epoch 8/50
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 67ms/step - loss: 0.1601 - val_loss: 0.1379
Epoch 9/50
[1m57/57[0m [32m━━━━━━━━━━━━━━━━

#### 2) 단계2-2 : 동적 동작 세부 분류

* 세부 요구사항
    * 동적 행동(Walking, Walking Upstairs, Walking Downstairs)인 데이터 추출
    * Walking, Walking Upstairs, Walking Downstairs 를 분류하는 모델을 생성
    * 몇가지 모델을 만들고 가장 성능이 좋은 모델을 선정하시오.

#### 모델 2-2-1

In [None]:
# 제일 위에 데이터 불러오고 밑에 코드들 순서대로 실행했습니다.

static_activities = ['STANDING', 'SITTING', 'LAYING']
dynamic_activities = ['WALKING', 'WALKING_UPSTAIRS', 'WALKING_DOWNSTAIRS']
data['is_dynamic'] = data['Activity'].apply(lambda x: 0 if x in static_activities else 1)
data.head()

In [None]:
from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder()
data['Activity'] = encoder.fit_transform(data['Activity'])

In [None]:
df_1 = data.loc[data['Activity'].isin([3, 4, 5])]

# x, y 분리
target = 'Activity'
x1 = df_1.drop([target,'is_dynamic'], axis = 1)
y1 = df_1.loc[:, target] - 3

# 데이터 분할
x_train1, x_val1, y_train1, y_val1 = train_test_split(x1, y1, test_size = .3, random_state = 20)

In [None]:
class Modeling:
    def __init__(self, dense=[], dropout=[]):
        self.model = None
        self.dense = dense
        self.dropout = dropout

    def build_models(self, clear=True):
        if clear:
            clear_session()

        inp_layer = Input(shape=(x_train1.shape[1], )) #input도 바꿨습니다.
        out_layer = Dense(3, activation='softmax')

        model_layer = [inp_layer] # add input layer

        if len(self.dropout) != 0:
            for n, r in zip(self.dense, self.dropout): # hidden layer
                dense_ = Dense(n, activation='relu')
                dropout_ = Dropout(r)
                model_layer.append(dense_)
                model_layer.append(dropout_)
        else:
            for n in self.dense: # hidden layer
                dense_ = Dense(n, activation='relu')
                model_layer.append(dense_)

        model_layer.append(out_layer) # add output layer

        self.layers = model_layer

        model = Sequential(self.layers)
        self.model = model

    def compile_models(self, learning_rate):
        self.model.compile(
            optimizer=Adam(learning_rate),
            loss='sparse_categorical_crossentropy'
        )

    def run_models(self,
                   learning_rate=0.001, epochs=50, callbacks=[], time=1,log=True, return_preds=True):

        result = []
        f1_result = []
        history_logs = pd.DataFrame()

        for i in range(time):

            self.build_models()

            if i == 0:
                self.model.summary()

            self.compile_models(learning_rate)

            history = self.model.fit(x_train1, y_train1, # 학습값을 바꿨습니다.
                                        epochs=epochs,
                                        validation_split=0.2,
                                        verbose=0,
                                        callbacks=callbacks).history
            if log:
                history_logs[f'history_{i}'] = history

            pred = self.model.predict(x_val1) # 평가 데이터를 바꿨습니다.
            pred_label = pred.argmax(axis=1)

            result.append(accuracy_score(y_val1, pred_label))

            print(result[i])

        print("Accuracy: ", np.mean(result))

        if return_preds:
            return pred_label, result, history_logs
        else:
            return result, history_logs

In [None]:
LR = 0.001
EPOCHS = 50

model5 = Modeling([1024, 256, 64, 16])
y_pred, scores, history_logs = model5.run_models(LR, EPOCHS, time=1, log=True, return_preds=True)

[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
0.9874213836477987
Accuracy:  0.9874213836477987


#### 모델 2-2-2

In [None]:
LR = 0.001
EPOCHS = 200

model6 = Modeling([256, 128])
y_pred, scores, history_logs = model6.run_models(LR, EPOCHS, time=1, log=True, return_preds=True)

[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
0.9924528301886792
Accuracy:  0.9924528301886792


### (3) 분류 모델 파이프라인 구성


* 세부 요구사항
    * 두 단계 모델을 통합하고, 새로운 데이터(test)에 대해서 최종 예측결과와 성능평가가 나오도록 함수로 만들기
    * 데이터 파이프라인 구축 : test데이터가 로딩되어 전처리 과정을 거치고, 예측 및 성능 평가 수행

* 예시
![](https://github.com/DA4BAM/image/blob/main/pipeline%20function.png?raw=true)

#### 1) 함수 만들기

In [None]:
scaler_is_dynamic = MinMaxScaler()
sclaer_is_dynamic.fit_transform(x)

scaler_static = MinMaxScaler()
sclaer_static.fit_transform(x0)

scaler_dynamic = MinMaxScaler()
sclaer_dynamic.fit_transform(x1)

In [None]:
def get_test_performance(test_data,
                         model_is_dynamic = best_model_is_dynamic,
                         model_static = best_model_static,
                         model_dynamic = best_model_dynamic,
                         scaler_dynamic = scaler_dynamic,
                         scaler_static = scaler_static,
                         scaler_is_dynamic = scaler_is_dynamic,
                         encoder_dynamic = encoder_dynamic,
                         encoder_static = encoder_static):
  x_test, y_test = x_y_split(test_data, target="is_dynamic")
  x_test_is_dynamic = scaler_is_dynamic.transform(x_test)
  y_is_dynamic = model_is_dynamic.predict(x_test_is_dynamic)
  y_is_dynamic = y_is_dynamic.argmax(axis=1)

  test_data['is_dynamic'] = y_is_dynamic
  df_x_test = test_data.copy()
  # df_x_test = pd.DataFrame(x_test)

  # df_x_test["is_dynamic"] = y_is_dynamic

  x_static = df_x_test[df_x_test.is_dynamic == 0].drop(["Activity", "is_dynamic"], axis=1)
  y_static = df_x_test[df_x_test.is_dynamic == 0].loc[:, "Activity"]
  x_dynamic = df_x_test[df_x_test.is_dynamic == 1].drop(["Activity", "is_dynamic"], axis=1)
  y_dynamic = df_x_test[df_x_test.is_dynamic == 1].loc[:, "Activity"]

  x_static = scaler_static.transform(x_static)
  x_dynamic = scaler_dynamic.transform(x_dynamic)

  y_static_pred = model_static.predict(x_static)
  y_dynamic_pred = model_dynamic.predict(x_dynamic)

  y_static_result = y_static_pred.argmax(axis=1)
  y_dyanmic_result = y_dynamic_pred.argmax(axis=1)

  y_static_class = [encoder_static.classes_[x] for x in y_static_result]
  y_dynamic_class = [encoder_dynamic.classes_[x] for x in y_dyanmic_result]

  return accuracy_score(list(y_static) + list(y_dynamic), list(y_static_class)+list(y_dynamic_class))

#### 2) test 셋으로 예측하고 평가하기

In [2]:
acc_final = get_test_performance(test, model_is_dynamic=model4, model_dynamic=model3, model_static=model1)
acc_final

0.9768864717878993