#**스마트폰 센서 데이터 기반 모션 분류**
# 단계3 : 단계별 모델링


## 0.미션

단계별로 나눠서 모델링을 수행하고자 합니다.  

* 단계1 : 정적(0), 동적(1) 행동 분류 모델 생성
* 단계2 : 세부 동작에 대한 분류모델 생성
    * 단계1 모델에서 0으로 예측 -> 정적 행동 3가지 분류 모델링
    * 단계1 모델에서 1으로 예측 -> 동적 행동 3가지 분류 모델링 
* 모델 통합
    * 두 단계 모델을 통합하고, 새로운 데이터에 대해서 최종 예측결과와 성능평가가 나오도록 함수로 만들기
* 성능 비교
    * 기본 모델링의 성능과 비교
    * 모든 모델링은 [다양한 알고리즘 + 성능 튜닝]을 수행해야 합니다.


## 1.환경설정

### (1) 라이브러리 불러오기

* 세부 요구사항
    - 기본적으로 필요한 라이브러리를 import 하도록 코드가 작성되어 있습니다.
    - 필요하다고 판단되는 라이브러리를 추가하세요.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# 필요하다고 판단되는 라이브러리를 추가하세요.

### (2) 데이터 불러오기

* 주어진 데이터셋
    * data01_train.csv : 학습 및 검증용

 <br/>  

* 세부 요구사항
    - data01_train.csv 를 불러와 'data' 이름으로 저장합니다.
        - data에서 변수 subject는 삭제합니다.
    - data01_test.csv 를 불러와 'new_data' 이름으로 저장합니다.


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
data = pd.read_csv('/content/drive/MyDrive/15 Mini Project/data01_train.csv')
data.head()

Unnamed: 0,tBodyAcc-mean()-X,tBodyAcc-mean()-Y,tBodyAcc-mean()-Z,tBodyAcc-std()-X,tBodyAcc-std()-Y,tBodyAcc-std()-Z,tBodyAcc-mad()-X,tBodyAcc-mad()-Y,tBodyAcc-mad()-Z,tBodyAcc-max()-X,...,fBodyBodyGyroJerkMag-kurtosis(),"angle(tBodyAccMean,gravity)","angle(tBodyAccJerkMean),gravityMean)","angle(tBodyGyroMean,gravityMean)","angle(tBodyGyroJerkMean,gravityMean)","angle(X,gravityMean)","angle(Y,gravityMean)","angle(Z,gravityMean)",subject,Activity
0,0.288508,-0.009196,-0.103362,-0.988986,-0.962797,-0.967422,-0.989,-0.962596,-0.96565,-0.929747,...,-0.816696,-0.042494,-0.044218,0.307873,0.07279,-0.60112,0.331298,0.165163,21,STANDING
1,0.265757,-0.016576,-0.098163,-0.989551,-0.994636,-0.987435,-0.990189,-0.99387,-0.987558,-0.937337,...,-0.693515,-0.062899,0.388459,-0.765014,0.771524,0.345205,-0.769186,-0.147944,15,LAYING
2,0.278709,-0.014511,-0.108717,-0.99772,-0.981088,-0.994008,-0.997934,-0.982187,-0.995017,-0.942584,...,-0.829311,0.000265,-0.525022,-0.891875,0.021528,-0.833564,0.202434,-0.032755,11,STANDING
3,0.289795,-0.035536,-0.150354,-0.231727,-0.006412,-0.338117,-0.273557,0.014245,-0.347916,0.008288,...,-0.408956,-0.255125,0.612804,0.747381,-0.072944,-0.695819,0.287154,0.111388,17,WALKING
4,0.394807,0.034098,0.091229,0.088489,-0.106636,-0.388502,-0.010469,-0.10968,-0.346372,0.584131,...,-0.563437,-0.044344,-0.845268,-0.97465,-0.887846,-0.705029,0.264952,0.137758,17,WALKING_DOWNSTAIRS


In [4]:
data.drop('subject', axis=1, inplace=True)

In [5]:
# new_data = pd.read_csv('/content/drive/MyDrive/15 Mini Project/data01_train.csv')
# new_data.head()

## 2.데이터 전처리

* 세부 요구사항
    - Label 추가 : data 에 Activity_dynamic 를 추가합니다. Activity_dynamic은 과제1에서 is_dynamic과 동일한 값입니다.
    - x와 y1, y2로 분할하시오.
        * y1 : Activity
        * y2 : Activity_dynamic
    - train : val = 8 : 2 혹은 7 : 3
    - random_state 옵션을 사용하여 다른 모델과 비교를 위해 성능이 재현되도록 합니다.

In [6]:
data['Activity_dynamic'] = np.where((data['Activity'] == 'STANDING') 
                                | (data['Activity'] == 'SITTING')
                                | (data['Activity'] == 'LAYING'), 0, 1)
data.head()

Unnamed: 0,tBodyAcc-mean()-X,tBodyAcc-mean()-Y,tBodyAcc-mean()-Z,tBodyAcc-std()-X,tBodyAcc-std()-Y,tBodyAcc-std()-Z,tBodyAcc-mad()-X,tBodyAcc-mad()-Y,tBodyAcc-mad()-Z,tBodyAcc-max()-X,...,fBodyBodyGyroJerkMag-kurtosis(),"angle(tBodyAccMean,gravity)","angle(tBodyAccJerkMean),gravityMean)","angle(tBodyGyroMean,gravityMean)","angle(tBodyGyroJerkMean,gravityMean)","angle(X,gravityMean)","angle(Y,gravityMean)","angle(Z,gravityMean)",Activity,Activity_dynamic
0,0.288508,-0.009196,-0.103362,-0.988986,-0.962797,-0.967422,-0.989,-0.962596,-0.96565,-0.929747,...,-0.816696,-0.042494,-0.044218,0.307873,0.07279,-0.60112,0.331298,0.165163,STANDING,0
1,0.265757,-0.016576,-0.098163,-0.989551,-0.994636,-0.987435,-0.990189,-0.99387,-0.987558,-0.937337,...,-0.693515,-0.062899,0.388459,-0.765014,0.771524,0.345205,-0.769186,-0.147944,LAYING,0
2,0.278709,-0.014511,-0.108717,-0.99772,-0.981088,-0.994008,-0.997934,-0.982187,-0.995017,-0.942584,...,-0.829311,0.000265,-0.525022,-0.891875,0.021528,-0.833564,0.202434,-0.032755,STANDING,0
3,0.289795,-0.035536,-0.150354,-0.231727,-0.006412,-0.338117,-0.273557,0.014245,-0.347916,0.008288,...,-0.408956,-0.255125,0.612804,0.747381,-0.072944,-0.695819,0.287154,0.111388,WALKING,1
4,0.394807,0.034098,0.091229,0.088489,-0.106636,-0.388502,-0.010469,-0.10968,-0.346372,0.584131,...,-0.563437,-0.044344,-0.845268,-0.97465,-0.887846,-0.705029,0.264952,0.137758,WALKING_DOWNSTAIRS,1


In [7]:
# 데이터 분리
x = data.drop(['Activity', 'Activity_dynamic'], axis=1)
y1 = data['Activity']
y2 = data['Activity_dynamic']

In [8]:
# train, validataion 분할
from sklearn.model_selection import train_test_split

x_train, x_val, y2_train, y2_val = train_test_split(x, y2, test_size=0.2, random_state=2023)

## **3.단계별 모델링**

![](https://github.com/DA4BAM/image/blob/main/step%20by%20step.png?raw=true)

### (1) 단계1 : 정적/동적 행동 분류 모델

* 세부 요구사항
    * 정적 행동(Laying, Sitting, Standing)과 동적 행동(동적 : Walking, Walking-Up, Walking-Down)을 구분하는 모델 생성.
    * 몇가지 모델을 만들고 가장 성능이 좋은 모델을 선정하시오.

#### 1) 알고리즘1 : XGBoost

In [9]:
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, f1_score

In [10]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y2_train = le.fit_transform(y2_train)

In [11]:
# 모델
model_xgb = XGBClassifier(random_state=2023)

# 학습
model_xgb.fit(x_train, y2_train)

In [12]:
# 예측
y_pred_xgb = model_xgb.predict(x_val)
y_pred_xgb = le.inverse_transform(y_pred_xgb)

# 평가
print(classification_report(y2_val, y_pred_xgb))
print(f1_score(y2_val, y_pred_xgb, average='macro'))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00       657
           1       1.00      1.00      1.00       520

    accuracy                           1.00      1177
   macro avg       1.00      1.00      1.00      1177
weighted avg       1.00      1.00      1.00      1177

1.0


#### 2) 알고리즘2 : Logistic Regression

In [13]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, f1_score

In [14]:
# 정규화
x_max, x_min = x_train.max(), x_train.min()
x_train_n = (x_train - x_min) / (x_max - x_min)
x_val_n = (x_val - x_min) / (x_max - x_min)

In [15]:
# 모델
model_lr = LogisticRegression(max_iter=5000)

# 학습
model_lr.fit(x_train, y2_train)

In [16]:
# 예측
y_pred_lr = model_lr.predict(x_val)

# 평가
print(classification_report(y2_val, y_pred_lr))
print(f1_score(y2_val, y_pred_lr, average='macro'))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00       657
           1       1.00      1.00      1.00       520

    accuracy                           1.00      1177
   macro avg       1.00      1.00      1.00      1177
weighted avg       1.00      1.00      1.00      1177

0.9991388852917656


### (2) 단계2-1 : 정적 동작 세부 분류

* 세부 요구사항
    * 정적 행동(Laying, Sitting, Standing)인 데이터 추출
    * Laying, Sitting, Standing 를 분류하는 모델을 생성
    * 몇가지 모델을 만들고 가장 성능이 좋은 모델을 선정하시오.

In [17]:
data_static = data.loc[data['Activity_dynamic'] == 0]

In [18]:
data_static.head()

Unnamed: 0,tBodyAcc-mean()-X,tBodyAcc-mean()-Y,tBodyAcc-mean()-Z,tBodyAcc-std()-X,tBodyAcc-std()-Y,tBodyAcc-std()-Z,tBodyAcc-mad()-X,tBodyAcc-mad()-Y,tBodyAcc-mad()-Z,tBodyAcc-max()-X,...,fBodyBodyGyroJerkMag-kurtosis(),"angle(tBodyAccMean,gravity)","angle(tBodyAccJerkMean),gravityMean)","angle(tBodyGyroMean,gravityMean)","angle(tBodyGyroJerkMean,gravityMean)","angle(X,gravityMean)","angle(Y,gravityMean)","angle(Z,gravityMean)",Activity,Activity_dynamic
0,0.288508,-0.009196,-0.103362,-0.988986,-0.962797,-0.967422,-0.989,-0.962596,-0.96565,-0.929747,...,-0.816696,-0.042494,-0.044218,0.307873,0.07279,-0.60112,0.331298,0.165163,STANDING,0
1,0.265757,-0.016576,-0.098163,-0.989551,-0.994636,-0.987435,-0.990189,-0.99387,-0.987558,-0.937337,...,-0.693515,-0.062899,0.388459,-0.765014,0.771524,0.345205,-0.769186,-0.147944,LAYING,0
2,0.278709,-0.014511,-0.108717,-0.99772,-0.981088,-0.994008,-0.997934,-0.982187,-0.995017,-0.942584,...,-0.829311,0.000265,-0.525022,-0.891875,0.021528,-0.833564,0.202434,-0.032755,STANDING,0
7,0.272026,-0.001329,-0.125491,-0.992068,-0.912985,-0.972451,-0.994752,-0.943141,-0.976428,-0.925446,...,-0.704995,-0.024442,0.076332,0.741277,0.729812,-0.817201,0.037746,0.136129,STANDING,0
8,0.284338,0.021956,-0.006925,-0.980153,-0.838394,-0.782357,-0.983683,-0.816199,-0.743923,-0.914011,...,-0.400197,0.021212,-0.009465,-0.282762,0.563343,-0.782072,0.242834,-0.025285,STANDING,0


In [19]:
# 데이터 분리
x_st = data_static.drop(['Activity', 'Activity_dynamic'], axis=1)
y_st = data_static['Activity']

In [20]:
# train, validataion 분할
from sklearn.model_selection import train_test_split

x_st_train, x_st_val, y_st_train, y_st_val = train_test_split(x_st, y_st, test_size=0.2, random_state=2023)

In [21]:
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, f1_score
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
y_st_train = le.fit_transform(y_st_train)

In [22]:
# 모델
model_xgb = XGBClassifier(random_state=2023)

# 학습
model_xgb.fit(x_st_train, y_st_train)

In [23]:
# 예측
y_pred_xgb = model_xgb.predict(x_st_val)
y_pred_xgb = le.inverse_transform(y_pred_xgb)

# 평가
print(classification_report(y_st_val, y_pred_xgb))
print(f1_score(y_st_val, y_pred_xgb, average='macro'))

              precision    recall  f1-score   support

      LAYING       1.00      1.00      1.00       208
     SITTING       0.99      0.98      0.98       211
    STANDING       0.98      0.99      0.98       228

    accuracy                           0.99       647
   macro avg       0.99      0.99      0.99       647
weighted avg       0.99      0.99      0.99       647

0.989351878321041


### (3) 단계2-2 : 동적 동작 세부 분류

* 세부 요구사항
    * 동동적 행동(Walking, Walking Upstairs, Walking Downstairs)인 데이터 추출
    * Walking, Walking Upstairs, Walking Downstairs 를 분류하는 모델을 생성
    * 몇가지 모델을 만들고 가장 성능이 좋은 모델을 선정하시오.

In [24]:
data_dynamic = data.loc[data['Activity_dynamic'] == 1]

In [25]:
data_dynamic.head()

Unnamed: 0,tBodyAcc-mean()-X,tBodyAcc-mean()-Y,tBodyAcc-mean()-Z,tBodyAcc-std()-X,tBodyAcc-std()-Y,tBodyAcc-std()-Z,tBodyAcc-mad()-X,tBodyAcc-mad()-Y,tBodyAcc-mad()-Z,tBodyAcc-max()-X,...,fBodyBodyGyroJerkMag-kurtosis(),"angle(tBodyAccMean,gravity)","angle(tBodyAccJerkMean),gravityMean)","angle(tBodyGyroMean,gravityMean)","angle(tBodyGyroJerkMean,gravityMean)","angle(X,gravityMean)","angle(Y,gravityMean)","angle(Z,gravityMean)",Activity,Activity_dynamic
3,0.289795,-0.035536,-0.150354,-0.231727,-0.006412,-0.338117,-0.273557,0.014245,-0.347916,0.008288,...,-0.408956,-0.255125,0.612804,0.747381,-0.072944,-0.695819,0.287154,0.111388,WALKING,1
4,0.394807,0.034098,0.091229,0.088489,-0.106636,-0.388502,-0.010469,-0.10968,-0.346372,0.584131,...,-0.563437,-0.044344,-0.845268,-0.97465,-0.887846,-0.705029,0.264952,0.137758,WALKING_DOWNSTAIRS,1
5,0.330708,0.007561,-0.061371,-0.21576,0.101075,0.072949,-0.269857,0.06006,0.101298,-0.019263,...,-0.887024,-0.030645,-0.852091,-0.500195,0.306091,-0.552729,0.253885,0.291256,WALKING_UPSTAIRS,1
6,0.121465,-0.031902,-0.005196,-0.152198,-0.113104,-0.239423,-0.202401,-0.164698,-0.247099,0.114668,...,-0.775779,0.445206,-0.003487,-0.940185,0.041387,-0.886603,0.173338,-0.005627,WALKING,1
12,0.303885,0.002768,-0.038613,-0.168656,0.190336,-0.140473,-0.205134,0.101144,-0.120572,-0.000818,...,-0.329728,-0.04003,0.257252,0.076091,-0.123425,-0.752882,0.266729,0.045692,WALKING,1


In [26]:
# 데이터 분리
x_dy = data_dynamic.drop(['Activity', 'Activity_dynamic'], axis=1)
y_dy = data_dynamic['Activity']

In [27]:
# train, validataion 분할
from sklearn.model_selection import train_test_split

x_dy_train, x_dy_val, y_dy_train, y_dy_val = train_test_split(x_dy, y_dy, test_size=0.2, random_state=2023)

In [28]:
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, f1_score
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
y_dy_train = le.fit_transform(y_dy_train)

In [29]:
# 모델
model_xgb = XGBClassifier(random_state=2023)

# 학습
model_xgb.fit(x_dy_train, y_dy_train)

In [30]:
# 예측
y_pred_xgb = model_xgb.predict(x_dy_val)
y_pred_xgb = le.inverse_transform(y_pred_xgb)

# 평가
print(classification_report(y_dy_val, y_pred_xgb))
print(f1_score(y_dy_val, y_pred_xgb, average='macro'))

                    precision    recall  f1-score   support

           WALKING       1.00      0.98      0.99       181
WALKING_DOWNSTAIRS       0.97      0.99      0.98       164
  WALKING_UPSTAIRS       0.99      0.99      0.99       185

          accuracy                           0.99       530
         macro avg       0.99      0.99      0.99       530
      weighted avg       0.99      0.99      0.99       530

0.9866479052250069


### (4) 분류 모델 합치기


* 세부 요구사항
    * 두 단계 모델을 통합하고, 새로운 데이터(test)에 대해서 최종 예측결과와 성능평가가 나오도록 함수로 만들기
    * 데이터 파이프라인 구축 : test데이터가 로딩되어 전처리 과정을 거치고, 예측 및 성능 평가 수행

![](https://github.com/DA4BAM/image/blob/main/pipeline%20function.png?raw=true)

#### 1) 함수 만들기

In [None]:
from sklearn.model_selection import train_test_split

from xgboost import XGBClassifier
from sklearn.preprocessing import LabelEncoder

from sklearn.metrics import classification_report, f1_score

In [None]:
def model_test(model1, model2_1, model2_2, data):
    # 데이터 전처리
    data['Activity_dynamic'] = np.where((data['Activity'] == 'STANDING') | (data['Activity'] == 'SITTING') | (data['Activity'] == 'LAYING'), 0, 1)

    # x, y1, y2 분리
    x = data.drop(['Activity', 'Activity_dynamic'], axis=1)
    y1 = data['Activity']
    y2 = data['Activity_dynamic']

    # train, validataion 분할 
    x_train, x_val, y2_train, y2_val = train_test_split(x, y2, test_size=0.2, random_state=2023)

    le = LabelEncoder()
    y2_train = le.fit_transform(y2_train)

    # 모델1
    model1.fit(x_train, y2_train)  # 학습

    y_pred1 = model1.predict(x_val)  # 예측
    y_pred1 = le.inverse_transform(y_pred1)

    data['predict'] = y_pred1

    # 모델2-1
    data_static = data.loc[data['predict'] == 0]

    x_st = data_static.drop(['Activity', 'Activity_dynamic'], axis=1)  # 데이터 분리
    y_st = data_static['Activity']

    le = LabelEncoder()
    y_st = le.fit_transform(y_st)

    model2_1.fit(x_st_train, y_st_train)  # 학습

    y_pred2_1 = model2_1.predict(x_st)  # 예측
    y_pred2_1 = le.inverse_transform(y_pred2_1)

    # 모델2-2
    data_dynamic = data.loc[data['predict'] == 1]

    x_dy = data_dynamic.drop(['Activity', 'Activity_dynamic'], axis=1)  # 데이터 분리
    y_dy = data_dynamic['Activity']

    le = LabelEncoder()
    y_dy = le.fit_transform(y_dy)

    model2_2.fit(x_st_train, y_st_train)  # 학습

    y_pred2_2 = model2_2.predict(x_dy)  # 예측
    y_pred2_2 = le.inverse_transform(y_pred2_2)

    # 예측 결과 합치기
    

    # 평가
    print(classification_report(y2_val, y_pred_xgb))
    print(f1_score(y2_val, y_pred_xgb, average='macro'))
