#**스마트폰 센서 데이터 기반 모션 분류**
# 단계3 : 단계별 모델링


## 0.미션

단계별로 나눠서 모델링을 수행하고자 합니다.  

* 단계1 : 정적(0), 동적(1) 행동 분류 모델 생성
* 단계2 : 세부 동작에 대한 분류모델 생성
    * 단계1 모델에서 0으로 예측 -> 정적 행동 3가지 분류 모델링
    * 단계1 모델에서 1으로 예측 -> 동적 행동 3가지 분류 모델링 
* 모델 통합
    * 두 단계 모델을 통합하고, 새로운 데이터에 대해서 최종 예측결과와 성능평가가 나오도록 함수로 만들기
* 성능 비교
    * 기본 모델링의 성능과 비교
    * 모든 모델링은 [다양한 알고리즘 + 성능 튜닝]을 수행해야 합니다.


## 1.환경설정

### (1) 라이브러리 불러오기

* 세부 요구사항
    - 기본적으로 필요한 라이브러리를 import 하도록 코드가 작성되어 있습니다.
    - 필요하다고 판단되는 라이브러리를 추가하세요.

In [62]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# 필요하다고 판단되는 라이브러리를 추가하세요.
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
import xgboost as xgb
from sklearn.metrics import *
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import KNeighborsClassifier

### (2) 데이터 불러오기

* 주어진 데이터셋
    * data01_train.csv : 학습 및 검증용

 <br/>  

* 세부 요구사항
    - data01_train.csv 를 불러와 'data' 이름으로 저장합니다.
        - data에서 변수 subject는 삭제합니다.
    - data01_test.csv 를 불러와 'new_data' 이름으로 저장합니다.


In [63]:
#data 데이터 불러오기
data_path = "./data01_train.csv"
data = pd.read_csv(data_path)
data.drop('subject', axis=1, inplace=True)
data.head()

Unnamed: 0,tBodyAcc-mean()-X,tBodyAcc-mean()-Y,tBodyAcc-mean()-Z,tBodyAcc-std()-X,tBodyAcc-std()-Y,tBodyAcc-std()-Z,tBodyAcc-mad()-X,tBodyAcc-mad()-Y,tBodyAcc-mad()-Z,tBodyAcc-max()-X,...,fBodyBodyGyroJerkMag-skewness(),fBodyBodyGyroJerkMag-kurtosis(),"angle(tBodyAccMean,gravity)","angle(tBodyAccJerkMean),gravityMean)","angle(tBodyGyroMean,gravityMean)","angle(tBodyGyroJerkMean,gravityMean)","angle(X,gravityMean)","angle(Y,gravityMean)","angle(Z,gravityMean)",Activity
0,0.288508,-0.009196,-0.103362,-0.988986,-0.962797,-0.967422,-0.989,-0.962596,-0.96565,-0.929747,...,-0.487737,-0.816696,-0.042494,-0.044218,0.307873,0.07279,-0.60112,0.331298,0.165163,STANDING
1,0.265757,-0.016576,-0.098163,-0.989551,-0.994636,-0.987435,-0.990189,-0.99387,-0.987558,-0.937337,...,-0.23782,-0.693515,-0.062899,0.388459,-0.765014,0.771524,0.345205,-0.769186,-0.147944,LAYING
2,0.278709,-0.014511,-0.108717,-0.99772,-0.981088,-0.994008,-0.997934,-0.982187,-0.995017,-0.942584,...,-0.535287,-0.829311,0.000265,-0.525022,-0.891875,0.021528,-0.833564,0.202434,-0.032755,STANDING
3,0.289795,-0.035536,-0.150354,-0.231727,-0.006412,-0.338117,-0.273557,0.014245,-0.347916,0.008288,...,-0.004012,-0.408956,-0.255125,0.612804,0.747381,-0.072944,-0.695819,0.287154,0.111388,WALKING
4,0.394807,0.034098,0.091229,0.088489,-0.106636,-0.388502,-0.010469,-0.10968,-0.346372,0.584131,...,-0.157832,-0.563437,-0.044344,-0.845268,-0.97465,-0.887846,-0.705029,0.264952,0.137758,WALKING_DOWNSTAIRS


In [64]:
#new_data 데이터 불러오기
data_path = "./data01_test.csv"
new_data = pd.read_csv(data_path)
new_data.drop('subject', axis=1, inplace=True)
new_data.head()

Unnamed: 0,tBodyAcc-mean()-X,tBodyAcc-mean()-Y,tBodyAcc-mean()-Z,tBodyAcc-std()-X,tBodyAcc-std()-Y,tBodyAcc-std()-Z,tBodyAcc-mad()-X,tBodyAcc-mad()-Y,tBodyAcc-mad()-Z,tBodyAcc-max()-X,...,fBodyBodyGyroJerkMag-skewness(),fBodyBodyGyroJerkMag-kurtosis(),"angle(tBodyAccMean,gravity)","angle(tBodyAccJerkMean),gravityMean)","angle(tBodyGyroMean,gravityMean)","angle(tBodyGyroJerkMean,gravityMean)","angle(X,gravityMean)","angle(Y,gravityMean)","angle(Z,gravityMean)",Activity
0,0.284379,-0.021981,-0.116683,-0.99249,-0.97964,-0.963321,-0.992563,-0.977304,-0.958142,-0.93885,...,-0.509523,-0.850065,-0.018043,0.092304,0.07422,-0.714534,-0.671943,-0.018351,-0.185733,SITTING
1,0.27744,-0.028086,-0.118412,-0.99662,-0.927676,-0.972294,-0.997346,-0.931405,-0.971788,-0.939837,...,-0.210792,-0.613367,-0.022456,-0.155414,0.247498,-0.112257,-0.826816,0.184489,-0.068699,STANDING
2,0.305833,-0.041023,-0.087303,0.00688,0.1828,-0.237984,0.005642,0.028616,-0.236474,0.016311,...,0.579587,0.394388,-0.362616,0.171069,0.576349,-0.688314,-0.743234,0.272186,0.053101,WALKING
3,0.276053,-0.016487,-0.108381,-0.995379,-0.983978,-0.975854,-0.995877,-0.98528,-0.974907,-0.941425,...,-0.566291,-0.841455,0.289548,0.079801,-0.020033,0.291898,-0.639435,-0.111998,-0.123298,SITTING
4,0.271998,0.016904,-0.078856,-0.973468,-0.702462,-0.86945,-0.97981,-0.711601,-0.856807,-0.92076,...,0.447577,0.214219,0.010111,0.114179,-0.830776,-0.325098,-0.840817,0.116237,-0.096615,STANDING


## 2.데이터 전처리

* 세부 요구사항
    - Label 추가 : data 에 Activity_dynamic 를 추가합니다. Activity_dynamic은 과제1에서 is_dynamic과 동일한 값입니다.
    - x와 y1, y2로 분할하시오.
        * y1 : Activity
        * y2 : Activity_dynamic
    - train : val = 8 : 2 혹은 7 : 3
    - random_state 옵션을 사용하여 다른 모델과 비교를 위해 성능이 재현되도록 합니다.

In [65]:
# Activity 숫자로 변환
data['Activity_dynamic'] = data['Activity'].map({'WALKING':1,
                                                 'WALKING_UPSTAIRS':1,
                                                 'WALKING_DOWNSTAIRS':1,
                                                 'LAYING':0,
                                                 'STANDING':0,
                                                 'SITTING':0})
data

Unnamed: 0,tBodyAcc-mean()-X,tBodyAcc-mean()-Y,tBodyAcc-mean()-Z,tBodyAcc-std()-X,tBodyAcc-std()-Y,tBodyAcc-std()-Z,tBodyAcc-mad()-X,tBodyAcc-mad()-Y,tBodyAcc-mad()-Z,tBodyAcc-max()-X,...,fBodyBodyGyroJerkMag-kurtosis(),"angle(tBodyAccMean,gravity)","angle(tBodyAccJerkMean),gravityMean)","angle(tBodyGyroMean,gravityMean)","angle(tBodyGyroJerkMean,gravityMean)","angle(X,gravityMean)","angle(Y,gravityMean)","angle(Z,gravityMean)",Activity,Activity_dynamic
0,0.288508,-0.009196,-0.103362,-0.988986,-0.962797,-0.967422,-0.989000,-0.962596,-0.965650,-0.929747,...,-0.816696,-0.042494,-0.044218,0.307873,0.072790,-0.601120,0.331298,0.165163,STANDING,0
1,0.265757,-0.016576,-0.098163,-0.989551,-0.994636,-0.987435,-0.990189,-0.993870,-0.987558,-0.937337,...,-0.693515,-0.062899,0.388459,-0.765014,0.771524,0.345205,-0.769186,-0.147944,LAYING,0
2,0.278709,-0.014511,-0.108717,-0.997720,-0.981088,-0.994008,-0.997934,-0.982187,-0.995017,-0.942584,...,-0.829311,0.000265,-0.525022,-0.891875,0.021528,-0.833564,0.202434,-0.032755,STANDING,0
3,0.289795,-0.035536,-0.150354,-0.231727,-0.006412,-0.338117,-0.273557,0.014245,-0.347916,0.008288,...,-0.408956,-0.255125,0.612804,0.747381,-0.072944,-0.695819,0.287154,0.111388,WALKING,1
4,0.394807,0.034098,0.091229,0.088489,-0.106636,-0.388502,-0.010469,-0.109680,-0.346372,0.584131,...,-0.563437,-0.044344,-0.845268,-0.974650,-0.887846,-0.705029,0.264952,0.137758,WALKING_DOWNSTAIRS,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5876,0.277194,-0.012389,-0.131974,-0.994046,-0.940578,-0.917337,-0.994261,-0.932830,-0.908088,-0.936219,...,-0.690363,-0.034888,-0.261437,-0.391477,-0.877612,-0.912365,0.114009,0.080146,SITTING,0
5877,0.191568,0.013328,-0.105174,-0.126969,-0.121729,-0.327480,-0.192523,-0.109923,-0.295286,0.078644,...,-0.879215,0.721718,0.623151,0.866858,-0.445660,-0.690278,0.303194,-0.044188,WALKING_UPSTAIRS,1
5878,0.267981,-0.018348,-0.107440,-0.991303,-0.989881,-0.990313,-0.992386,-0.988852,-0.991237,-0.936099,...,-0.886851,0.060173,0.228739,0.684400,-0.216665,0.620363,-0.437247,-0.571840,LAYING,0
5879,0.212787,-0.048130,-0.121001,-0.041373,0.052449,-0.585361,-0.100714,0.023353,-0.554707,0.219814,...,-0.053556,0.260880,0.551742,-0.943773,-0.862899,-0.718009,0.292856,0.024920,WALKING_UPSTAIRS,1


In [66]:
# 데이터 분할을 위한 전처리
x = data.drop(labels=['Activity'], axis = 1)
x = x.drop(labels=['Activity_dynamic'], axis = 1)
y1 = data.loc[:, 'Activity']
y2 = data.loc[:, 'Activity_dynamic']

In [67]:
#MIN-MAX 스케일링
scaler = MinMaxScaler()

# 데이터 스케일링 후 데이터 프라임으로 생성
x = pd.DataFrame(scaler.fit_transform(x), columns=x.columns)

  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():


In [68]:
x_train, x_val, y_train, y_val = train_test_split(x, y2, test_size = .3, random_state=42)

## **3.단계별 모델링**

![](https://github.com/DA4BAM/image/blob/main/step%20by%20step.png?raw=true)

### (1) 단계1 : 정적/동적 행동 분류 모델

* 세부 요구사항
    * 정적 행동(Laying, Sitting, Standing)과 동적 행동(동적 : Walking, Walking-Up, Walking-Down)을 구분하는 모델 생성.
    * 몇가지 모델을 만들고 가장 성능이 좋은 모델을 선정하시오.

# 1) 알고리즘1 : 랜덤 포레스트

In [69]:
#생성
model1 = RandomForestClassifier()

#학습
model1.fit(x_train, y_train)
pred = model1.predict(x_val)

#평가
print('accuracy :',accuracy_score(y_val, pred))
print('='*60)
print(confusion_matrix(y_val, pred))
print('='*60)
print(classification_report(y_val, pred))

  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():


accuracy : 1.0
[[957   0]
 [  0 808]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       957
           1       1.00      1.00      1.00       808

    accuracy                           1.00      1765
   macro avg       1.00      1.00      1.00      1765
weighted avg       1.00      1.00      1.00      1765



  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():


# 2) 알고리즘2 : XGboost

In [70]:
# XGBoost 모델 생성
model = xgb.XGBClassifier()

# 모델 학습
model.fit(x_train, y_train)

# 예측
pred = model.predict(x_val)

# 성능 평가
print('Accuracy:', accuracy_score(y_val, pred))
print('=' * 60)
print('Confusion Matrix:')
print(confusion_matrix(y_val, pred))
print('=' * 60)
print('Classification Report:')
print(classification_report(y_val, pred))

  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(data):


Accuracy: 0.9994334277620397
Confusion Matrix:
[[957   0]
 [  1 807]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       957
           1       1.00      1.00      1.00       808

    accuracy                           1.00      1765
   macro avg       1.00      1.00      1.00      1765
weighted avg       1.00      1.00      1.00      1765



  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)


### (2) 단계2-1 : 정적 동작 세부 분류

* 세부 요구사항
    * 정적 행동(Laying, Sitting, Standing)인 데이터 추출
    * Laying, Sitting, Standing 를 분류하는 모델을 생성
    * 몇가지 모델을 만들고 가장 성능이 좋은 모델을 선정하시오.

In [71]:
s_data = data[data['Activity_dynamic'] == 0]
s_data

Unnamed: 0,tBodyAcc-mean()-X,tBodyAcc-mean()-Y,tBodyAcc-mean()-Z,tBodyAcc-std()-X,tBodyAcc-std()-Y,tBodyAcc-std()-Z,tBodyAcc-mad()-X,tBodyAcc-mad()-Y,tBodyAcc-mad()-Z,tBodyAcc-max()-X,...,fBodyBodyGyroJerkMag-kurtosis(),"angle(tBodyAccMean,gravity)","angle(tBodyAccJerkMean),gravityMean)","angle(tBodyGyroMean,gravityMean)","angle(tBodyGyroJerkMean,gravityMean)","angle(X,gravityMean)","angle(Y,gravityMean)","angle(Z,gravityMean)",Activity,Activity_dynamic
0,0.288508,-0.009196,-0.103362,-0.988986,-0.962797,-0.967422,-0.989000,-0.962596,-0.965650,-0.929747,...,-0.816696,-0.042494,-0.044218,0.307873,0.072790,-0.601120,0.331298,0.165163,STANDING,0
1,0.265757,-0.016576,-0.098163,-0.989551,-0.994636,-0.987435,-0.990189,-0.993870,-0.987558,-0.937337,...,-0.693515,-0.062899,0.388459,-0.765014,0.771524,0.345205,-0.769186,-0.147944,LAYING,0
2,0.278709,-0.014511,-0.108717,-0.997720,-0.981088,-0.994008,-0.997934,-0.982187,-0.995017,-0.942584,...,-0.829311,0.000265,-0.525022,-0.891875,0.021528,-0.833564,0.202434,-0.032755,STANDING,0
7,0.272026,-0.001329,-0.125491,-0.992068,-0.912985,-0.972451,-0.994752,-0.943141,-0.976428,-0.925446,...,-0.704995,-0.024442,0.076332,0.741277,0.729812,-0.817201,0.037746,0.136129,STANDING,0
8,0.284338,0.021956,-0.006925,-0.980153,-0.838394,-0.782357,-0.983683,-0.816199,-0.743923,-0.914011,...,-0.400197,0.021212,-0.009465,-0.282762,0.563343,-0.782072,0.242834,-0.025285,STANDING,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5874,0.257476,-0.413865,0.017374,-0.919666,0.075259,-0.630716,-0.937986,0.176796,-0.613510,-0.866296,...,-0.091605,-0.000793,0.267189,-0.186202,0.098099,0.792970,-0.034020,-0.928148,LAYING,0
5875,0.277378,-0.013298,-0.104322,-0.996596,-0.987491,-0.973345,-0.996372,-0.987746,-0.973512,-0.942156,...,-0.956239,0.122320,0.136275,-0.708377,-0.507788,-0.818263,0.222620,0.035430,STANDING,0
5876,0.277194,-0.012389,-0.131974,-0.994046,-0.940578,-0.917337,-0.994261,-0.932830,-0.908088,-0.936219,...,-0.690363,-0.034888,-0.261437,-0.391477,-0.877612,-0.912365,0.114009,0.080146,SITTING,0
5878,0.267981,-0.018348,-0.107440,-0.991303,-0.989881,-0.990313,-0.992386,-0.988852,-0.991237,-0.936099,...,-0.886851,0.060173,0.228739,0.684400,-0.216665,0.620363,-0.437247,-0.571840,LAYING,0


In [72]:
# x , y 분할
x = s_data.drop(labels=['Activity_dynamic'], axis = 1)
x = x.drop(labels=['Activity'], axis = 1)
y1 = s_data.loc[:, 'Activity']

In [73]:
# 데이터 스케일링 후 데이터 프라임으로 생성
x = pd.DataFrame(scaler.fit_transform(x), columns=x.columns)

  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():


In [74]:
#'LAYING' = 0 , 'SITTING' = 1, 'STANDING' =2 , 인코딩
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
y1 = label_encoder.fit_transform(y1)
y1

array([2, 0, 2, ..., 1, 0, 1])

In [75]:
# 데이터 분할
x_train, x_val, y_train, y_val = train_test_split(x, y1, test_size = .3, random_state=42)

In [76]:
# XGBoost 모델 생성
model2_1 = xgb.XGBClassifier()

# 모델 학습
model2_1.fit(x_train, y_train)

# 예측
pred = model2_1.predict(x_val)

# 성능 평가
print('Accuracy:', accuracy_score(y_val, pred))
print('=' * 60)
print('Confusion Matrix:')
print(confusion_matrix(y_val, pred))
print('=' * 60)
print('Classification Report:')
print(classification_report(y_val, pred))

  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)


Accuracy: 0.9855818743563337
Confusion Matrix:
[[338   0   0]
 [  0 303   7]
 [  0   7 316]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       338
           1       0.98      0.98      0.98       310
           2       0.98      0.98      0.98       323

    accuracy                           0.99       971
   macro avg       0.99      0.99      0.99       971
weighted avg       0.99      0.99      0.99       971



  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)


### (3) 단계2-2 : 동적 동작 세부 분류

* 세부 요구사항
    * 동동적 행동(Walking, Walking Upstairs, Walking Downstairs)인 데이터 추출
    * Walking, Walking Upstairs, Walking Downstairs 를 분류하는 모델을 생성
    * 몇가지 모델을 만들고 가장 성능이 좋은 모델을 선정하시오.

In [77]:
# 동적 동작 분류
d_data = data[data['Activity_dynamic'] == 1]
d_data

Unnamed: 0,tBodyAcc-mean()-X,tBodyAcc-mean()-Y,tBodyAcc-mean()-Z,tBodyAcc-std()-X,tBodyAcc-std()-Y,tBodyAcc-std()-Z,tBodyAcc-mad()-X,tBodyAcc-mad()-Y,tBodyAcc-mad()-Z,tBodyAcc-max()-X,...,fBodyBodyGyroJerkMag-kurtosis(),"angle(tBodyAccMean,gravity)","angle(tBodyAccJerkMean),gravityMean)","angle(tBodyGyroMean,gravityMean)","angle(tBodyGyroJerkMean,gravityMean)","angle(X,gravityMean)","angle(Y,gravityMean)","angle(Z,gravityMean)",Activity,Activity_dynamic
3,0.289795,-0.035536,-0.150354,-0.231727,-0.006412,-0.338117,-0.273557,0.014245,-0.347916,0.008288,...,-0.408956,-0.255125,0.612804,0.747381,-0.072944,-0.695819,0.287154,0.111388,WALKING,1
4,0.394807,0.034098,0.091229,0.088489,-0.106636,-0.388502,-0.010469,-0.109680,-0.346372,0.584131,...,-0.563437,-0.044344,-0.845268,-0.974650,-0.887846,-0.705029,0.264952,0.137758,WALKING_DOWNSTAIRS,1
5,0.330708,0.007561,-0.061371,-0.215760,0.101075,0.072949,-0.269857,0.060060,0.101298,-0.019263,...,-0.887024,-0.030645,-0.852091,-0.500195,0.306091,-0.552729,0.253885,0.291256,WALKING_UPSTAIRS,1
6,0.121465,-0.031902,-0.005196,-0.152198,-0.113104,-0.239423,-0.202401,-0.164698,-0.247099,0.114668,...,-0.775779,0.445206,-0.003487,-0.940185,0.041387,-0.886603,0.173338,-0.005627,WALKING,1
12,0.303885,0.002768,-0.038613,-0.168656,0.190336,-0.140473,-0.205134,0.101144,-0.120572,-0.000818,...,-0.329728,-0.040030,0.257252,0.076091,-0.123425,-0.752882,0.266729,0.045692,WALKING,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5868,0.277709,-0.053919,-0.098746,-0.589970,-0.076626,-0.500837,-0.605474,-0.142798,-0.506696,-0.533485,...,-0.268237,-0.076922,0.706620,-0.954969,-0.324779,-0.691663,0.153974,-0.196833,WALKING,1
5869,0.252496,-0.009773,-0.118293,-0.155168,-0.244513,-0.044524,-0.244367,-0.229509,-0.042199,0.291682,...,-0.752980,0.619246,-0.684483,0.924623,-0.353041,-0.775518,0.253218,0.013216,WALKING_UPSTAIRS,1
5873,0.264961,-0.034719,-0.088140,-0.368158,-0.074514,-0.229606,-0.394387,-0.082437,-0.243525,-0.261087,...,-0.046157,0.037517,-0.903692,0.614933,-0.610918,-0.601604,0.335996,0.156518,WALKING,1
5877,0.191568,0.013328,-0.105174,-0.126969,-0.121729,-0.327480,-0.192523,-0.109923,-0.295286,0.078644,...,-0.879215,0.721718,0.623151,0.866858,-0.445660,-0.690278,0.303194,-0.044188,WALKING_UPSTAIRS,1


In [78]:
# 동적 데이터로 x , y 분할
x = d_data.drop(labels=['Activity_dynamic'], axis = 1)
x = x.drop(labels=['Activity'], axis = 1)
y1 = d_data.loc[:, 'Activity']
y1

3                  WALKING
4       WALKING_DOWNSTAIRS
5         WALKING_UPSTAIRS
6                  WALKING
12                 WALKING
               ...        
5868               WALKING
5869      WALKING_UPSTAIRS
5873               WALKING
5877      WALKING_UPSTAIRS
5879      WALKING_UPSTAIRS
Name: Activity, Length: 2647, dtype: object

In [79]:
# 데이터 스케일링 후 데이터 프라임으로 생성
x = pd.DataFrame(scaler.fit_transform(x), columns=x.columns)

  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():


In [80]:
from sklearn.preprocessing import LabelEncoder


#'LAYING' = 0 , 'SITTING' = 1, 'STANDING' =2
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y1)
y_encoded

array([0, 1, 2, ..., 0, 2, 2])

In [81]:
# train , test 분할
x_train, x_val, y_train, y_val = train_test_split(x, y_encoded, test_size = .1, random_state=42)

In [82]:
# XGBoost 모델 생성 
model2_2 = xgb.XGBClassifier()

# 모델 학습
model2_2.fit(x_train, y_train)

# 예측
pred = model2_2.predict(x_val)

# 성능 평가
print('Accuracy:', accuracy_score(y_val, pred))
print('=' * 60)
print('Confusion Matrix:')
print(confusion_matrix(y_val, pred))
print('=' * 60)
print('Classification Report:')
print(classification_report(y_val, pred))

  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)


Accuracy: 0.9962264150943396
Confusion Matrix:
[[93  0  0]
 [ 0 77  1]
 [ 0  0 94]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        93
           1       1.00      0.99      0.99        78
           2       0.99      1.00      0.99        94

    accuracy                           1.00       265
   macro avg       1.00      1.00      1.00       265
weighted avg       1.00      1.00      1.00       265



  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)


### (4) 분류 모델 합치기


* 세부 요구사항
    * 두 단계 모델을 통합하고, 새로운 데이터(test)에 대해서 최종 예측결과와 성능평가가 나오도록 함수로 만들기
    * 데이터 파이프라인 구축 : test데이터가 로딩되어 전처리 과정을 거치고, 예측 및 성능 평가 수행

![](https://github.com/DA4BAM/image/blob/main/pipeline%20function.png?raw=true)

#### 1) 함수 만들기

In [83]:
new_data['Activity_dynamic'] = new_data['Activity'].map({'WALKING':1,
                                                 'WALKING_UPSTAIRS':1,
                                                 'WALKING_DOWNSTAIRS':1,
                                                 'LAYING':0,
                                                 'STANDING':0,
                                                 'SITTING':0})
new_data

Unnamed: 0,tBodyAcc-mean()-X,tBodyAcc-mean()-Y,tBodyAcc-mean()-Z,tBodyAcc-std()-X,tBodyAcc-std()-Y,tBodyAcc-std()-Z,tBodyAcc-mad()-X,tBodyAcc-mad()-Y,tBodyAcc-mad()-Z,tBodyAcc-max()-X,...,fBodyBodyGyroJerkMag-kurtosis(),"angle(tBodyAccMean,gravity)","angle(tBodyAccJerkMean),gravityMean)","angle(tBodyGyroMean,gravityMean)","angle(tBodyGyroJerkMean,gravityMean)","angle(X,gravityMean)","angle(Y,gravityMean)","angle(Z,gravityMean)",Activity,Activity_dynamic
0,0.284379,-0.021981,-0.116683,-0.992490,-0.979640,-0.963321,-0.992563,-0.977304,-0.958142,-0.938850,...,-0.850065,-0.018043,0.092304,0.074220,-0.714534,-0.671943,-0.018351,-0.185733,SITTING,0
1,0.277440,-0.028086,-0.118412,-0.996620,-0.927676,-0.972294,-0.997346,-0.931405,-0.971788,-0.939837,...,-0.613367,-0.022456,-0.155414,0.247498,-0.112257,-0.826816,0.184489,-0.068699,STANDING,0
2,0.305833,-0.041023,-0.087303,0.006880,0.182800,-0.237984,0.005642,0.028616,-0.236474,0.016311,...,0.394388,-0.362616,0.171069,0.576349,-0.688314,-0.743234,0.272186,0.053101,WALKING,1
3,0.276053,-0.016487,-0.108381,-0.995379,-0.983978,-0.975854,-0.995877,-0.985280,-0.974907,-0.941425,...,-0.841455,0.289548,0.079801,-0.020033,0.291898,-0.639435,-0.111998,-0.123298,SITTING,0
4,0.271998,0.016904,-0.078856,-0.973468,-0.702462,-0.869450,-0.979810,-0.711601,-0.856807,-0.920760,...,0.214219,0.010111,0.114179,-0.830776,-0.325098,-0.840817,0.116237,-0.096615,STANDING,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1466,0.278725,-0.015262,-0.106398,-0.993625,-0.982845,-0.974745,-0.993963,-0.981100,-0.974596,-0.939303,...,-0.874066,-0.054788,0.712975,0.300318,-0.319188,-0.857336,0.120184,0.119276,SITTING,0
1467,0.275803,-0.019257,-0.109078,-0.998614,-0.991621,-0.987403,-0.998813,-0.991503,-0.986802,-0.945442,...,-0.721050,0.076333,-0.021599,-0.277268,0.754011,-0.764185,0.212111,0.138595,STANDING,0
1468,0.240402,0.006361,-0.121377,-0.045805,0.189930,0.332664,-0.114706,0.157771,0.195271,0.210139,...,-0.615554,0.330378,-0.667635,0.806563,-0.850113,-0.639564,0.185363,0.260201,WALKING_DOWNSTAIRS,1
1469,0.135873,-0.020675,-0.116644,-0.960526,-0.955134,-0.985818,-0.963115,-0.971338,-0.988261,-0.946289,...,-0.422383,-0.048474,0.236761,-0.186581,0.396648,0.790877,-0.474618,-0.505953,LAYING,0


In [84]:
test_x = new_data.drop(labels=['Activity'], axis = 1)
test_x = test_x.drop(labels=['Activity_dynamic'], axis = 1)
test_y1 = new_data.loc[:, 'Activity_dynamic']
test_y2 = new_data.loc[:, 'Activity']

In [85]:
# 데이터 스케일링 후 데이터 프라임으로 생성
test_x = pd.DataFrame(scaler.fit_transform(test_x), columns=test_x.columns)
test_x

  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():


Unnamed: 0,tBodyAcc-mean()-X,tBodyAcc-mean()-Y,tBodyAcc-mean()-Z,tBodyAcc-std()-X,tBodyAcc-std()-Y,tBodyAcc-std()-Z,tBodyAcc-mad()-X,tBodyAcc-mad()-Y,tBodyAcc-mad()-Z,tBodyAcc-max()-X,...,fBodyBodyGyroJerkMag-meanFreq(),fBodyBodyGyroJerkMag-skewness(),fBodyBodyGyroJerkMag-kurtosis(),"angle(tBodyAccMean,gravity)","angle(tBodyAccJerkMean),gravityMean)","angle(tBodyGyroMean,gravityMean)","angle(tBodyGyroJerkMean,gravityMean)","angle(X,gravityMean)","angle(Y,gravityMean)","angle(Z,gravityMean)"
0,0.787233,0.719350,0.313492,0.003805,0.011351,0.018992,0.003745,0.014164,0.023396,0.005578,...,0.674778,0.250887,0.076553,0.479277,0.546207,0.537519,0.130624,0.163769,0.684186,0.408612
1,0.782980,0.714860,0.311696,0.001631,0.040504,0.014345,0.001293,0.042806,0.015746,0.005020,...,0.448081,0.405152,0.201865,0.476977,0.421248,0.624612,0.439708,0.086308,0.826273,0.467341
2,0.800383,0.705344,0.344023,0.529889,0.663505,0.394559,0.515481,0.641899,0.427936,0.545235,...,0.789220,0.813307,0.735386,0.299658,0.585939,0.789898,0.144080,0.128112,0.887704,0.528462
3,0.782130,0.723391,0.322120,0.002284,0.008918,0.012502,0.002046,0.009186,0.013998,0.004124,...,0.718961,0.221571,0.081111,0.639618,0.539900,0.490146,0.647117,0.180028,0.618587,0.439942
4,0.779645,0.747950,0.352802,0.013818,0.166854,0.067596,0.010283,0.179973,0.080201,0.015799,...,0.217794,0.745136,0.640002,0.493953,0.557242,0.082653,0.330480,0.079306,0.778463,0.453332
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1466,0.783768,0.724292,0.324180,0.003207,0.009553,0.013077,0.003027,0.011794,0.014173,0.005322,...,0.626776,0.211094,0.063847,0.460123,0.859299,0.651160,0.333513,0.071044,0.781228,0.561669
1467,0.781977,0.721353,0.321395,0.000581,0.004630,0.006523,0.000541,0.005302,0.007330,0.001854,...,0.765950,0.280622,0.144856,0.528474,0.488750,0.360856,0.884270,0.117634,0.845622,0.571364
1468,0.760279,0.740196,0.308614,0.502154,0.667505,0.690032,0.453783,0.722497,0.669957,0.654746,...,0.638348,0.403836,0.200707,0.660902,0.162862,0.905607,0.061046,0.179964,0.826886,0.632388
1469,0.696210,0.720310,0.313533,0.020631,0.025099,0.007343,0.018841,0.017887,0.006512,0.001375,...,0.429768,0.551498,0.302974,0.463414,0.619077,0.406437,0.700874,0.895406,0.364575,0.247920


In [86]:
pred = model1.predict(test_x)

#평가
print('accuracy :',accuracy_score(test_y1, pred))
print('='*60)
print(confusion_matrix(test_y1, pred))
print('='*60)
print(classification_report(test_y1, pred))

  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():


accuracy : 1.0
[[833   0]
 [  0 638]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       833
           1       1.00      1.00      1.00       638

    accuracy                           1.00      1471
   macro avg       1.00      1.00      1.00      1471
weighted avg       1.00      1.00      1.00      1471



In [87]:
# 0으로 예측된 데이터 추출
x_val_0 = test_x[pred == 0]
y_val_0 = test_y1[pred == 0]

# 1로 예측된 데이터 추출
x_val_1 = test_x[pred == 1]
y_val_1 = test_y1[pred == 1]
x_val_0

Unnamed: 0,tBodyAcc-mean()-X,tBodyAcc-mean()-Y,tBodyAcc-mean()-Z,tBodyAcc-std()-X,tBodyAcc-std()-Y,tBodyAcc-std()-Z,tBodyAcc-mad()-X,tBodyAcc-mad()-Y,tBodyAcc-mad()-Z,tBodyAcc-max()-X,...,fBodyBodyGyroJerkMag-meanFreq(),fBodyBodyGyroJerkMag-skewness(),fBodyBodyGyroJerkMag-kurtosis(),"angle(tBodyAccMean,gravity)","angle(tBodyAccJerkMean),gravityMean)","angle(tBodyGyroMean,gravityMean)","angle(tBodyGyroJerkMean,gravityMean)","angle(X,gravityMean)","angle(Y,gravityMean)","angle(Z,gravityMean)"
0,0.787233,0.719350,0.313492,0.003805,0.011351,0.018992,0.003745,0.014164,0.023396,0.005578,...,0.674778,0.250887,0.076553,0.479277,0.546207,0.537519,0.130624,0.163769,0.684186,0.408612
1,0.782980,0.714860,0.311696,0.001631,0.040504,0.014345,0.001293,0.042806,0.015746,0.005020,...,0.448081,0.405152,0.201865,0.476977,0.421248,0.624612,0.439708,0.086308,0.826273,0.467341
3,0.782130,0.723391,0.322120,0.002284,0.008918,0.012502,0.002046,0.009186,0.013998,0.004124,...,0.718961,0.221571,0.081111,0.639618,0.539900,0.490146,0.647117,0.180028,0.618587,0.439942
4,0.779645,0.747950,0.352802,0.013818,0.166854,0.067596,0.010283,0.179973,0.080201,0.015799,...,0.217794,0.745136,0.640002,0.493953,0.557242,0.082653,0.330480,0.079306,0.778463,0.453332
6,0.887348,0.887840,0.312716,0.082483,0.504074,0.231053,0.077298,0.592889,0.250781,0.134716,...,0.382421,0.359655,0.211152,0.461288,0.587399,0.392404,0.363915,0.102239,0.860772,0.489408
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1464,0.785449,0.722505,0.318603,0.001276,0.003282,0.001774,0.001163,0.003683,0.001863,0.004132,...,0.713779,0.368853,0.238707,0.522525,0.499875,0.532784,0.509399,0.381130,0.531816,0.314962
1465,0.780721,0.725586,0.328065,0.015071,0.034718,0.030295,0.010609,0.039642,0.033081,0.029389,...,0.288509,0.339304,0.121473,0.466546,0.616622,0.531795,0.558808,0.202565,0.700919,0.372298
1466,0.783768,0.724292,0.324180,0.003207,0.009553,0.013077,0.003027,0.011794,0.014173,0.005322,...,0.626776,0.211094,0.063847,0.460123,0.859299,0.651160,0.333513,0.071044,0.781228,0.561669
1467,0.781977,0.721353,0.321395,0.000581,0.004630,0.006523,0.000541,0.005302,0.007330,0.001854,...,0.765950,0.280622,0.144856,0.528474,0.488750,0.360856,0.884270,0.117634,0.845622,0.571364


In [88]:
# 동적 인덱스 맞춤
y0_index = y_val_0.index.tolist()
desired_row = new_data.iloc[y0_index]
feature_1_column = desired_row['Activity']
y_val_0 = feature_1_column
y_val_0

0        SITTING
1       STANDING
3        SITTING
4       STANDING
6       STANDING
          ...   
1464     SITTING
1465     SITTING
1466     SITTING
1467    STANDING
1469      LAYING
Name: Activity, Length: 833, dtype: object

In [89]:
# 정적 인덱스 맞춤
y1_index = y_val_1.index.tolist()
desired_row = new_data.iloc[y1_index]
feature_1_column = desired_row['Activity']
y_val_1 = feature_1_column
y_val_1

2                  WALKING
5                  WALKING
9         WALKING_UPSTAIRS
16        WALKING_UPSTAIRS
17                 WALKING
               ...        
1459      WALKING_UPSTAIRS
1462    WALKING_DOWNSTAIRS
1463    WALKING_DOWNSTAIRS
1468    WALKING_DOWNSTAIRS
1470    WALKING_DOWNSTAIRS
Name: Activity, Length: 638, dtype: object

In [90]:
from sklearn.preprocessing import LabelEncoder

# LabelEncoder 초기화
label_encoder = LabelEncoder()

# y_val_0 열을 인코딩
y_val_0_encoded = label_encoder.fit_transform(y_val_0)


In [91]:
pred2_1 = model2_1.predict(x_val_0)

#평가
print('accuracy :',accuracy_score(y_val_0_encoded, pred2_1))
print('='*60)
print(confusion_matrix(y_val_0_encoded, pred2_1))
print('='*60)
print(classification_report(y_val_0_encoded, pred2_1))

accuracy : 0.9699879951980792
[[292   0   0]
 [  0 237  17]
 [  0   8 279]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       292
           1       0.97      0.93      0.95       254
           2       0.94      0.97      0.96       287

    accuracy                           0.97       833
   macro avg       0.97      0.97      0.97       833
weighted avg       0.97      0.97      0.97       833



  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)


In [96]:
from sklearn.preprocessing import LabelEncoder

# LabelEncoder 초기화
label_encoder = LabelEncoder()

# y_val_0 열을 인코딩
y_val_1_encoded = label_encoder.fit_transform(y_val_1)


In [97]:
pred2_2 = model2_2.predict(x_val_1)

#평가
print('accuracy :',accuracy_score(y_val_1_encoded, pred2_2))
print('='*60)
print(confusion_matrix(y_val_1_encoded, pred2_2))
print('='*60)
print(classification_report(y_val_1_encoded, pred2_2))

accuracy : 0.799373040752351
[[129  43  56]
 [  0 193   2]
 [  0  27 188]]
              precision    recall  f1-score   support

           0       1.00      0.57      0.72       228
           1       0.73      0.99      0.84       195
           2       0.76      0.87      0.82       215

    accuracy                           0.80       638
   macro avg       0.83      0.81      0.79       638
weighted avg       0.84      0.80      0.79       638



  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)


In [94]:
# 1,2,3,4,5,6으로 예측된 데이터 추출
x0 = x_val_0[pred2_1 == 0]
x1 = x_val_0[pred2_1 == 1]
x2 = x_val_0[pred2_1 == 2]
x3 = x_val_1[pred2_2 == 0]
x4 = x_val_1[pred2_2 == 1]
x5 = x_val_1[pred2_2 == 2]

# 각각의 데이터 배열을 DataFrame으로 변환합니다.
x0_df = pd.DataFrame(x0)
x1_df = pd.DataFrame(x1)
x2_df = pd.DataFrame(x2)
x3_df = pd.DataFrame(x3)
x4_df = pd.DataFrame(x4)
x5_df = pd.DataFrame(x5)

# 'final_predictions' 열을 생성하고 각 데이터 배열에 해당하는 예측값(0, 1, 2)을 할당합니다.
x0_df['final_predictions'] = 'LAYING' 
x1_df['final_predictions'] = 'SITTING'
x2_df['final_predictions'] = 'STANDING'
x3_df['final_predictions'] = 'WALKING'
x4_df['final_predictions'] = 'WALKING_DOWNSTAIRS'
x5_df['final_predictions'] = 'WALKING_UPSTAIRS'


# DataFrame을 합칩니다.
combined_data_df = pd.concat([x0_df, x1_df, x2_df, x3_df, x4_df, x5_df])
result = combined_data_df.sort_index()
final_predictions = result['final_predictions']
final_predictions

0                  SITTING
1                 STANDING
2         WALKING_UPSTAIRS
3                  SITTING
4                 STANDING
               ...        
1466               SITTING
1467              STANDING
1468    WALKING_DOWNSTAIRS
1469                LAYING
1470    WALKING_DOWNSTAIRS
Name: final_predictions, Length: 1471, dtype: object

In [95]:
# 마지막으로 파이널과 , test_y2 비교하여 모델 평가
print('accuracy :',accuracy_score(final_predictions, test_y2))
print('='*60)
print(confusion_matrix(final_predictions, test_y2))
print('='*60)
print(classification_report(final_predictions, test_y2))

accuracy : 0.8959891230455472
[[292   0   0   0   0   0]
 [  0 237   8   0   0   0]
 [  0  17 279   0   0   0]
 [  0   0   0 129   0   0]
 [  0   0   0  43 193  27]
 [  0   0   0  56   2 188]]
                    precision    recall  f1-score   support

            LAYING       1.00      1.00      1.00       292
           SITTING       0.93      0.97      0.95       245
          STANDING       0.97      0.94      0.96       296
           WALKING       0.57      1.00      0.72       129
WALKING_DOWNSTAIRS       0.99      0.73      0.84       263
  WALKING_UPSTAIRS       0.87      0.76      0.82       246

          accuracy                           0.90      1471
         macro avg       0.89      0.90      0.88      1471
      weighted avg       0.92      0.90      0.90      1471

