# 미니 프로젝트 : 스마트폰 센서 데이터 기반 모션 분류
> 스마트폰 센서를 통해 사람 행동을 분류하는 모델 개발

<img src = "https://www.snubh.org/upload/ce3/namoimage/images/000073/202111_03_04.png"/>

# **1.환경설정**

In [None]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split

import warnings
warnings.filterwarnings('ignore')
import os



---



# **2. 데이터 전처리**

* 가변수화, 데이터 분할, NaN 확인 및 조치, 스케일링 등 필요한 전처리를 수행한다.


## (1) 데이터 불러오기
* 데이터셋
    * data01_train.csv : 학습 및 검증용
    * data01_test.csv : 테스트용
    * feature.csv : feature 이름을 계층구조로 정리한 데이터  
    * feature_importance.pkl : 이전 단계에서 분석한 중요 변수


In [None]:
# Datset 경로
path = 'drive/MyDrive/AIVLE/Mini_project/'


data = pd.read_csv(path + 'data01_train.csv')
test = pd.read_csv(path + 'data01_test.csv')
features = pd.read_csv(path + 'features.csv')
# important_features = pd.read_csv(path + 'important_features.csv')

## (2) 기본 정보 조회
- Meta 정보 분석

In [None]:
# Dataset의 차원 정보

print('Train Dataset :', data.shape)
print('Test Dataset :', test.shape)
print('Features Dataset :', features.shape)

Train Dataset : (5881, 563)
Test Dataset : (1471, 563)
Features Dataset : (561, 4)


In [None]:
# 불필요한 Features 제거

data.drop('subject', axis=1, inplace = True)
test.drop('subject', axis=1, inplace = True)

In [None]:
data.head()

Unnamed: 0,tBodyAcc-mean()-X,tBodyAcc-mean()-Y,tBodyAcc-mean()-Z,tBodyAcc-std()-X,tBodyAcc-std()-Y,tBodyAcc-std()-Z,tBodyAcc-mad()-X,tBodyAcc-mad()-Y,tBodyAcc-mad()-Z,tBodyAcc-max()-X,...,fBodyBodyGyroJerkMag-skewness(),fBodyBodyGyroJerkMag-kurtosis(),"angle(tBodyAccMean,gravity)","angle(tBodyAccJerkMean),gravityMean)","angle(tBodyGyroMean,gravityMean)","angle(tBodyGyroJerkMean,gravityMean)","angle(X,gravityMean)","angle(Y,gravityMean)","angle(Z,gravityMean)",Activity
0,0.288508,-0.009196,-0.103362,-0.988986,-0.962797,-0.967422,-0.989,-0.962596,-0.96565,-0.929747,...,-0.487737,-0.816696,-0.042494,-0.044218,0.307873,0.07279,-0.60112,0.331298,0.165163,STANDING
1,0.265757,-0.016576,-0.098163,-0.989551,-0.994636,-0.987435,-0.990189,-0.99387,-0.987558,-0.937337,...,-0.23782,-0.693515,-0.062899,0.388459,-0.765014,0.771524,0.345205,-0.769186,-0.147944,LAYING
2,0.278709,-0.014511,-0.108717,-0.99772,-0.981088,-0.994008,-0.997934,-0.982187,-0.995017,-0.942584,...,-0.535287,-0.829311,0.000265,-0.525022,-0.891875,0.021528,-0.833564,0.202434,-0.032755,STANDING
3,0.289795,-0.035536,-0.150354,-0.231727,-0.006412,-0.338117,-0.273557,0.014245,-0.347916,0.008288,...,-0.004012,-0.408956,-0.255125,0.612804,0.747381,-0.072944,-0.695819,0.287154,0.111388,WALKING
4,0.394807,0.034098,0.091229,0.088489,-0.106636,-0.388502,-0.010469,-0.10968,-0.346372,0.584131,...,-0.157832,-0.563437,-0.044344,-0.845268,-0.97465,-0.887846,-0.705029,0.264952,0.137758,WALKING_DOWNSTAIRS


In [None]:
data.describe()

Unnamed: 0,tBodyAcc-mean()-X,tBodyAcc-mean()-Y,tBodyAcc-mean()-Z,tBodyAcc-std()-X,tBodyAcc-std()-Y,tBodyAcc-std()-Z,tBodyAcc-mad()-X,tBodyAcc-mad()-Y,tBodyAcc-mad()-Z,tBodyAcc-max()-X,...,fBodyBodyGyroJerkMag-meanFreq(),fBodyBodyGyroJerkMag-skewness(),fBodyBodyGyroJerkMag-kurtosis(),"angle(tBodyAccMean,gravity)","angle(tBodyAccJerkMean),gravityMean)","angle(tBodyGyroMean,gravityMean)","angle(tBodyGyroJerkMean,gravityMean)","angle(X,gravityMean)","angle(Y,gravityMean)","angle(Z,gravityMean)"
count,5881.0,5881.0,5881.0,5881.0,5881.0,5881.0,5881.0,5881.0,5881.0,5881.0,...,5881.0,5881.0,5881.0,5881.0,5881.0,5881.0,5881.0,5881.0,5881.0,5881.0
mean,0.274811,-0.017799,-0.109396,-0.603138,-0.509815,-0.604058,-0.628151,-0.525944,-0.605374,-0.46549,...,0.126955,-0.305883,-0.623548,0.008524,-0.001185,0.00934,-0.007099,-0.491501,0.059299,-0.054594
std,0.067614,0.039422,0.058373,0.448807,0.501815,0.417319,0.424345,0.485115,0.413043,0.544995,...,0.249176,0.322808,0.310371,0.33973,0.447197,0.60819,0.476738,0.509069,0.29734,0.278479
min,-0.503823,-0.684893,-1.0,-1.0,-0.999844,-0.999667,-1.0,-0.999419,-1.0,-1.0,...,-0.965725,-0.979261,-0.999765,-0.97658,-1.0,-1.0,-1.0,-1.0,-1.0,-0.980143
25%,0.262919,-0.024877,-0.121051,-0.992774,-0.97768,-0.980127,-0.993602,-0.977865,-0.980112,-0.936067,...,-0.02161,-0.541969,-0.845985,-0.122361,-0.294369,-0.481718,-0.373345,-0.811397,-0.018203,-0.141555
50%,0.277154,-0.017221,-0.108781,-0.943933,-0.844575,-0.856352,-0.948501,-0.849266,-0.849896,-0.878729,...,0.133887,-0.342923,-0.712677,0.010278,0.005146,0.011448,-0.000847,-0.709441,0.182893,0.003951
75%,0.288526,-0.01092,-0.098163,-0.24213,-0.034499,-0.26269,-0.291138,-0.068857,-0.268539,-0.01369,...,0.288944,-0.127371,-0.501158,0.154985,0.28503,0.499857,0.356236,-0.51133,0.248435,0.111932
max,1.0,1.0,1.0,1.0,0.916238,1.0,1.0,0.967664,1.0,1.0,...,0.9467,0.989538,0.956845,1.0,1.0,0.998702,0.996078,0.977344,0.478157,1.0


In [None]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5881 entries, 0 to 5880
Columns: 562 entries, tBodyAcc-mean()-X to Activity
dtypes: float64(561), object(1)
memory usage: 25.2+ MB


In [None]:
data['Activity'].value_counts(normalize = True)

LAYING                0.189594
STANDING              0.184833
SITTING               0.175480
WALKING               0.169699
WALKING_UPSTAIRS      0.145894
WALKING_DOWNSTAIRS    0.134501
Name: Activity, dtype: float64

In [None]:
data.isna().sum().sort_values()

fBodyBodyGyroMag-entropy()     0
fBodyBodyGyroMag-maxInds       0
fBodyBodyGyroMag-meanFreq()    0
fBodyBodyGyroMag-skewness()    0
fBodyBodyGyroMag-kurtosis()    0
                              ..
angle(X,gravityMean)           0
angle(Y,gravityMean)           0
angle(Z,gravityMean)           0
Activity                       0
tBodyAcc-mean()-X              0
Length: 562, dtype: int64

In [None]:
features.isna().sum().sort_values()

sensor            0
agg               0
feature_name      0
axis            125
dtype: int64

## (3) x, y 데이터 분할
- Features와 Target 분류

In [None]:
# Target 지정
target = 'Activity'

# Features와 Target 분류 (Train, Valid)
X = data.drop(target, axis = 1)
y = data.loc[:, target]

# Test Data
X_test = test.drop(target, axis = 1)
y_test = test.loc[:, target]

## (5) train, validation 데이터 분할

In [None]:
# 올바른 학습을 위해 Validation Data 분할
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.3, stratify = y)

## **(3) 스케일링(필요시)**


### Standard

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaler.fit(X_train)

X_train_s = scaler.transform(X_train)
X_valid_s = scaler.transform(X_valid)

### MinMax

In [None]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
scaler.fit(X_train)

X_train_m = scaler.transform(X_train)
X_valid_m = scaler.transform(X_valid)

## (4) PCA 차원 축소

In [None]:
from sklearn.decomposition import PCA

pca = PCA(n_components = 50)
pca.fit(X_train_s)

X_train_pca = pca.transform(X_train_s)
X_valid_pca = pca.transform(X_valid_s)
X_test_pca = pca.transform(X_test)

# **3. 머신러닝 모델링**



* 세부 요구사항
    - 최소 4개 이상의 알고리즘을 적용하여 모델링을 수행한다.
    - 각 알고리즘별로 전체 변수로 모델링, 상위 N개 변수를 선택하여 모델링을 수행하고 성능 비교를 한다.
    - (옵션) 알고리즘 중 1~2개에 대해서, 변수 중요도 상위 N개를 선정하여 모델링하고 타 모델과 성능을 비교.
        * 상위 N개를 선택하는 방법은, 변수를 하나씩 늘려가며 모델링 및 성능 검증을 수행하여 적절한 지점을 찾는 것이다.

## **(1) Logistic regression**

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import *

#### 표준화 데이터

- Validation Data 성능

In [None]:
lr_model = LogisticRegression()

lr_model.fit(X_train_s, y_train)

y_pred = lr_model.predict(X_valid_s)

print(confusion_matrix(y_valid, y_pred))
print(classification_report(y_valid, y_pred))

[[335   0   0   0   0   0]
 [  0 293  16   0   0   1]
 [  0  20 306   0   0   0]
 [  0   0   0 300   0   0]
 [  0   0   0   1 235   1]
 [  0   0   0   1   0 256]]
                    precision    recall  f1-score   support

            LAYING       1.00      1.00      1.00       335
           SITTING       0.94      0.95      0.94       310
          STANDING       0.95      0.94      0.94       326
           WALKING       0.99      1.00      1.00       300
WALKING_DOWNSTAIRS       1.00      0.99      1.00       237
  WALKING_UPSTAIRS       0.99      1.00      0.99       257

          accuracy                           0.98      1765
         macro avg       0.98      0.98      0.98      1765
      weighted avg       0.98      0.98      0.98      1765



- Test Data 성능

In [None]:
y_pred = lr_model.predict(X_test)

print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

[[ 39 249   4   0   0   0]
 [  0 247   7   0   0   0]
 [  0  48 239   0   0   0]
 [  0   0 226   2   0   0]
 [  0   1 190   0   4   0]
 [  0  12 200   0   0   3]]
                    precision    recall  f1-score   support

            LAYING       1.00      0.13      0.24       292
           SITTING       0.44      0.97      0.61       254
          STANDING       0.28      0.83      0.41       287
           WALKING       1.00      0.01      0.02       228
WALKING_DOWNSTAIRS       1.00      0.02      0.04       195
  WALKING_UPSTAIRS       1.00      0.01      0.03       215

          accuracy                           0.36      1471
         macro avg       0.79      0.33      0.22      1471
      weighted avg       0.76      0.36      0.24      1471



#### 정규화 데이터

- Validation Data 성능

In [None]:
lr_model = LogisticRegression()

lr_model.fit(X_train_m, y_train)

y_pred = lr_model.predict(X_valid_m)

print(confusion_matrix(y_valid, y_pred))
print(classification_report(y_valid, y_pred))

[[335   0   0   0   0   0]
 [  0 289  20   0   0   1]
 [  0  19 307   0   0   0]
 [  0   0   0 300   0   0]
 [  0   0   0   1 236   0]
 [  0   0   0   1   0 256]]
                    precision    recall  f1-score   support

            LAYING       1.00      1.00      1.00       335
           SITTING       0.94      0.93      0.94       310
          STANDING       0.94      0.94      0.94       326
           WALKING       0.99      1.00      1.00       300
WALKING_DOWNSTAIRS       1.00      1.00      1.00       237
  WALKING_UPSTAIRS       1.00      1.00      1.00       257

          accuracy                           0.98      1765
         macro avg       0.98      0.98      0.98      1765
      weighted avg       0.98      0.98      0.98      1765



- Test Data 성능

In [None]:
y_pred = lr_model.predict(X_test)

print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

[[  0 276  16   0   0   0]
 [  0 244  10   0   0   0]
 [  0  20 267   0   0   0]
 [  0  15 210   3   0   0]
 [  0   5 151   0  30   9]
 [  0  42 129   0   0  44]]
                    precision    recall  f1-score   support

            LAYING       0.00      0.00      0.00       292
           SITTING       0.41      0.96      0.57       254
          STANDING       0.34      0.93      0.50       287
           WALKING       1.00      0.01      0.03       228
WALKING_DOWNSTAIRS       1.00      0.15      0.27       195
  WALKING_UPSTAIRS       0.83      0.20      0.33       215

          accuracy                           0.40      1471
         macro avg       0.60      0.38      0.28      1471
      weighted avg       0.55      0.40      0.28      1471



### PCA 데이터

- Validation Data 성능

In [None]:
lr_model = LogisticRegression()

lr_model.fit(X_train_pca, y_train)

y_pred = lr_model.predict(X_valid_pca)

print(confusion_matrix(y_valid, y_pred))
print(classification_report(y_valid, y_pred))

[[334   1   0   0   0   0]
 [  0 278  30   1   0   1]
 [  0  38 288   0   0   0]
 [  0   0   0 297   2   1]
 [  0   0   0   2 232   3]
 [  0   0   0   3   0 254]]
                    precision    recall  f1-score   support

            LAYING       1.00      1.00      1.00       335
           SITTING       0.88      0.90      0.89       310
          STANDING       0.91      0.88      0.89       326
           WALKING       0.98      0.99      0.99       300
WALKING_DOWNSTAIRS       0.99      0.98      0.99       237
  WALKING_UPSTAIRS       0.98      0.99      0.98       257

          accuracy                           0.95      1765
         macro avg       0.96      0.96      0.96      1765
      weighted avg       0.95      0.95      0.95      1765



- Test Data 성능

In [None]:
y_pred = lr_model.predict(X_test_pca)

print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

[[  0 290   2   0   0   0]
 [  0  76 178   0   0   0]
 [  0   0 287   0   0   0]
 [  0   0 228   0   0   0]
 [  0   0 188   0   7   0]
 [  0   1 214   0   0   0]]
                    precision    recall  f1-score   support

            LAYING       0.00      0.00      0.00       292
           SITTING       0.21      0.30      0.24       254
          STANDING       0.26      1.00      0.41       287
           WALKING       0.00      0.00      0.00       228
WALKING_DOWNSTAIRS       1.00      0.04      0.07       195
  WALKING_UPSTAIRS       0.00      0.00      0.00       215

          accuracy                           0.25      1471
         macro avg       0.24      0.22      0.12      1471
      weighted avg       0.22      0.25      0.13      1471



## **(2) KNN**

In [None]:
from sklearn.neighbors import KNeighborsClassifier

#### 표준화 데이터

In [None]:
knn_model = KNeighborsClassifier()

knn_model.fit(X_train_s, y_train)

y_pred = knn_model.predict(X_valid_s)

print(confusion_matrix(y_valid, y_pred))
print(classification_report(y_valid, y_pred))

[[332   3   0   0   0   0]
 [  1 244  64   0   0   1]
 [  0  18 308   0   0   0]
 [  0   0   0 298   1   1]
 [  0   0   0   5 228   4]
 [  0   0   0   4   1 252]]
                    precision    recall  f1-score   support

            LAYING       1.00      0.99      0.99       335
           SITTING       0.92      0.79      0.85       310
          STANDING       0.83      0.94      0.88       326
           WALKING       0.97      0.99      0.98       300
WALKING_DOWNSTAIRS       0.99      0.96      0.98       237
  WALKING_UPSTAIRS       0.98      0.98      0.98       257

          accuracy                           0.94      1765
         macro avg       0.95      0.94      0.94      1765
      weighted avg       0.94      0.94      0.94      1765



#### 정규화 데이터

In [None]:
knn_model = KNeighborsClassifier()

knn_model.fit(X_train_m, y_train)

y_pred = knn_model.predict(X_valid_m)

print(confusion_matrix(y_valid, y_pred))
print(classification_report(y_valid, y_pred))

[[333   2   0   0   0   0]
 [  1 266  42   0   0   1]
 [  0  20 306   0   0   0]
 [  0   0   0 300   0   0]
 [  0   0   0   5 228   4]
 [  0   0   0   4   0 253]]
                    precision    recall  f1-score   support

            LAYING       1.00      0.99      1.00       335
           SITTING       0.92      0.86      0.89       310
          STANDING       0.88      0.94      0.91       326
           WALKING       0.97      1.00      0.99       300
WALKING_DOWNSTAIRS       1.00      0.96      0.98       237
  WALKING_UPSTAIRS       0.98      0.98      0.98       257

          accuracy                           0.96      1765
         macro avg       0.96      0.96      0.96      1765
      weighted avg       0.96      0.96      0.96      1765



## **(3) DecisionTree**

In [None]:
from sklearn.tree import DecisionTreeClassifier

#### 표준화 데이터

In [None]:
dt_model = DecisionTreeClassifier(max_depth = 5, random_state=1)

dt_model.fit(X_train_s, y_train)

y_pred = dt_model.predict(X_valid_s)

print(confusion_matrix(y_valid, y_pred))
print(classification_report(y_valid, y_pred))

[[335   0   0   0   0   0]
 [  0 280  29   0   0   1]
 [  0  26 297   3   0   0]
 [  0   0   0 277  17   6]
 [  0   0   0  20 211   6]
 [  0   0   0  25  23 209]]
                    precision    recall  f1-score   support

            LAYING       1.00      1.00      1.00       335
           SITTING       0.92      0.90      0.91       310
          STANDING       0.91      0.91      0.91       326
           WALKING       0.85      0.92      0.89       300
WALKING_DOWNSTAIRS       0.84      0.89      0.86       237
  WALKING_UPSTAIRS       0.94      0.81      0.87       257

          accuracy                           0.91      1765
         macro avg       0.91      0.91      0.91      1765
      weighted avg       0.91      0.91      0.91      1765



#### 정규화 데이터

In [None]:
dt_model = DecisionTreeClassifier(max_depth = 5, random_state=1)

dt_model.fit(X_train_m, y_train)

y_pred = dt_model.predict(X_valid_m)

print(confusion_matrix(y_valid, y_pred))
print(classification_report(y_valid, y_pred))

[[335   0   0   0   0   0]
 [  0 280  29   0   0   1]
 [  0  26 297   3   0   0]
 [  0   0   0 277  17   6]
 [  0   0   0  20 211   6]
 [  0   0   0  25  23 209]]
                    precision    recall  f1-score   support

            LAYING       1.00      1.00      1.00       335
           SITTING       0.92      0.90      0.91       310
          STANDING       0.91      0.91      0.91       326
           WALKING       0.85      0.92      0.89       300
WALKING_DOWNSTAIRS       0.84      0.89      0.86       237
  WALKING_UPSTAIRS       0.94      0.81      0.87       257

          accuracy                           0.91      1765
         macro avg       0.91      0.91      0.91      1765
      weighted avg       0.91      0.91      0.91      1765



## **(4) Random Forest**

In [None]:
from sklearn.ensemble import RandomForestClassifier

#### 표준화 데이터

In [None]:
rf_model = RandomForestClassifier(max_depth=5, n_estimators=100, random_state=1)

rf_model.fit(X_train_s, y_train)

y_pred = rf_model.predict(X_valid_s)

print(confusion_matrix(y_valid, y_pred))
print(classification_report(y_valid, y_pred))

[[335   0   0   0   0   0]
 [  0 285  24   0   0   1]
 [  0  23 303   0   0   0]
 [  0   0   0 286   8   6]
 [  0   0   0  23 200  14]
 [  0   0   0   5   5 247]]
                    precision    recall  f1-score   support

            LAYING       1.00      1.00      1.00       335
           SITTING       0.93      0.92      0.92       310
          STANDING       0.93      0.93      0.93       326
           WALKING       0.91      0.95      0.93       300
WALKING_DOWNSTAIRS       0.94      0.84      0.89       237
  WALKING_UPSTAIRS       0.92      0.96      0.94       257

          accuracy                           0.94      1765
         macro avg       0.94      0.93      0.94      1765
      weighted avg       0.94      0.94      0.94      1765



#### 정규화 데이터

In [None]:
rf_model = RandomForestClassifier(max_depth=5, n_estimators=100, random_state=1)

rf_model.fit(X_train_m, y_train)

y_pred = rf_model.predict(X_valid_m)

print(confusion_matrix(y_valid, y_pred))
print(classification_report(y_valid, y_pred))

[[335   0   0   0   0   0]
 [  0 285  24   0   0   1]
 [  0  27 299   0   0   0]
 [  0   0   0 287   8   5]
 [  0   0   0  23 202  12]
 [  0   0   0   4   4 249]]
                    precision    recall  f1-score   support

            LAYING       1.00      1.00      1.00       335
           SITTING       0.91      0.92      0.92       310
          STANDING       0.93      0.92      0.92       326
           WALKING       0.91      0.96      0.93       300
WALKING_DOWNSTAIRS       0.94      0.85      0.90       237
  WALKING_UPSTAIRS       0.93      0.97      0.95       257

          accuracy                           0.94      1765
         macro avg       0.94      0.94      0.94      1765
      weighted avg       0.94      0.94      0.94      1765



## **(5) SVC**

In [None]:
from sklearn.svm import SVC

#### 표준화 데이터

In [None]:
svc_model = SVC()

svc_model.fit(X_train_s, y_train)

y_pred = svc_model.predict(X_valid_s)

print(confusion_matrix(y_valid, y_pred))
print(classification_report(y_valid, y_pred))

[[334   0   0   0   1   0]
 [  0 294  15   0   0   1]
 [  0  25 301   0   0   0]
 [  0   0   0 298   2   0]
 [  0   0   0   0 236   1]
 [  0   0   0   1   1 255]]
                    precision    recall  f1-score   support

            LAYING       1.00      1.00      1.00       335
           SITTING       0.92      0.95      0.93       310
          STANDING       0.95      0.92      0.94       326
           WALKING       1.00      0.99      0.99       300
WALKING_DOWNSTAIRS       0.98      1.00      0.99       237
  WALKING_UPSTAIRS       0.99      0.99      0.99       257

          accuracy                           0.97      1765
         macro avg       0.97      0.98      0.97      1765
      weighted avg       0.97      0.97      0.97      1765



#### 정규화 데이터

In [None]:
svc_model = SVC()

svc_model.fit(X_train_m, y_train)

y_pred = svc_model.predict(X_valid_m)

print(confusion_matrix(y_valid, y_pred))
print(classification_report(y_valid, y_pred))

[[335   0   0   0   0   0]
 [  1 288  20   0   0   1]
 [  0  28 298   0   0   0]
 [  0   0   0 300   0   0]
 [  0   0   0   2 233   2]
 [  0   0   0   2   0 255]]
                    precision    recall  f1-score   support

            LAYING       1.00      1.00      1.00       335
           SITTING       0.91      0.93      0.92       310
          STANDING       0.94      0.91      0.93       326
           WALKING       0.99      1.00      0.99       300
WALKING_DOWNSTAIRS       1.00      0.98      0.99       237
  WALKING_UPSTAIRS       0.99      0.99      0.99       257

          accuracy                           0.97      1765
         macro avg       0.97      0.97      0.97      1765
      weighted avg       0.97      0.97      0.97      1765



## 딥러닝 (LSTM)

In [None]:
# Datset 경로
path = 'drive/MyDrive/AIVLE/Mini_project/'


data = pd.read_csv(path + 'data01_train.csv')
test = pd.read_csv(path + 'data01_test.csv')
features = pd.read_csv(path + 'features.csv')
# important_features = pd.read_csv(path + 'important_features.csv')

# 불필요한 Features 제거

data.drop('subject', axis=1, inplace = True)
test.drop('subject', axis=1, inplace = True)

# Target 지정
target = 'Activity'

# Features와 Target 분류 (Train, Valid)
X = data.drop(target, axis = 1)
y = data.loc[:, target]

# Test Data
X_test = test.drop(target, axis = 1)
y_test = test.loc[:, target]

In [None]:
# 클래스 레이블을 정수로 인코딩
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y)
y_test_encoded = label_encoder.transform(y_test)

# 클래스 레이블을 원-핫 인코딩
y_train_one_hot = to_categorical(y_train_encoded)
y_test_one_hot = to_categorical(y_test_encoded)

validation_index = int(len(X_train) * 0.8)
X_train, X_valid = X[:validation_index], X[validation_index:]
y_train, y_valid = y_train_one_hot[:validation_index], y_train_one_hot[validation_index:]

In [None]:
import keras
from keras import layers
from keras.utils import to_categorical
from keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn.preprocessing import LabelEncoder

num_features = X_train.shape[-1]

# # 모델 생성
# inputs = keras.Input(shape=(num_features,))
# x = layers.Dense(64, activation='relu')(inputs)
# x = layers.Dropout(0.5)(x)
# outputs = layers.Dense(6, activation='softmax')(x)

# model = keras.Model(inputs, outputs)

model = keras.models.Sequential([
  keras.layers.LSTM(20, return_sequences=True, input_shape = (None, num_features)),
	keras.layers.LSTM(20, return_sequences=True),
	keras.layers.TimeDistributed(keras.layers.Dense(10))])


# 모델 컴파일
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Early Stopping 콜백 정의
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# 모델 학습
history = model.fit(X_train, y_train,
                    epochs=200,
                    validation_data=(X_valid, y_valid),
                    callbacks=[ModelCheckpoint("dense_model.h5", save_best_only=True), early_stopping])

# 학습된 모델 로드
loaded_model = keras.models.load_model("dense_model.h5")

# 테스트 데이터에 대해 성능 평가
print("테스트 Accuracy:", loaded_model.evaluate(X_test, y_test_one_hot)[1])


Epoch 1/200


ValueError: in user code:

    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1401, in train_function  *
        return step_function(self, iterator)
    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1384, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1373, in run_step  **
        outputs = model.train_step(data)
    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1150, in train_step
        y_pred = self(x, training=True)
    File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/input_spec.py", line 235, in assert_input_compatibility
        raise ValueError(

    ValueError: Exception encountered when calling layer 'sequential_4' (type Sequential).
    
    Input 0 of layer "lstm_24" is incompatible with the layer: expected ndim=3, found ndim=2. Full shape received: (None, 561)
    
    Call arguments received by layer 'sequential_4' (type Sequential):
      • inputs=tf.Tensor(shape=(None, 561), dtype=float64)
      • training=True
      • mask=None


In [None]:
temp = model.predict(X_test)
temp = temp.argmax(axis = 1)
print(temp)

[1 2 3 ... 4 0 4]


In [None]:
y_test

0                  SITTING
1                 STANDING
2                  WALKING
3                  SITTING
4                 STANDING
               ...        
1466               SITTING
1467              STANDING
1468    WALKING_DOWNSTAIRS
1469                LAYING
1470    WALKING_DOWNSTAIRS
Name: Activity, Length: 1471, dtype: object

In [None]:
y_test_one_hot

array([[0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       ...,
       [0., 0., 0., 0., 1., 0.],
       [1., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0.]], dtype=float32)

In [None]:
model = keras.models.Sequential([
  keras.layers.LSTM(20, return_sequences=True, input_shape = (None, 1)),
	keras.layers.LSTM(20, return_sequences=True),
	keras.layers.TimeDistributed(keras.layers.Dense(10))])