In [2]:
import pandas as pd
import seaborn as sns
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix, f1_score, classification_report
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.utils.testing import all_estimators
import matplotlib.pyplot as plt
from matplotlib.cm import get_cmap
from matplotlib.colors import rgb2hex
import os
import pydot
from PIL import Image
import requests, zipfile, io
import time
import warnings
warnings.filterwarnings('ignore')
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)

plt.rcParams['font.family'] = 'Malgun Gothic'
plt.rcParams['axes.unicode_minus']=False

%matplotlib inline

In [19]:
data = pd.read_csv("k:/sensor_report/data/data_ver3/data.csv", encoding='utf-8')
data = data.rename(columns = {'Unnamed: 0': 'index'})
data = data.set_index('index')

In [20]:
data.head(3)

Unnamed: 0_level_0,Time1,Time2,l1,l2,l3,l4,l5,l6,l7,r1,...,R6,R7,LSUM,RSUM,ASUM,Iabel_act,label_load,label_sub,label_weight,label_all
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,21:41:08.4754657,21:41:08.4754657,1.0,1.0,1.0,75.0,90.0,80.0,87.0,84.0,...,1.190588,1.708235,4.335294,11.310588,15.645882,1.0,0kg,A,83kg,1_0kg_A_83kg
1,21:41:08.4754657,21:41:08.5067156,1.0,1.0,1.0,74.0,90.0,80.0,87.0,84.0,...,1.177647,1.708235,4.322353,11.297647,15.62,1.0,0kg,A,83kg,1_0kg_A_83kg
2,21:41:08.4754657,21:41:08.5223356,1.0,1.0,1.0,74.0,90.0,80.0,88.0,84.0,...,1.177647,1.708235,4.335294,11.297647,15.632941,1.0,0kg,A,83kg,1_0kg_A_83kg


In [21]:
data.columns

Index(['Time1', 'Time2', 'l1', 'l2', 'l3', 'l4', 'l5', 'l6', 'l7', 'r1', 'r2',
       'r3', 'r4', 'r5', 'r6', 'r7', 'L1', 'L2', 'L3', 'L4', 'L5', 'L6', 'L7',
       'R1', 'R2', 'R3', 'R4', 'R5', 'R6', 'R7', 'LSUM', 'RSUM', 'ASUM',
       'Iabel_act', 'label_load', 'label_sub', 'label_weight', 'label_all'],
      dtype='object')

In [22]:
pd.isnull(data).sum()

Time1           0
Time2           0
l1              0
l2              0
l3              0
l4              0
l5              0
l6              0
l7              0
r1              0
r2              0
r3              0
r4              0
r5              0
r6              0
r7              0
L1              0
L2              0
L3              0
L4              0
L5              0
L6              0
L7              0
R1              0
R2              0
R3              0
R4              0
R5              0
R6              0
R7              0
LSUM            0
RSUM            0
ASUM            0
Iabel_act       0
label_load      0
label_sub       0
label_weight    0
label_all       0
dtype: int64

In [23]:
y = data.loc[:,"label_load"]

In [24]:
x = data.loc[:,['l1', 'l2', 'l3', 'l4', 'l5', 'l6', 'l7', 'r1', 'r2',
       'r3', 'r4', 'r5', 'r6', 'r7', 'L1', 'L2', 'L3', 'L4', 'L5', 'L6', 'L7',
       'R1', 'R2', 'R3', 'R4', 'R5', 'R6', 'R7', 'LSUM', 'RSUM', 'ASUM']]

In [None]:
x_train,x_test, y_train, y_test = train_test_split(x,y, test_size=0.2,
                                                  train_size = 0.8, shuffle=True)
# 일부만 사용하기 10% data[::10]

In [29]:
from sklearn.ensemble import RandomForestClassifier
start_time =time.time()
rf_clf = RandomForestClassifier(random_state=0)
rf_clf.fit(x_train, y_train)
pred = rf_clf.predict(x_test)
accuracy=accuracy_score(y_test,pred)
print('랜덤포레스트 정확도 : {0:.4f}'.format(accuracy))
print("RandomForestClassifier 수행시간 : {0:1.4f}초".format(time.time()-start_time))

랜덤포레스트 정확도 : 0.9685
RandomForestClassifier 수행시간 : 89.7510초


In [31]:
#사이킷런 래퍼 SGBoost 클래스인 XGBClassifier임포트
from xgboost import XGBClassifier
start_time =time.time()
xgb_clf = XGBClassifier(random_state=0)
xgb_clf.fit(x_train,y_train)
pred = xgb_clf.predict(x_test)
accuracy=accuracy_score(y_test,pred)
print('XGBClassifier 정확도:{0:.4f}'.format(accuracy))
print("XGBClassifier 수행시간 : {0:1.4f}초".format(time.time()-start_time))
#print('XGBClassifier 기본 하이퍼 파라미터:\n',xgb_clf.get_params())

XGBClassifier 정확도:0.6590
XGBClassifier 수행시간 : 1079.1134초


In [32]:
from lightgbm import LGBMClassifier
start_time =time.time()
lgbm_clf = LGBMClassifier(random_state=0)
lgbm_clf.fit(x_train, y_train)
pred = lgbm_clf.predict(x_test)
accuracy=accuracy_score(y_test,pred)
print('LGBMClassifier 정확도:{0:.4f}'.format(accuracy))
print("LGBMClassifier 수행시간 : {0:1.4f}초".format(time.time()-start_time))
#print('LGBMClassifier 기본 하이퍼 파라미터:\n',lgbm_clf.get_params())

LGBMClassifier 정확도:0.8007
LGBMClassifier 수행시간 : 65.0921초


In [33]:
from sklearn.neural_network import MLPClassifier
start_time =time.time()
MLP_clf = MLPClassifier()
MLP_clf.fit(x_train,y_train)
pred = MLP_clf.predict(x_test)
accuracy=accuracy_score(y_test,pred)
print('MLPClassifier 정확도:{0:.4f}'.format(accuracy))
print("MLPClassifier 수행시간 : {0:1.4f}초".format(time.time()-start_time))

MLPClassifier 정확도:0.7122
MLPClassifier 수행시간 : 665.4444초


In [34]:
from sklearn.neighbors import KNeighborsClassifier
start_time =time.time()
KNN_clf = KNeighborsClassifier(n_neighbors=5)
# weights='uniform', algorithm='auto',metric='minkowski'
KNN_clf.fit(x_train,y_train)
pred = KNN_clf.predict(x_test)
accuracy=accuracy_score(y_test,pred)
print('KNeighborsClassifier 정확도:{0:.4f}'.format(accuracy))
print("KNeighborsClassifier 수행시간 : {0:1.4f}초".format(time.time()-start_time))

KNeighborsClassifier 정확도:0.9368
KNeighborsClassifier 수행시간 : 442.5188초


n_neighbors : int, optional (default = 5)<br>
  Number of neighbors to use by default for kneighbors queries.<br>
  kNN with k=1 in most cases leads to over-fitting.<br>

metric : string or callable, default ‘minkowski’<br>
  'manhattan': 맨하튼 거리 측정 방법 사용<br>
  'euclidean': 유클리디안 거리 측정 방법 사용<br>
  'minkowski': 민코프스키 거리 측정 방법 사용<br>

weights : str or callable, optional (default = ‘uniform’)<br>
  'uniform': 거리에 가중치 부여하지 않음<br>
  'distance': 거리에 가중치 부여함 <br>

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
#GBM의 시간 측정을 위함 시작시간 설정
start_time =time.time()
#GBM모델 셋업
gb_clf=GradientBoostingClassifier(random_state=0)
gb_clf.fit(x_train,y_train)
gb_pred=gb_clf.predict(x_test)
gb_accuracy = accuracy_score(y_test,gb_pred)
print('GradientBoostingClassifier 정확도:{0:.4f}'.format(gb_accuracy))
print("GradientBoostingClassifier 수행시간 : {0:1.4f}초".format(time.time()-start_time))
#print('GradientBoostingClassifier 기본 하이퍼 파라미터:\n',gb_clf.get_params())

In [None]:
from sklearn.svm import SVC
start_time =time.time()
svm_clf = SVC(kernel = 'rbf', class_weight = 'balanced', random_state=0)
svm_clf.fit(x_train,y_train)
pred = svm_clf.predict(x_test)
accuracy=accuracy_score(y_test,pred)
print('Support Vector Classidier 정확도:{0:.4f}'.format(accuracy))
print("SVClassifier 수행시간 : {0:1.4f}초".format(time.time()-start_time))