In [16]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [17]:
df = pd.read_csv('../data/dataSet.csv', header=None)
df

Unnamed: 0,0,1,2,3,4,5
0,2.602562,2.563770,37.098059,185.083765,M,Bad
1,3.057403,5.110418,74.813240,190.168347,M,Bad
2,2.571436,2.770216,60.483945,192.010416,M,Bad
3,3.703299,6.709837,47.163448,191.049732,M,Bad
4,4.334830,3.814075,66.392621,184.219977,M,Bad
...,...,...,...,...,...,...
9568,0.334082,7.507347,38.002269,218.586367,M,Turtle
9569,3.083660,10.988651,47.903601,177.496479,M,Turtle
9570,3.083660,10.353320,44.800357,175.513532,M,Turtle
9571,2.763857,11.976132,42.757102,179.479804,M,Turtle


In [18]:
df.columns=['shoulder_angle_lst', 'ear_angle_lst', 'turtle_dis_lst', 'face_dis_lst', 'gender', 'class']
df

Unnamed: 0,shoulder_angle_lst,ear_angle_lst,turtle_dis_lst,face_dis_lst,gender,class
0,2.602562,2.563770,37.098059,185.083765,M,Bad
1,3.057403,5.110418,74.813240,190.168347,M,Bad
2,2.571436,2.770216,60.483945,192.010416,M,Bad
3,3.703299,6.709837,47.163448,191.049732,M,Bad
4,4.334830,3.814075,66.392621,184.219977,M,Bad
...,...,...,...,...,...,...
9568,0.334082,7.507347,38.002269,218.586367,M,Turtle
9569,3.083660,10.988651,47.903601,177.496479,M,Turtle
9570,3.083660,10.353320,44.800357,175.513532,M,Turtle
9571,2.763857,11.976132,42.757102,179.479804,M,Turtle


In [19]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9573 entries, 0 to 9572
Data columns (total 6 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   shoulder_angle_lst  9573 non-null   float64
 1   ear_angle_lst       9573 non-null   float64
 2   turtle_dis_lst      9573 non-null   float64
 3   face_dis_lst        9573 non-null   float64
 4   gender              9573 non-null   object 
 5   class               9573 non-null   object 
dtypes: float64(4), object(2)
memory usage: 448.9+ KB


In [20]:
X = df.iloc[:, :-1]
y = df['class']
X.info(), y.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9573 entries, 0 to 9572
Data columns (total 5 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   shoulder_angle_lst  9573 non-null   float64
 1   ear_angle_lst       9573 non-null   float64
 2   turtle_dis_lst      9573 non-null   float64
 3   face_dis_lst        9573 non-null   float64
 4   gender              9573 non-null   object 
dtypes: float64(4), object(1)
memory usage: 374.1+ KB
<class 'pandas.core.series.Series'>
RangeIndex: 9573 entries, 0 to 9572
Series name: class
Non-Null Count  Dtype 
--------------  ----- 
9573 non-null   object
dtypes: object(1)
memory usage: 74.9+ KB


(None, None)

In [21]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=123)

X_train.shape, X_test.shape, y_train.shape, y_test.shape

((6701, 5), (2872, 5), (6701,), (2872,))

In [22]:
# one-hot incoding
X_train = pd.get_dummies(X_train)
X_test = pd.get_dummies(X_test)

In [23]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((6701, 6), (2872, 6), (6701,), (2872,))

### 스케일링, 모델링

In [24]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import *
import pickle

In [11]:
pipelines = {
    'rf' : make_pipeline(StandardScaler(), RandomForestClassifier()),
    'sgd' : make_pipeline(StandardScaler(), SGDClassifier()),
    'ada' : make_pipeline(StandardScaler(), AdaBoostClassifier())
}

In [25]:
pipelines = {
    'rf' : make_pipeline(MinMaxScaler(), RandomForestClassifier()),
    'sgd' : make_pipeline(MinMaxScaler(), SGDClassifier()),
    'ada' : make_pipeline(MinMaxScaler(), AdaBoostClassifier())
}

In [26]:
fit_model = {}
for name, pipeline in pipelines.items():
    model = pipeline.fit(X_train, y_train)
    fit_model[name] = model

In [27]:
rf_pred = fit_model['rf'].predict(X_test)
sgd_pred = fit_model['sgd'].predict(X_test)
ada_pred = fit_model['ada'].predict(X_test)

In [28]:
# Standard
print('RandomForest\n', classification_report(y_test, rf_pred), '\n', accuracy_score(y_test, rf_pred))

RandomForest
               precision    recall  f1-score   support

         Bad       0.95      0.89      0.92       973
        Good       0.95      0.97      0.96       649
      Turtle       0.92      0.96      0.94      1250

    accuracy                           0.94      2872
   macro avg       0.94      0.94      0.94      2872
weighted avg       0.94      0.94      0.94      2872
 
 0.9373259052924791


In [29]:
# MinMax
print('RandomForest\n', classification_report(y_test, rf_pred), '\n', accuracy_score(y_test, rf_pred))

RandomForest
               precision    recall  f1-score   support

         Bad       0.95      0.89      0.92       973
        Good       0.95      0.97      0.96       649
      Turtle       0.92      0.96      0.94      1250

    accuracy                           0.94      2872
   macro avg       0.94      0.94      0.94      2872
weighted avg       0.94      0.94      0.94      2872
 
 0.9373259052924791


In [30]:
# Standard
print('SGD\n', classification_report(y_test, sgd_pred), '\n', accuracy_score(y_test, sgd_pred))

SGD
               precision    recall  f1-score   support

         Bad       0.86      0.67      0.76       973
        Good       0.74      0.90      0.81       649
      Turtle       0.81      0.86      0.83      1250

    accuracy                           0.80      2872
   macro avg       0.80      0.81      0.80      2872
weighted avg       0.81      0.80      0.80      2872
 
 0.8046657381615598


In [31]:
# MinMax
print('SGD\n', classification_report(y_test, sgd_pred), '\n', accuracy_score(y_test, sgd_pred))

SGD
               precision    recall  f1-score   support

         Bad       0.86      0.67      0.76       973
        Good       0.74      0.90      0.81       649
      Turtle       0.81      0.86      0.83      1250

    accuracy                           0.80      2872
   macro avg       0.80      0.81      0.80      2872
weighted avg       0.81      0.80      0.80      2872
 
 0.8046657381615598


In [16]:
# Standard
print('Ada\n', classification_report(y_test, ada_pred), '\n', accuracy_score(y_test, ada_pred))

Ada
               precision    recall  f1-score   support

         Bad       0.86      0.79      0.82       934
        Good       0.89      0.93      0.91       656
      Turtle       0.83      0.87      0.85      1118

    accuracy                           0.86      2708
   macro avg       0.86      0.86      0.86      2708
weighted avg       0.86      0.86      0.85      2708
 
 0.8552437223042836


In [32]:
# MinMax
print('Ada\n', classification_report(y_test, ada_pred), '\n', accuracy_score(y_test, ada_pred))

Ada
               precision    recall  f1-score   support

         Bad       0.87      0.78      0.83       973
        Good       0.87      0.96      0.91       649
      Turtle       0.85      0.88      0.87      1250

    accuracy                           0.86      2872
   macro avg       0.87      0.87      0.87      2872
weighted avg       0.86      0.86      0.86      2872
 
 0.8638579387186629


In [33]:
with open('../models/rf_model2.pkl', 'wb') as f:
    pickle.dump(fit_model['rf'], f)


In [34]:
X_test

Unnamed: 0,shoulder_angle_lst,ear_angle_lst,turtle_dis_lst,face_dis_lst,gender_F,gender_M
4606,0.186025,5.924582,52.392581,174.232603,False,True
8567,8.056045,0.415180,6.019999,194.092761,False,True
7707,9.385518,23.875281,63.599873,128.413395,False,True
4385,21.399810,53.914927,62.255002,119.230868,False,True
4459,2.304778,3.094058,63.555356,169.189243,False,True
...,...,...,...,...,...,...
8925,3.652223,0.485546,8.705688,178.717654,False,True
1074,6.340192,8.185036,71.117903,221.415898,False,True
1085,5.004660,6.473956,73.205719,212.042449,False,True
4895,0.886861,2.675427,90.419508,167.047897,False,True


In [35]:
rf_pred

array(['Turtle', 'Turtle', 'Bad', ..., 'Good', 'Good', 'Turtle'],
      dtype=object)