In [2]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import font_manager, rc
import seaborn as sns
import platform

# seaborn 설정 리셋
sns.reset_defaults()

# 폰트설정
if platform.system() == 'Windows' :
    path = 'c:/Windows/Fonts/malgun.ttf'
    font_name = font_manager.FontProperties(fname=path).get_name()
    rc('font', family=font_name)
elif platform.system() == 'Darwin':
    rc('font', family='AppleGothic')
else :
    print('Check your OS System')
    
# 그래프에 마이너스 표시
matplotlib.rcParams['axes.unicode_minus'] = False

In [15]:
# 데이터 불러오기
from sklearn.datasets import load_wine

wine = load_wine()

In [7]:
wine.keys()

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names'])

In [17]:
# 독립변수, 종속변수 분리
data = wine['data']
target = wine['target']

In [18]:
# 훈련데이터, 테스트데이터 분리
from sklearn.model_selection import train_test_split

train_input, test_input, train_target, test_target = \
    train_test_split(data, target, random_state=42)

In [19]:
# 분리 확인
print(train_input.shape, train_target.shape)
print(test_input.shape, test_target.shape)

(133, 13) (133,)
(45, 13) (45,)


In [23]:
# 교차검증 패키지, 클래스 불러오기
from sklearn.model_selection import cross_validate

## 1. 랜덤포레스트

In [29]:
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(n_jobs=-1, random_state=12)
scores = cross_validate(rf, train_input, train_target,
                        return_train_score= True, n_jobs=-1)

print('<랜덤포레스트>   훈련데이터 정확도 :', round(scores['train_score'].mean(),4))
print('<랜덤포레스트> 테스트데이터 정확도 :', round(scores['test_score'].mean(),4))

<랜덤포레스트>   훈련데이터 정확도 : 1.0
<랜덤포레스트> 테스트데이터 정확도 : 0.9701


In [38]:
# 특성 중요도 확인
rf.fit(train_input, train_target)
print('<특성 중요도>')
print(rf.feature_importances_)

<특성 중요도>
[0.1028361  0.01964014 0.0154432  0.03086936 0.028848   0.08150926
 0.17782879 0.00761759 0.02277321 0.1801928  0.06908495 0.12620187
 0.13715473]


In [61]:
feature = wine['feature_names']
feature_importances = rf.feature_importances_

In [76]:
wine_df = pd.DataFrame(columns=['feature', 'feature_importances'])
wine_df['feature'] = feature
wine_df['feature_importances'] = feature_importances

In [81]:
wine_df = wine_df.sort_values(by='feature_importances', ascending=False).reset_index(drop=True)
wine_df

Unnamed: 0,feature,feature_importances
0,color_intensity,0.180193
1,flavanoids,0.177829
2,proline,0.137155
3,od280/od315_of_diluted_wines,0.126202
4,alcohol,0.102836
5,total_phenols,0.081509
6,hue,0.069085
7,alcalinity_of_ash,0.030869
8,magnesium,0.028848
9,proanthocyanins,0.022773


In [84]:
print('<랜덤포레스트> 최종테스트 정확도 :',rf.score(test_input, test_target))

<랜덤포레스트> 최종테스트 정확도 : 1.0


## 2. 엑스트라 트리

In [86]:
from sklearn.ensemble import ExtraTreesClassifier

et = ExtraTreesClassifier(n_jobs=-1, random_state=12)

scores = cross_validate(et, train_input, train_target,
                        return_train_score= True, n_jobs= -1)

print('<엑스트라트리>   훈련데이터 정확도 :', round(scores['train_score'].mean(),4))
print('<엑스트라트리> 테스트데이터 정확도 :', round(scores['test_score'].mean(),4))

<엑스트라트리>   훈련데이터 정확도 : 1.0
<엑스트라트리> 테스트데이터 정확도 : 0.9701
