In [1]:
!pip install xgboost

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


## pycaret을 활용한 자동화

### 학습 목표
- 와인 품질 데이터 셋애 대해 pycaret을 활용하여 최적의 모델 찾기

### 목차
- 01. 라이브러리 설치 및 불러오기
- 02. 모델 가져오기 및 시각화
- 03. 모델 스태킹

### pycaret 순서
- 01 setup()
- 02 모델 비교- 좋은 모델 선택 - compare_model()
- 03 좋은 모델 매개변수 튜닝 - tune_models()
- 04 모델 저장 및 평가
- 05 모델 불러오기 및 시각화 



### 01 라이브러리 설치 및 불어오기

In [None]:
# !pip install xgboost
# !pip install lightgbm
# !pip install catboost
# !pip install pycaret

In [7]:
import xgboost, lightgbm, catboost

from pycaret.datasets import get_data
from pycaret.classification import *

# 와인 품질 데이터 셋
wine_data = get_data('wine')

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality,type
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,red
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5,red
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5,red
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6,red
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,red


In [8]:
# 분류 모델 설정 및 학습
clf = setup(wine_data, target='quality', fold=3)
clf

Unnamed: 0,Description,Value
0,Session id,1620
1,Target,quality
2,Target type,Multiclass
3,Target mapping,"3: 0, 4: 1, 5: 2, 6: 3, 7: 4, 8: 5, 9: 6"
4,Original data shape,"(6497, 13)"
5,Transformed data shape,"(6497, 13)"
6,Transformed train set shape,"(4547, 13)"
7,Transformed test set shape,"(1950, 13)"
8,Ordinal features,1
9,Numeric features,11


<pycaret.classification.oop.ClassificationExperiment at 0x7fc35c0f1d50>

In [9]:
%%time

# 모델 비교 및 평가
best_model = compare_models()
print("최적의 모델 : ", best_model)

# 최적 모델 튜닝
tuned_model = tune_model(best_model)
tuned_model

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
et,Extra Trees Classifier,0.6387,0.8211,0.6387,0.6449,0.6223,0.4318,0.4383,1.4767
rf,Random Forest Classifier,0.631,0.8126,0.631,0.6369,0.6134,0.4186,0.4251,0.8533
lightgbm,Light Gradient Boosting Machine,0.6246,0.7955,0.6246,0.6179,0.6118,0.4175,0.4206,1.7167
xgboost,Extreme Gradient Boosting,0.6206,0.7937,0.6206,0.6098,0.6086,0.4124,0.4151,1.89
catboost,CatBoost Classifier,0.6193,0.7955,0.6193,0.613,0.6067,0.4091,0.4121,12.0733
gbc,Gradient Boosting Classifier,0.5788,0.7598,0.5788,0.5685,0.5622,0.3386,0.344,5.0467
lda,Linear Discriminant Analysis,0.5366,0.7172,0.5366,0.5161,0.5136,0.2649,0.271,0.1933
ridge,Ridge Classifier,0.5335,0.0,0.5335,0.4734,0.4673,0.227,0.2449,0.0867
dt,Decision Tree Classifier,0.5329,0.6525,0.5329,0.5359,0.534,0.3075,0.3076,0.1267
lr,Logistic Regression,0.5241,0.7054,0.5241,0.4781,0.4838,0.2269,0.2372,2.1867


Processing:   0%|          | 0/69 [00:00<?, ?it/s]

최적의 모델 :  ExtraTreesClassifier(bootstrap=False, ccp_alpha=0.0, class_weight=None,
                     criterion='gini', max_depth=None, max_features='sqrt',
                     max_leaf_nodes=None, max_samples=None,
                     min_impurity_decrease=0.0, min_samples_leaf=1,
                     min_samples_split=2, min_weight_fraction_leaf=0.0,
                     n_estimators=100, n_jobs=-1, oob_score=False,
                     random_state=1620, verbose=0, warm_start=False)


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.5574,0.7331,0.5574,0.5346,0.5101,0.2689,0.291
1,0.5534,0.7364,0.5534,0.5541,0.4996,0.2574,0.2821
2,0.5597,0.7485,0.5597,0.5304,0.5105,0.2739,0.2946
Mean,0.5569,0.7393,0.5569,0.5397,0.5067,0.2667,0.2892
Std,0.0026,0.0066,0.0026,0.0103,0.005,0.0069,0.0053


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 3 folds for each of 10 candidates, totalling 30 fits


Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).
CPU times: user 6.38 s, sys: 529 ms, total: 6.91 s
Wall time: 1min 42s


In [10]:
# 모델 예측
pred = predict_model(tuned_model)
pred

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Extra Trees Classifier,0.6774,0.8674,0.6774,0.6831,0.6646,0.4946,0.5006


Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,type,quality,prediction_label,prediction_score
5274,5.6,0.25,0.19,2.4,0.049,42.0,166.0,0.99200,3.25,0.43,10.4,white,3,6,0.66
2933,9.0,0.24,0.50,1.2,0.048,26.0,107.0,0.99180,3.21,0.34,12.4,white,3,6,0.76
3497,7.2,0.31,0.41,8.6,0.053,15.0,89.0,0.99760,3.29,0.64,9.9,white,3,5,0.52
8,7.8,0.58,0.02,2.0,0.073,9.0,18.0,0.99680,3.36,0.57,9.5,red,4,5,0.49
2505,8.3,0.27,0.45,1.3,0.048,8.0,72.0,0.99440,3.08,0.61,10.3,white,1,4,0.54
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2407,6.0,0.39,0.17,12.0,0.046,65.0,246.0,0.99760,3.15,0.38,9.0,white,3,5,0.70
4512,7.7,0.18,0.30,1.2,0.046,49.0,199.0,0.99413,3.03,0.38,9.3,white,2,6,0.48
3109,8.0,0.18,0.49,1.8,0.061,10.0,145.0,0.99420,3.23,0.48,10.0,white,2,5,0.46
3441,8.2,0.18,0.31,11.8,0.039,96.0,249.0,0.99760,3.07,0.52,9.5,white,3,6,0.65


In [11]:
# 모델 저장
save_model(tuned_model, 'wine_pycaret_model')

Transformation Pipeline and Model Successfully Saved


(Pipeline(memory=FastMemory(location=/tmp/joblib),
          steps=[('label_encoding',
                  TransformerWrapperWithInverse(exclude=None, include=None,
                                                transformer=LabelEncoder())),
                 ('numerical_imputer',
                  TransformerWrapper(exclude=None,
                                     include=['fixed acidity',
                                              'volatile acidity', 'citric acid',
                                              'residual sugar', 'chlorides',
                                              'free sulfur dioxide',
                                              'total sulfur dioxide', 'density',
                                              'pH'...
                  ExtraTreesClassifier(bootstrap=False, ccp_alpha=0.0,
                                       class_weight=None, criterion='gini',
                                       max_depth=None, max_features='sqrt',
                     

In [12]:
# 저장된 모델 불러오기
loaded_model = load_model('wine_pycaret_model')

Transformation Pipeline and Model Successfully Loaded


In [13]:
# 평가
evaluate_model(loaded_model)

interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Pipeline Plot', 'pipelin…