In [None]:
!pip install pycaret[full]==3.0.4  # 書籍執筆時点のバージョンをインストール


# 最新版をインストール場合はこちら
# !pip install pycaret[full]

In [None]:
import pycaret

pycaret.__version__

# compare_models()

In [None]:
# load dataset
from pycaret.datasets import get_data
# init setup
from pycaret.classification import *


diabetes = get_data('diabetes')
clf1 = setup(data=diabetes, target='Class variable', session_id=1234)

# compare models
best = compare_models()

In [None]:
best = compare_models(sort='F1')

In [None]:
print(best)

## 対象モデルの指定

In [None]:
# load dataset
from pycaret.datasets import get_data
# init setup
from pycaret.classification import *


diabetes = get_data('diabetes')
clf1 = setup(data=diabetes, target='Class variable', session_id=1234)

# compare models
best = compare_models(include=['lr', 'dt', 'lightgbm'])

## モデルの除外

In [None]:
best = compare_models(exclude=['lr', 'dt', 'lightgbm'])

## スコア閾値の設定

In [None]:
# load dataset
from pycaret.datasets import get_data
# init setup
from pycaret.classification import *


diabetes = get_data('diabetes')
clf1 = setup(data=diabetes, target='Class variable', session_id=1234)

# compare models
best = compare_models(probability_threshold=0.25)

## クロスバリデーションのOFF

In [None]:
# load dataset
from pycaret.datasets import get_data
# init setup
from pycaret.classification import *


diabetes = get_data('diabetes')
clf1 = setup(data=diabetes, target='Class variable', session_id=1234)

# compare models
best = compare_models(cross_validation=False)

# create_model()

In [None]:
# load dataset
from pycaret.datasets import get_data
# init setup
from pycaret.classification import *


diabetes = get_data('diabetes')
clf1 = setup(data=diabetes, target='Class variable', session_id=1234)

# train logistic regression
lr = create_model('lr')

## foldの指定

In [None]:
lr = create_model('lr', fold=5)

## 個別のパラメータを設定

In [None]:
print(lr)

In [None]:
lr = create_model('lr', C=0.5)
print(lr)

## モデルの確認

In [None]:
models()

## 学習結果をデータフレームで取り出す pull

In [None]:
# load dataset
from pycaret.datasets import get_data
# init setup
from pycaret.classification import *


diabetes = get_data('diabetes')
clf1 = setup(data=diabetes, target='Class variable', session_id=1234)

# train decision tree
dt = create_model('dt')

# access the scoring grid
dt_results = pull()
print(dt_results)

# パラメータチューニング

## tune_model

In [None]:
# load dataset
from pycaret.datasets import get_data
# init setup
from pycaret.classification import *


diabetes = get_data('diabetes')
clf1 = setup(data=diabetes, target='Class variable', session_id=1234)

# train model
dt = create_model('dt')

In [None]:
# tune model
tuned_dt = tune_model(dt)

In [None]:
# default model
print(dt)

# tuned model
print(tuned_dt)

In [None]:
tuned_dt = tune_model(dt, n_iter=20)

## custom grid

In [None]:
# load dataset
from pycaret.datasets import get_data
# init setup
from pycaret.classification import *


diabetes = get_data('diabetes')
clf1 = setup(data=diabetes, target='Class variable', session_id=1234)

# train model
dt = create_model('dt')

# define search space
params = {'min_samples_leaf': [2, 3, 4, 5, 6]}

# tune model
tuned_dt = tune_model(dt, custom_grid=params)

## choose_better

In [None]:
# load dataset
from pycaret.datasets import get_data
# init setup
from pycaret.classification import *


diabetes = get_data('diabetes')
clf1 = setup(data=diabetes, target='Class variable', session_id=1234)

# train model
dt = create_model('dt')

# define search space
params = {'min_samples_leaf': [2, 3, 4, 5, 6]}

# tune model
better_dt = tune_model(dt, custom_grid=params, choose_better=True)
print(better_dt)

# アンサンブル学習

## ensemble_models

In [None]:
# load dataset
from pycaret.datasets import get_data
# init setup
from pycaret.classification import *


diabetes = get_data('diabetes')
clf1 = setup(data=diabetes, target='Class variable', session_id=1234)

# train model
dt = create_model('dt')

# ensemble model
ensemble_dt = ensemble_model(dt)

In [None]:
print(ensemble_dt)

In [None]:
# load dataset
from pycaret.datasets import get_data
# init setup
from pycaret.classification import *


diabetes = get_data('diabetes')
clf1 = setup(data=diabetes, target='Class variable', session_id=1234)

# train model
dt = create_model('dt')

# ensemble model
ensemble_dt = ensemble_model(dt, n_estimators=20)

print(ensemble_dt)

## ブースティング

In [None]:
# load dataset
from pycaret.datasets import get_data
# init setup
from pycaret.classification import *


diabetes = get_data('diabetes')
clf1 = setup(data=diabetes, target='Class variable', session_id=1234)

# train model
dt = create_model('dt')

# ensemble model
boosting_dt = ensemble_model(dt, method='Boosting')

print(boosting_dt)

## blending model

In [None]:
# load dataset
from pycaret.datasets import get_data
# init setup
from pycaret.classification import *


diabetes = get_data('diabetes')
clf1 = setup(data=diabetes, target='Class variable', session_id=1234)

# train a few models
lr = create_model('lr')
dt = create_model('dt')
rf = create_model('rf')

# blend models
blender = blend_models([lr, dt, rf])

In [None]:
type(blender)

In [None]:
print(blender)

In [None]:
blender = blend_models([lr, dt, rf], weights=[0.3, 0.4, 0.3])

In [None]:
blender = blend_models(compare_models(n_select=3))

## stack model

In [None]:
# load dataset
from pycaret.datasets import get_data
# init setup
from pycaret.classification import *


diabetes = get_data('diabetes')
clf1 = setup(data=diabetes, target='Class variable', session_id=1234)

# train a few models
lr = create_model('lr')
dt = create_model('dt')
knn = create_model('knn')

# stack models
stacker = stack_models([lr, dt, knn])

In [None]:
stacker = stack_models(compare_models(n_select=3))

## meta model指定

In [None]:
# load dataset
from pycaret.datasets import get_data
# init setup
from pycaret.classification import *


diabetes = get_data('diabetes')
clf1 = setup(data=diabetes, target='Class variable', session_id=1234)

# train a few models
lr = create_model('lr')
dt = create_model('dt')
knn = create_model('knn')

# train meta-model
lightgbm = create_model('lightgbm')

# stack models
stacker = stack_models([lr, dt, knn], meta_model=lightgbm)

In [None]:
print(stacker)

# optimize threshold

In [None]:
# load dataset
from pycaret.datasets import get_data
# init setup
from pycaret.classification import *


diabetes = get_data('diabetes')
clf1 = setup(data=diabetes, target='Class variable', session_id=1234)

# train a model
knn = create_model('knn')

# optimize threshold
optimized_knn = optimize_threshold(knn)

# calibrate_model

In [None]:
# load dataset
from pycaret.datasets import get_data
# init setup
from pycaret.classification import *


diabetes = get_data('diabetes')
clf1 = setup(data=diabetes, target='Class variable')

# train a model
dt = create_model('dt')

# calibrate model
calibrated_dt = calibrate_model(dt)

In [None]:
plot_model(dt, plot='calibration')

In [None]:
plot_model(calibrated_dt, plot='calibration')