# PyCaret のインストール

In [None]:
!pip install pycaret[full]==3.0.4  # 書籍執筆時点のバージョンをインストール


# 最新版をインストール場合はこちら
# !pip install pycaret[full]

# 回帰における評価指標

## PyCaretで使用できる評価指標一覧

In [None]:
import pycaret
pycaret.__version__

In [None]:
from pycaret.regression import *
from pycaret.datasets import get_data


data = get_data('insurance')
reg = setup(data, target='charges', session_id=1234)

get_metrics()

## RMSE/MSEの実践

In [None]:
from sklearn.metrics import mean_squared_error
import numpy as np


y_true = np.array([5, 7, 20, 12])
y_pred = np.array([3, 10, 14, 12])

# MSE
mse = mean_squared_error(y_true, y_pred)
print('MSE: ', mse)

# RMSE
rmse = np.sqrt(mse)
print('RMSE: ', rmse)

## MAE の実践

In [None]:
from sklearn.metrics import mean_absolute_error
import numpy as np


y_true = np.array([5, 7, 20, 12])
y_pred = np.array([3, 10, 14, 12])

# MAE
mae = mean_absolute_error(y_true, y_pred)
print('MAE: ', mae)

## RMSLEの実践

In [None]:
from sklearn.metrics import mean_squared_log_error
import numpy as np


y_true = np.array([5, 7, 20, 12])
y_pred = np.array([3, 10, 14, 12])

# RMSLE
rmsle = np.sqrt(mean_squared_log_error(y_true, y_pred))
print('RMSLE: ', rmsle)

## MAPE

In [None]:
from sklearn.metrics import mean_absolute_percentage_error
import numpy as np


y_true = np.array([5, 7, 20, 12])
y_pred = np.array([3, 10, 14, 12])

# MAPE
mape = mean_absolute_percentage_error(y_true, y_pred)
print('MAPE: ', mape)

## R^2

In [None]:
from sklearn.metrics import r2_score
import numpy as np


y_true = np.array([5, 7, 20, 12])
y_pred = np.array([3, 10, 14, 12])

# R^2
r2 = r2_score(y_true, y_pred)
print('R^2: ', r2)

# 分類における評価指標

In [None]:
from pycaret.classification import *
from pycaret.datasets import get_data


data = get_data('diabetes')
cls = setup(data, target='Class variable', session_id=1234)

get_metrics()

## 混同行列

In [None]:
from sklearn.metrics import confusion_matrix


y_true = [1, 0, 1, 1, 0, 1, 0, 0, 1, 1]
y_pred = [1, 0, 0, 1, 0, 1, 1, 0, 1, 0]

tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
print('TP: ', tp)
print('FP: ', fp)
print('FN: ', fn)
print('TN: ', tn)

## ACC

In [None]:
from sklearn.metrics import accuracy_score


y_true = [1, 0, 1, 1, 0, 1, 0, 0, 1, 1]
y_pred = [1, 0, 0, 1, 0, 1, 1, 0, 1, 0]

print('ACC: ', accuracy_score(y_true, y_pred))

## Recall

In [None]:
from sklearn.metrics import recall_score


y_true = [1, 0, 1, 1, 0, 1, 0, 0, 1, 1]
y_pred = [1, 0, 0, 1, 0, 1, 1, 0, 1, 0]

print('Recall: ', recall_score(y_true, y_pred))

## Precision

In [None]:
from sklearn.metrics import precision_score


y_true = [1, 0, 1, 1, 0, 1, 0, 0, 1, 1]
y_pred = [1, 0, 0, 1, 0, 1, 1, 0, 1, 0]

print('Precision: ', precision_score(y_true, y_pred))

## F1

In [None]:
from sklearn.metrics import f1_score


y_true = [1, 0, 1, 1, 0, 1, 0, 0, 1, 1]
y_pred = [1, 0, 0, 1, 0, 1, 1, 0, 1, 0]

print('F1: ', f1_score(y_true, y_pred))

## Kappa

In [None]:
from sklearn.metrics import cohen_kappa_score


y_true = [1, 0, 1, 1, 0, 1, 0, 0, 1, 1]
y_pred = [1, 0, 0, 1, 0, 1, 1, 0, 1, 0]

print('Kappa: ', cohen_kappa_score(y_true, y_pred))

## Mcc

In [None]:
from sklearn.metrics import matthews_corrcoef


y_true = [1, 0, 1, 1, 0, 1, 0, 0, 1, 1]
y_pred = [1, 0, 0, 1, 0, 1, 1, 0, 1, 0]

print('MCC: ', matthews_corrcoef(y_true, y_pred))

# metrics を追加

In [None]:
from pycaret.classification import *
from pycaret.datasets import get_data
from sklearn.metrics import log_loss


diabetes = get_data('diabetes')
setup(diabetes, target = 'Class variable', session_id = 123)

add_metric('logloss', 'Log Loss', log_loss, greater_is_better = False)

get_metrics()

In [None]:
from pycaret.classification import *
from pycaret.datasets import get_data
from sklearn.metrics import log_loss


diabetes = get_data('diabetes')
setup(diabetes, target = 'Class variable', session_id = 123)

add_metric('logloss', 'Log Loss', log_loss, greater_is_better = False)

compare_models()

# 評価の可視化


In [None]:
import pycaret
pycaret.__version__

# plot_model

## 回帰

### モデル作成

In [None]:
from pycaret.datasets import get_data
from pycaret.regression import *


data = get_data('insurance')
s = setup(data, target='charges', session_id=1234)

best = compare_models()

In [None]:
print(best)

In [None]:
lr = create_model('lr')

### 残差プロット

In [None]:
plot_model(best, plot='residuals')

In [None]:
plot_model(lr, plot='residuals')

### 予測誤差プロット

In [None]:
plot_model(best, plot='error')

In [None]:
plot_model(lr, plot='error')

### Cook's distance

In [None]:
plot_model(best, plot='cooks')

In [None]:
plot_model(lr, plot='cooks')

### 学習曲線

In [None]:
plot_model(best, plot='learning')

In [None]:
plot_model(lr, plot='learning')

### 特徴重要度グラフ

In [None]:
plot_model(best, plot='feature')

In [None]:
plot_model(lr, plot='feature')

### パイプライン

In [None]:
plot_model(best, plot='pipeline')

In [None]:
plot_model(lr, plot='pipeline')

### parameter

In [None]:
plot_model(best, plot='parameter')

In [None]:
plot_model(lr, plot='parameter')

## 分類

### モデル作成

In [None]:
from pycaret.datasets import get_data
from pycaret.classification import *


data = get_data('diabetes')
clf = setup(data, target='Class variable', session_id=1234)

best = compare_models(['rf', 'knn', 'lr'])
dt = create_model('dt')

In [None]:
print(best)

### 混同行列

In [None]:
plot_model(best, plot='confusion_matrix')

In [None]:
plot_model(dt, plot='confusion_matrix')

### AUC

In [None]:
plot_model(best, plot='auc')

In [None]:
plot_model(dt, plot='auc')

### 閾値

In [None]:
plot_model(best, plot='threshold')

In [None]:
plot_model(dt, plot='threshold')

### パラメータ


In [None]:
plot_model(best, plot='parameter')

In [None]:
plot_model(dt, plot='parameter')

### Precision Recall Curve

In [None]:
plot_model(best, plot='pr')

In [None]:
plot_model(dt, plot='pr')

### class_report

In [None]:
plot_model(best, plot='class_report')

In [None]:
plot_model(dt, plot='class_report')

## クラスタリング

### モデル作成

In [None]:
from pycaret.datasets import get_data
from pycaret.clustering import *


data = get_data('iris')
s = setup(data, session_id=1234, ignore_features=['species'])

kmeans = create_model('kmeans', num_clusters=4)

### PCAプロット(2次元)

In [None]:
plot_model(kmeans, plot='cluster')

### t-SNE(3次元)

In [None]:
plot_model(kmeans, plot='tsne')

In [None]:
plot_model(kmeans, plot='cluster', save=True)

# evaluate_model

In [None]:
from pycaret.datasets import get_data
from pycaret.regression import *


data = get_data('insurance')
s = setup(data, target='charges', session_id=1234)

lr = create_model('lr')
evaluate_model(lr)

In [None]:
from pycaret.datasets import get_data
from pycaret.classification import *


data = get_data('diabetes')
clf = setup(data, target='Class variable', session_id=1234)

dt = create_model('dt')
evaluate_model(dt)

In [None]:
from pycaret.datasets import get_data
from pycaret.clustering import *


data = get_data('iris')
s = setup(data, session_id=1234, ignore_features=['species'])

kmeans = create_model('kmeans', num_clusters=4)
evaluate_model(kmeans)

# dashboard

In [None]:
from pycaret.datasets import get_data
from pycaret.regression import *


insurance = get_data('insurance')
reg1 = setup(data=insurance,  target='charges', session_id=1234)
lr = create_model('lr')
dashboard(lr)

# get_leaderboard

In [None]:
get_leaderboard()