# Ceteris Paribus cheatsheet

### Prepare dataset

In [1]:
import pandas as pd
from ceteris_paribus.datasets import DATASETS_DIR
import os

df = pd.read_csv(os.path.join(DATASETS_DIR, 'insurance.csv'))

df = df[['age', 'bmi', 'children', 'charges']]

x = df.drop(['charges'], inplace=False, axis=1)
y = df['charges']
var_names = list(x.columns)
x = x.values
y = y.values

### Prepare regression models

In [2]:
from sklearn.linear_model import LinearRegression
from sklearn import ensemble, svm

def linear_regression_model():
    linear_model = LinearRegression()
    linear_model.fit(x, y)
    # model, data, labels, variable_names
    return linear_model, x, y, var_names


def gradient_boosting_model():
    gb_model = ensemble.GradientBoostingRegressor(n_estimators=1000, random_state=42)
    gb_model.fit(x, y)
    return gb_model, x, y, var_names


def supported_vector_machines_model():
    svm_model = svm.SVR(C=0.01, gamma='scale', kernel='poly')
    svm_model.fit(x, y)
    return svm_model, x, y, var_names

### Calculate single profile variables

In [3]:
from ceteris_paribus.explainer import explain
from ceteris_paribus.plots.plots import plot_notebook
from ceteris_paribus.profiles import individual_variable_profile

(gb_model, data, labels, variable_names) = gradient_boosting_model()

explainer_gb = explain(gb_model, variable_names, data, y)

cp_1 = individual_variable_profile(explainer_gb, x[0], y[0])
plot_notebook(cp_1, selected_variables=["bmi"], print_observations=False)

 You can add label using method set_label


### Local fit plots

In [4]:
(svm_model, _, _, _) = supported_vector_machines_model()
(linear_model, data, labels, variable_names) = linear_regression_model()

explainer_linear = explain(linear_model, variable_names, data, y, label='linear_model')
explainer_svm = explain(svm_model, variable_names, data, y, label='svm_model')

In [5]:
from ceteris_paribus.select_data import select_neighbours

neighbours_x, neighbours_y = select_neighbours(x, x[10], y=y, n=10)
cp_2 = individual_variable_profile(explainer_gb,
        neighbours_x, neighbours_y)
plot_notebook(cp_2, show_residuals=True, selected_variables=["age"], print_observations=False, color_residuals='red', 
     plot_title='')

### Aggregate profiles

In [6]:
plot_notebook(cp_2, aggregate_profiles="mean", selected_variables=["age"], color_pdps='black', size_pdps=6,
     alpha_pdps=0.7, print_observations=False,
     plot_title='')

### Many variables

In [7]:
plot_notebook(cp_1, selected_variables=["bmi", "age", "children"], print_observations=False, plot_title='', width=900)

### Many models

In [8]:
cp_svm = individual_variable_profile(explainer_svm, x[0], y[0])
cp_linear = individual_variable_profile(explainer_linear, x[0], y[0])
plot_notebook(cp_1, cp_svm, cp_linear, print_observations=False, plot_title='', width=850, size=3, alpha=0.7)

### Color by feature

In [9]:
plot_notebook(cp_2, color="age", plot_title='', width=900, size=3)

### Prepare classification example

In [10]:
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
iris = load_iris()

def random_forest_classifier():
    rf_model = ensemble.RandomForestClassifier(n_estimators=100, random_state=42)

    rf_model.fit(iris['data'], iris['target'])

    return rf_model, iris['data'], iris['target'], iris['feature_names']

In [11]:
rf_model, iris_x, iris_y, iris_var_names = random_forest_classifier()

explainer_rf1 = explain(rf_model, iris_var_names, iris_x, iris_y,
                       predict_function= lambda X: rf_model.predict_proba(X)[::, 0], label=iris.target_names[0])
explainer_rf2 = explain(rf_model, iris_var_names, iris_x, iris_y,
                       predict_function= lambda X: rf_model.predict_proba(X)[::, 1], label=iris.target_names[1])
explainer_rf3 = explain(rf_model, iris_var_names, iris_x, iris_y,
                       predict_function= lambda X: rf_model.predict_proba(X)[::, 2], label=iris.target_names[2])


cp_rf1 = individual_variable_profile(explainer_rf1, iris_x[0], iris_y[0])
cp_rf2 = individual_variable_profile(explainer_rf2, iris_x[0], iris_y[0])
cp_rf3 = individual_variable_profile(explainer_rf3, iris_x[0], iris_y[0])

### Multiclass profiles

In [14]:
plot_notebook(cp_rf1, cp_rf2, cp_rf3, selected_variables=['petal length (cm)', 'petal width (cm)', 'sepal length (cm)'],
     plot_title='', print_observations=False, width=800, height=300, size=4, alpha=0.9)