In [1]:
try:
    import evidently
except:
    !pip install git+https://github.com/evidentlyai/evidently.git
    
import pandas as pd
import numpy as np

from sklearn import datasets, ensemble, model_selection

from evidently import ColumnMapping
from evidently.report import Report
from evidently.metric_preset import DataDriftPreset
from evidently.metric_preset import DataQualityPreset
from evidently.metric_preset import RegressionPreset
from evidently.metric_preset import ClassificationPreset
from evidently.metric_preset import TargetDriftPreset

  from .autonotebook import tqdm as notebook_tqdm


<b>Prepare Datasets</b>

In [2]:
#Dataset for Data Quality and Integrity
adult_data = datasets.fetch_openml(name='adult', version=2, as_frame='auto')
adult = adult_data.frame

adult_ref = adult[~adult.education.isin(['Some-college', 'HS-grad', 'Bachelors'])]
adult_cur = adult[adult.education.isin(['Some-college', 'HS-grad', 'Bachelors'])]

adult_cur.iloc[:2000, 3:5] = np.nan

  warn(


In [3]:
#Dataset for regression
housing_data = datasets.fetch_california_housing(as_frame='auto')
housing = housing_data.frame

housing.rename(columns={'MedHouseVal': 'target'}, inplace=True)
housing['prediction'] = housing_data['target'].values + np.random.normal(0, 3, housing.shape[0])

housing_ref = housing.sample(n=5000, replace=False)
housing_cur = housing.sample(n=5000, replace=False)

In [4]:
#Dataset for Binary Probabilistic Classifcation
bcancer_data = datasets.load_breast_cancer(as_frame='auto')
bcancer = bcancer_data.frame

bcancer_ref = bcancer.sample(n=300, replace=False)
bcancer_cur = bcancer.sample(n=200, replace=False)

bcancer_label_ref = bcancer_ref.copy(deep=True)
bcancer_label_cur = bcancer_cur.copy(deep=True)

model = ensemble.RandomForestClassifier(random_state=1, n_estimators=10)
model.fit(bcancer_ref[bcancer_data.feature_names.tolist()], bcancer_ref.target)

bcancer_ref['prediction'] = model.predict_proba(bcancer_ref[bcancer_data.feature_names.tolist()])[:, 1]
bcancer_cur['prediction'] = model.predict_proba(bcancer_cur[bcancer_data.feature_names.tolist()])[:, 1]

bcancer_label_ref['prediction'] = model.predict(bcancer_label_ref[bcancer_data.feature_names.tolist()])
bcancer_label_cur['prediction'] = model.predict(bcancer_label_cur[bcancer_data.feature_names.tolist()])

In [5]:
#Dataset for multiclass classifcation
iris_data = datasets.load_iris(as_frame='auto')
iris = iris_data.frame

iris_ref = iris.sample(n=150, replace=False)
iris_cur = iris.sample(n=150, replace=False)

model = ensemble.RandomForestClassifier(random_state=1, n_estimators=3)
model.fit(iris_ref[iris_data.feature_names], iris_ref.target)

iris_ref['prediction'] = model.predict(iris_ref[iris_data.feature_names])
iris_cur['prediction'] = model.predict(iris_cur[iris_data.feature_names])

<b>How to run Reports (ex. Dashboard)?</b>

In [6]:
data_drift_report = Report(metrics=[
    DataDriftPreset(num_stattest='ks', cat_stattest='psi', num_stattest_threshold=0.2, cat_stattest_threshold=0.2),
])

data_drift_report.run(reference_data=adult_ref, current_data=adult_cur)
data_drift_report

In [7]:
data_drift_report.save_html('data_drift_report.html')

<b>How to create a json Report (ex. Profile)?</b>

In [9]:
data_drift_report.json()
data_drift_report.save_json('data_drift_report.json')

<b>How to get a python object with Report's main data?<b>

In [10]:
data_drift_report.as_dict()
print(data_drift_report)

<evidently.report.report.Report object at 0x0000022558628610>


<b>What Reports are avaliable?</b>

In [11]:
data_quality_report = Report(metrics=[
    DataQualityPreset(),
])

data_quality_report.run(reference_data=adult_ref, current_data=adult_cur)
data_quality_report

In [12]:
regression_performance_report = Report(metrics=[
    RegressionPreset(),
])

regression_performance_report.run(reference_data=housing_ref.sort_index(), current_data=housing_cur.sort_index())
regression_performance_report

In [13]:
classification_performance_report = Report(metrics=[
    ClassificationPreset(probas_threshold=0.7),
])

classification_performance_report.run(reference_data=bcancer_ref, current_data=bcancer_cur)

classification_performance_report

In [14]:
num_target_drift_report = Report(metrics=[
    TargetDriftPreset(num_stattest='ks', cat_stattest='psi'),
])

num_target_drift_report.run(reference_data=housing_ref, current_data=housing_cur)
num_target_drift_report

In [15]:
multiclass_cat_target_drift_report = Report(metrics=[
    TargetDriftPreset(num_stattest='ks', cat_stattest='psi'),
])

multiclass_cat_target_drift_report.run(reference_data=iris_ref, current_data=iris_cur)
multiclass_cat_target_drift_report

In [16]:
binary_cat_target_drift_report = Report(metrics=[
    TargetDriftPreset(num_stattest='ks', cat_stattest='psi'),
])

binary_cat_target_drift_report.run(reference_data=bcancer_label_ref, current_data=bcancer_label_cur)
binary_cat_target_drift_report

In [17]:
prob_binary_cat_target_drift_report = Report(metrics=[
    TargetDriftPreset(num_stattest='ks', cat_stattest='psi'),
])

prob_binary_cat_target_drift_report.run(reference_data=bcancer_ref, current_data=bcancer_cur)
prob_binary_cat_target_drift_report

In [18]:
prob_binary_cat_target_drift_report.save_html('prob_binary_cat_target_drift_report.html')