# Getting Started Tutorial

To install Evidently using the pip package manager, run:

```$ pip install evidently```


If you want to see reports inside a Jupyter notebook, you need to also install the Jupyter nbextension. After installing evidently, run the two following commands in the terminal from the Evidently directory.

To install jupyter nbextension, run:

```$ jupyter nbextension install --sys-prefix --symlink --overwrite --py evidently```

To enable it, run:

```$ jupyter nbextension enable evidently --py --sys-prefix```

That's it!

In [1]:
try:
    import evidently
except:
    !npm install -g yarn
    !pip install git+https://github.com/evidentlyai/evidently.git

In [2]:
import pandas as pd
import numpy as np

from sklearn.datasets import fetch_california_housing

from evidently import ColumnMapping

from evidently.report import Report
from evidently.metrics.base_metric import generate_column_metrics
from evidently.metric_preset import DataDriftPreset, TargetDriftPreset, DataQualityPreset, RegressionPreset
from evidently.metrics import *

from evidently.test_suite import TestSuite
from evidently.tests.base_test import generate_column_tests
from evidently.test_preset import DataStabilityTestPreset, NoTargetPerformanceTestPreset
from evidently.tests import *

In [3]:
import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

## Load Data

In [4]:
data = fetch_california_housing(as_frame=True)
housing_data = data.frame

In [5]:
housing_data.rename(columns={'MedHouseVal': 'target'}, inplace=True)
housing_data['prediction'] = housing_data['target'].values + np.random.normal(0, 5, housing_data.shape[0])

In [6]:
reference = housing_data.sample(n=5000, replace=False)
current = housing_data.sample(n=5000, replace=False)

## Report

In [7]:
report = Report(metrics=[
    DataDriftPreset(), 
])

report.run(reference_data=reference, current_data=current)
report

In [8]:
report = Report(metrics=[
    ColumnSummaryMetric(column_name='AveRooms'),
    ColumnQuantileMetric(column_name='AveRooms', quantile=0.25),
    ColumnDriftMetric(column_name='AveRooms'),
    
])

report.run(reference_data=reference, current_data=current)
report

In [9]:
report = Report(metrics=[
    generate_column_metrics(ColumnQuantileMetric, parameters={'quantile':0.25}, columns=['AveRooms', 'AveBedrms']),
])

report.run(reference_data=reference, current_data=current)
report

In [10]:
report = Report(metrics=[
    ColumnSummaryMetric(column_name='AveRooms'),
    generate_column_metrics(ColumnQuantileMetric, parameters={'quantile':0.25}, columns='num'),
    DataDriftPreset()
])

report.run(reference_data=reference, current_data=current)
report

In [11]:
report.as_dict()

{'metrics': [{'metric': 'ColumnSummaryMetric',
   'result': {'column_name': 'AveRooms',
    'column_type': 'num',
    'reference_characteristics': {'number_of_rows': 5000,
     'count': 5000,
     'mean': 5.48,
     'std': 3.4,
     'min': 0.89,
     'p25': 4.48,
     'p50': 5.23,
     'p75': 6.07,
     'max': 141.91,
     'unique': 4884,
     'unique_percentage': 97.68,
     'missing': 0,
     'missing_percentage': 0.0,
     'infinite_count': 0,
     'infinite_percentage': 0.0,
     'most_common': 5.0,
     'most_common_percentage': 0.16},
    'current_characteristics': {'number_of_rows': 5000,
     'count': 5000,
     'mean': 5.42,
     'std': 2.08,
     'min': 0.89,
     'p25': 4.44,
     'p50': 5.24,
     'p75': 6.06,
     'max': 52.69,
     'unique': 4892,
     'unique_percentage': 97.84,
     'missing': 0,
     'missing_percentage': 0.0,
     'infinite_count': 0,
     'infinite_percentage': 0.0,
     'most_common': 6.0,
     'most_common_percentage': 0.12}}},
  {'metric': 'Column

In [12]:
report.json()

'{"version": "0.2.0", "timestamp": "2022-12-07 21:47:13.606053", "metrics": [{"metric": "ColumnSummaryMetric", "result": {"column_name": "AveRooms", "column_type": "num", "reference_characteristics": {"number_of_rows": 5000, "count": 5000, "mean": 5.48, "std": 3.4, "min": 0.89, "p25": 4.48, "p50": 5.23, "p75": 6.07, "max": 141.91, "unique": 4884, "unique_percentage": 97.68, "missing": 0, "missing_percentage": 0.0, "infinite_count": 0, "infinite_percentage": 0.0, "most_common": 5.0, "most_common_percentage": 0.16}, "current_characteristics": {"number_of_rows": 5000, "count": 5000, "mean": 5.42, "std": 2.08, "min": 0.89, "p25": 4.44, "p50": 5.24, "p75": 6.06, "max": 52.69, "unique": 4892, "unique_percentage": 97.84, "missing": 0, "missing_percentage": 0.0, "infinite_count": 0, "infinite_percentage": 0.0, "most_common": 6.0, "most_common_percentage": 0.12}}}, {"metric": "ColumnQuantileMetric", "result": {"column_name": "AveBedrms", "quantile": 0.25, "current": 1.0058867078798632, "referen

In [13]:
report.save_html('report.html')

In [14]:
report.save_json('report.json')

## Test Suite 

In [15]:
tests = TestSuite(tests=[
    TestNumberOfColumnsWithMissingValues(),
    TestNumberOfRowsWithMissingValues(),
    TestNumberOfConstantColumns(),
    TestNumberOfDuplicatedRows(),
    TestNumberOfDuplicatedColumns(),
    TestColumnsType(),
    TestNumberOfDriftedColumns(),
])

tests.run(reference_data=reference, current_data=current)
tests

In [16]:
suite = TestSuite(tests=[
    NoTargetPerformanceTestPreset(),
])

suite.run(reference_data=reference, current_data=current)
suite

In [17]:
suite = TestSuite(tests=[
    TestColumnDrift('Population'),
    TestMeanInNSigmas('HouseAge'),
    NoTargetPerformanceTestPreset(columns=['AveRooms', 'AveBedrms', 'AveOccup'])
])

suite.run(reference_data=reference, current_data=current)
suite

In [18]:
suite = TestSuite(tests=[
    TestColumnDrift('Population'),
    TestShareOfOutRangeValues('Population'),
    generate_column_tests(TestMeanInNSigmas, columns='num'),
    
])

suite.run(reference_data=reference, current_data=current)
suite

In [19]:
suite.as_dict()

{'tests': [{'name': 'Drift per Column',
   'description': 'The drift score for the feature **Population** is 0.026. The drift detection method is Wasserstein distance (normed). The drift detection threshold is 0.1.',
   'status': 'SUCCESS',
   'group': 'data_drift',
   'parameters': {'features': {'Population': {'stattest_name': 'Wasserstein distance (normed)',
      'score': 0.026,
      'stattest_threshold': 0.1,
      'data_drift': False}}}},
  {'name': 'Share of Out-of-Range Values',
   'description': 'The share of values out of range in the column **Population** is 0 (0 out of 5000).  The test threshold is eq=0 ± 1e-12.',
   'status': 'SUCCESS',
   'group': 'data_quality',
   'parameters': {'condition': {'eq': 0 ± 1e-12},
    'left': None,
    'right': None,
    'share_not_in_range': 0.0}},
  {'name': 'Mean Value Stability',
   'description': 'The mean value of the column **AveBedrms** is 1.09. The expected range is from -0.31 to 2.53',
   'status': 'SUCCESS',
   'group': 'data_qua

In [20]:
suite.json()

'{"version": "0.2.0", "timestamp": "2022-12-07 21:47:18.679998", "tests": [{"name": "Drift per Column", "description": "The drift score for the feature **Population** is 0.026. The drift detection method is Wasserstein distance (normed). The drift detection threshold is 0.1.", "status": "SUCCESS", "group": "data_drift", "parameters": {"features": {"Population": {"stattest_name": "Wasserstein distance (normed)", "score": 0.026, "stattest_threshold": 0.1, "data_drift": false}}}}, {"name": "Share of Out-of-Range Values", "description": "The share of values out of range in the column **Population** is 0 (0 out of 5000).  The test threshold is eq=0 \\u00b1 1e-12.", "status": "SUCCESS", "group": "data_quality", "parameters": {"condition": {"eq": {"value": 0, "relative": 1e-06, "absolute": 1e-12}}, "left": null, "right": null, "share_not_in_range": 0.0}}, {"name": "Mean Value Stability", "description": "The mean value of the column **AveBedrms** is 1.09. The expected range is from -0.31 to 2.

In [21]:
suite.save_html('test_suite.html')

In [22]:
suite.save_json('test_suite.json')