In [1]:
try:
    import evidently
except:
    !pip install git+https://github.com/evidentlyai/evidently.git

In [2]:
import pandas as pd
import numpy as np

from sklearn.datasets import fetch_california_housing

from evidently import ColumnMapping

from evidently.report import Report
from evidently.metrics.base_metric import generate_column_metrics
from evidently.metric_preset import DataDriftPreset, TargetDriftPreset, DataQualityPreset, RegressionPreset
from evidently.metrics import *

from evidently.test_suite import TestSuite
from evidently.tests.base_test import generate_column_tests
from evidently.test_preset import DataStabilityTestPreset, NoTargetPerformanceTestPreset, RegressionTestPreset
from evidently.tests import *

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

 <font size="+2"><b>Load the Data</b></font>

In [4]:
data = fetch_california_housing(as_frame=True)
housing_data = data.frame

In [5]:
housing_data.rename(columns={'MedHouseVal': 'target'}, inplace=True)
housing_data['prediction'] = housing_data['target'].values + np.random.normal(0, 5, housing_data.shape[0])

In [6]:
reference = housing_data.sample(n=5000, replace=False)
current = housing_data.sample(n=5000, replace=False)

In [7]:
print(reference.head())

       MedInc  HouseAge  AveRooms  AveBedrms  Population  AveOccup  Latitude  \
975    5.0677      52.0  6.300813   1.089431       315.0  2.560976     37.66   
8087   5.3096      47.0  5.588095   1.040476      1219.0  2.902381     33.81   
7137   3.9500      40.0  5.517572   0.987220       932.0  2.977636     34.02   
12260  2.0135      21.0  4.530303   1.046970      2108.0  3.193939     33.79   
11672  4.4821      35.0  5.987931   0.970690      2091.0  3.605172     33.84   

       Longitude  target  prediction  
975      -121.87   2.333   -2.805718  
8087     -118.20   2.099   -1.119292  
7137     -118.11   2.102   -5.160134  
12260    -116.96   0.830   -3.620336  
11672    -118.02   2.141    9.559527  


In [9]:
print(current.head())

       MedInc  HouseAge  AveRooms  AveBedrms  Population  AveOccup  Latitude  \
3899   4.5648      36.0  4.864469   1.051282       823.0  3.014652     34.21   
13273  6.1480      17.0  7.133588   1.187023       831.0  3.171756     34.11   
9743   4.7969      15.0  6.639394   1.084848      1150.0  3.484848     36.77   
3449   4.2330      34.0  4.994361   0.968045      1948.0  3.661654     34.32   
15807  3.9079      52.0  4.817352   1.036530       816.0  1.863014     37.76   

       Longitude  target  prediction  
3899     -118.52   1.937    7.690746  
13273    -117.62   2.439    5.565380  
9743     -121.65   2.275   -0.757069  
3449     -118.43   1.574   -3.131689  
15807    -122.44   3.700   -2.800074  


 <font size="+2"><b>Report</b></font>

In [10]:
report = Report(metrics=[
    DataDriftPreset(), 
])

report.run(reference_data=reference, current_data=current)
report

In [11]:
report = Report(metrics=[
    ColumnSummaryMetric(column_name='AveRooms'),
    ColumnQuantileMetric(column_name='AveRooms', quantile=0.25),
    ColumnDriftMetric(column_name='AveRooms'),
    
])

report.run(reference_data=reference, current_data=current)
report

In [12]:
report = Report(metrics=[
    generate_column_metrics(ColumnQuantileMetric, parameters={'quantile':0.25}, columns=['AveRooms', 'AveBedrms']),
])

report.run(reference_data=reference, current_data=current)
report

In [13]:
report = Report(metrics=[
    ColumnSummaryMetric(column_name='AveRooms'),
    generate_column_metrics(ColumnQuantileMetric, parameters={'quantile':0.25}, columns='num'),
    DataDriftPreset()
])

report.run(reference_data=reference, current_data=current)
report

In [14]:
report.save_html('report.html')

In [15]:
report.as_dict()
report.json()
report.save_json('report.json')

 <font size="+2"><b>Test Suite</b></font>

In [16]:
tests = TestSuite(tests=[
    TestNumberOfColumnsWithMissingValues(),
    TestNumberOfRowsWithMissingValues(),
    TestNumberOfConstantColumns(),
    TestNumberOfDuplicatedRows(),
    TestNumberOfDuplicatedColumns(),
    TestColumnsType(),
    TestNumberOfDriftedColumns(),
])

tests.run(reference_data=reference, current_data=current)
tests

In [17]:
suite = TestSuite(tests=[
    NoTargetPerformanceTestPreset(),
])

suite.run(reference_data=reference, current_data=current)
suite

In [18]:
suite = TestSuite(tests=[
    TestColumnDrift('Population'),
    TestMeanInNSigmas('HouseAge'),
    NoTargetPerformanceTestPreset(columns=['AveRooms', 'AveBedrms', 'AveOccup'])
])

suite.run(reference_data=reference, current_data=current)
suite

In [19]:
suite = TestSuite(tests=[
    TestNumberOfColumnsWithMissingValues(),
    TestNumberOfRowsWithMissingValues(),
    TestNumberOfConstantColumns(),
    TestNumberOfDuplicatedRows(),
    TestNumberOfDuplicatedColumns(),
    TestColumnsType(),
    TestNumberOfDriftedColumns(),
    TestColumnDrift('Population'),
    TestShareOfOutRangeValues('Population'),
    DataStabilityTestPreset(),
    RegressionTestPreset()
    
])

suite.run(reference_data=reference, current_data=current)
suite

In [20]:
suite.as_dict()
suite.json()
suite.save_html('test_suite.html')
suite.save_json('test_suite.json')