In [15]:
from evidently import ColumnMapping
from evidently.report import Report
from evidently.metric_preset import DataDriftPreset, TargetDriftPreset, DataQualityPreset, RegressionPreset
from evidently.metrics import ColumnSummaryMetric, ColumnQuantileMetric, ColumnDriftMetric, DatasetDriftMetric, DataDriftTable
from evidently.test_suite import TestSuite
from evidently.test_preset import DataStabilityTestPreset, NoTargetPerformanceTestPreset, RegressionTestPreset
from evidently.ui.workspace import Workspace
from evidently.ui.workspace import WorkspaceBase, RemoteWorkspace
from evidently.ui.dashboards import DashboardPanelCounter, ReportFilter, PanelValue, PlotType, CounterAgg

import datetime

In [12]:
ws = Workspace("getting started evidently")
ws.create_project("data drift customization")
project = ws.get_project('0193d372-d204-7304-9a4e-636bf5c5dc45')
ws.add_project(project)

Project(id=UUID('0193d372-d204-7304-9a4e-636bf5c5dc45'), name='data drift customization', description=None, dashboard=DashboardConfig(name='data drift customization', panels=[], tabs=[], tab_id_to_panel_ids={}), team_id=None, org_id=None, date_from=None, date_to=None, created_at=datetime.datetime(2024, 12, 17, 12, 38, 57, 732830))

In [19]:
import numpy as np
import pandas as pd
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Step 1: Create a synthetic dataset
X, y = make_classification(n_samples=1000, n_features=20, n_informative=2, n_redundant=10, random_state=42)
data = pd.DataFrame(X, columns=[f'feature_{i}' for i in range(X.shape[1])])
data['target'] = y

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(data.drop('target', axis=1), data['target'], test_size=0.25, random_state=42)

# Step 2: Train a model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Predict on the test set
test_predictions = model.predict(X_test)
train_predictions = model.predict(X_train)

# Calculate accuracy
accuracy = accuracy_score(y_test, predictions)
print(f'Model Accuracy: {accuracy}')

# Step 3: Use Evidently to analyze model performance and data drift
# Combine predictions with the test set for Evidently analysis
test_data_with_predictions = X_test.copy()
test_data_with_predictions['target'] = y_test
test_data_with_predictions['prediction'] = test_predictions

# Generate reference (training) and current (test) datasets as Pandas dataframes
reference_data = pd.concat([X_train, y_train], axis=1)
current_data = test_data_with_predictions
reference_data["prediction"] = train_predictions

Model Accuracy: 0.856


In [3]:
reference_data

Unnamed: 0,feature_0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,...,feature_11,feature_12,feature_13,feature_14,feature_15,feature_16,feature_17,feature_18,feature_19,target
82,0.985400,1.782906,-1.102515,-1.585846,-0.452062,-1.177226,2.303572,-0.199773,1.519616,-1.666003,...,-1.070134,-0.096279,-0.247340,0.468540,-0.062764,1.526743,1.843870,0.670050,-0.825183,0
991,0.135859,-0.447961,-0.313845,-0.052505,0.683565,-0.767067,1.039595,-0.576499,1.375729,0.098406,...,-0.233581,-0.712826,-1.214806,-0.603838,-0.118237,0.436611,1.094532,-0.230586,-0.366928,1
789,0.489178,0.295225,0.381886,-1.179777,-0.698644,-0.436515,-1.202069,0.616323,-1.519416,-0.025890,...,0.295864,0.755035,-0.679445,1.071680,0.127422,-0.530940,-1.238467,-0.239556,0.424840,1
894,2.935658,1.784775,0.659333,-1.073328,1.052441,-0.513901,-1.007995,-0.380870,-0.000635,1.547709,...,0.708378,-0.602395,0.013450,-1.198951,-0.059110,-0.911103,-0.554181,-0.928792,0.366354,1
398,0.975201,1.110911,-0.046991,0.369264,0.437550,-0.029173,0.314066,-0.300760,0.595207,0.251059,...,-0.005654,-0.389590,-1.500802,0.139320,-0.059125,0.066200,0.398967,1.335570,-0.109425,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
106,-0.317761,0.249451,1.015735,0.116579,1.009762,-0.147340,-1.841883,-0.195499,-0.711112,1.953148,...,1.037797,-0.411941,0.399228,0.963925,-0.015489,-1.405108,-1.282655,-0.275121,0.663796,1
270,-0.550587,-1.137033,-0.610611,0.509689,-1.366894,0.491446,0.748150,0.603648,-0.454848,-1.709867,...,-0.690339,0.888860,-0.821815,-0.229081,0.103210,0.842808,0.238153,0.137662,-0.275527,0
860,1.897767,-1.372859,0.206854,-0.947893,-1.863607,1.194763,-1.352985,1.283971,-2.547540,-1.061106,...,0.030353,1.662245,0.351480,0.894647,0.252545,-0.291258,-1.712425,0.046935,0.471533,0
435,-0.281845,-1.979019,0.265890,-0.636876,-0.784114,-0.977939,-0.977625,0.619556,-1.403555,-0.227896,...,0.179960,0.776894,2.026817,-0.612423,0.125503,-0.370405,-1.071060,-0.830070,0.344184,1


In [24]:
#Column mapping
column_mapping = ColumnMapping()
column_mapping.target = "target" #target column
column_mapping.prediction = "prediction" #predict column
#column_mapping.numerical_features = "" #numeric columns

report = Report(metrics=[DataDriftPreset(), DataQualityPreset()], timestamp=datetime.datetime.now())
report.run(reference_data=reference_data, current_data=current_data, column_mapping=column_mapping)
ws.add_report(project_id=project.id, report=report)


In [None]:
#Color Coding
from evidently.options import BERLIN_AUTUMN_COLOR_OPTIONS
report = Report(metrics=[DataDriftTable()], timestamp=datetime.datetime.now(), options=[BERLIN_AUTUMN_COLOR_OPTIONS])
report.run(reference_data=reference, current_data=current, column_mapping=None)
report.show(mode="inline")

In [None]:
#report on selected columns



In [None]:
# change drift detection algorithm
data_drift_report = Report(metrics=[ 
    DataDriftPreset(num_stattest= 'ks' , cat_stattest= 'psi' , num_stattest_threshold= 0.2 , cat_stattest_threshold= 0.2 ), 
]) 

data_drift_report.run(reference_data=reference_data, current_data=current_data, column_mapping=column_mapping) 
data_drift_report.show(mode= 'inline' )



In [None]:
#custom drift detection method




In [None]:
#data set level metrics



In [None]:
#column level metrics



In [None]:
#set mertics parameters
#To set a custom condition for the dataset drift (share of drifting columns in the dataset) in the relevant Metrics or Presets:
DatasetDriftMetric(drift_share=0.10)

In [27]:
# Regression metrics
column_mapping = ColumnMapping()
column_mapping.target = "target" #target column
column_mapping.prediction = "prediction" #predict column
column_mapping.numerical_features = report.get_column_mapping().numerical_features  #numeric columns

regression_performance_report = Report(metrics=[
    RegressionPreset()
    # Preset is used for predefined set of regression metrics
])

# Run the report on the reference and current datasets
regression_performance_report.run(reference_data=reference_data, current_data=current_data, column_mapping=column_mapping)
ws.add_report(project_id=project.id, report=regression_performance_report)

