In [13]:
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split

# Load and Split Data

In [2]:
df = sns.load_dataset("penguins") 

In [3]:
df = df.dropna()
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 333 entries, 0 to 343
Data columns (total 7 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   species            333 non-null    object 
 1   island             333 non-null    object 
 2   bill_length_mm     333 non-null    float64
 3   bill_depth_mm      333 non-null    float64
 4   flipper_length_mm  333 non-null    float64
 5   body_mass_g        333 non-null    float64
 6   sex                333 non-null    object 
dtypes: float64(4), object(3)
memory usage: 20.8+ KB


In [5]:
df_ml, df_prod = train_test_split(
    df,
    shuffle=True,
    train_size=2/3,
    stratify=df.species,
    random_state=42
)

In [6]:
# Sanity check
len(df_ml), len(df_prod)

(222, 111)

# Option 1: Test

In [14]:
from evidently.test_suite import TestSuite
from evidently.test_preset import DataStabilityTestPreset

In [17]:
data_stability = TestSuite(tests=[
    DataStabilityTestPreset(),
])

data_stability.run(
    current_data=df_prod,
    reference_data=df_ml,
    column_mapping=None
)

data_stability.save_html("test.html")

# Option 2: Report

In [15]:
from evidently.report import Report
from evidently.metric_preset import DataDriftPreset

In [18]:
data_drift_report = Report(metrics=[
    DataDriftPreset()
])

data_drift_report.run(
    current_data=df_prod,
    reference_data=df_ml,
    column_mapping=None
)

data_drift_report.save("report.html")