# Analyse Data Drift avec Evidently

## Import

In [1]:
import pandas as pd

from evidently.report import Report
from evidently.metric_preset import DataDriftPreset, TargetDriftPreset
from evidently import ColumnMapping

## Get the Data

In [9]:
train_df = pd.read_csv('Data/train.csv')
test_df = pd.read_csv('Data/test.csv')

train_df.drop('TARGET', axis=1, inplace=True)
#test.drop('TARGET', axis=1, inplace=True)
train = train_df.sample(10000)
test = test_df.sample(10000)

## Prepare the data drift report

In [10]:
# Init column mapping object 
mapping = ColumnMapping()
mapping.id = 'SK_ID_CURR'
mapping.numerical_features = train.select_dtypes(include='float64').columns.to_list()
mapping.categorical_features = train.drop('SK_ID_CURR', axis=1).select_dtypes(exclude='float64').columns.to_list()

# Init report
report = Report(metrics=[
    DataDriftPreset(),
])

# Run and show report
report.run(reference_data=train, current_data=test, column_mapping=mapping)
report.show(mode='inline')

In [11]:
report.save_html('data_drift_report.html')