In [1]:
# iimport librabries
import os
import pandas as pd
import matplotlib.pyplot as plt 

In [2]:
reference_df = pd.read_csv("../data/processed/reference.csv")
production_df = pd.read_csv("../data/processed/production_batch_01.csv") 

assert list(reference_df.columns) == list(production_df.columns), "DataFrames must have the same shape for drift comparison."

In [3]:
# Encode target column CONSISTENTLY

Target_col = "Churn"
reference_df[Target_col] = reference_df[Target_col].map({"No": 0, "Yes": 1})
production_df[Target_col] = production_df[Target_col].map({"No": 0, "Yes": 1})

 

In [4]:
numerical_cols = ["tenure", "MonthlyCharges", "TotalCharges"]
categorical_cols = [col for col in reference_df.columns if col not in numerical_cols + [Target_col]]

print("Numerical Columns:", numerical_cols)
print("Categorical Columns:", categorical_cols) 

Numerical Columns: ['tenure', 'MonthlyCharges', 'TotalCharges']
Categorical Columns: ['customerID', 'gender', 'SeniorCitizen', 'Partner', 'Dependents', 'PhoneService', 'MultipleLines', 'InternetService', 'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies', 'Contract', 'PaperlessBilling', 'PaymentMethod']


In [5]:
from evidently.report import Report
from evidently.metric_preset import DataDriftPreset
from evidently.metrics import DatasetSummaryMetric, DatasetMissingValuesMetric
from evidently.pipeline.column_mapping import ColumnMapping
 

In [6]:
column_mapping = ColumnMapping(
    target=Target_col,
    numerical_features=numerical_cols,
    categorical_features=categorical_cols
) 

In [7]:
# Data Quality Report

data_quality_report = Report(
    metrics=[
        DatasetSummaryMetric(),
        DatasetMissingValuesMetric()
    ]
)
data_quality_report.run(
    reference_data=reference_df,
    current_data=production_df,
    column_mapping=column_mapping
)

# save
data_quality_report.save_html(
    "../artifacts/reports/data_quality_report.html"
) 

In [8]:
drift_report = Report(
    metrics=[DataDriftPreset()]
)

drift_report.run(
    reference_data=reference_df,
    current_data=production_df,
    column_mapping=column_mapping
)

drift_report.save_html(
    "../artifacts/reports/drift_report.html"
)
