In [None]:
import pandas as pd
from scipy.stats import ks_2samp

def detect_drift(reference_df, current_df, threshold=0.05):
    drift_report = {}
    common_columns = list(set(reference_df.columns) & set(current_df.columns))

    for col in common_columns:
        if pd.api.types.is_numeric_dtype(reference_df[col]) and pd.api.types.is_numeric_dtype(current_df[col]):
            stat, p_value = ks_2samp(reference_df[col].dropna(), current_df[col].dropna())
            drift_report[col] = {
                "p_value": p_value,
                "drift": p_value < threshold
            }

    return drift_report

def main():
    # Load datasets
    reference_df = pd.read_csv("data/reference.csv")
    current_df = pd.read_csv("data/current.csv")

    # Detect drift
    report = detect_drift(reference_df, current_df)

    # Print report
    print("\nData Drift Report (Kolmogorov-Smirnov Test):")
    print("=============================================")
    for feature, result in report.items():
        status = "Drift Detected" if result['drift'] else "No Drift"
        print(f"{feature}: {status} (p-value = {result['p_value']:.4f})")

if _name_ == "_main_":
    main()