In [None]:
import pandas as pd
from scipy.stats import ks_2samp

# Load your reference and current datasets
reference_df = pd.read_csv('reference.csv')
current_df = pd.read_csv('current.csv')

# Function to perform KS test for each numeric column
def detect_data_drift(reference_df, current_df, threshold=0.05):
    drift_results = {}

    numeric_columns = reference_df.select_dtypes(include=['int64', 'float64']).columns

    for column in numeric_columns:
        if column in current_df.columns:
            stat, p_value = ks_2samp(reference_df[column].dropna(), current_df[column].dropna())
            drift_results[column] = {
                'p_value': p_value,
                'drift_detected': p_value < threshold
            }

    return drift_results

# Run data drift detection
results = detect_data_drift(reference_df, current_df)

# Print results
for column, result in results.items():
    print(f"Column: {column}")
    print(f"  p-value: {result['p_value']:.5f}")
    print(f"  Drift detected: {result['drift_detected']}")