In [None]:
# Step 1: Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import ks_2samp

# Step 2: Simulate reference and current datasets
np.random.seed(42)

# Reference dataset - simulated from a normal distribution
ref_data = pd.DataFrame({
    'feature_1': np.random.normal(loc=50, scale=10, size=1000),
    'feature_2': np.random.normal(loc=100, scale=20, size=1000)
})

# Current dataset - feature_1 is shifted (drifted), feature_2 is stable
current_data = pd.DataFrame({
    'feature_1': np.random.normal(loc=60, scale=10, size=1000),  # shifted mean
    'feature_2': np.random.normal(loc=100, scale=20, size=1000)  # same distribution
})

# Step 3: Visualization for data drift
def plot_feature_drift(feature):
    plt.figure(figsize=(10, 5))
    sns.kdeplot(ref_data[feature], label='Reference', fill=True)
    sns.kdeplot(current_data[feature], label='Current', fill=True)
    plt.title(f'Distribution Comparison for {feature}')
    plt.legend()
    plt.show()

# Plot drift for both features
plot_feature_drift('feature_1')
plot_feature_drift('feature_2')

# Step 4: Statistical test (Kolmogorov-Smirnov Test) for drift detection
def detect_drift(feature):
    stat, p_value = ks_2samp(ref_data[feature], current_data[feature])
    drift_detected = p_value < 0.05
    return {
        'feature': feature,
        'ks_statistic': round(stat, 4),
        'p_value': round(p_value, 4),
        'drift_detected': drift_detected
    }

# Step 5: Evaluate features
features = ['feature_1', 'feature_2']
drift_results = [detect_drift(f) for f in features]
drift_df = pd.DataFrame(drift_results)

# Step 6: Show drift summary
print("Data Drift Detection Summary:")
print(drift_df)