### Detect Data Drift
**Description**: Data drift can occur when the statistical properties of your data change over time. Learn to detect data drift using visualizations.

In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
def detect_data_drift(reference_data, current_data, feature_name):
    reference_feature = reference_data[feature_name]
    current_feature = current_data[feature_name]
    plt.figure(figsize=(12, 6))
    plt.subplot(1, 2, 1)
    plt.hist(reference_feature, bins=30, alpha=0.5, label='Reference')
    plt.hist(current_feature, bins=30, alpha=0.5, label='Current')
    plt.title(f'Distribution of {feature_name}')
    plt.legend()
    plt.subplot(1, 2, 2)
    plt.boxplot([reference_feature, current_feature], labels=['Reference', 'Current'])
    plt.title(f'Boxplot of {feature_name}')
    plt.tight_layout()
    plt.show()
    ks_statistic, p_value = stats.ks_2samp(reference_feature, current_feature)
    print(f"KS Statistic: {ks_statistic}, p-value: {p_value}")
    if p_value < 0.05:
        print("Warning: Significant drift detected!")
    else:
        print("No significant drift detected.")
np.random.seed(42)
reference_data = pd.DataFrame({'feature': np.random.normal(0, 1, 1000)})
current_data = pd.DataFrame({'feature': np.random.normal(0.5, 1.2, 1000)})
detect_data_drift(reference_data, current_data, 'feature')
