### Detect Data Drift
**Description**: Data drift can occur when the statistical properties of your data change over time. Learn to detect data drift using visualizations.

In [None]:
# Write your code from here

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from sklearn.metrics import mutual_info_score
from scipy.spatial import distance
def detect_data_drift(reference_data, current_data, feature_name):
    reference_feature = reference_data[feature_name]
    current_feature = current_data[feature_name]
    plt.figure(figsize=(18, 12))
    plt.subplot(2, 2, 1)
    sns.kdeplot(reference_feature, label='Reference', shade=True)
    sns.kdeplot(current_feature, label='Current', shade=True)
    plt.title(f'KDE Plot of {feature_name}')
    plt.legend()
    plt.subplot(2, 2, 2)
    plt.boxplot([reference_feature, current_feature], labels=['Reference', 'Current'])
    plt.title(f'Boxplot Comparison')
    plt.subplot(2, 2, 3)
    stats.probplot(reference_feature, dist="norm", plot=plt)
    plt.title('Reference Data Q-Q Plot')
    plt.subplot(2, 2, 4)
    stats.probplot(current_feature, dist="norm", plot=plt)
    plt.title('Current Data Q-Q Plot')
    plt.tight_layout()
    plt.show()
    ks_stat, ks_p = stats.ks_2samp(reference_feature, current_feature)
    t_stat, t_p = stats.ttest_ind(reference_feature, current_feature)
    mi_score = mutual_info_score(reference_feature, current_feature)
    js_dist = distance.jensenshannon(reference_feature, current_feature)
    print(f"Statistical Tests Results:")
    print(f"KS Test: Statistic={ks_stat:.4f}, p-value={ks_p:.4f}")
    print(f"T-Test: Statistic={t_stat:.4f}, p-value={t_p:.4f}")
    print(f"Mutual Information Score: {mi_score:.4f}")
    print(f"Jensen-Shannon Distance: {js_dist:.4f}")
    drift_detected = False
    if ks_p < 0.05:
        print("KS Test: Significant drift detected (p < 0.05)")
        drift_detected = True
    if t_p < 0.05:
        print("T-Test: Significant mean difference detected (p < 0.05)")
        drift_detected = True
    if js_dist > 0.1:
        print(f"JS Distance: Significant distribution change (JS > 0.1)")
        drift_detected = True
    
    if not drift_detected:
        print("No significant drift detected across all metrics")
np.random.seed(42)
ref_data = pd.DataFrame({'feature': np.random.normal(0, 1, 1000)})
curr_data = pd.DataFrame({'feature': np.random.normal(0.5, 1.2, 1000)})
detect_data_drift(ref_data, curr_data, 'feature')