### Data Drift: Detection Using Statistical Tests
**Question**: Simulate two datasets with a shift in distribution, and apply the Kolmogorov-
Smirnov test to determine if data drift has occurred.

In [None]:
import numpy as np
from scipy.stats import ks_2samp
import matplotlib.pyplot as plt

# Step 1: Simulate two datasets
np.random.seed(42)

# Dataset 1: baseline data ~ Normal(0,1)
data1 = np.random.normal(loc=0, scale=1, size=1000)

# Dataset 2: drifted data ~ Normal(0.5, 1.5) (shift in mean and std)
data2 = np.random.normal(loc=0.5, scale=1.5, size=1000)

# Step 2: Visualize both distributions
plt.hist(data1, bins=30, alpha=0.5, label='Dataset 1 (Baseline)')
plt.hist(data2, bins=30, alpha=0.5, label='Dataset 2 (Drifted)')
plt.legend()
plt.title("Distribution Comparison")
plt.show()

# Step 3: Apply Kolmogorov-Smirnov test
ks_stat, p_value = ks_2samp(data1, data2)

print(f"KS Statistic: {ks_stat:.4f}")
print(f"P-value: {p_value:.4f}")

# Step 4: Interpret the result
alpha = 0.05
if p_value < alpha:
    print("Result: Reject the null hypothesis - Distributions are different (Data drift detected)")
else:
    print("Result: Fail to reject the null hypothesis - No evidence of data drift")