### Detect Data Drift using Kolmogorov-Smirnov Test
**Description**: Use statistical tests to detect data drift between two datasets.

In [None]:
# write your code from here
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import ks_2samp

# 1. Simulate or load two datasets (reference and new)
np.random.seed(0)

# Reference dataset: normal distribution with mean=100, std=15
reference_data = np.random.normal(loc=100, scale=15, size=1000)

# New dataset with potential drift: shifted mean and std deviation
new_data = np.random.normal(loc=110, scale=20, size=1000)

# 2. Visualize the distributions
plt.figure(figsize=(10, 6))
plt.hist(reference_data, bins=30, alpha=0.6, label='Reference Data', density=True)
plt.hist(new_data, bins=30, alpha=0.6, label='New Data', density=True)
plt.title('Distribution Comparison')
plt.xlabel('Value')
plt.ylabel('Density')
plt.legend()
plt.grid(True)
plt.show()

# 3. Perform Kolmogorov-Smirnov test
ks_statistic, p_value = ks_2samp(reference_data, new_data)

print(f"Kolmogorov-Smirnov Test Statistic: {ks_statistic:.4f}")
print(f"P-value: {p_value:.4f}")

# 4. Interpret results
alpha = 0.05
if p_value < alpha:
    print("❌ Data drift detected: distributions are significantly different.")
else:
    print("✅ No significant data drift detected: distributions are similar.")
