In [5]:
# Install required libraries
!pip install alibi-detect pandas pyarrow pyngrok ace_tools

Collecting ace_tools
  Downloading ace_tools-0.0-py3-none-any.whl.metadata (300 bytes)
Downloading ace_tools-0.0-py3-none-any.whl (1.1 kB)
Installing collected packages: ace_tools
Successfully installed ace_tools-0.0


In [12]:
import pandas as pd
from alibi_detect.cd import TabularDrift
from pyngrok import ngrok

# Define GitHub base URL
github_base_url = "https://raw.github.com/Guardian99/ISB-CT1_GR03/main/datasets/"

# Load train and prod datasets from GitHub
train_data = pd.read_parquet(f"{github_base_url}train.parquet")
prod_data = pd.read_parquet(f"{github_base_url}prod.parquet")

# Align columns between train and production data
common_cols = train_data.columns.intersection(prod_data.columns)
train_data = train_data[common_cols]
prod_data = prod_data[common_cols]

# Convert data to numpy arrays for drift detection
train_data_np = train_data.values
prod_data_np = prod_data.values

# Initialize the Tabular Drift Detector
drift_detector = TabularDrift(x_ref=train_data_np,p_val=0.05)  # p_val=0.05 is the significance level



# Detect drift in production data
drift_results = drift_detector.predict(prod_data_np)

# Extract results
feature_drift = drift_results["data"]["is_drift"]  # Overall drift status
p_values = drift_results["data"]["p_val"]          # P-values per feature
thresholds = drift_results["data"]["threshold"]    # Thresholds per feature

# Create a results dataframe
results_df = pd.DataFrame({
    "Feature": train_data.columns,
    "P-Value": p_values,
    "Threshold": thresholds,
    "Drift Detected": ["Yes" if p < 0.05 else "No" for p in p_values]
})

# Add overall drift status
overall_drift_status = "Yes" if feature_drift else "No"
print(f"Overall Drift Detected: {overall_drift_status}")

# Display the results in a tabular format
print(results_df)


Overall Drift Detected: No
                     Feature   P-Value  Threshold Drift Detected
0                  Education  0.998159   0.005556             No
1                JoiningYear  0.170186   0.005556             No
2                       City  0.992152   0.005556             No
3                PaymentTier  1.000000   0.005556             No
4                        Age  0.255043   0.005556             No
5                     Gender  0.775182   0.005556             No
6                EverBenched  0.999620   0.005556             No
7  ExperienceInCurrentDomain  1.000000   0.005556             No
8                 LeaveOrNot  1.000000   0.005556             No


