In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from evidently.report import Report
from evidently.metric_preset import DataDriftPreset

# Simulate training and production data
# Step 1: Load or create data
train_data = pd.read_csv("train.csv")       # Historical/training dataset
prod_data = pd.read_csv("production.csv")   # Recent/production dataset

# Step 2: Split features and target
X_train = train_data.drop(columns="target")
y_train = train_data["target"]

X_prod = prod_data.drop(columns="target")
y_prod = prod_data["target"]

# Step 3: Train a model
model = RandomForestClassifier()
model.fit(X_train, y_train)

# Step 4: Evaluate model on both datasets
train_pred = model.predict(X_train)
prod_pred = model.predict(X_prod)

print("Train Accuracy:", accuracy_score(y_train, train_pred))
print("Production Accuracy:", accuracy_score(y_prod, prod_pred))

# Step 5: Check for data drift
drift_report = Report(metrics=[DataDriftPreset()])
drift_report.run(reference_data=X_train, current_data=X_prod)
drift_report.show(mode="inline")  # Use "dashboard" to open in browser