In [None]:
pip install pandas scikit-learn evidently
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from evidently.report import Report
from evidently.metric_preset import DataDriftPreset, DataQualityPreset

# Step 1: Load or create datasets
train_data = pd.read_csv("train.csv")         # Training or reference data
prod_data = pd.read_csv("production.csv")     # Current/production data

# Step 2: Separate features and target
X_train = train_data.drop(columns=["target"])
y_train = train_data["target"]

X_prod = prod_data.drop(columns=["target"])
y_prod = prod_data["target"]

# Step 3: Train a model on training data
model = RandomForestClassifier()
model.fit(X_train, y_train)

# Step 4: Evaluate performance
train_acc = accuracy_score(y_train, model.predict(X_train))
prod_acc = accuracy_score(y_prod, model.predict(X_prod))

print(f"Training Accuracy: {train_acc:.2f}")
print(f"Production Accuracy: {prod_acc:.2f}")

# Step 5: Generate Evidently report for Data Drift and Data Quality
report = Report(metrics=[
    DataDriftPreset(),
    DataQualityPreset()
])

report.run(reference_data=X_train, current_data=X_prod)
report.save_html("data_drift_and_quality_report.html")

print("Report saved as data_drift_and_quality_report.html")