# 📊 Evidently AI Monitoring Dashboard
### Road Accident Risk Prediction - Model Monitoring

This notebook provides comprehensive model monitoring using Evidently AI:
- **Data Drift Detection**: Track changes in feature distributions
- **Model Performance**: Monitor prediction quality over time
- **Target Drift**: Detect shifts in target variable
- **Interactive Visualizations**: Explore detailed drift reports

## 1️⃣ Setup & Imports

In [6]:
# Import required libraries
import pandas as pd
import numpy as np
import pickle
import warnings
warnings.filterwarnings('ignore')

# Evidently imports (version 0.7.15)
from evidently import Report
from evidently.presets import DataDriftPreset
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

## 2️⃣ Load Data & Model

In [7]:
# Load reference data (training set)
reference_data = pd.read_csv('../artifacts/data_transformation/train.csv')
print(f"📊 Reference Data Shape: {reference_data.shape}")

# Load current data (test set)
current_data = pd.read_csv('../artifacts/data_transformation/test.csv')
print(f"📊 Current Data Shape: {current_data.shape}")

# Load trained model
with open('../artifacts/model_trainer/model.pkl', 'rb') as f:
    model = pickle.load(f)
print(f"✅ Model Loaded: {type(model).__name__}")

# Display sample data
print("\n📋 Reference Data Sample:")
reference_data.head()

📊 Reference Data Shape: (8000, 23)
📊 Current Data Shape: (2000, 23)
✅ Model Loaded: GradientBoostingRegressor

📋 Reference Data Sample:


Unnamed: 0,road_type,num_lanes,curvature,speed_limit,lighting,weather,road_signs_present,public_road,time_of_day,holiday,...,curvature_speed,lanes_curvature,high_speed,high_curvature,few_lanes,no_signs,holiday_risk,speed_category,curvature_category,accident_risk
0,1.250168,-0.438733,1.659484,-1.330971,-1.209673,0.045897,-1.012579,0.984618,1.222311,-0.998751,...,0.061086,0.736842,-0.752647,1.021481,0.993769,1.012579,-0.998751,-0.352398,-1.118407,0.37
1,-1.199941,1.357052,-1.069337,-0.696892,0.039344,-1.194564,-1.012579,0.984618,1.222311,1.001251,...,-1.015093,-0.452685,-0.752647,-0.978971,-1.00627,1.012579,1.001251,-0.352398,0.073925,0.06
2,1.250168,1.357052,-1.40122,-0.062813,1.288361,1.286358,-1.012579,-1.015622,1.222311,-0.998751,...,-1.1488,-0.849194,-0.752647,-0.978971,-1.00627,1.012579,-0.998751,1.027525,0.073925,0.25
3,0.025114,-0.438733,-0.147438,0.888304,-1.209673,0.045897,-1.012579,0.984618,0.001526,1.001251,...,0.289366,-0.342544,1.328644,-0.978971,0.993769,1.012579,1.001251,1.027525,1.266256,0.45
4,0.025114,1.357052,-0.884957,1.522383,0.039344,1.286358,-1.012579,0.984618,-1.219259,-0.998751,...,-0.330252,-0.232402,1.328644,-0.978971,-1.00627,1.012579,-0.998751,-1.73232,0.073925,0.42


## 3️⃣ Configure Column Mapping

In [8]:
# Identify target column
target = 'accident_risk'

# Separate features and target
X_ref = reference_data.drop(target, axis=1)
y_ref = reference_data[target]

X_cur = current_data.drop(target, axis=1)
y_cur = current_data[target]

# Make predictions
reference_data['prediction'] = model.predict(X_ref)
current_data['prediction'] = model.predict(X_cur)

print(f"✅ Added predictions to datasets")
print(f"📊 Reference predictions: {reference_data['prediction'].describe()}")
print(f"📊 Current predictions: {current_data['prediction'].describe()}")

✅ Added predictions to datasets
📊 Reference predictions: count    8000.000000
mean        0.353014
std         0.157303
min         0.014402
25%         0.242476
50%         0.341281
75%         0.454247
max         0.882884
Name: prediction, dtype: float64
📊 Current predictions: count    2000.000000
mean        0.347007
std         0.154588
min         0.020027
25%         0.240471
50%         0.330815
75%         0.446711
max         0.860328
Name: prediction, dtype: float64


In [10]:
# Identify numerical and categorical features
numerical_features = X_ref.select_dtypes(include=['int64', 'float64']).columns.tolist()
categorical_features = X_ref.select_dtypes(include=['object', 'category']).columns.tolist()

print(f"🔢 Numerical Features ({len(numerical_features)}): {numerical_features[:5]}...")
print(f"📝 Categorical Features ({len(categorical_features)}): {categorical_features}")

🔢 Numerical Features (22): ['road_type', 'num_lanes', 'curvature', 'speed_limit', 'lighting']...
📝 Categorical Features (0): []


## 4️⃣ Data Drift Report
### Detect changes in feature distributions between reference and current data

In [12]:
# Generate data drift report
data_drift_report = Report(metrics=[DataDriftPreset()])

# Run report (returns Snapshot object)
drift_snapshot = data_drift_report.run(
    current_data=current_data,
    reference_data=reference_data
)

In [None]:
# Display the interactive report in notebook
from IPython.display import HTML

# Get HTML string and display (as_iframe=False for inline display)
html_str = drift_snapshot.get_html_str(as_iframe=False)
display(HTML(html_str))

## 5️⃣ Save Report to HTML File

In [15]:
# Save drift report to HTML file
drift_html = '../artifacts/monitoring/data_drift_report.html'
drift_snapshot.save_html(drift_html)

print(f"✅ Report saved to: {drift_html}")

✅ Report saved to: ../artifacts/monitoring/data_drift_report.html


## 7️⃣ Feature-Level Drift Analysis
### Examine drift for individual features

In [18]:
# Calculate basic drift statistics manually
print("\n📈 Feature Drift Analysis (Mean % Change):\n")

# Calculate mean changes for numerical features
ref_stats = reference_data[numerical_features].describe()
cur_stats = current_data[numerical_features].describe()

drift_analysis = []
for col in numerical_features:
    ref_mean = reference_data[col].mean()
    cur_mean = current_data[col].mean()
    
    if ref_mean != 0:
        pct_change = abs((cur_mean - ref_mean) / ref_mean * 100)
    else:
        pct_change = 0
    
    drift_analysis.append({
        'Feature': col,
        'Ref Mean': ref_mean,
        'Cur Mean': cur_mean,
        '% Change': pct_change,
        'Status': '🔴' if pct_change > 10 else '🟡' if pct_change > 5 else '🟢'
    })

drift_df = pd.DataFrame(drift_analysis).sort_values('% Change', ascending=False)
display(drift_df.head(15))


📈 Feature Drift Analysis (Mean % Change):



Unnamed: 0,Feature,Ref Mean,Cur Mean,% Change,Status
10,school_season,-1.332268e-18,0.01225,9.194953e+17,🔴
5,weather,6.217249e-18,-0.018607,2.992789e+17,🔴
0,road_type,-3.463896e-17,0.04839,1.396972e+17,🔴
15,high_speed,5.861978000000001e-17,-0.055414,9.453188e+16,🔴
16,high_curvature,3.552714e-18,0.003251,9.150003e+16,🔴
3,speed_limit,-8.437695000000001e-17,-0.043791,5.189931e+16,🔴
21,curvature_category,7.105427e-18,0.003577,5.034172e+16,🔴
11,num_reported_accidents,-8.881784000000001e-17,0.0328,3.692913e+16,🔴
12,lanes_speed,-8.526513e-17,-0.030559,3.583998e+16,🔴
7,public_road,8.748557e-17,-0.024503,2.800798e+16,🔴


## 8️⃣ Model Performance Analysis
### Calculate and compare model metrics

In [19]:
# Calculate model performance metrics
print("🎯 MODEL PERFORMANCE COMPARISON\n")
print("="*70)

# Reference data metrics
ref_mae = mean_absolute_error(reference_data[target], reference_data['prediction'])
ref_rmse = np.sqrt(mean_squared_error(reference_data[target], reference_data['prediction']))
ref_r2 = r2_score(reference_data[target], reference_data['prediction'])

# Current data metrics
cur_mae = mean_absolute_error(current_data[target], current_data['prediction'])
cur_rmse = np.sqrt(mean_squared_error(current_data[target], current_data['prediction']))
cur_r2 = r2_score(current_data[target], current_data['prediction'])

# Create comparison dataframe
performance_df = pd.DataFrame({
    'Metric': ['MAE', 'RMSE', 'R² Score'],
    'Reference (Train)': [ref_mae, ref_rmse, ref_r2],
    'Current (Test)': [cur_mae, cur_rmse, cur_r2]
})

performance_df['Change %'] = (
    (performance_df['Current (Test)'] - performance_df['Reference (Train)']) / 
    performance_df['Reference (Train)'] * 100
)

display(performance_df)

print("\n" + "="*70)
print("💡 Interpretation:")
if abs(performance_df.loc[2, 'Change %']) < 5:
    print("  ✅ Model performance is consistent across datasets")
else:
    print("  ⚠️  Significant performance difference detected")

🎯 MODEL PERFORMANCE COMPARISON



Unnamed: 0,Metric,Reference (Train),Current (Test),Change %
0,MAE,0.039693,0.045789,15.355733
1,RMSE,0.050566,0.05865,15.987666
2,R² Score,0.908351,0.876007,-3.560722



💡 Interpretation:
  ✅ Model performance is consistent across datasets


---
## 📚 Additional Resources

- **Evidently Documentation**: https://docs.evidentlyai.com/
- **Dashboard Guide**: https://docs.evidentlyai.com/docs/platform/dashboard_overview
- **GitHub Repository**: https://github.com/evidentlyai/evidently

---