Complete Guide to Effortless ML Monitoring with Evidently.ai

In [None]:
!pip install evidently

Integrate Evidently with Grafana Dashboard

In [None]:
#We can integrate Evidently, with Grafana Dashboard, we use PostgreSQL database, to store the metrics results.

#Our docker file, in which it consists of all necessary dependencies.

version: '3.7'

volumes:
    grafana_data: {}

networks:
  front-tier:
  back-tier:

services:
  db:
    image: postgres
    restart: always
    environment:
      POSTGRES_PASSWORD: example
    ports:
      - "5432:5432"
    networks:
      - back-tier

  adminer:
    image: adminer
    restart: always
    ports:
      - "8080:8080"
    networks:
      - back-tier
      - front-tier

  grafana:
    image: grafana/grafana:8.5.21
    user: "472"
    ports:
      - "3000:3000"
    volumes:
      - ./config/grafana_datasources.yaml:/etc/grafana/provisioning/datasources/datasource.yaml:ro
      - ./config/grafana_dashboards.yaml:/etc/grafana/provisioning/dashboards/dashboards.yaml:ro
      - ./dashboards:/opt/grafana/dashboards
    networks:
      - back-tier
      - front-tier
    restart: always  

In [None]:
# Step 1: Import Necessary Libraries
import datetime
import time
import logging
import psycopg
import pandas as pd
from evidently.metric_preset import DataQualityPreset
from sklearn import datasets
from evidently.test_preset import DataQualityTestPreset
from evidently.report import Report
from evidently.metrics import ColumnDriftMetric, Dataset

DriftMetric

In [None]:
# Step 2: Configure Logging Settings
# Configure logging settings
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]: %(message)s")

In [None]:
# Step 3: Define SQL Statement to Create a Table for Storing Drift Metrics
# Define SQL statement to create table for storing drift metrics
create_table_statement = """
drop table if exists drift_metrics;
create table drift_metrics(
	timestamp timestamp,
	target_drift float,
	share_drifted_columns float
)

In [None]:
# Step 4: Read Dataset

# Read dataset
df=pd.read_csv("DelayedFlights.csv")

In [None]:
# Step 5: Define Reference and Production Simulation Data
# Define reference and production simulation data
reference_data = df[5000:5500]
prod_simulation_data = df[7000:]
mini_batch_size = 50

In [None]:
# Step 6: Prepare Database for Storing Drift Metrics
# Function to prepare database for storing drift metrics
def prep_db():
    # Connect to PostgreSQL and create database if it doesn't exist
    with psycopg.connect("host=localhost port=5432 user=postgres password=example", autocommit=True) as conn:
        res = conn.execute("SELECT 1 FROM pg_database WHERE datname='test'")
        if len(res.fetchall()) == 0:
            conn.execute("create database test;")
        # Connect to the 'test' database and create table for drift metrics
        with psycopg.connect("host=localhost port=5432 dbname=test user=postgres password=example") as conn:
            conn.execute(create_table_statement)

In [None]:
# Step 7: Calculate Drift Metrics and Store them in PostgreSQL

# Function to calculate drift metrics and store them in PostgreSQL
def calulate_metrics_postgresql(curr, i):
    # Initialize report for data quality analysis
    report = Report(metrics=[
        DataQualityPreset(),
    ])

    # Run the report on reference and current data
    report.run(reference_data=reference_data, current_data=prod_simulation_data[i*mini_batch_size : (i+1)*mini_batch_size])
    result = report.as_dict()

    # Extract drift metrics from the report results
    target_drift = result['metrics'][1]['result']['drift_score']
    share_drifted_columns = result['metrics'][0]['result']['share_of_drifted_columns']

    # Insert metrics into the 'drift_metrics' table
    curr.execute(
        "insert into drift_metrics(timestamp, target_drift, share_drifted_columns) values (%s, %s, %s)",
        (datetime.datetime.now(), target_drift, share_drifted_columns)
    )

In [None]:
# Step 8: Perform Batch Monitoring and Backfill Drift Metrics into PostgreSQL
# Function to perform batch monitoring and backfill drift metrics into PostgreSQL
def batch_monitoring_backfill():
    # Prepare the database
    prep_db()
    # Connect to the 'test' database and iterate over mini-batches of data
    with psycopg.connect("host=localhost port=5432 dbname=test user=postgres password=example", autocommit=True) as conn:
        for i in range(50):
            with conn.cursor() as curr:
                # Calculate and store drift metrics for each mini-batch
                calulate_metrics_postgresql(curr, i)
            # Log progress and wait before processing the next mini-batch
            logging.info("data sent")
            time.sleep(3)

In [None]:
# Step 9: Execute the Project
# Entry point of the script
if __name__ == '__main__':
    batch_monitoring_backfill()

In [None]:
# To execute the docker file, 

docker compose-up --build
python grafana.py