# Evidently Report

In [2]:
from evidently import Report, ColumnMapping
from evidently.metrics import ColumnDriftMetric, DatasetDriftMetric, DatasetMissingValuesMetric

import requests
import pandas as pd
import mlflow
from mlflow import MlflowClient

ModuleNotFoundError: No module named 'evidently'

In [None]:

def get_latest_version(model_name):
    response = requests.post(
        'http://experiment-tracking:5000/api/2.0/mlflow/registered-models/get-latest-versions',
        json={
            'name': model_name,
            'stages': ["None"]
        }
    )
    latest_versions = response.json().get('model_versions', [])
    latest_version = latest_versions[-1]['version']
    return latest_version

def get_dataframe(run_id, month):
    # Pas het pad aan naar jouw batch output (CSV-bestanden)
    path = f"batch-data/report/students/{run_id}_{month}.csv"
    return pd.read_csv(path)



In [None]:


mlflow.set_tracking_uri("http://experiment-tracking:5000")
client = MlflowClient("http://experiment-tracking:5000")
model_name = "rf-math-pass-predictor"
latest_version = get_latest_version(model_name)
model_latest = mlflow.pyfunc.load_model(f"models:/{model_name}/{latest_version}")

# Stel dat je batch pipeline rapporten maakt voor verschillende maanden
# Gebruik het juiste run_id van het model (of haal uit een batch-bestand)
run_id = model_latest.metadata.run_id

# Laad batch prediction resultaten voor verschillende maanden
df_april = get_dataframe(run_id, "april")
df_june  = get_dataframe(run_id, "june")

# Pas de features aan naar jouw dataset
num_features = []  # Bijvoorbeeld: []
cat_features = ["gender", "race/ethnicity", "parental level of education", "lunch", "test preparation course"]

column_mapping = ColumnMapping(
    target="pass_math",
    prediction="pass_math_pred",
    numerical_features=num_features,
    categorical_features=cat_features
)

report = Report(
    metrics=[
        ColumnDriftMetric(column_name='pass_math_pred'),
        DatasetDriftMetric(),
        DatasetMissingValuesMetric()
    ]
)

report.run(reference_data=df_april, current_data=df_june, column_mapping=column_mapping)
report.show()