In [1]:
%cd ../

d:\personal_work\mlops-zoomcamp-final-project


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


# Load Model

In [2]:
from constants import SEED, MODEL_DIR, TEST_SIZE, MONITORING_ARTIFACT_DIR
from src.prepare_dataset import read_dataset, prepare_data, split_data
from src.model import Model
import numpy as np
import pandas as pd
import os
from evidently import ColumnMapping
from evidently.report import Report
from evidently.metrics import ColumnDriftMetric, DatasetDriftMetric

os.makedirs(MONITORING_ARTIFACT_DIR, exist_ok=True)

In [3]:
model = Model.from_model_dir(MODEL_DIR)
numeric_cols = model.numeric_cols
target = model.target

df = read_dataset()
X, y = prepare_data(df, numeric_cols=numeric_cols, target=target)
X_train, X_val, y_train, y_val = split_data(X, y, test_size=TEST_SIZE, random_state=SEED)

X_train.shape, y_train.shape, X_val.shape, y_val.shape

((50104, 3), (50104,), (12526, 3), (12526,))

In [4]:
# save reference data
def get_data_df(x):
    df = pd.DataFrame(x, columns=numeric_cols)
    df['prediction'] = model.predict(df)
    return df

reference_df = get_data_df(X_train)
reference_df.to_csv(MONITORING_ARTIFACT_DIR / "reference.csv", index=False, header=True)
reference_df.head()

Unnamed: 0,Humidity[%],Temperature[C],eCO2[ppm],prediction
0,57.52,14.39,400.0,1
1,42.16,28.56,400.0,0
2,48.61,21.896,400.0,1
3,44.72,28.12,412.0,1
4,49.29,-6.377,400.0,1


In [5]:
current_df = get_data_df(X_val)
current_df.head()

Unnamed: 0,Humidity[%],Temperature[C],eCO2[ppm],prediction
0,49.88,-5.203,400.0,1
1,50.01,20.99,400.0,1
2,55.61,-5.234,400.0,1
3,52.5,0.992,701.0,1
4,54.14,12.97,406.0,1


In [6]:
column_mapping = ColumnMapping(
    target=None,
    prediction='prediction',
    numerical_features=numeric_cols,
)

In [20]:
report = Report(
    metrics=[
        ColumnDriftMetric(column_name="Temperature[C]"),
        ColumnDriftMetric(column_name="Humidity[%]"),
        ColumnDriftMetric(column_name="eCO2[ppm]"),
        DatasetDriftMetric(),
    ]
)

In [21]:
report.run(reference_data=reference_df, current_data=current_df, column_mapping=column_mapping)

In [1]:
# report.show(mode="inline")

In [23]:
result = report.as_dict()

In [17]:
# temperature column drift
result["metrics"][0]['result']['drift_score']

0.008870631843781385

In [26]:
# humidity column drift
result["metrics"][1]['result']['drift_score']

0.014511600725613015

In [27]:
# eCO2 column drift
result["metrics"][2]['result']['drift_score']

0.006778503350536544

In [28]:
# number of drifted columns
result["metrics"][3]['result']['number_of_drifted_columns']

0