In [1]:
%cd ../

d:\personal_work\mlops-zoomcamp-final-project


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


# Load Model

In [2]:
from constants import SEED, MODEL_DIR, TEST_SIZE, MONITORING_ARTIFACT_DIR
from src.prepare_dataset import read_dataset, prepare_data, split_data
from src.model import Model
import numpy as np
import pandas as pd
import os
from evidently import ColumnMapping
from evidently.report import Report
from evidently.metrics import ColumnDriftMetric, DatasetDriftMetric

os.makedirs(MONITORING_ARTIFACT_DIR, exist_ok=True)

In [3]:
model = Model.from_model_dir(MODEL_DIR)
numeric_cols = model.numeric_cols
target = model.target

df = read_dataset()
X, y = prepare_data(df, numeric_cols=numeric_cols, target=target)
X_train, X_val, y_train, y_val = split_data(X, y, test_size=TEST_SIZE, random_state=SEED)

X_train.shape, y_train.shape, X_val.shape, y_val.shape

((50104, 2), (50104,), (12526, 2), (12526,))

In [4]:
# save reference data
def get_data_df(x):
    df = pd.DataFrame(x, columns=numeric_cols)
    df['prediction'] = model.predict(df)
    return df

reference_df = get_data_df(X_train)
reference_df.to_csv(MONITORING_ARTIFACT_DIR / "reference.csv", index=False, header=True)
reference_df.head()

Unnamed: 0,Humidity[%],Temperature[C],prediction
0,57.52,14.39,1
1,42.16,28.56,1
2,48.61,21.896,1
3,44.72,28.12,1
4,49.29,-6.377,1


In [5]:
current_df = get_data_df(X_val)
current_df.head()

Unnamed: 0,Humidity[%],Temperature[C],prediction
0,49.88,-5.203,1
1,50.01,20.99,1
2,55.61,-5.234,1
3,52.5,0.992,1
4,54.14,12.97,1


In [6]:
column_mapping = ColumnMapping(
    target=None,
    prediction='prediction',
    numerical_features=numeric_cols,
)

In [7]:
report = Report(
    metrics=[
        ColumnDriftMetric(column_name="Humidity[%]"),
        ColumnDriftMetric(column_name="Temperature[C]"),
        # ColumnDriftMetric(column_name="eCO2[ppm]"),
        DatasetDriftMetric(),
    ]
)

In [8]:
report.run(reference_data=reference_df, current_data=current_df.iloc[:1], column_mapping=column_mapping)

In [9]:
# report.show(mode="inline")

In [10]:
result = report.as_dict()

In [12]:
result["metrics"][0]

{'metric': 'ColumnDriftMetric',
 'result': {'column_name': 'Humidity[%]',
  'column_type': 'num',
  'stattest_name': 'Wasserstein distance (normed)',
  'stattest_threshold': 0.1,
  'drift_score': 0.5774265336348993,
  'drift_detected': True,
  'current': {'small_distribution': {'x': [49.38,
     49.480000000000004,
     49.580000000000005,
     49.68,
     49.78,
     49.88,
     49.980000000000004,
     50.080000000000005,
     50.18,
     50.28,
     50.38],
    'y': [0.0, 0.0, 0.0, 0.0, 0.0, 9.999999999999858, 0.0, 0.0, 0.0, 0.0]}},
  'reference': {'small_distribution': {'x': [10.77,
     17.213,
     23.656,
     30.099,
     36.542,
     42.985,
     49.428,
     55.87100000000001,
     62.31400000000001,
     68.757,
     75.2],
    'y': [0.004934637397639883,
     0.0025277238647044235,
     0.0015984136203277964,
     0.0009200171419328595,
     0.008760297903656997,
     0.050960276100799604,
     0.0744098712672363,
     0.009466573689383233,
     0.001474505587744247,
     0

In [14]:
result["metrics"][2]

{'metric': 'DatasetDriftMetric',
 'result': {'drift_share': 0.5,
  'number_of_columns': 3,
  'number_of_drifted_columns': 3,
  'share_of_drifted_columns': 1.0,
  'dataset_drift': True}}

In [11]:
# temperature column drift
result["metrics"][0]['result']['drift_score']

0.5774265336348993

In [42]:
# humidity column drift
result["metrics"][1]['result']['drift_score']

0.5774265336348993

In [43]:
# eCO2 column drift
result["metrics"][2]['result']['drift_score']

0.14145425030440242

In [44]:
# number of drifted columns
result["metrics"][3]['result']['number_of_drifted_columns']

4

In [45]:
result["metrics"][3]['result']

{'drift_share': 0.5,
 'number_of_columns': 4,
 'number_of_drifted_columns': 4,
 'share_of_drifted_columns': 1.0,
 'dataset_drift': True}

In [13]:
df.columns

Index(['Unnamed: 0', 'UTC', 'Temperature[C]', 'Humidity[%]', 'TVOC[ppb]',
       'eCO2[ppm]', 'Raw H2', 'Raw Ethanol', 'Pressure[hPa]', 'PM1.0', 'PM2.5',
       'NC0.5', 'NC1.0', 'NC2.5', 'CNT', 'Fire Alarm'],
      dtype='object')