In [1]:
import pandas as pd 
from dotenv import load_dotenv
import os
import numpy as np

import mlflow
from datetime import date

from evidently.metrics import DatasetDriftMetric, DataDriftTable, ColumnDriftMetric, RegressionQualityMetric
from evidently.metric_preset import DataDriftPreset, TargetDriftPreset, DataQualityPreset, RegressionPreset
from evidently.report import Report
from evidently.ui.dashboards import CounterAgg, DashboardPanelCounter, DashboardPanelPlot, PanelValue, PlotType, ReportFilter
from evidently.ui.workspace import Workspace


  @numba.jit()
  @numba.jit()
  @numba.jit()
  from .autonotebook import tqdm as notebook_tqdm
  @numba.jit()


In [2]:
# get environment variables
load_dotenv(dotenv_path="../.env")
DATAPATH = os.getenv("DATAPATH")
MLFLOW_TRACKING_URI = os.getenv("MLFLOW_TRACKING_URI")
EVIDENTLY_WORKSPACE = os.getenv("EVIDENTLY_WORKSPACE")

# Set other variables
model_name = f"best-model-{date.today()}"
preprocessor_name = f"preprocessor-{date.today()}"
stage = "Production"

project_name = "Algorithmic Trading"


In [44]:
# Load reference data
reference_path = f"{DATAPATH}/BEL_20_reference.pkl" # Path to the reference dataset

if not os.path.exists(reference_path): # If the reference dataset does not exist
    ref_data = pd.read_pickle(f"{DATAPATH}/BEL_20.pkl") # Load data
    ref_data.to_pickle(reference_path) # Save data as reference dataset
else: # If the reference dataset exists
    ref_data = pd.read_pickle(reference_path) # Load reference dataset

# Load latest model
preprocessor = mlflow.sklearn.load_model(model_uri=f"models:/{preprocessor_name}/{stage}")
model = mlflow.pyfunc.load_model(model_uri=f"models:/{model_name}/{stage}")

# Load latest data
data = pd.read_pickle(f"{DATAPATH}/BEL_20.pkl")

# Make target and prediction column for Evidently to work
ref_data["target"] = ref_data["close_growth"]
ref_data.dropna(inplace=True) # Drop rows with missing values
ref_data["prediction"] = model.predict(preprocessor.transform(ref_data)) # Apply both model and preprocessor to reference data

data["target"] = data["close_growth"]
data.dropna(inplace=True) # Drop rows with missing values
data["prediction"] = model.predict(preprocessor.transform(data)) # Apply both model and preprocessor to reference data


X does not have valid feature names, but LinearRegression was fitted with feature names


X does not have valid feature names, but LinearRegression was fitted with feature names



In [64]:
# Open workspace

if os.path.exists(EVIDENTLY_WORKSPACE):
    ws = Workspace(EVIDENTLY_WORKSPACE)
    project = ws.get_project(project_name) # If workspace exists, open workspace and project
else:
    ws = Workspace.create(EVIDENTLY_WORKSPACE)
    project = ws.create_project(project_name) # If workspace does not exist, create workspace and project

    project.dashboard.add_panel(
        DashboardPanelCounter(
            filter=ReportFilter(metadata_values={}, tag_values=[]),
            agg=CounterAgg.NONE,
            title="Stock Price Growth Rate Prediction",
        )
    )

    project.dashboard.add_panel(
        DashboardPanelPlot(
            title="Dataset Drift",
            filter=ReportFilter(metadata_values={}, tag_values=[]),
            values=[
                PanelValue(metric_id="DatasetDriftMetric", field_path=DataDriftTable.fields.share_of_drifted_columns, legend="Drift Share")
            ], 
            plot_type=PlotType.LINE
        )
    )

    project.dashboard.add_panel(
        DashboardPanelPlot(
            title="Target Drift",
            filter=ReportFilter(metadata_values={}, tag_values=[]),
            values=[
                PanelValue(metric_id="ColumnDriftMetric", field_path=ColumnDriftMetric.fields.drift_score, legend="Drift Score")
            ], 
            plot_type=PlotType.LINE
        )
    )

    project.dashboard.add_panel(
    DashboardPanelPlot(
        title="MAPE",
        filter=ReportFilter(metadata_values={}, tag_values=[]),
        values=[
        PanelValue(
            metric_id="RegressionQualityMetric",
            field_path=RegressionQualityMetric.fields.current.mean_abs_perc_error,
            legend="MAPE",
        ),
    ],
    plot_type=PlotType.LINE,
    size=2,
    )
)
  
    project.save()

In [66]:
# Add report to project
report = Report(
        metrics=[
            DataDriftPreset(),
            DataQualityPreset(),
            TargetDriftPreset(),
            RegressionPreset()
        ]
    )

report.run(reference_data=ref_data.reset_index(drop=True), current_data=data.reset_index(drop=True))

ws.add_report(project.id, report)


ks_2samp: Exact calculation unsuccessful. Switching to method=asymp.


ks_2samp: Exact calculation unsuccessful. Switching to method=asymp.


ks_2samp: Exact calculation unsuccessful. Switching to method=asymp.


ks_2samp: Exact calculation unsuccessful. Switching to method=asymp.


ks_2samp: Exact calculation unsuccessful. Switching to method=asymp.


R^2 score is not well-defined with less than two samples.


R^2 score is not well-defined with less than two samples.


R^2 score is not well-defined with less than two samples.


R^2 score is not well-defined with less than two samples.



In [69]:
project.id

UUID('f2df9ec7-408d-4a61-99b9-4289a6ee106f')