# Determine the performance on the `Production` Model

In [None]:
from src.environment import init

experiment_id, client = init()

In [None]:
from src.data import load_data

DATA_SET_FILENAME: str = "datasets/housing.csv"
(X, y) = load_data(csv_url=DATA_SET_FILENAME, truth_col_name="median_house_value")

In [None]:
import warnings
import mlflow

warnings.filterwarnings("ignore")

# By model stage
new_model = "models:/demo_california_housing_prices/Production"

# By model version
# new_model = "models:/demo_california_housing_prices/<<version>>"
# new_model = "models:/demo_california_housing_prices/3"

# By run id
# new_model = "runs:/<<MLFLOW RUN ID>>/model"


# Load model as a PyFuncModel.
loaded_model = mlflow.pyfunc.load_model(new_model, suppress_warnings=True)

In [None]:
import pandas as pd
from sklearn.metrics import classification_report

y_pred: pd.DataFrame = pd.DataFrame(loaded_model.predict(X), columns=["median_house_value"])
y_pred["median_house_value"] = y_pred["median_house_value"].round().astype(dtype=int)

# Generate our bucket labels
y_1 = y["median_house_value"].round(-5)
y_pred_1 = y_pred["median_house_value"].round(-5)
labels = pd.concat([y_1, y_pred_1]).drop_duplicates()

print(classification_report(y_true=y_1, y_pred=y_pred_1, labels=labels))

In [None]:
from src.rest import predict
import pandas as pd
from sklearn.metrics import classification_report
from ae5_tools import demand_env_var

endpoint_url: str = demand_env_var(name="SELF_HOSTED_MODEL_ENDPOINT")
y_pred_api: pd.DataFrame = predict(endpoint_url=endpoint_url, data_x=X, auth=False)

In [None]:
# Generate our bucket labels
y_2 = y["median_house_value"].round(-5)
y_pred_2 = y_pred_api["predictions"].round(-5)
labels = pd.concat([y_2, y_pred_2]).drop_duplicates()

print(classification_report(y_true=y_2, y_pred=y_pred_2, labels=labels))