In [1]:
import gradio as gr
import pandas as pd
import mlflow
import numpy as np
import mlflow.pyfunc
import xgboost as xgb
import mlflow.xgboost
import pickle
import matplotlib.pyplot as plt
import plotly.express as px
from model_functions import *
import html
import plotly.graph_objs as go
import plotly.io as pio

mlflow.set_tracking_uri("http://localhost:5000")


  def hasna(x: np.ndarray) -> bool:


In [2]:
def get_stats_of_model_from_mlflow(
    model_name="wue-rent-feature-set-app", stage="production", progress=gr.Progress()
):
    progress(0.05, desc="Connect to MLFlow")
    time.sleep(0.3)
    client = mlflow.tracking.MlflowClient()
    progress(0.10, desc="Connect to MLFlow")
    model_name = model_name
    stage = stage
    progress(0.15, desc="Load latest productive model from MLFlow...")
    model = mlflow.pyfunc.load_model(model_uri=f"models:/{model_name}/{stage}")
    progress(0.65, desc="Extract metrics for overview...")
    time.sleep(0.2)
    run_id = model.metadata.run_id
    run = client.get_run(run_id)
    mae = round(run.data.metrics["mae"], 2)
    mse = round(run.data.metrics["mse"], 2)
    rmse = round(np.sqrt(mse), 2)
    r2 = round(run.data.metrics["r2"], 2)
    mae_test = round(run.data.metrics["mae_test"], 2)
    mse_test = round(run.data.metrics["mse_test"], 2)
    rmse_test = round(np.sqrt(mse_test), 2)

    r2_test = round(run.data.metrics["r2_test"], 2)
    mae_train = round(run.data.metrics["mae_train"], 2)
    mse_train = round(run.data.metrics["mse_train"], 2)
    r2_train = round(run.data.metrics["r2_train"], 2)

    progress(0.70, desc="Extract metrics for overview...")
    time.sleep(0.2)

    run_name = run.data.tags["mlflow.runName"]
    model_version = run.data.tags["mlflow.source.name"]

    # calculate a currrent benchmark
    progress(0.75, desc="Calculate current benchmark for comparison")
    X_val = pd.read_excel("data/X_val.xlsx")
    X_val = X_val.drop("Unnamed: 0", axis=1)
    y_val = pd.read_excel("data/y_val.xlsx")
    y_val = y_val.drop("Unnamed: 0", axis=1)
    X_test = pd.read_excel("data/X_test.xlsx")
    X_test = X_test.drop("Unnamed: 0", axis=1)
    y_test = pd.read_excel("data/y_test.xlsx")
    y_test = y_test.drop("Unnamed: 0", axis=1)
    progress(0.80, desc="Calculate current benchmark for comparison")
    baseline_mae, baseline_mse, baseline_r2, baseline_rmse = apply_benchmark_rent(
        X_val, y_val
    )
    (
        baseline_mae_test,
        baseline_mse_test,
        baseline_r2_test,
        baseline_rmse_test,
    ) = apply_benchmark_rent(X_test, y_test)

    metrics_dict = {
        "model_name": ["Current used model", "Baseline"],
        "mae_val": [mae, baseline_mae],
        "rmse_val": [rmse, baseline_rmse],
        "mae_test": [mae_test, baseline_mae_test],
        "rmse_test": [rmse_test, baseline_rmse_test],
    }

    df_metrics = pd.DataFrame(metrics_dict)
    print(df_metrics)
    df_metrics_melted = df_metrics.melt(
        id_vars="model_name", var_name="metric", value_name="value"
    )

    plot = px.bar(
        df_metrics_melted,
        x="metric",
        y="value",
        title="Current Model vs Benchmark (MAE, RMSE))",
        color="model_name",
        barmode="group",
        color_discrete_map={
            "Current used": "blue",
            "Baseline": "grey",
        },
    )

    progress(0.85, desc="Generate plot")
    html_string = f"""
    <h2>Basic Information:</h2>
    <ul><li>Model: {model_name}</li><li>Stage: {stage}</li><li>Run Name: {run_name}</li><li>MLFlow Run id: {run_id}</li></ul>
    <h2>Metrics:</h2>
    <ul><li>MAE Val: {mae}</li><li>MSE Val: {mse}</li><li>R2 Val: {r2}</li></ul>
    <ul><li>MAE Test: {mae_test}</li><li>MSE Test: {mse_test}</li><li>R2 Test: {r2_test}</li></ul>
    """
    return html_string, gr.update(value=plot, visible=True)

In [3]:
with gr.Blocks() as demo:
    with gr.Tab(label="Admin"):
        gr.Markdown("## Overview")
        link = "https://michaelseitz98.github.io/enterprise-ai-project/eda-wue-rent-all.html"
        description = "Explorative Data Analyis (EDA) for flats to rent in Würzburg"
        html_code = f'Explorative Data Analysis of data foundation: <a href="{html.escape(link)}" font-size: 16px;">{html.escape(description)}</a>'
        gr.HTML(html_code)

        with gr.Row():
            data_overview = gr.HTML("")
            plot_overview = gr.Plot(visible=False)

        btn = gr.Button(f"Get stats of productive model")
        btn.click(
            get_stats_of_model_from_mlflow, outputs=[data_overview, plot_overview]
        )
        gr.HTML("<hr>")
        gr.Markdown("## Retrain models with new data")
        gr.Markdown("Select the models you want to retrain.")

        with gr.Row():
            xgb = gr.Checkbox(label="xgb")
            rf = gr.Checkbox(label="rf")
            linear = gr.Checkbox(label="linear", value=True)
            lasso = gr.Checkbox(label="lasso")
            ridge = gr.Checkbox(label="ridge")
            elasticnet = gr.Checkbox(label="elasticnet")
            baseline = gr.Checkbox(label="baseline-rent", value=True)

        model_list = []
        if xgb:
            model_list.append("xgb")
        if rf:
            model_list.append("rf")
        if lasso:
            model_list.append("lasso")
        if ridge:
            model_list.append("ridge")
        if elasticnet:
            model_list.append("elasticnet")
        if baseline:
            model_list.append("baseline-rent")

        limit = gr.Slider(
            label="Amount of scraped pages of Immowelt", minimum=1, maximum=10, step=1
        )
        nachtraining = gr.Button("Scrape new data, retrain and evaluate models")
        gr.HTML("<hr>")

        df_results = gr.HTML("")
        output_plot = gr.Plot(visible=False)

        nachtraining.click(
            fn=gradio_retrain_with_added_data,
            inputs=[xgb, ridge, rf, elasticnet, linear, lasso, baseline, limit],
            outputs=[df_results, output_plot],
        )

demo.queue(concurrency_count=10).launch(debug=True)

Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.
