In [1]:
import gradio as gr
import pandas as pd
import mlflow
import numpy as np
import mlflow.pyfunc
import xgboost as xgb
import mlflow.xgboost
import pickle
from gradio_functions import *
import matplotlib.pyplot as plt
import plotly.express as px
from model_functions import *
import html
import plotly.graph_objs as go
import plotly.io as pio

mlflow.set_tracking_uri("http://localhost:5000")


  def hasna(x: np.ndarray) -> bool:


In [2]:
def get_stats_of_model_from_mlflow(
    model_name="wue-rent-feature-set-app", stage="production", progress=gr.Progress()
):
    progress(0.05, desc="Connect to MLFlow")
    time.sleep(0.3)
    client = mlflow.tracking.MlflowClient()
    progress(0.10, desc="Connect to MLFlow")
    model_name = model_name
    stage = stage
    progress(0.15, desc="Load latest productive model from MLFlow...")
    model = mlflow.pyfunc.load_model(model_uri=f"models:/{model_name}/{stage}")
    progress(0.65, desc="Extract metrics for overview...")
    time.sleep(0.2)
    run_id = model.metadata.run_id
    run = client.get_run(run_id)
    mae = round(run.data.metrics["mae"], 2)
    mse = round(run.data.metrics["mse"], 2)
    rmse = round(np.sqrt(mse), 2)
    r2 = round(run.data.metrics["r2"], 2)
    mae_test = round(run.data.metrics["mae_test"], 2)
    mse_test = round(run.data.metrics["mse_test"], 2)
    rmse_test = round(np.sqrt(mse_test), 2)
    


    r2_test = round(run.data.metrics["r2_test"], 2)
    mae_train = round(run.data.metrics["mae_train"], 2)
    mse_train = round(run.data.metrics["mse_train"], 2)
    r2_train = round(run.data.metrics["r2_train"], 2)

    progress(0.70, desc="Extract metrics for overview...")
    time.sleep(0.2)

    run_name = run.data.tags["mlflow.runName"]
    model_version = run.data.tags["mlflow.source.name"]

    # calculate a currrent benchmark
    progress(0.75, desc="Calculate current benchmark for comparison")
    X_val = pd.read_excel("data/X_val.xlsx")
    X_val = X_val.drop("Unnamed: 0", axis=1)
    y_val = pd.read_excel("data/y_val.xlsx")
    y_val = y_val.drop("Unnamed: 0", axis=1)
    X_test = pd.read_excel("data/X_test.xlsx")
    X_test = X_test.drop("Unnamed: 0", axis=1)
    y_test = pd.read_excel("data/y_test.xlsx")
    y_test = y_test.drop("Unnamed: 0", axis=1)
    progress(0.80, desc="Calculate current benchmark for comparison")
    baseline_mae, baseline_mse, baseline_r2, baseline_rmse = apply_benchmark_rent(X_val, y_val)
    baseline_mae_test, baseline_mse_test, baseline_r2_test, baseline_rmse_test= apply_benchmark_rent(X_test, y_test)
    

    metrics_dict = {
        "model_name": ["Current used model", "Baseline"],
        "mae_val": [mae, baseline_mae],
        "rmse_val": [rmse, baseline_rmse],
        "mae_test": [mae_test, baseline_mae_test],
        "rmse_test": [rmse_test, baseline_rmse_test],
    }

    df_metrics = pd.DataFrame(metrics_dict)
    print(df_metrics)
    df_metrics_melted = df_metrics.melt(id_vars='model_name', var_name='metric', value_name='value')

    plot = px.bar(
        df_metrics_melted,
        x='metric',
        y='value',
        title='Current Model vs Benchmark',
        color='model_name',
        barmode='group',
        color_discrete_map={
            'My Model': 'blue',
            'baseline-rent': 'grey',
        },
    )

    

    progress(0.85, desc="Generate plot")
    html_string = f"""
    <h2>Basic Information:</h2>
    <ul><li>Model: {model_name}</li><li>Stage: {stage}</li><li>Run Name: {run_name}</li><li>MLFlow Run id: {run_id}</li></ul>
    <h2>Metrics:</h2>
    <ul><li>MAE Val: {mae}</li><li>MSE Val: {mse}</li><li>R2 Val: {r2}</li></ul>
    <ul><li>MAE Test: {mae_test}</li><li>MSE Test: {mse_test}</li><li>R2 Test: {r2_test}</li></ul>
    """
    return html_string, gr.update(value=plot, visible=True)

In [3]:
with gr.Blocks() as demo:
    with gr.Tab(label="Admin"):
        gr.Markdown("## Overview")
        link = "https://michaelseitz98.github.io/enterprise-ai-project/eda-wue-rent-all.html"
        description = "Explorative Data Analyis (EDA) for flats to rent in Würzburg"
        html_code = f'Explorative Data Analysis of data foundation: <a href="{html.escape(link)}" font-size: 16px;">{html.escape(description)}</a>'
        gr.HTML(html_code)

        with gr.Row():
            data_overview = gr.HTML("")
            plot_overview = gr.Plot(visible=False)

        btn = gr.Button(f"Get stats of productive model")
        btn.click(
            get_stats_of_model_from_mlflow, outputs=[data_overview, plot_overview]
        )
        gr.HTML("<hr>")
        gr.Markdown("## Retrain models with new data")
        gr.Markdown("Select the models you want to retrain.")

        with gr.Row():
            xgb = gr.Checkbox(label="xgb")
            rf = gr.Checkbox(label="rf")
            linear = gr.Checkbox(label="linear", value=True)
            lasso = gr.Checkbox(label="lasso")
            ridge = gr.Checkbox(label="ridge")
            elasticnet = gr.Checkbox(label="elasticnet")
            baseline = gr.Checkbox(label="baseline-rent", value=True)

        model_list = []
        if xgb:
            model_list.append("xgb")
        if rf:
            model_list.append("rf")
        if lasso:
            model_list.append("lasso")
        if ridge:
            model_list.append("ridge")
        if elasticnet:
            model_list.append("elasticnet")
        if baseline:
            model_list.append("baseline-rent")

        limit = gr.Slider(
            label="Amount of scraped pages of Immowelt", minimum=1, maximum=10, step=1
        )
        nachtraining = gr.Button("Scrape new data, retrain and evaluate models")
        gr.HTML("<hr>")

        df_results = gr.HTML("")
        output_plot = gr.HTML("")

        nachtraining.click(
            fn=gradio_retrain_with_added_data,
            inputs=[xgb, ridge, rf, elasticnet, linear, lasso, baseline, limit],
            outputs=[df_results, output_plot],
        )

demo.queue(concurrency_count=10).launch(debug=True)

Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


https://www.immowelt.de/liste/wuerzburg/wohnungen/mieten?d=true&r=10&sd=DESC&sf=RELEVANCE&sp=1
started
Retraining data successfully scraped.
Retraining data successfully written to excel under data/retrain_train_data.xslx
Done with raw preprocessing.
old shape of train_recent (108, 47)
Index(['Object_price', 'LivingSpace', 'Rooms', 'ConstructionYear', 'ZipCode',
       'EstateType', 'DistributionType', 'abstellraum', 'altbau_(bis_1945)',
       'balkon', 'barriefrei', 'dachgeschoss', 'dusche', 'einbaukueche',
       'elektro', 'etagenheizung', 'fenster', 'fern', 'ferne', 'fliesen',
       'frei', 'fussbodenheizung', 'gaestewc', 'garten', 'gartennutzung',
       'gas', 'gepflegt', 'kelleranteil', 'kunststofffenster', 'luftwp',
       'neubau', 'offene_kueche', 'parkett', 'pellet', 'personenaufzug',
       'reinigung', 'renoviert', 'rollstuhlgerecht', 'speisekammer',
       'stellplatz', 'teilweise_moebliert', 'teppich', 'terrasse',
       'tiefgarage', 'wanne', 'wg_geeignet', 'zentralhe

2023/07/17 04:24:21 INFO mlflow.tracking.fluent: Experiment with name 'retraining_2023-07-17_04-24' does not exist. Creating a new experiment.


XGB------
train(121, 46)
val:(27, 46)
y_train:(121,)
y_val:(27, 1)
[0]	validation_0-rmse:974.41924	validation_0-mae:845.35651
[1]	validation_0-rmse:742.92345	validation_0-mae:616.63767
[2]	validation_0-rmse:592.83183	validation_0-mae:473.71177
[3]	validation_0-rmse:487.42019	validation_0-mae:372.20308
[4]	validation_0-rmse:408.12288	validation_0-mae:299.44443
[5]	validation_0-rmse:360.06418	validation_0-mae:274.04487
[6]	validation_0-rmse:332.68252	validation_0-mae:260.07832
[7]	validation_0-rmse:318.78724	validation_0-mae:251.46591
[8]	validation_0-rmse:312.82863	validation_0-mae:244.36140
[9]	validation_0-rmse:301.29382	validation_0-mae:231.61418
[10]	validation_0-rmse:299.21947	validation_0-mae:230.68253
[11]	validation_0-rmse:296.74795	validation_0-mae:227.54988
[12]	validation_0-rmse:298.85917	validation_0-mae:227.79979
[13]	validation_0-rmse:299.06953	validation_0-mae:225.03601
[14]	validation_0-rmse:298.83213	validation_0-mae:224.25303
[15]	validation_0-rmse:300.90010	validation



Training xgb model done...
---EVALUATION AND LOGGING TO MLFLOW------ xgb


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


BASELINE-RENT------
Extrcated rental price per square meter via scraper: 11.21
Average rental price per sqm: 11.21
Baseline Mae: 285.5564
Baseline Mae: 285.5564
Baseline MSE: 140135.41872988813
Baseline R2 Score: 0.5802888162379594
Baseline RMSE: 374.3466558283754
Training baseline-rent model done...
---EVALUATION AND LOGGING TO MLFLOW------ baseline-rent


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


Done with retraining:             model     mae        mse    r2  mae_test  mse_test  r2_test  \
0            xgb  219.34   90055.77  0.73    124.04  34550.16     0.88   
1  baseline-rent  285.56  140135.42  0.58    237.83  92491.55     0.69   

   mae_train  mse_train  r2_train  
0       19.0     684.76       1.0  
1        NaN        NaN       NaN  
Save results to excel
Done with saving results to excel
Done with plotting:  Figure({
    'data': [{'alignmentgroup': 'True',
              'hovertemplate': 'model=%{x}<br>mae=%{marker.color}<extra></extra>',
              'legendgroup': '',
              'marker': {'color': array([219.34, 285.56]), 'coloraxis': 'coloraxis', 'pattern': {'shape': ''}},
              'name': '',
              'offsetgroup': '',
              'orientation': 'v',
              'showlegend': False,
              'textposition': 'auto',
              'type': 'bar',
              'x': array(['xgb', 'baseline-rent'], dtype=object),
              'xaxis': 'x',
   

ERROR:    Exception in ASGI application
Traceback (most recent call last):
  File "c:\Users\michi\Anaconda3\envs\enterpriseai2\lib\site-packages\fastapi\encoders.py", line 137, in jsonable_encoder
    data = dict(obj)
TypeError: 'property' object is not iterable

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "c:\Users\michi\Anaconda3\envs\enterpriseai2\lib\site-packages\fastapi\encoders.py", line 141, in jsonable_encoder
    data = vars(obj)
TypeError: vars() argument must have __dict__ attribute

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "c:\Users\michi\Anaconda3\envs\enterpriseai2\lib\site-packages\uvicorn\protocols\http\httptools_impl.py", line 372, in run_asgi
    result = await app(self.scope, self.receive, self.send)
  File "c:\Users\michi\Anaconda3\envs\enterpriseai2\lib\site-packages\uvicorn\middleware\proxy_headers.py", line 75, in __call__
    

Done convertion to html:  <table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>model</th>
      <th>mae</th>
      <th>mse</th>
      <th>r2</th>
      <th>mae_test</th>
      <th>mse_test</th>
      <th>r2_test</th>
      <th>mae_train</th>
      <th>mse_train</th>
      <th>r2_train</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>0</th>
      <td>xgb</td>
      <td>219.34</td>
      <td>90055.77</td>
      <td>0.73</td>
      <td>124.04</td>
      <td>34550.16</td>
      <td>0.88</td>
      <td>19.0</td>
      <td>684.76</td>
      <td>1.0</td>
    </tr>
    <tr>
      <th>1</th>
      <td>baseline-rent</td>
      <td>285.56</td>
      <td>140135.42</td>
      <td>0.58</td>
      <td>237.83</td>
      <td>92491.55</td>
      <td>0.69</td>
      <td>NaN</td>
      <td>NaN</td>
      <td>NaN</td>
    </tr>
  </tbody>
</table>
Done adding headline:  <h2>Result of retraining</h2><table border="1" class="dataframe">
  <thead>


2023/07/17 04:30:42 INFO mlflow.tracking.fluent: Experiment with name 'retraining_2023-07-17_04-30' does not exist. Creating a new experiment.


XGB------
train(121, 46)
val:(27, 46)
y_train:(121,)
y_val:(27, 1)
[0]	validation_0-rmse:974.41924	validation_0-mae:845.35651
[1]	validation_0-rmse:742.92345	validation_0-mae:616.63767
[2]	validation_0-rmse:592.83183	validation_0-mae:473.71177
[3]	validation_0-rmse:487.42019	validation_0-mae:372.20308
[4]	validation_0-rmse:408.12288	validation_0-mae:299.44443
[5]	validation_0-rmse:360.06418	validation_0-mae:274.04487
[6]	validation_0-rmse:332.68252	validation_0-mae:260.07832
[7]	validation_0-rmse:318.78724	validation_0-mae:251.46591
[8]	validation_0-rmse:312.82863	validation_0-mae:244.36140
[9]	validation_0-rmse:301.29382	validation_0-mae:231.61418
[10]	validation_0-rmse:299.21947	validation_0-mae:230.68253
[11]	validation_0-rmse:296.74795	validation_0-mae:227.54988
[12]	validation_0-rmse:298.85917	validation_0-mae:227.79979
[13]	validation_0-rmse:299.06953	validation_0-mae:225.03601
[14]	validation_0-rmse:298.83213	validation_0-mae:224.25303
[15]	validation_0-rmse:300.90010	validation



Training xgb model done...
---EVALUATION AND LOGGING TO MLFLOW------ xgb



The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.



Done with retraining:    model     mae       mse    r2  mae_test  mse_test  r2_test  mae_train  \
0   xgb  219.34  90055.77  0.73    124.04  34550.16     0.88       19.0   

   mse_train  r2_train  
0     684.76       1.0  
Save results to excel
Done with saving results to excel
Done with plotting:  Figure({
    'data': [{'alignmentgroup': 'True',
              'hovertemplate': 'model=%{x}<br>mae=%{marker.color}<extra></extra>',
              'legendgroup': '',
              'marker': {'color': array([219.34]), 'coloraxis': 'coloraxis', 'pattern': {'shape': ''}},
              'name': '',
              'offsetgroup': '',
              'orientation': 'v',
              'showlegend': False,
              'textposition': 'auto',
              'type': 'bar',
              'x': array(['xgb'], dtype=object),
              'xaxis': 'x',
              'y': array([219.34]),
              'yaxis': 'y'}],
    'layout': {'barmode': 'relative',
               'coloraxis': {'colorbar': {'title': {'t

ERROR:    Exception in ASGI application
Traceback (most recent call last):
  File "c:\Users\michi\Anaconda3\envs\enterpriseai2\lib\site-packages\fastapi\encoders.py", line 137, in jsonable_encoder
    data = dict(obj)
TypeError: 'property' object is not iterable

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "c:\Users\michi\Anaconda3\envs\enterpriseai2\lib\site-packages\fastapi\encoders.py", line 141, in jsonable_encoder
    data = vars(obj)
TypeError: vars() argument must have __dict__ attribute

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "c:\Users\michi\Anaconda3\envs\enterpriseai2\lib\site-packages\uvicorn\protocols\http\httptools_impl.py", line 372, in run_asgi
    result = await app(self.scope, self.receive, self.send)
  File "c:\Users\michi\Anaconda3\envs\enterpriseai2\lib\site-packages\uvicorn\middleware\proxy_headers.py", line 75, in __call__
    

Done convertion to html:  <table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>model</th>
      <th>mae</th>
      <th>mse</th>
      <th>r2</th>
      <th>mae_test</th>
      <th>mse_test</th>
      <th>r2_test</th>
      <th>mae_train</th>
      <th>mse_train</th>
      <th>r2_train</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>0</th>
      <td>xgb</td>
      <td>219.34</td>
      <td>90055.77</td>
      <td>0.73</td>
      <td>124.04</td>
      <td>34550.16</td>
      <td>0.88</td>
      <td>19.0</td>
      <td>684.76</td>
      <td>1.0</td>
    </tr>
  </tbody>
</table>
Done adding headline:  <h2>Result of retraining</h2><table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>model</th>
      <th>mae</th>
      <th>mse</th>
      <th>r2</th>
      <th>mae_test</th>
      <th>mse_test</th>
      <th>r2_test</th>
      <th>mae_train</th>
      <th>mse_train</th>
     