In [1]:
import gradio as gr
import pandas as pd
import mlflow
import numpy as np
import mlflow.pyfunc
import xgboost as xgb
import mlflow.xgboost
import pickle
import matplotlib.pyplot as plt
import plotly.express as px
from model_functions import *
import html
import plotly.graph_objs as go
import plotly.io as pio

mlflow.set_tracking_uri("http://localhost:5000")


In [None]:
with gr.Blocks() as demo:
    with gr.Tab(label="Admin"):
        gr.Markdown("## Overview 💹")
        link = "https://michaelseitz98.github.io/enterprise-ai-project/eda-wue-rent-all.html"
        description = "Explorative Data Analyis (EDA) for flats to rent in Würzburg"
        html_code = f'Explorative Data Analysis of data foundation: <a href="{html.escape(link)}" font-size: 16px;">{html.escape(description)}</a>'
        gr.HTML(html_code)

        with gr.Row():
            data_overview = gr.HTML("")
            plot_overview = gr.Plot(visible=False)

        btn = gr.Button(f"Get stats of productive model")
        btn.click(
            get_stats_of_model_from_mlflow, outputs=[data_overview, plot_overview]
        )
        gr.HTML("<hr>")
        gr.Markdown("## Retrain models with new data 🔁")
        gr.Markdown("Select the models you want to retrain.")

        with gr.Row():
            xgb = gr.Checkbox(label="xgb")
            rf = gr.Checkbox(label="rf")
            linear = gr.Checkbox(label="linear", value=True)
            lasso = gr.Checkbox(label="lasso")
            ridge = gr.Checkbox(label="ridge")
            elasticnet = gr.Checkbox(label="elasticnet")
            baseline = gr.Checkbox(label="baseline-rent", value=True)

        model_list = []
        if xgb:
            model_list.append("xgb")
        if rf:
            model_list.append("rf")
        if lasso:
            model_list.append("lasso")
        if ridge:
            model_list.append("ridge")
        if elasticnet:
            model_list.append("elasticnet")
        if baseline:
            model_list.append("baseline-rent")

        limit = gr.Slider(
            label="Amount of scraped pages of Immowelt", minimum=1, maximum=10, step=1
        )
        nachtraining = gr.Button("Scrape new data, retrain and evaluate models")
        gr.HTML("<hr>")

        df_results = gr.HTML("")
        output_plot = gr.Plot(visible=False)

        nachtraining.click(
            fn=gradio_retrain_with_added_data,
            inputs=[xgb, ridge, rf, elasticnet, linear, lasso, baseline, limit],
            outputs=[df_results, output_plot],
        )

demo.queue(concurrency_count=10).launch(debug=True, server_port=7878)

Running on local URL:  http://127.0.0.1:7878

To create a public link, set `share=True` in `launch()`.


Trying to unpickle estimator DecisionTreeRegressor from version 1.2.2 when using version 1.2.1. This might lead to breaking code or invalid results. Use at your own risk. For more info please refer to:
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
Trying to unpickle estimator RandomForestRegressor from version 1.2.2 when using version 1.2.1. This might lead to breaking code or invalid results. Use at your own risk. For more info please refer to:
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


Extrcated rental price per square meter via scraper: 11.21
Average rental price per sqm: 11.21
Baseline Mae: 285.5564
Baseline R2: 0.5802888162379594
Baseline MSE: 140135.41872988813
Extrcated rental price per square meter via scraper: 11.21
Average rental price per sqm: 11.21
Baseline Mae: 237.83265
Baseline R2: 0.6862148619059796
Baseline MSE: 92491.55243031321
           model_name   mae_val    rmse_val   mae_test   rmse_test
0  Current used model  194.3800  291.790000  145.56000  193.340000
1            Baseline  285.5564  374.346656  237.83265  304.124238
https://www.immowelt.de/liste/wuerzburg/wohnungen/mieten?d=true&r=10&sd=DESC&sf=RELEVANCE&sp=1
started
Retraining data successfully scraped.
Retraining data successfully written to excel under data/retrain_train_data.xslx
Done with raw preprocessing.
old shape of train_recent (108, 47)
Index(['Object_price', 'LivingSpace', 'Rooms', 'ConstructionYear', 'ZipCode',
       'EstateType', 'DistributionType', 'abstellraum', 'altbau_(bis

2023/07/18 12:20:37 INFO mlflow.tracking.fluent: Experiment with name 'retraining_2023-07-18_12-20' does not exist. Creating a new experiment.


XGB------
train(122, 46)
val:(27, 46)
y_train:(122,)
y_val:(27, 1)
[0]	validation_0-rmse:974.13175	validation_0-mae:845.07005
[1]	validation_0-rmse:742.23967	validation_0-mae:616.06518
[2]	validation_0-rmse:592.08262	validation_0-mae:473.14175
[3]	validation_0-rmse:486.97535	validation_0-mae:371.80555
[4]	validation_0-rmse:407.62699	validation_0-mae:299.03856
[5]	validation_0-rmse:359.57764	validation_0-mae:273.65164
[6]	validation_0-rmse:332.34477	validation_0-mae:258.61453
[7]	validation_0-rmse:316.73433	validation_0-mae:248.52826
[8]	validation_0-rmse:311.00892	validation_0-mae:243.21563
[9]	validation_0-rmse:307.14198	validation_0-mae:239.74456
[10]	validation_0-rmse:305.50105	validation_0-mae:236.66515
[11]	validation_0-rmse:301.23373	validation_0-mae:232.35972
[12]	validation_0-rmse:301.10979	validation_0-mae:230.20236
[13]	validation_0-rmse:301.87738	validation_0-mae:228.23814
[14]	validation_0-rmse:304.67461	validation_0-mae:229.11170
[15]	validation_0-rmse:305.53290	validation



Training xgb model done...
---EVALUATION AND LOGGING TO MLFLOW------ xgb



The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.

Traceback (most recent call last):
  File "c:\Users\FelixNeubauer\anaconda3\envs\py38\lib\site-packages\gradio\routes.py", line 437, in run_predict
    output = await app.get_blocks().process_api(
  File "c:\Users\FelixNeubauer\anaconda3\envs\py38\lib\site-packages\gradio\blocks.py", line 1352, in process_api
    result = await self.call_function(
  File "c:\Users\FelixNeubauer\anaconda3\envs\py38\lib\site-packages\gradio\blocks.py", line 1077, in call_function
    prediction = await anyio.to_thread.run_sync(
  File "c:\Users\FelixNeubauer\anaconda3\envs\py38\lib\site-packages\anyio\to_thread.py", line 28, in run_sync
    return await get_asynclib().run_sync_in_worker_thread(func, *args, cancellable=cancellable,
  File "c:\Users\FelixNeubauer\anaconda3\envs\py38\lib\site-packages\anyio\_backends\_asyncio.py", line 818, in run_sync_in_worker_thread
    return await fut

: 

: 

: 