In [138]:
import kfp

from typing import NamedTuple
import kfp.components as comp
from kfp import compiler, dsl
from kfp import dsl
from kfp.components import InputPath, OutputPath
from kubernetes.client.models import V1EnvVar

In [139]:
def GetThresholds(output_thresholds_path: OutputPath(str)):

    import requests
    import json

    thresholds = {
        "heat_meter":{
            "building1": [0,3],
            "building2": [0,4],
            "datacenter1": [3,5],
            "psnc_garden": [3,5],
            "hvac": [3,5],
            "office":[3,5],
            "flat1": [3,5],
            "eagle": [3,5],
            "altair": [3,5]
        },
        "electricity_meter": {
            "building1": [0,6],
            "building2": [0,6],
            "datacenter1": [3,5],
            "psnc_garden": [3,5],
            "hvac": [3,5],
            "office": [3,5],
            "flat1": [3,5],
            "eagle": [3,5],
            "altair": [3,5]
        }
    }

    with open(output_thresholds_path, "w") as file:
        json.dump(thresholds, file)
    


In [140]:
def GetData(measurement_name: str, min_date: str, max_date: str,url_pilot : str,type_measurement :str,key_measurement : str,
            filter_vars:list , filter_cases: list, output_data_forecast: OutputPath(str)):

    import requests # To REQUIREMENTS
    import json
    import pandas as pd # To REQUIREMENTS
    import maya # To REQUIREMENTS
    from tqdm import tqdm
    from icecream import ic
    from discord_webhook import DiscordWebhook
    from retry import retry # TO REQUIREMENTS

    def GetRequest(url, headers ={}, payload = {}):
        response = requests.request("GET", url, headers = headers, data = payload)
        try:
            return response.json()
        except:
            dict_ = {
                "status_code": response.status_code,
                "text": response.text
            }
            return dict_
    def DownloadAssetsData(measurement_name, url_pilot,bucket = "renergetic", min_date = "yesterday", max_date = "tomorrow"):
        
        from datetime import datetime
        import pandas as pd
        import maya
        from tqdm import tqdm
        from icecream import ic

        test = True

        try:
            min_date_from = maya.when(min_date).datetime()
        except:
            ValueError("Please introduce correct time format for MIN_DATE")
        
        try: 
            max_date_from = maya.when(max_date).datetime()
        except:
            ValueError("Please introduce correct time format for MAX_DATE")
        
        datelist = pd.date_range(min_date_from, max_date_from)

        data_ = []
        for i in tqdm(range(len(datelist)-1)):
            from_obj = datelist[i]
            to_obj = datelist[i+1]
            from_ = datetime.strftime(from_obj, "%Y-%m-%d 00:00:00")
            to_ = datetime.strftime(to_obj, "%Y-%m-%d 00:00:00")

            url = url_pilot + "/data?measurements={measurement_name}&from={from_}&to={to_}"\
                .format(measurement_name = measurement_name, from_ = from_, to_= to_)
            info_ = GetRequest(url)
            if type(info_) == list:
                data_ = data_ + info_
            elif type(info_) == dict:
                print("Error")
                print(from_)
                print(to_)
        return data_
    
    def DataFrameAssests(list_data, name_field):
        dicts = []
        for data in list_data:
            try:
                if "energy" in data["fields"].keys():
                    name_value = "energy"
                else:
                    name_value = name_field
                dict_ = {
                    "asset_name": data["tags"]["asset_name"],
                    "value": float(data["fields"][name_value]),
                    "ds": data["fields"]["time"]
                }

                if "type_data" in data["tags"].keys():
                    dict_["type"] = data["tags"]["type_data"]
                elif "typeData" in data["tags"].keys():
                    dict_["type"] = data["tags"]["typeData"]
                else:
                    dict_["type"] = "None"

                if "measurement_type" in data["tags"].keys():
                    dict_["measurement_type"] = data["tags"]["measurement_type"]
                else:
                    dict_["measurement_type"] = "None"
                
                if "direction" in data["tags"].keys():
                    dict_["direction"] = data["tags"]["direction"]
                else:
                    dict_["direction"] = "None"
                if "domain" in data["tags"].keys():
                    dict_["domain"] = data["tags"]["domain"]
                else:
                    dict_["domain"] = "None"
                
                if "sensor_id" in data["tags"].keys():
                    dict_["id_sensor"] = data["tags"]["sensor_id"]
                else:
                    dict_["id_sensor"] = "None"
                
                if "interpolation_method" in data["tags"].keys():
                    dict_["interpolation"] = data["tags"]["interpolation_method"]
                else:
                    dict_["interpolation"] = "None"

                dicts.append(dict_)
            except ValueError:
                continue
        return pd.DataFrame(dicts)
    
    @retry(tries= 3)
    def DownloadAndProcess(measurement_name, url_pilot, min_date, max_date, key_measurement):
        # max_date = maya.now().add(days = 3).iso8601()
        list_ = DownloadAssetsData(measurement_name, url_pilot,min_date = min_date, max_date = max_date)
        data = DataFrameAssests(list_, key_measurement)

        return data
    
    def FilterCases(var_value, filter_cases):
        if var_value in filter_cases:
            return True
        else:
            return False
        
    
    data = DownloadAndProcess(measurement_name, url_pilot, min_date, max_date, key_measurement)
    ic(data.shape[0])
    data = data[data.type == type_measurement]

    for i in range(len(filter_vars)):
        data["Filter"] = data[filter_vars[i]].apply(FilterCases, filter_cases = filter_cases[i])
        data = data[data["Filter"] == True]

    ic(data.shape[0])

    if data.shape[0] == 0:
        url_disc = "https://discord.com/api/webhooks/1002537248622923816/_9XY9Hi_mjzh2LTVqnmSKXlIFJ5rgBO2b8xna5pynUrzALgtC4aXSFq89uMdlW_v-ZzT"
        message = "Not enough data for {measurement_name}".format(measurement_name = measurement_name)
        webhook = DiscordWebhook(url = url_disc, content = message)
        webhook.execute()
        
        raise ValueError("Void data to forecast")

    data_output = {
        "value": data["value"].tolist(),
        "time_registered": data["ds"].tolist(),
        "asset_name": data["asset_name"].tolist()
    }

    with open(output_data_forecast, "w") as file:
        json.dump(data_output, file)

In [141]:
def ProcessData(input_data_path: InputPath(str), hourly_aggregate, minute_aggregate ,min_date, max_date, output_data_forecast: OutputPath(str)):

    import maya
    from datetime import datetime
    import json
    import pandas as pd
    from icecream import ic
    
    min_date = datetime.strftime(maya.when(min_date).datetime(), "%Y-%m-%d")
    max_date = datetime.strftime(maya.when(max_date).datetime(), "%Y-%m-%d")
    
    ic(hourly_aggregate)

    with open(input_data_path) as file:
        data_str = json.load(file)
    
    data = pd.DataFrame(data_str)
    # DEFINE HOURLY AGGREGATE PROCESS

    if hourly_aggregate in ["mean","sum", "max"]:
        
        list_dicts = []
        list_data = []

        for asset_name in pd.unique(data.asset_name):
            data_iter = data[data.asset_name == asset_name]
            
            def GetHourDate(str_):
                import maya
                from datetime import datetime

                return datetime.strftime(maya.parse(str_).datetime(), "%Y-%m-%d %H:00:00")

            data_iter["hour"] = data_iter["time_registered"].apply(GetHourDate)
            data_group = (data_iter.groupby('hour')
                .agg({'time_registered':'count', 'value': hourly_aggregate})
                .reset_index()
            )

            df = data_group[["hour", "value"]]
            df = df.rename(columns={'hour':'ds', 'value': 'y'})
            last_value = df.y.tolist()[0]
            for ds_obj in pd.date_range(min_date, max_date):
                for i in range(24):
                    if i < 10:
                        str_i = "0"+str(i)
                    else:
                        str_i = str(i)
                    ds_str = "{date} {H}:00:00".format(date = ds_obj.strftime("%Y-%m-%d"), H = str_i)

                    if df[df.ds == ds_str].shape[0] == 0:
                        if minute_aggregate == "max":
                                value_ = last_value
                        else:
                            value_ = 0

                        dict_ = {
                            "time_registered": ds_str,
                            "value": value_,
                            "asset_name": asset_name
                        }
                    else:
                        dict_ = {
                            "time_registered": ds_str,
                            "value": df[df.ds == ds_str].y.tolist()[0],
                            "asset_name": asset_name
                        }
                        if minute_aggregate == "max":
                            last_value = dict_["value"]
                    list_dicts.append(dict_)
            if minute_aggregate == "max":
                data_1 = pd.DataFrame(list_dicts)
                data_1["value_1"] = data_1["value"].shift(1)
                data_1["value"] = data_1["value"] - data_1["value_1"]
                data_1 = data_1.drop(["value_1"], axis = 1)
                data_1["value"] = data_1["value"].fillna(0)
                list_data.append(data_1)
                list_dicts = []

        if minute_aggregate == "max":
            output_data = pd.concat(list_data, ignore_index = True)
        else:
            output_data = pd.DataFrame(list_dicts)
    elif minute_aggregate in ["max", "sum", "mean"]:
        list_dicts = []
        list_data = []
        for asset_name in pd.unique(data.asset_name):
            data_iter = data[data.asset_name == asset_name]
            def GetHourMinuteDate(str_):
                import maya
                from datetime import datetime

                return datetime.strftime(maya.parse(str_).datetime(), "%Y-%m-%d %H:%M:00")
            
            data_iter["minute"] = data_iter["time_registered"].apply(GetHourMinuteDate)
            data_group = (data_iter.groupby('minute')
                .agg({'time_registered':'count', 'value': minute_aggregate})
                .reset_index()
            )

            df = data_group[["minute", "value"]]
            df = df.rename(columns={'minute':'ds', 'value': 'y'})

            last_value = df.y.tolist()[0]

            for ds_obj in pd.date_range(min_date, max_date):
                for i in range(24):
                    for j in range(60):
                        if i < 10:
                            str_i = "0"+str(i)
                        else:
                            str_i = str(i)
                        
                        if j < 10:
                            str_j = "0" + str(j)
                        else:
                            str_j = str(j)
                        ds_str = "{date} {H}:{M}:00".format(date = ds_obj.strftime("%Y-%m-%d"), H = str_i, M = str_j)

                        if df[df.ds == ds_str].shape[0] == 0:

                            if minute_aggregate == "max":
                                value_ = last_value
                            else:
                                value_ = 0

                            dict_ = {
                                "time_registered": ds_str,
                                "value": value_,
                                "asset_name": asset_name
                            }
                        else:
                            dict_ = {
                                "time_registered": ds_str,
                                "value": df[df.ds == ds_str].y.tolist()[0],
                                "asset_name": asset_name
                            }
                            if minute_aggregate == "max":
                                last_value = dict_["value"]
                        list_dicts.append(dict_)
            if minute_aggregate == "max":
                data_1 = pd.DataFrame(list_dicts)
                data_1["value_1"] = data_1["value"].shift(1)
                data_1["value_cummulative"] = data_1["value"].copy()
                data_1["value"] = data_1["value"] - data_1["value_1"]
                data_1 = data_1.drop(["value_1"], axis = 1)
                data_1["value"] = data_1["value"].fillna(0)
                list_data.append(data_1)
                list_dicts = []

        if minute_aggregate == "max":
            output_data = pd.concat(list_data, ignore_index = True)
        else:
            output_data = pd.DataFrame(list_dicts)

    else:
        output_data = data

    ic(output_data)

    data_output = {
        "value": output_data["value"].tolist(),
        "time_registered": output_data["time_registered"].tolist(),
        "asset_name": output_data["asset_name"].tolist()
    }

    if minute_aggregate == "max":
        data_output["value_cummulative"] = output_data["value_cummulative"].tolist()

    with open(output_data_forecast, "w") as file:
        json.dump(data_output, file)



In [142]:
def ForecastProcess(input_data_path: InputPath(str),input_thresholds_path : InputPath(str), measurement_name,
    path_minio,
    access_key,
    secret_key,
    mode,
    url_pilot,
    diff_time,
    pilot_name,
    send_forecast,
    mlpipeline_metrics_path: OutputPath('Metrics')
    ):

    import maya
    from discord_webhook import DiscordWebhook
    import json
    from icecream import ic
    import requests
    import pandas as pd
    from prophet.serialize import model_to_json
    from minio import Minio
    import boto3
    from tqdm import tqdm
    from sklearn.metrics import mean_absolute_error, r2_score
    from datetime import datetime

    with open(input_thresholds_path) as file:
        thresholds = json.load(file)

    try:
        client = Minio(
            path_minio,
            access_key=access_key,
            secret_key=secret_key,
            secure = False
        )

        list_objects = client.list_objects("test")
        for obj_ in list_objects:
            ic(obj_._object_name)
    except:
        url_disc = "https://discord.com/api/webhooks/1002537248622923816/_9XY9Hi_mjzh2LTVqnmSKXlIFJ5rgBO2b8xna5pynUrzALgtC4aXSFq89uMdlW_v-ZzT"
        message = "Cannot access minio server correctly - read data."
        webhook = DiscordWebhook(url = url_disc, content = message)
        webhook.execute()
    
    measures_per_hour = int(60/int(diff_time))
    time_prediction = maya.now().epoch

    def GetDateInfo(ds_str, time_value):
        maya_obj = maya.parse(ds_str)
        if time_value == "year":
            return str(maya_obj.year)
        elif time_value == "month":
            return str(maya_obj.month)
        elif time_value == "weekday":
            return str(maya_obj.weekday)
        elif time_value == "hour":
            return str(maya_obj.hour)
        
    def ManageData(data_ds):

        data_ds["year"]  = data_ds["ds"].apply(GetDateInfo, time_value = "year")
        data_ds["month"]  = data_ds["ds"].apply(GetDateInfo, time_value = "month")
        data_ds["weekday"]  = data_ds["ds"].apply(GetDateInfo, time_value = "weekday")
        data_ds["hour"]  = data_ds["ds"].apply(GetDateInfo, time_value = "hour")

        for var_ in ["year", "month", "weekday", "hour"]:
            if len(pd.unique(data_ds[var_])) <= 2:
                bin_main = list(pd.unique(data_ds[var_]))[0]
                data_ds[var_] = (data_ds[var_] == bin_main)
        
        data_ds["prev_val_1"] = data_ds["y"].shift(1)
        data_ds["prev_val_2"] = data_ds["y"].shift(2)
        data_ds["prev_val_3"] = data_ds["y"].shift(3)
        data_ds["prev_val_4"] = data_ds["y"].shift(4)
        data_ds["prev_val_5"] = data_ds["y"].shift(5)

        return data_ds



    def ForecastData(data, asset_name, measurement_name, metrics_list, measures_per_hour, diff_time, mode = "no notifications"):
        data_ds = data[data.asset_name == asset_name][["time_registered", "value"]]
        try:
            last_cummulative_value = data[data.asset_name == asset_name]["value_cummulative"].tolist()[-1]
        except:
            last_cummulative_value = 0
        data_ds.columns = ["ds", "y"]
        max_date = max(data_ds.ds)
        prev_value = data_ds
        ic(data_ds.shape[0])

        if len(pd.unique(data_ds.y)) >= 10:

            n_train = int(data_ds.shape[0] - 24 * 2 * measures_per_hour)
            train_data = data_ds[0:n_train]
            test_data = data_ds[n_train:]

            from prophet import Prophet

            m = Prophet(daily_seasonality=True, weekly_seasonality=True, changepoint_prior_scale = 0.05)
            m.fit(train_data)
            future = m.make_future_dataframe(periods= 24*3*measures_per_hour , freq="{minutes}T".format(minutes = diff_time))
            forecast = m.predict(future)

            forecast_test = forecast["yhat"].tolist()[-24*measures_per_hour*3:-24*measures_per_hour]
            try:
                forecast_train = forecast["yhat"].tolist()[:-24*4*3]
                real_vals_train = train_data["y"].tolist()
                r2_score_train = r2_score(real_vals_train, forecast_train)

            except:
                r2_score_train = 0
            
            try:
                asset_number = dict_asset[asset_name]
            except:
                asset_number = 3
            
            real_vals_test = test_data["y"].tolist()

            if len(forecast_test) == len(real_vals_test):
                r2_score_test = r2_score(real_vals_test, forecast_test)
                metrics = {
                    'metrics': [
                        {
                        'name': 'r2_score_test',
                        'numberValue':  float(r2_score_test),
                        'format': "PERCENTAGE"
                        },
                        {
                            'name': 'r2_score_train',
                            "numberValue": float(r2_score_train),
                            "format": "PERCENTAGE"
                        },
                        {
                            "name": "asset_number",
                            "numberValue": asset_name,
                            "format": "RAW"
                        }
                    ]}  
                
                metrics_list.append(metrics)
            else:
                ic(len(forecast_test))
                ic(len(real_vals_test))

            with open("/tmp/model_prophet.json", 'w') as fout:
                fout.write(model_to_json(m))  # Save model
            

            date = maya.when("now").rfc2822()
            f_name = "model_{domain}_{asset}.json"\
                .format(domain = measurement_name, asset = asset_name)
            try:
                result = client.fput_object(
                    "test", f_name, "/tmp/model_prophet.json"
                )

                print(
                    "created {0} object; etag: {1}, version-id: {2}".format(
                        result.object_name, result.etag, result.version_id,
                    ),
                )
            except:
                url_disc = "https://discord.com/api/webhooks/1002537248622923816/_9XY9Hi_mjzh2LTVqnmSKXlIFJ5rgBO2b8xna5pynUrzALgtC4aXSFq89uMdlW_v-ZzT"
                message = "Model not saved for {measurement_name} - {asset_name}".format(measurement_name = measurement_name, asset_name = asset_name)
                webhook = DiscordWebhook(url = url_disc, content = message)
                webhook.execute()

                s3 = boto3.resource(
                    service_name='s3',
                    aws_access_key_id='QyvycO9kc2cm58K8',
                    aws_secret_access_key='tKtUrdQzQgWfhfBwhbQF3yGbyZ43oPn92iGAT7g0',
                    endpoint_url='https://s3.tebi.io'
                )

                for bucket in s3.buckets.all():
                    ic(bucket.name)
                
                # Upload a new file
                data = open('/tmp/model_prophet.json', 'rb')
                f_name = "model_{pilot}_{domain}_{asset}.json"\
                .format(pilot = pilot_name,domain = measurement_name, asset = asset_name)
                s3.Bucket('test-pf').put_object(Key=f_name, Body=data)

                url_disc = "https://discord.com/api/webhooks/1002537248622923816/_9XY9Hi_mjzh2LTVqnmSKXlIFJ5rgBO2b8xna5pynUrzALgtC4aXSFq89uMdlW_v-ZzT"
                message = "Model sent to tebi for {measurement_name} - {asset_name}".format(measurement_name = measurement_name, asset_name = asset_name)
                webhook = DiscordWebhook(url = url_disc, content = message)
                webhook.execute()

        else:
            from catboost import CatBoostClassifier, Pool
            from sklearn.model_selection import train_test_split
            from sklearn.metrics import classification_report, accuracy_score

            data_ds = ManageData(data_ds)

            X_train, X_test, Y_train, Y_test = train_test_split(data_ds.drop(["y", "ds"], axis = 1), data_ds.y, test_size = 0.2)

            train_pool = Pool(
                data = X_train, label = Y_train, 
                cat_features = ["year", "month", "weekday", "hour", "prev_val_1", 
                                "prev_val_2","prev_val_3","prev_val_4","prev_val_5"]
                )
            test_pool = Pool(
                data = X_test, label = Y_test, 
                cat_features = ["year", "month", "weekday", "hour", "prev_val_1", 
                                "prev_val_2","prev_val_3","prev_val_4","prev_val_5"]
                )
            
            catboost_model = CatBoostClassifier(
                iterarions = 20,
                learning_rate = 0.5,
                depth = 12
            )

            catboost_model.fit(train_pool)

            yhat_test = catboost_model.predict(test_pool)
            yhat_train = catboost_model.predict(train_pool)

            accuracy_score_train = accuracy_score(Y_test, yhat_train)
            accuracy_score_test = accuracy_score(Y_test, yhat_test)

            data_ds["yhat"] = last_cummulative_value

            for i in range(24):
                for j in range(measures_per_hour):
                    last_row = data_ds.iloc[-1]
                    ds_obj = maya.parse(last_row["ds"]).add(minutes = diff_time)
                    dict_input = [{
                        "year": ds_obj.year,
                        "month": ds_obj.month,
                        "weekday": ds_obj.weekday,
                        "hour": ds_obj.hour,
                        "prev_val_1": last_row["y"],
                        "prev_val_2": last_row["prev_val_1"],
                        "prev_val_3": last_row["prev_val_2"],
                        "prev_val_4": last_row["prev_val_3"],
                        "prev_val_5": last_row["prev_val_4"]
                    }]
                    data_input = pd.DataFrame(dict_input)
                    pred_pool = Pool(
                        data = data_input, 
                        cat_features = ["year", "month", "weekday", "hour", "prev_val_1", 
                                        "prev_val_2","prev_val_3","prev_val_4","prev_val_5"]
                        )
                    pred_value = catboost_model.predict(pred_pool)[0]
                    dict_input[0]["y"] = pred_value
                    dict_input[0]["ds"] = datetime.strftime(ds_obj, "%Y-%m-%d %H:%M:%S")
                    dict_input[0]["yhat"] = last_row["yhat"] + float(prev_value)
                    data_add = pd.DataFrame(dict_input)
                    data_ds = pd.concat([data_ds, data_add],ignore_index = True)
            
            forecast = data_ds

            metrics = {
                'metrics': [
                    {
                    'name': 'accuracy_train',
                    'numberValue':  float(accuracy_score_train),
                    'format': "PERCENTAGE"
                    },
                    {
                        'name': 'accuracy_test',
                        "numberValue": float(accuracy_score_test),
                        "format": "PERCENTAGE"
                    },
                    {
                        "name": "asset_number",
                        "numberValue": asset_name,
                        "format": "RAW"
                    }
                ]}  
            
            metrics_list.append(metrics)

            url_disc = "https://discord.com/api/webhooks/1002537248622923816/_9XY9Hi_mjzh2LTVqnmSKXlIFJ5rgBO2b8xna5pynUrzALgtC4aXSFq89uMdlW_v-ZzT"
            message = "Model accuracy {asset_name}: train -> {accuracy_train}, test -> {accuracy_test}".format(asset_name = asset_name, accuracy_train = accuracy_score_train, accuracy_test = accuracy_score_test)
            webhook = DiscordWebhook(url = url_disc, content = message)
            webhook.execute()

            



        return forecast[forecast.ds > max_date], metrics_list


    if measurement_name == "electricity_meter":
        domain_ = "electricity"
    elif measurement_name == "heat_meter":
        domain_ = "heat"
    else:
        url_disc = "https://discord.com/api/webhooks/1002537248622923816/_9XY9Hi_mjzh2LTVqnmSKXlIFJ5rgBO2b8xna5pynUrzALgtC4aXSFq89uMdlW_v-ZzT"
        message = "There is a problem with the measurement_name, {measurement_name}".format(measurement_name = measurement_name)
        webhook = DiscordWebhook(url = url_disc, content = message)
        webhook.execute()
        domain_ = "electricity"
        # raise ValueError

    asset_list_dict = {
        "electricity_meter": ["building1", "building2", "pv_panel_1", "wind_farm_1"],
        "heat_meter": ["building1", "building2", "solar_collector1"],
        "energy_meter": ["psnc", "office"],
        "pv": ["office", "de-nieuwe-dokken-pv-0198-xxxxx853","de-nieuwe-dokken-pv-0198-xxxxx4B4","de-nieuwe-dokken-pv-0198-xxxxx9C0", 
        "de-nieuwe-dokken-pv-017A-xxxxx9A1" ]
    }


    asset_list = asset_list_dict[measurement_name]

    with open(input_data_path) as file:
        data_str = json.load(file)
    
    data = pd.DataFrame(data_str)
    metrics_list = []
    for asset_name in asset_list:

        ic(asset_name)
        # get notification code for anomaly high and low

        url_notification_definition = "http://api-swagger-ren-prototype.apps.paas-dev.psnc.pl/api/notification/definition"
        payload={}
        headers = {}

        response = requests.request("GET", url_notification_definition, headers=headers, data=payload)
        try:
            dict_notifications = response.json()
        except:
            dict_notifications = []

        for notif in dict_notifications:
            if notif["message"] == "message.anomaly.high":
                code_high = notif["code"]
            elif notif["message"] == "message.anomaly.low":
                code_low = notif["code"]
        
        # get asset_id for asset_name

        url_asset_name = "http://api-swagger-ren-prototype.apps.paas-dev.psnc.pl/api/assets?name={asset_name}".format(asset_name = asset_name)
        try:
            response = requests.request("GET", url_asset_name, headers=headers, data=payload)
            dict_asset = response.json()[0]
            id_asset = dict_asset["id"]
        except:
            id_asset = -1

        id_dashboard = 1

        try:
            threshold_min = thresholds[measurement_name][asset_name][0]
        except:
            threshold_min = 0
        
        try:
            threshold_max = thresholds[measurement_name][asset_name][1]
        except:
            threshold_max = 10000

        try:
            forecasted_data, metrics_list = ForecastData(data, asset_name, measurement_name, metrics_list, measures_per_hour, diff_time)
            max_ds = forecasted_data["ds"].tolist()[-1]
        except:
            forecasted_dict = {
                "ds": [],
                "yhat": []
            }
            forecasted_data = pd.DataFrame(forecasted_dict)
            metrics_list = []
        
        
        try:
            forecasted_data.to_csv('/tmp/forecast_test_{asset_name}.csv'.format(asset_name = asset_name), index = False)
            data_to_send = open('/tmp/forecast_test_{asset_name}.csv'.format(asset_name = asset_name), 'rb')
            f_name = "forecast_test_{pilot}_{asset_name}.csv".format(pilot = pilot_name, asset_name = asset_name)
            s3 = boto3.resource(
                service_name='s3',
                aws_access_key_id='QyvycO9kc2cm58K8',
                aws_secret_access_key='tKtUrdQzQgWfhfBwhbQF3yGbyZ43oPn92iGAT7g0',
                endpoint_url='https://s3.tebi.io'
            )
            s3.Bucket('test-pf').put_object(Key=f_name, Body=data_to_send)
            url_disc = "https://discord.com/api/webhooks/1002537248622923816/_9XY9Hi_mjzh2LTVqnmSKXlIFJ5rgBO2b8xna5pynUrzALgtC4aXSFq89uMdlW_v-ZzT"
            message = "Data File: {f_name} Saved to Tebi".format(f_name = f_name)
            webhook = DiscordWebhook(url = url_disc, content = message)
            webhook.execute()
        except:
            url_disc = "https://discord.com/api/webhooks/1002537248622923816/_9XY9Hi_mjzh2LTVqnmSKXlIFJ5rgBO2b8xna5pynUrzALgtC4aXSFq89uMdlW_v-ZzT"
            message = "Unable to save data to tebi"
            webhook = DiscordWebhook(url = url_disc, content = message)
            webhook.execute()

            message = "Values for {asset_name}: {list_values}".format(asset_name = asset_name,list_values = forecasted_data.yhat.tolist())
            webhook = DiscordWebhook(url = url_disc, content = message)
            webhook.execute()
        for index, row in tqdm(forecasted_data.iterrows(), total = forecasted_data.shape[0]):
            time_ = str(row["ds"])
            time_epoch = maya.parse(time_).epoch
            value = row["yhat"]

            if asset_name in ["building1", "building2", "psnc", "office"]:
                direction_energy = "in"
                type_ = "None"
            elif measurement_name == "pv":
                direction_energy = "out"
                type_ = "renewable"
            elif measurement_name == "office":
                direction_energy = "in"
                type_ = "None"
            else:
                direction_energy = "out"
                type_ = "renewable"
            data_post = {
                    "bucket": "renergetic",
                    "measurement": measurement_name,
                    "fields":{
                        "energy": value,
                        "time": time_,
                    },
                    "tags":{
                        "domain": domain_,
                        "typeData": "forecasting",
                        "direction": direction_energy,
                        "prediction_window": "24h",
                        "asset_name": asset_name,
                        "measurement_type": type_,
                        "time_prediction": time_prediction
                    }
                }

            # SEND NOTIFICATION
            if mode == "none":
                if value < threshold_min:
                    date_from = maya.parse(time_).epoch
                    mode = "lower"

                    if time_ == max_ds:
                        date_to = maya.parse(time_).add(minutes = 15).epoch
                        dict_post = {
                            "notification_code": code_low,
                            "date_from": date_from,
                            "date_to": date_to,
                            "asset_id": id_asset,
                            "dashboard_id": id_dashboard
                        }
                        url = "http://api-swagger-ren-prototype.apps.paas-dev.psnc.pl/api/notification"
                        headers = {
                            "Content-Type": "application/json"
                        }
                        response = requests.request("POST", url, headers=headers, data=json.dumps(dict_post))
                        status_code = response.status_code

                        url_disc = "https://discord.com/api/webhooks/1002537248622923816/_9XY9Hi_mjzh2LTVqnmSKXlIFJ5rgBO2b8xna5pynUrzALgtC4aXSFq89uMdlW_v-ZzT"
                        message = "Anomaly detect between {date_from} and {date_to} to asset {asset_name}. Response of Notification {status_code}".\
                            format(date_from = date_from, date_to = date_to, status_code = status_code, asset_name = asset_name)
                        webhook = DiscordWebhook(url = url_disc, content = message)
                        webhook.execute()
                
                if value > threshold_max:
                    date_from = maya.parse(time_).epoch
                    mode = "upper"

                    if time_ == max_ds:
                        date_to = maya.parse(time_).add(minutes = 15).epoch
                        dict_post = {
                            "notification_code": code_low,
                            "date_from": date_from,
                            "date_to": date_to,
                            "asset_id": id_asset,
                            "dashboard_id": id_dashboard
                        }
                        url = "http://api-swagger-ren-prototype.apps.paas-dev.psnc.pl/api/notification"
                        headers = {
                            "Content-Type": "application/json"
                        }
                        response = requests.request("POST", url, headers=headers, data=json.dumps(dict_post))
                        status_code = response.status_code

                        url_disc = "https://discord.com/api/webhooks/1002537248622923816/_9XY9Hi_mjzh2LTVqnmSKXlIFJ5rgBO2b8xna5pynUrzALgtC4aXSFq89uMdlW_v-ZzT"
                        message = "Anomaly detect between {date_from} and {date_to} to asset {asset_name}. Response of Notification {status_code}".\
                            format(date_from = date_from, date_to = date_to, status_code = status_code, asset_name = asset_name)
                        webhook = DiscordWebhook(url = url_disc, content = message)
                        webhook.execute()
                
            elif mode == "lower":
                if value > threshold_min or time_ == max_ds:
                    date_to = maya.parse(time_).epoch
                    dict_post = {
                        "notification_code": code_low,
                        "date_from": date_from,
                        "date_to": date_to,
                        "asset_id": id_asset,
                        "dashboard_id": id_dashboard
                    }

                    url = "http://api-swagger-ren-prototype.apps.paas-dev.psnc.pl/api/notification"
                    headers = {
                        "Content-Type": "application/json"
                    }
                    response = requests.request("POST", url, headers=headers, data=json.dumps(dict_post))
                    status_code = response.status_code

                    url_disc = "https://discord.com/api/webhooks/1002537248622923816/_9XY9Hi_mjzh2LTVqnmSKXlIFJ5rgBO2b8xna5pynUrzALgtC4aXSFq89uMdlW_v-ZzT"
                    message = "Anomaly detect between {date_from} and {date_to} to asset {asset_name}. Response of Notification {status_code}".\
                        format(date_from = date_from, date_to = date_to, status_code = status_code, asset_name = asset_name)
                    webhook = DiscordWebhook(url = url_disc, content = message)
                    webhook.execute()

                    if value > threshold_max:
                        mode = "upper"
                        date_from = maya.parse(time_).epoch
                    else:
                        mode = "none"
            
            elif (mode == "upper" or time_ == max_ds) and (mode != "no notification"):
                if value < threshold_max:
                    date_to = maya.parse(time_).epoch
                    dict_post = {
                        "notification_code": code_high,
                        "date_from": date_from*1000,
                        "date_to": date_to*1000,
                        "asset_id": id_asset,
                        "dashboard_id": id_dashboard
                    }

                    if value < threshold_min:
                        date_from = maya.parse(time_).epoch
                        mode = "lower"
                    else:
                        mode = "none"

                    url = "http://api-swagger-ren-prototype.apps.paas-dev.psnc.pl/api/notification"
                    headers = {
                        "Content-Type": "application/json"
                    }
                    response = requests.request("POST", url, headers=headers, data=json.dumps(dict_post))
                    status_code = response.status_code

                    url_disc = "https://discord.com/api/webhooks/1002537248622923816/_9XY9Hi_mjzh2LTVqnmSKXlIFJ5rgBO2b8xna5pynUrzALgtC4aXSFq89uMdlW_v-ZzT"
                    message = "Anomaly detect between {date_from} and {date_to} to asset {asset_name}. Response of Notification {status_code}".\
                        format(date_from = date_from, date_to = date_to, status_code = status_code, asset_name = asset_name)
                    webhook = DiscordWebhook(url = url_disc, content = message)
                    webhook.execute()


                

            # url = "http://influx-api-ren-prototype.apps.paas-dev.psnc.pl/api/measurement"
            url = url_pilot

            headers = {
                "Content-Type": "application/json"
            }

            try:
                if send_forecast == "yes":
                    response = requests.request("POST", url, headers=headers, data=json.dumps(data_post))
                    status_code = response.status_code
                else:
                    status_code = 200
            except:
                url_disc = "https://discord.com/api/webhooks/1002537248622923816/_9XY9Hi_mjzh2LTVqnmSKXlIFJ5rgBO2b8xna5pynUrzALgtC4aXSFq89uMdlW_v-ZzT"
                message = "Error in updating value for measurement name: {measurement_name} in asset: {asset_name} in time {time_pred}"\
                    .format(measurement_name = "electricity_meter", asset_name = asset_name, time_pred = data_post["fields"]["time"])
                webhook = DiscordWebhook(url = url_disc, content = message)
                webhook.execute()
                status_code = 200
            
            if status_code > 299:
                url_disc = "https://discord.com/api/webhooks/1002537248622923816/_9XY9Hi_mjzh2LTVqnmSKXlIFJ5rgBO2b8xna5pynUrzALgtC4aXSFq89uMdlW_v-ZzT"
                message = "Error in sending the value for measurement name: {measurement_name} in asset: {asset_name} in time {time_pred}"\
                    .format(measurement_name = measurement_name, asset_name = asset_name, time_pred = data_post["fields"]["time"])
                webhook = DiscordWebhook(url = url_disc, content = message)
                webhook.execute()

    with open("/tmp/metrics_{domain}.json".format(domain = domain_), "w") as file:
        json.dump(metrics_list, file)
    
    

    s3 = boto3.resource(
                service_name='s3',
                aws_access_key_id='QyvycO9kc2cm58K8',
                aws_secret_access_key='tKtUrdQzQgWfhfBwhbQF3yGbyZ43oPn92iGAT7g0',
                endpoint_url='https://s3.tebi.io'
            )
    data = open("/tmp/metrics_{domain}.json".format(domain = domain_), 'rb')
    f_name = "metrics_{domain}_latest.json"\
    .format(domain = measurement_name)
    s3.Bucket('test-pf').put_object(Key=f_name, Body=data)
    


    url_disc = "https://discord.com/api/webhooks/1002537248622923816/_9XY9Hi_mjzh2LTVqnmSKXlIFJ5rgBO2b8xna5pynUrzALgtC4aXSFq89uMdlW_v-ZzT"
    message = "Forecasting done for {domain}".format(domain = domain_)
    webhook = DiscordWebhook(url = url_disc, content = message)
    webhook.execute()


In [143]:
def ExportModelToMinio(input_model_path: InputPath(str),measurement_name, 
    path_minio = "minio.kubeflow-renergetic.svc:9000",
    access_key = "minio",
    secret_key = "DaTkKc45Hxr1YLR4LxR2xJP2"
    ):

    from minio import Minio
    import json
    with open(input_model_path) as file:
        model_serialiazed = json.load()
    client = Minio(
        path_minio,
        access_key=access_key,
        secret_key=secret_key,
    )


In [144]:
def REN_Forecast_Test_Pipeline(url_pilot,
    diff_time:int,
    filter_vars = [],
    filter_case = [],
    url = "minio-kubeflow-renergetic.apps.dcw1-test.paas.psnc.pl",
    access_key="minio",
    secret_key="DaTkKc45Hxr1YLR4LxR2xJP2",
    min_date = "yesterday",
    max_date = "today",
    mode = "no notifications",
    list_measurements = ["electricity_meter", "heat_meter"],
    key_measurement = "energy",
    type_measurement = "simulation",
    pilot_name = "Virtual",
    hourly_aggregate = "no",
    minute_aggregate = "no",
    send_forecast = "yes"
    ):

    env_var = V1EnvVar(name='HOME', value='/tmp')
    download_data_op = comp.create_component_from_func(
        GetData, packages_to_install = ["requests", "numpy", "maya","pandas", "icecream", "tqdm", "discord-webhook", "retry"], output_component_file = "download_data_op_component.yaml")
    get_thresholds_op = comp.create_component_from_func(
        GetThresholds, packages_to_install= ["requests"], output_component_file= "thresholds_component.yaml"
    )
    process_data_op = comp.create_component_from_func(
        ProcessData, packages_to_install= ["maya", "pandas", "icecream"], output_component_file= "process_data_op_component.yaml"
    )
    forecast_data_op = comp.create_component_from_func(
        ForecastProcess, packages_to_install = ["requests", "numpy", "maya","pandas", "icecream", "prophet", "discord-webhook", "tqdm", "minio", "boto3", "scikit-learn"], output_component_file = "forecast_data_op_component.yaml")
    
    get_thresholds_task = get_thresholds_op()
    
    with dsl.ParallelFor(list_measurements) as measurement:
        download_task = (download_data_op(measurement, min_date, max_date, url_pilot,type_measurement, key_measurement, filter_vars, filter_case).add_env_variable(env_var)
                        .set_memory_request('500M')
                        .set_memory_limit('1Gi')
                        .set_cpu_request('1')
                        .set_cpu_limit('2'))
        process_task = (process_data_op(download_task.output, 
                        hourly_aggregate,
                        minute_aggregate,
                        min_date, 
                        max_date))
        if hourly_aggregate in ["sum", "mean"]:
            diff_time = 60
        forecast_task = (forecast_data_op(process_task.output, 
        get_thresholds_task.output,
        measurement, 
        url, 
        access_key, 
        secret_key, 
        mode,
        url_pilot,
        diff_time,
        pilot_name,
        send_forecast).add_env_variable(env_var))
                       
    
compiler.Compiler().compile(pipeline_func = REN_Forecast_Test_Pipeline, package_path ="Forecast_Data_Pipeline.yaml")
    

    
    



TypeError: ProcessData() missing 1 required positional argument: 'max_date'