In [100]:
import kfp

from typing import NamedTuple
import kfp.components as comp
from kfp import compiler, dsl
from kfp import dsl
from kfp.components import InputPath, OutputPath
from kubernetes.client.models import V1EnvVar

import time

### Contents
1. [Get Thresholds](#Thresholds)
2. [Download Data](#Download)
3. [Process Data](#process)
4. [Forecast with previous model](#forecastprevious)

# Thresholds

In [101]:
def GetThresholds(url_pilot: str, name_pilot: str,output_thresholds_path: OutputPath(str)):

    import requests
    import json

    def GetRequest(url, headers ={}, payload = {}):

        from urllib3.exceptions import InsecureRequestWarning
        import warnings
        import contextlib

        old_merge_environment_settings = requests.Session.merge_environment_settings

        @contextlib.contextmanager
        def no_ssl_verification():
            opened_adapters = set()

            def merge_environment_settings(self, url, proxies, stream, verify, cert):
                # Verification happens only once per connection so we need to close
                # all the opened adapters once we're done. Otherwise, the effects of
                # verify=False persist beyond the end of this context manager.
                opened_adapters.add(self.get_adapter(url))

                settings = old_merge_environment_settings(self, url, proxies, stream, verify, cert)
                settings['verify'] = False

                return settings

            requests.Session.merge_environment_settings = merge_environment_settings

            try:
                with warnings.catch_warnings():
                    warnings.simplefilter('ignore', InsecureRequestWarning)
                    yield
            finally:
                requests.Session.merge_environment_settings = old_merge_environment_settings

                for adapter in opened_adapters:
                    try:
                        adapter.close()
                    except:
                        pass
        
        with no_ssl_verification():
            response = requests.request("GET", url, headers = headers, data = payload)
            
        try:
            return response.json()
        except:
            dict_ = {
                "status_code": response.status_code,
                "text": response.text
            }
            return dict_
    
    if name_pilot != "Virtual":
        url_ = "{url_pilot}/api-postgre/1.0/api/threshold".format(
            url_pilot = url_pilot
        )
    
    else:
        url_ = "http://api-ren-prototype.apps.paas-dev.psnc.pl/api/threshold"
        
    try:
        thresholds = GetRequest(url_)
    except:
        thresholds = {}

    

    with open(output_thresholds_path, "w") as file:
        json.dump(thresholds, file)
    


# Download

In [102]:
def GetData(measurement_name: str, min_date: str, max_date: str,url_pilot : str, pilot_name:str, type_measurement :str,key_measurement : str,
            filter_vars:list , filter_cases: list, output_data_forecast: OutputPath(str), output_data_metric : OutputPath(str)):

    import requests # To REQUIREMENTS
    import json
    import pandas as pd # To REQUIREMENTS
    import maya # To REQUIREMENTS
    from tqdm import tqdm
    from icecream import ic
    from discord_webhook import DiscordWebhook
    from retry import retry # TO REQUIREMENTS

    #Functions definitions

    def GetRequest(url, headers ={}, payload = {}):

        from urllib3.exceptions import InsecureRequestWarning
        import warnings
        import contextlib

        old_merge_environment_settings = requests.Session.merge_environment_settings

        @contextlib.contextmanager
        def no_ssl_verification():
            opened_adapters = set()

            def merge_environment_settings(self, url, proxies, stream, verify, cert):
                # Verification happens only once per connection so we need to close
                # all the opened adapters once we're done. Otherwise, the effects of
                # verify=False persist beyond the end of this context manager.
                opened_adapters.add(self.get_adapter(url))

                settings = old_merge_environment_settings(self, url, proxies, stream, verify, cert)
                settings['verify'] = False

                return settings

            requests.Session.merge_environment_settings = merge_environment_settings

            try:
                with warnings.catch_warnings():
                    warnings.simplefilter('ignore', InsecureRequestWarning)
                    yield
            finally:
                requests.Session.merge_environment_settings = old_merge_environment_settings

                for adapter in opened_adapters:
                    try:
                        adapter.close()
                    except:
                        pass
        
        with no_ssl_verification():
            response = requests.request("GET", url, headers = headers, data = payload)
            
        try:
            return response.json()
        except:
            dict_ = {
                "status_code": response.status_code,
                "text": response.text
            }
            return dict_
    def DownloadAssetsData(measurement_name, url_pilot,bucket = "renergetic", min_date = "yesterday", max_date = "tomorrow"):
        
        from datetime import datetime
        import pandas as pd
        import maya
        from tqdm import tqdm
        from icecream import ic

        test = True

        try:
            min_date_from = maya.when(min_date).datetime()
        except:
            ValueError("Please introduce correct time format for MIN_DATE")
        
        try: 
            max_date_from = maya.when(max_date).datetime()
        except:
            ValueError("Please introduce correct time format for MAX_DATE")
        
        datelist = pd.date_range(min_date_from, max_date_from)

        data_ = []
        for i in tqdm(range(len(datelist)-1)):
            from_obj = datelist[i]
            to_obj = datelist[i+1]
            from_ = datetime.strftime(from_obj, "%Y-%m-%d 00:00:00")
            to_ = datetime.strftime(to_obj, "%Y-%m-%d 00:00:00")

            if pilot_name == "Virtual":
                url = "http://influx-api-ren-prototype.apps.paas-dev.psnc.pl/api/measurement/data?measurements={measurement_name}&from={from_}&to={to_}"\
                    .format(measurement_name = measurement_name, from_ = from_, to_= to_)
            else:
                url = url_pilot + "/api-measurement/1.0/api/measurement/data?measurements={measurement_name}&from={from_}&to={to_}"\
                    .format(measurement_name = measurement_name, from_ = from_, to_= to_)
            info_ = GetRequest(url)
            if type(info_) == list:
                data_ = data_ + info_
            elif type(info_) == dict:
                print("Error")
                print(from_)
                print(to_)
        return data_
    def DataFrameAssests(list_data, name_field):
        dicts = []
        for data in list_data:
            try:
                if "energy" in data["fields"].keys():
                    name_value = "energy"
                else:
                    name_value = name_field
                dict_ = {
                    "asset_name": data["tags"]["asset_name"],
                    "value": float(data["fields"][name_value]),
                    "ds": data["fields"]["time"]
                }

                if "type_data" in data["tags"].keys():
                    dict_["type"] = data["tags"]["type_data"]
                elif "typeData" in data["tags"].keys():
                    dict_["type"] = data["tags"]["typeData"]
                else:
                    dict_["type"] = "None"

                if "measurement_type" in data["tags"].keys():
                    dict_["measurement_type"] = data["tags"]["measurement_type"]
                else:
                    dict_["measurement_type"] = "None"
                
                if "direction" in data["tags"].keys():
                    dict_["direction"] = data["tags"]["direction"]
                else:
                    dict_["direction"] = "None"
                if "domain" in data["tags"].keys():
                    dict_["domain"] = data["tags"]["domain"]
                else:
                    dict_["domain"] = "None"
                
                if "sensor_id" in data["tags"].keys():
                    dict_["id_sensor"] = data["tags"]["sensor_id"]
                else:
                    dict_["id_sensor"] = "None"
                
                if "interpolation_method" in data["tags"].keys():
                    dict_["interpolation"] = data["tags"]["interpolation_method"]
                else:
                    dict_["interpolation"] = "None"

                dicts.append(dict_)
            except:
                continue
        return pd.DataFrame(dicts)
    
    @retry(tries= 3)
    def DownloadAndProcess(measurement_name, url_pilot, min_date, max_date, key_measurement):
        # max_date = maya.now().add(days = 3).iso8601()
        list_ = DownloadAssetsData(measurement_name, url_pilot,min_date = min_date, max_date = max_date)
        data = DataFrameAssests(list_, key_measurement)

        return data
    
    def FilterCases(var_value, filter_cases):
        if var_value in filter_cases:
            return True
        else:
            return False

    def FilterData(data, type_measurement, filter_vars, filter_cases):

        for i in range(len(filter_vars)):
            data["Filter"] = data[filter_vars[i]].apply(FilterCases, filter_cases = filter_cases[i])
            data = data[data["Filter"] == True]
        
        return data    
    
    def SendAlert(data):
        ic(data.shape[0])

        if data.shape[0] == 0:
            url_disc = "https://discord.com/api/webhooks/1002537248622923816/_9XY9Hi_mjzh2LTVqnmSKXlIFJ5rgBO2b8xna5pynUrzALgtC4aXSFq89uMdlW_v-ZzT"
            message = "Not enough data for {measurement_name}".format(measurement_name = measurement_name)
            webhook = DiscordWebhook(url = url_disc, content = message)
            webhook.execute()
            
            raise ValueError("Void data to forecast")

    # Code Execution
    
    data_all = DownloadAndProcess(measurement_name, url_pilot, min_date, max_date, key_measurement)
    data_filtered = FilterData(data_all, type_measurement, filter_vars, filter_cases)
    data_to_train = data_filtered[data_filtered.type == type_measurement]
    SendAlert(data_to_train)
    

    data_output = {
        "value": data_to_train["value"].tolist(),
        "time_registered": data_to_train["ds"].tolist(),
        "asset_name": data_to_train["asset_name"].tolist()
    }

    with open(output_data_forecast, "w") as file:
        json.dump(data_output, file)

    
    data_output_metrics = {
        "value": data_filtered["value"].tolist(),
        "asset_name": data_filtered["asset_name"].tolist(),
        "time_registered": data_filtered["ds"].tolist(),
        "type": data_filtered["type"].tolist()
    }

    with open(output_data_metric, "w") as file:
        json.dump(data_output_metrics, file)

# Process

In [103]:
def ProcessData(input_data_path: InputPath(str), hourly_aggregate, minute_aggregate ,min_date, max_date, output_data_forecast: OutputPath(str)):

    import maya
    from datetime import datetime
    import json
    import pandas as pd
    from icecream import ic
    from tqdm import tqdm
    
    min_date = datetime.strftime(maya.when(min_date).datetime(), "%Y-%m-%d")
    max_date = datetime.strftime(maya.when(max_date).datetime(), "%Y-%m-%d")
    
    ic(hourly_aggregate)
    ic(minute_aggregate)

    with open(input_data_path) as file:
        data_str = json.load(file)
    
    data = pd.DataFrame(data_str)

    # DEFINE HOURLY AGGREGATE PROCESS

    def ProcessHourly(data, hourly_aggregate, min_date, max_date):
        list_dicts = []
        list_data = []

        for asset_name in pd.unique(data.asset_name):
            data_iter = data[data.asset_name == asset_name]
            
            def GetHourDate(str_):
                import maya
                from datetime import datetime

                return datetime.strftime(maya.parse(str_).datetime(), "%Y-%m-%d %H:00:00")

            data_iter["hour"] = data_iter["time_registered"].apply(GetHourDate)
            data_group = (data_iter.groupby('hour')
                .agg({'time_registered':'count', 'value': hourly_aggregate})
                .reset_index()
            )

            df = data_group[["hour", "value"]]
            df = df.rename(columns={'hour':'ds', 'value': 'y'})
            last_value = df.y.tolist()[0]
            for ds_obj in tqdm(pd.date_range(min_date, max_date)):
                for i in range(24):
                    if i < 10:
                        str_i = "0"+str(i)
                    else:
                        str_i = str(i)
                    ds_str = "{date} {H}:00:00".format(date = ds_obj.strftime("%Y-%m-%d"), H = str_i)

                    if df[df.ds == ds_str].shape[0] == 0:
                        if hourly_aggregate == "max":
                                value_ = last_value
                        else:
                            value_ = 0

                        dict_ = {
                            "time_registered": ds_str,
                            "value": value_,
                            "asset_name": asset_name
                        }
                    else:
                        dict_ = {
                            "time_registered": ds_str,
                            "value": df[df.ds == ds_str].y.tolist()[0],
                            "asset_name": asset_name
                        }
                        if minute_aggregate == "max":
                            last_value = dict_["value"]
                    list_dicts.append(dict_)
            if hourly_aggregate == "max":
                data_1 = pd.DataFrame(list_dicts)
                data_1["value_1"] = data_1["value"].shift(1)
                data_1["value"] = data_1["value"] - data_1["value_1"]
                data_1 = data_1.drop(["value_1"], axis = 1)
                data_1["value"] = data_1["value"].fillna(0)
                list_data.append(data_1)
                list_dicts = []

        if hourly_aggregate == "max":
            output_data = pd.concat(list_data, ignore_index = True)
        else:
            output_data = pd.DataFrame(list_dicts)
        
        return output_data
    
    def ProcessMinutely(data, minute_aggregate, min_date, max_date):
        list_dicts = []
        list_data = []
        for asset_name in tqdm(pd.unique(data.asset_name)):
            data_iter = data[data.asset_name == asset_name]
            def GetHourMinuteDate(str_):
                import maya
                from datetime import datetime

                return datetime.strftime(maya.parse(str_).datetime(), "%Y-%m-%d %H:%M:00")
            
            data_iter["minute"] = data_iter["time_registered"].apply(GetHourMinuteDate)
            data_group = (data_iter.groupby('minute')
                .agg({'time_registered':'count', 'value': minute_aggregate})
                .reset_index()
            )

            df = data_group[["minute", "value"]]
            df = df.rename(columns={'minute':'ds', 'value': 'y'})

            last_value = df.y.tolist()[0]

            for ds_obj in tqdm(pd.date_range(min_date, max_date)):
                for i in range(24):
                    for j in range(60):
                        if i < 10:
                            str_i = "0"+str(i)
                        else:
                            str_i = str(i)
                        
                        if j < 10:
                            str_j = "0" + str(j)
                        else:
                            str_j = str(j)
                        ds_str = "{date} {H}:{M}:00".format(date = ds_obj.strftime("%Y-%m-%d"), H = str_i, M = str_j)

                        if df[df.ds == ds_str].shape[0] == 0:

                            if minute_aggregate == "max":
                                value_ = last_value
                            else:
                                value_ = 0

                            dict_ = {
                                "time_registered": ds_str,
                                "value": value_,
                                "asset_name": asset_name
                            }
                        else:
                            dict_ = {
                                "time_registered": ds_str,
                                "value": df[df.ds == ds_str].y.tolist()[0],
                                "asset_name": asset_name
                            }
                            if minute_aggregate == "max":
                                last_value = dict_["value"]
                        list_dicts.append(dict_)
            if minute_aggregate == "max":
                data_1 = pd.DataFrame(list_dicts)
                data_1["value_1"] = data_1["value"].shift(1)
                data_1["value_cummulative"] = data_1["value"].copy()
                data_1["value"] = data_1["value"] - data_1["value_1"]
                data_1 = data_1.drop(["value_1"], axis = 1)
                data_1["value"] = data_1["value"].fillna(0)
                list_data.append(data_1)
                list_dicts = []

        if minute_aggregate == "max":
            output_data = pd.concat(list_data, ignore_index = True)
        else:
            output_data = pd.DataFrame(list_dicts)
        
        return output_data
    
    if hourly_aggregate in ["mean","sum", "max"]:
        print("hourly process")
        output_data = ProcessHourly(data, hourly_aggregate, min_date, max_date)
    elif minute_aggregate in ["max", "sum", "mean"]:
        print("minutely process")
        output_data = ProcessMinutely(data, minute_aggregate, min_date, max_date)
    else:
        output_data = data

    

    data_output = {
        "value": output_data["value"].tolist(),
        "time_registered": output_data["time_registered"].tolist(),
        "asset_name": output_data["asset_name"].tolist()
    }

    if minute_aggregate == "max" or hourly_aggregate == "max":
        data_output["value_cummulative"] = output_data["value_cummulative"].tolist()

    with open(output_data_forecast, "w") as file:
        json.dump(data_output, file)



# Get Weather Data From Open Meteo API

In [104]:
def DownloadWeatherData_OpenMeteo(input_weather_influx: InputPath(str), city_name: str, start_date:str, output_weather_data : OutputPath(str)):
    import maya
    from datetime import datetime
    import requests
    import pandas as pd

    def ManageDateTime(ds_obj):
        return datetime.strftime(maya.parse(ds_obj).datetime(), "%Y-%m-%d %H:%M:%S")

    class InvalidArgument(Exception):
        "Input correctly City name or Lat AND Lon for city"
        pass

    

    def GetWeatherData(city = "None", lon = "no", lat= "no", start_date= "yesterday", end_date= "today"):

        # Input city or lat/lon + start date and end date

        if city != "None":
            url = "https://geocoding-api.open-meteo.com/v1/search?name={city_name}&count=10&language=en&format=json".format(city_name = city)
            geo_ = requests.get(url)
            try:
                results_ = geo_.json()["results"][0]
                lat = results_["latitude"]
                lon = results_["longitude"]
            except KeyError:
                raise InvalidArgument
        elif lat == "no" or lon == "no":
            raise InvalidArgument
        start_date_parsed = datetime.strftime(maya.when(start_date).datetime(), "%Y-%m-%d")
        end_date_parsed = datetime.strftime(maya.when(end_date).datetime(), "%Y-%m-%d")
        url = "https://archive-api.open-meteo.com/v1/archive?latitude={lat}&longitude={lon}&start_date={start_date}&end_date={end_date}&hourly=temperature_2m,cloudcover,shortwave_radiation,direct_radiation,diffuse_radiation,direct_normal_irradiance"\
            .format(lat = lat, lon = lon, start_date = start_date_parsed, end_date = end_date_parsed)
        data = requests.get(url)
        try:
            data = data.json()["hourly"]
        except:
            print(data)
        data = pd.DataFrame(data)
        data["ds"] = data["time"].apply(ManageDateTime)

        url = "https://api.open-meteo.com/v1/forecast?latitude={latitude}&longitude={longitude}&past_days=10&hourly=temperature_2m,cloudcover,shortwave_radiation,direct_radiation,diffuse_radiation,direct_normal_irradiance"\
            .format(latitude = lat, longitude = lon)
        forecast_data = requests.get(url)
        forecast_data = pd.DataFrame(forecast_data.json()["hourly"])
        forecast_data["ds"] = forecast_data["time"].apply(ManageDateTime)

        data["type"] = "real"
        forecast_data["type"] = "forecast"


        return pd.concat([data, forecast_data], ignore_index= True)
    def ProcessWeatherData(weather_data):
        weather_data_real = weather_data[weather_data.type == "real"]
        weather_data_real["check"] = weather_data_real["temperature_2m"].apply(lambda x: pd.isna(x) != True)
        weather_data_real = weather_data_real[weather_data_real.check == True]
        real_ds = weather_data_real.ds.tolist()

        weather_data_forecast = weather_data[weather_data.type == "forecast"]
        weather_data_forecast["check"] = weather_data_forecast["ds"].apply(lambda x: x not in real_ds)
        weather_data_forecast = weather_data_forecast[weather_data_forecast.check == True]

        weather_data = pd.concat([weather_data_real.drop(["check"], axis = 1), weather_data_forecast.drop(["check"], axis = 1)], ignore_index= True)

        weather_data = weather_data[["ds", 'temperature_2m', 'cloudcover', 'shortwave_radiation',
        'direct_radiation', 'diffuse_radiation', 'direct_normal_irradiance']]
        weather_data.columns = ["ds_hour", 'temperature_2m', 'cloudcover', 'shortwave_radiation',
            'direct_radiation', 'diffuse_radiation', 'direct_normal_irradiance']
        
        return weather_data
    
    try:
        weather_influx = pd.read_feather(input_weather_influx)
    except:
        weather_influx = None
    
    if weather_influx == None:
        try:
            weather_influx = pd.read_csv(input_weather_influx)
        except:
            weather_influx = None

    if city_name == "Virtual":
        city_name = "Madrid"

    if weather_influx == None:
        weather_data = GetWeatherData(city_name, start_date = start_date)
        weather_data = ProcessWeatherData(weather_data)
    else:
        weather_data = weather_influx
    try:
        weather_data.to_feather(output_weather_data)
    except:
        weather_data.to_csv(output_weather_data)
    


# Download Data from Influx DB

In [105]:
def DownloadDataFromInfluxDB(timestamp:float, output_weather_info_path: OutputPath(str)):
    import pandas as pd

    data = pd.DataFrame()
    try:
        data.to_feather(output_weather_info_path)
    except:
        data.to_csv(output_weather_info_path, index = False)

# Metrics Model

In [106]:
def CalculateForecastMetrics(input_data_metric_path: InputPath(str), asset_name, mae_threshold: float) -> bool:
    
    import json
    import maya
    from datetime import datetime
    import pandas as pd
    from sklearn.metrics import mean_absolute_error
    from icecream import ic

    with open(input_data_metric_path) as file:
        data_metrics = json.load(file)
    
    data_metrics = pd.DataFrame(data_metrics)
    data_metrics = data_metrics[data_metrics.asset_name == asset_name]

    max_ds = max(data_metrics.time_registered)
    min_ds = min(data_metrics.time_registered)
    
    if (maya.when(max_ds)- maya.when(min_ds)).days < 30:
        return True
    else:
        date_metric = datetime.strftime(maya.when(max_ds).add(days = -7).datetime(), "%Y-%m-%d")
        data_metrics_real = data_metrics[(data_metrics.time_registered >= date_metric) & (data_metrics.type == "real")][["time_registered", "value"]]
        data_metrics_forecast = data_metrics[(data_metrics.time_registered >= date_metric) & (data_metrics.type == "forecast")][["time_registered", "value"]]
        data_metrics_real.columns = ["ds", "value_real"]
        data_metrics_forecast.columns = ["ds", "value_forecast"]

        data_metrics = pd.merge(data_metrics_real, data_metrics_forecast, on = "ds")
        try:
            mae_metric = mean_absolute_error(data_metrics["value_real"], data_metrics["value_forecast"])
        except ValueError:
            mae_metric = mae_threshold + 1

        ic(mae_metric)
        ic(mae_threshold)

        if mae_metric > mae_threshold:
            return True
        else:
            return False

# Forecast_Previous

In [107]:
def PredictFromPreviousModel(input_data_path:InputPath(str), input_weather_path: InputPath(str),
        name_pilot, measurement_name, asset_name, name_model, max_date, num_days, diff_time,
        forecast_data_path: OutputPath(str)):

    import maya
    from discord_webhook import DiscordWebhook
    import json
    from icecream import ic
    import requests
    import pandas as pd
    from prophet.serialize import model_to_json, model_from_json
    from catboost import CatBoostClassifier, Pool
    from sklearn.model_selection import train_test_split
    from sklearn.metrics import classification_report, accuracy_score
    from minio import Minio
    import boto3
    from tqdm import tqdm
    from sklearn.metrics import mean_absolute_error, r2_score
    from datetime import datetime

    def ManageDateHour(ds_obj):
            return datetime.strftime(maya.parse(ds_obj).datetime(), "%Y-%m-%d %H:00:00")

    def ModifyData(data, asset_name):
        data_ds = data[data.asset_name == asset_name][["time_registered", "value"]]
        try:
            last_cummulative_value = data[data.asset_name == asset_name]["value_cummulative"].tolist()[-1]
        except:
            last_cummulative_value = 0
        data_ds.columns = ["ds", "y"]
        if data_ds.shape[0] == 0:
            max_date = datetime.strftime(maya.when("now").datetime(),"%Y-%m-%d %H:%M:%S")
        else:
            max_date = max(data_ds.ds)
        ic(max_date)
        ic(last_cummulative_value)
        ic(data_ds.shape)
        ic(len(pd.unique(data_ds.y)))

        return data_ds, max_date, last_cummulative_value

    s3 = boto3.resource(
        service_name='s3',
        aws_access_key_id='QyvycO9kc2cm58K8',
        aws_secret_access_key='tKtUrdQzQgWfhfBwhbQF3yGbyZ43oPn92iGAT7g0',
        endpoint_url='https://s3.tebi.io'
    )

    s3_client = boto3.client('s3',
        aws_access_key_id='QyvycO9kc2cm58K8',
        aws_secret_access_key='tKtUrdQzQgWfhfBwhbQF3yGbyZ43oPn92iGAT7g0',
        endpoint_url='https://s3.tebi.io'
    )

    # model_Ghent_pv_de-nieuwe-dokken-pv-017A-xxxxx9A1.json
    with open(input_data_path) as file:
        data_str = json.load(file)
    
    data = pd.DataFrame(data_str)
    data, max_date, last_cummulative_value = ModifyData(data, asset_name)
    metrics_list = []
    ic(asset_name)

    weather_data = pd.read_feather(input_weather_path)
    
    my_bucket = s3.Bucket('test-pf')
    list_objects = []
    for my_bucket_object in my_bucket.objects.all():
        list_objects.append(my_bucket_object.key)
    
    try:
        file_stats = "stats_{name_pilot}_{measurement_name}_{asset_name}.json".format(
            name_pilot = name_pilot,measurement_name = measurement_name, asset_name = asset_name
        )
        
        with open(file_stats, 'wb') as f:
            s3_client.download_fileobj('test-pf', file_stats, f)
        
        with open(file_stats) as f:
            stats_asset_models = json.load(f)
        
        if "catboost" in stats_asset_models.keys():
            last_date_catboost = stats_asset_models["catboost"]["last_update_date"]
        else:
            last_date_catboost = datetime.strftime(maya.when("1 Jan 1970").datetime(), format = "%Y-%m-%d %H:%M:%S")
        
        if "prophet" in stats_asset_models.keys():
            last_date_prophet = stats_asset_models["prophet"]["last_update_date"]
        else:
            last_date_prophet = datetime.strftime(maya.when("1 Jan 1970").datetime(), format = "%Y-%m-%d %H:%M:%S")
        
        if last_date_prophet == last_date_catboost:
            raise ValueError("No models trained")
        elif last_date_catboost > last_date_prophet:
            type_ = "catboost"
        else:
            type_ = "prophet"

    except:
        return False
    
    name_model = "model_{name_pilot}_{measurement_name}_{asset_name}_{type_}".format(
        name_pilot = name_pilot,measurement_name = measurement_name, asset_name = asset_name, type_ = type_
    )

    if type_ == "prophet":
        with open('model_prophet.json', 'r') as fin:
            m = model_from_json(fin.read())

        from_date_obj = maya.parse(last_date_prophet).add(days = -2)
        to_date_obj = maya.when(max_date).add(days = num_days)
        days_forecast = (to_date_obj - from_date_obj).days
        measures_per_hour = 60/diff_time
        future = m.make_future_dataframe(periods= 24*(3 + days_forecast)*measures_per_hour , freq="{minutes}T".format(minutes = diff_time))
        future["ds"] = future["ds"].apply(str)
        future["ds_hour"] = future["ds"].apply(ManageDateHour)
        future = pd.merge(future, weather_data, on = "ds_hour")

        forecast = m.predict(future)

        forecast[forecast.ds > max_date].to_csv(forecast_data_path, index = False)

    else:
        return True





# Train_Forecast

In [108]:
def ForecastProcess(input_data_path: InputPath(str), input_weather_path: InputPath(str),
    measurement_name,
    path_minio,
    access_key,
    secret_key,
    mode,
    url_pilot,
    diff_time,
    pilot_name,
    send_forecast,
    asset_name,
    num_days,
    mode_prophet,
    daily_seasonality,
    weekly_seasonality,
    mlpipeline_metrics_path: OutputPath('Metrics'),
    forecast_data_path: OutputPath(str)
    ):

    import maya
    from discord_webhook import DiscordWebhook
    import json
    from icecream import ic
    import requests
    import pandas as pd
    from prophet.serialize import model_to_json
    from catboost import CatBoostClassifier, Pool
    from sklearn.model_selection import train_test_split
    from sklearn.metrics import classification_report, accuracy_score
    from minio import Minio
    import boto3
    from tqdm import tqdm
    from sklearn.metrics import mean_absolute_error, r2_score, mean_squared_error
    from datetime import datetime

    from darts.models import RNNModel
    from darts import TimeSeries
    from darts.dataprocessing.transformers import Scaler
    from darts.utils.timeseries_generation import datetime_attribute_timeseries



    try:
        client = Minio(
            path_minio,
            access_key=access_key,
            secret_key=secret_key,
            secure = False
        )

        list_objects = client.list_objects("test")
        for obj_ in list_objects:
            ic(obj_._object_name)
    except:
        url_disc = "https://discord.com/api/webhooks/1002537248622923816/_9XY9Hi_mjzh2LTVqnmSKXlIFJ5rgBO2b8xna5pynUrzALgtC4aXSFq89uMdlW_v-ZzT"
        message = "Cannot access minio server correctly - read data."
        webhook = DiscordWebhook(url = url_disc, content = message)
        webhook.execute()
    

    def ForecastData(data, asset_name, measurement_name, metrics_list, measures_per_hour, diff_time, weather_data ,mode_prophet,daily_seasonality, weekly_seasonality,mode = "no notifications"):
        
        # Generic Processing Functions

        def ModifyData(data, asset_name):
            data_ds = data[data.asset_name == asset_name][["time_registered", "value"]]
            try:
                last_cummulative_value = data[data.asset_name == asset_name]["value_cummulative"].tolist()[-1]
            except:
                last_cummulative_value = 0
            data_ds.columns = ["ds", "y"]
            if data_ds.shape[0] == 0:
                max_date = datetime.strftime(maya.when("now").datetime(),"%Y-%m-%d %H:%M:%S")
            else:
                max_date = max(data_ds.ds)
            ic(max_date)
            ic(last_cummulative_value)
            ic(data_ds.shape)
            ic(len(pd.unique(data_ds.y)))

            return data_ds, max_date, last_cummulative_value
        # Categorical Processing Functions

        def GetDateInfo(ds_str, time_value):
            maya_obj = maya.parse(ds_str)
            if time_value == "year":
                return str(maya_obj.year)
            elif time_value == "month":
                return str(maya_obj.month)
            elif time_value == "weekday":
                return str(maya_obj.weekday)
            elif time_value == "hour":
                return str(maya_obj.hour)
            
        def ManageData(data_ds, num_prevs = 24):
            data_ds["year"]  = data_ds["ds"].apply(GetDateInfo, time_value = "year")
            data_ds["month"]  = data_ds["ds"].apply(GetDateInfo, time_value = "month")
            data_ds["weekday"]  = data_ds["ds"].apply(GetDateInfo, time_value = "weekday")
            data_ds["hour"]  = data_ds["ds"].apply(GetDateInfo, time_value = "hour")

            for var_ in ["year", "month", "weekday", "hour"]:
                if len(pd.unique(data_ds[var_])) <= 2:
                    bin_main = list(pd.unique(data_ds[var_]))[0]
                    data_ds[var_] = (data_ds[var_] == bin_main)
            
            for i in range(1,num_prevs + 1):
                name_var = "prev_val_{i}".format(i = i)
                data_ds[name_var] = data_ds["y"].shift(i).apply(str)
            data_ds = data_ds[(num_prevs+1):]

            cat_features_names = ["year", "month", "weekday", "hour"]
            names_prevs_vars = []
            for name_var in data_ds.columns.values:
                if "prev_val" in name_var:
                    names_prevs_vars.append(name_var)
            cat_features_names = cat_features_names + names_prevs_vars


            return data_ds, names_prevs_vars, cat_features_names
        
        def Train_CatBoost(data_ds, cat_features_names):

                # Process data
                X_train, X_test, Y_train, Y_test = train_test_split(data_ds.drop(["y", "ds"], axis = 1), data_ds.y, test_size = 0.2)
                

                # Pool Creation

                train_pool = Pool(
                    data = X_train, label = Y_train, 
                    cat_features = cat_features_names
                    )
                test_pool = Pool(
                    data = X_test, label = Y_test, 
                    cat_features = cat_features_names
                    )
                
                catboost_model = CatBoostClassifier(
                    iterations = 10,
                    learning_rate = 1,
                    depth = 8
                )
                print("Model To Train")
                catboost_model.fit(train_pool)
                print("Model trained")
                yhat_test = catboost_model.predict(test_pool)
                yhat_train = catboost_model.predict(train_pool)
                print("Y hat obtained")
                accuracy_score_train = accuracy_score(Y_train, yhat_train)
                accuracy_score_test = accuracy_score(Y_test, yhat_test)
                ic(accuracy_score_train)
                ic(accuracy_score_test)
            
                return catboost_model, accuracy_score_train, accuracy_score_test

        def Save_CatBoost(catboost_model, pilot_name, measurement_name, asset_name):
            catboost_model.save_model("/tmp/catboost_model.cbm", format = "cbm")
            s3 = boto3.resource(
                service_name='s3',
                aws_access_key_id='QyvycO9kc2cm58K8',
                aws_secret_access_key='tKtUrdQzQgWfhfBwhbQF3yGbyZ43oPn92iGAT7g0',
                endpoint_url='https://s3.tebi.io'
            )

            for bucket in s3.buckets.all():
                ic(bucket.name)
            
            # Upload a new file
            data = open('/tmp/catboost_model.cbm', 'rb')
            f_name = "model_{pilot}_{domain}_{asset}_latest_catboost.cbm"\
            .format(pilot = pilot_name,domain = measurement_name, asset = asset_name)
            s3.Bucket('test-pf').put_object(Key=f_name, Body=data)

            url_disc = "https://discord.com/api/webhooks/1002537248622923816/_9XY9Hi_mjzh2LTVqnmSKXlIFJ5rgBO2b8xna5pynUrzALgtC4aXSFq89uMdlW_v-ZzT"
            message = "Model sent to tebi for {measurement_name} - {asset_name}".format(measurement_name = measurement_name, asset_name = asset_name)
            webhook = DiscordWebhook(url = url_disc, content = message)
            webhook.execute()
        def Predict_CatBoost(catboost_model, data_ds, last_cummulative_value, num_days, measures_per_hour, diff_time, cat_features_names, names_prevs_vars):
            data_ds["yhat"] = last_cummulative_value
            for day_ in range(num_days):
                for i in tqdm(range(24)):
                    for j in range(measures_per_hour):
                        last_row = data_ds.iloc[-1]
                        ds_obj = maya.parse(last_row["ds"]).add(minutes = int(diff_time))
                        dict_input = {
                            "year": ds_obj.year,
                            "month": ds_obj.month,
                            "weekday": ds_obj.weekday,
                            "hour": ds_obj.hour
                        }
                        for k_var in names_prevs_vars:
                            k = int(k_var[9:])
                            if k == 1:
                                dict_input[k_var] = str(last_row["y"])
                            else:
                                dict_input[k_var] = str(last_row["prev_val_{i}".format(i = k -1)])
                        dict_input = [dict_input]
                        data_input = pd.DataFrame(dict_input)
                        pred_pool = Pool(
                            data = data_input, 
                            cat_features = cat_features_names
                            )
                        pred_value = catboost_model.predict(pred_pool)[0][0]
                        dict_input[0]["y"] = pred_value
                        dict_input[0]["ds"] = datetime.strftime(ds_obj.datetime(), "%Y-%m-%d %H:%M:%S")
                        dict_input[0]["yhat"] = last_row["yhat"] + float(pred_value)
                        data_add = pd.DataFrame(dict_input)
                        data_ds = pd.concat([data_ds, data_add],ignore_index = True)
            return data_ds

        def Metrics_CatBoost(accuracy_score_train, accuracy_score_test, metrics_list):
            metrics = {
                'metrics': [
                    {
                    'name': 'accuracy_train',
                    'numberValue':  float(accuracy_score_train),
                    'format': "PERCENTAGE"
                    },
                    {
                        'name': 'accuracy_test',
                        "numberValue": float(accuracy_score_test),
                        "format": "PERCENTAGE"
                    },
                    {
                        "name": "asset_number",
                        "numberValue": asset_name,
                        "format": "RAW"
                    }
                ]}  
            
            metrics_list.append(metrics)
            return metrics_list

        # Prophet Functions
        
        def ManageDateTime(ds_obj):
            return datetime.strftime(maya.parse(ds_obj).datetime(), "%Y-%m-%d %H:%M:%S")

        def ManageDateMinute(ds_obj):
            return datetime.strftime(maya.parse(ds_obj).datetime(), "%Y-%m-%d %H:%M:00")
        def ManageDateHour(ds_obj):
            return datetime.strftime(maya.parse(ds_obj).datetime(), "%Y-%m-%d %H:00:00")

        def Train_Prophet(train_data, num_days, measures_per_hour, diff_time, weather_data, mode_prophet, daily_seasonality, weekly_seasonality):
            from prophet import Prophet

            train_data["ds"] = train_data["ds"].apply(ManageDateMinute)
            train_data = train_data[["ds", "y"]].groupby("ds").mean().reset_index(level = "ds")
            train_data["ds_hour"] = train_data["ds"].apply(ManageDateHour)
            weather_data["ds_hour"] = weather_data["ds_hour"].apply(str)

            ic(train_data["ds_hour"].tolist()[-20:])
            ic(weather_data["ds_hour"].tolist()[-20:])
            train_data = pd.merge(train_data, weather_data, on = "ds_hour")

            ic(train_data.shape[0])

            min_date = maya.parse(min(train_data.ds))
            max_date = maya.parse(max(train_data.ds))
            days_train = (max_date - min_date).days

            if days_train >= 365:
                yearly_seasonality = True
            else:
                yearly_seasonality = False
            # Define Model to be trained
            m = Prophet(daily_seasonality=daily_seasonality, weekly_seasonality=weekly_seasonality, yearly_seasonality = yearly_seasonality,changepoint_prior_scale = 0.05, seasonality_mode=mode_prophet)

            m.add_regressor('shortwave_radiation')
            m.add_regressor('temperature_2m')
            m.add_regressor("direct_radiation")
            m.add_regressor("diffuse_radiation")
            m.add_regressor("direct_normal_irradiance")

            # Train Model
            m.fit(train_data)
            future = m.make_future_dataframe(periods= 24*(3 + num_days)*measures_per_hour , freq="{minutes}T".format(minutes = diff_time))
            future["ds"] = future["ds"].apply(str)
            future["ds_hour"] = future["ds"].apply(ManageDateHour)
            future = pd.merge(future, weather_data, on = "ds_hour")
            
            forecast = m.predict(future)

            print(forecast.tail(5))

            return forecast, m

        def GetMetricsProphet(forecast,train_data, test_data, num_days, dict_asset, metrics_list, date_train):
            try:
                asset_number = dict_asset[asset_name]
            except:
                asset_number = 3
            
            forecast["ds"] = forecast["ds"].apply(str)

            try:
                forecast_test = forecast[forecast.ds >= date_train]["yhat"].tolist()
                train_data = pd.merge(train_data, forecast[["ds", "yhat"]], on = "ds")
                real_vals_train = train_data["y"].tolist()
                forecast_train = train_data["yhat"].tolist()
                r2_score_train = r2_score(real_vals_train, forecast_train)
                mae_score_train = mean_absolute_error(real_vals_train, forecast_train)
            except:
                r2_score_train = 0
                mae_score_train = 0
            
            real_vals_test = test_data["y"].tolist()

            if len(forecast_test) == len(real_vals_test):
                r2_score_test = r2_score(real_vals_test, forecast_test)
                mae_score_test = mean_absolute_error(real_vals_test, forecast_test)
                metrics = {
                    'metrics': [
                        {
                        'name': 'r2_score_test',
                        'numberValue':  float(r2_score_test),
                        'format': "PERCENTAGE"
                        },
                        {
                            'name': 'r2_score_train',
                            "numberValue": float(r2_score_train),
                            "format": "PERCENTAGE"
                        },
                        {
                            "name": "asset_number",
                            "numberValue": asset_name,
                            "format": "RAW"
                        },
                        {
                            "name": "mae_train",
                            "numberValue": mae_score_train,
                            "format": "RAW"
                        },
                        {
                            "name": "mae_test",
                            "numberValue": mae_score_test,
                            "format": "RAW"
                        }
                    ]}  
                
                metrics_list.append(metrics)
                
            else:
                ic(len(forecast_test))
                ic(len(real_vals_test))
            
            return metrics_list

        def SaveModelProphet(model, measurement_name, asset_name, pilot_name):
            with open("/tmp/model_prophet.json", 'w') as fout:
                fout.write(model_to_json(model))  # Save model
            

            date = maya.when("now").rfc2822()
            f_name = "model_{domain}_{asset}.json"\
                .format(domain = measurement_name, asset = asset_name)
            try:
                result = client.fput_object(
                    "test", f_name, "/tmp/model_prophet.json"
                )

                print(
                    "created {0} object; etag: {1}, version-id: {2}".format(
                        result.object_name, result.etag, result.version_id,
                    ),
                )
            except:
                url_disc = "https://discord.com/api/webhooks/1002537248622923816/_9XY9Hi_mjzh2LTVqnmSKXlIFJ5rgBO2b8xna5pynUrzALgtC4aXSFq89uMdlW_v-ZzT"
                message = "Model not saved for {measurement_name} - {asset_name}".format(measurement_name = measurement_name, asset_name = asset_name)
                webhook = DiscordWebhook(url = url_disc, content = message)
                webhook.execute()

                s3 = boto3.resource(
                    service_name='s3',
                    aws_access_key_id='QyvycO9kc2cm58K8',
                    aws_secret_access_key='tKtUrdQzQgWfhfBwhbQF3yGbyZ43oPn92iGAT7g0',
                    endpoint_url='https://s3.tebi.io'
                )

                for bucket in s3.buckets.all():
                    ic(bucket.name)
                
                # Upload a new file
                data = open('/tmp/model_prophet.json', 'rb')
                f_name = "model_{pilot}_{domain}_{asset}_latest_prophet.json"\
                .format(pilot = pilot_name,domain = measurement_name, asset = asset_name)
                s3.Bucket('test-pf').put_object(Key=f_name, Body=data)

                url_disc = "https://discord.com/api/webhooks/1002537248622923816/_9XY9Hi_mjzh2LTVqnmSKXlIFJ5rgBO2b8xna5pynUrzALgtC4aXSFq89uMdlW_v-ZzT"
                message = "Model sent to tebi for {measurement_name} - {asset_name}".format(measurement_name = measurement_name, asset_name = asset_name)
                webhook = DiscordWebhook(url = url_disc, content = message)
                webhook.execute()

        # LSTM Functions
        def Train_LSTM(data, split_proportion, diff_time, num_days=1, 
                    measures_per_hour=1, n_epochs=100, batch_size=16):
            
            

            if not isinstance(data, pd.DataFrame) or 'ds' not in data.columns or 'y' not in data.columns:
                raise ValueError("The input data must be a pandas DataFrame with 'ds' and 'y' columns.")

            if not (0 < split_proportion < 1):
                raise ValueError("The split_proportion must be a float between 0 and 1.")

            # fill missing values with the last available value
            data = data.fillna(method='ffill')
            
            # Create a time series
            series = TimeSeries.from_dataframe(data, 'ds', 'y',fill_missing_dates=True, freq="{minutes}T".format(minutes = diff_time))

            # Create training and validation sets:
            train, val = series.split_after(pd.Timestamp(series.start_time() + pd.Timedelta(hours=int(len(series) * split_proportion))))

            # Normalize the time series (note: we avoid fitting the transformer on the validation set)
            transformer = Scaler()
            train_transformed = transformer.fit_transform(train)
            val_transformed = transformer.transform(val)
            series_transformed = transformer.transform(series)

            # predict *num_days* days ahead
            pred_ahead = 24 * (2 + num_days) * measures_per_hour

            my_model = RNNModel(
                input_chunk_length=2 * pred_ahead,
                model="LSTM",
                hidden_dim=25, 
                n_rnn_layers=1,
                dropout=0.2,
                training_length=pred_ahead,
                batch_size=batch_size,
                n_epochs=n_epochs,
                optimizer_kwargs={"lr": 1e-3},
                model_name="data_RNN",
                log_tensorboard=True,
                random_state=42,
                force_reset=True,
                save_checkpoints=True,
            )

            my_model.fit(
                train_transformed,
                val_series=val_transformed,
                verbose=True,
            )
            
            historical_forecast = my_model.historical_forecasts(
                                            series_transformed,
                                            start=pd.Timestamp(val.start_time() - pd.Timedelta(hours=1)),
                                            forecast_horizon=pred_ahead,
                                            retrain=False,
                                            verbose=True,
                                        )
            
            historical_forecast = transformer.inverse_transform(historical_forecast)
            historical_forecast = historical_forecast.pd_dataframe().reset_index()
            # rename columns
            historical_forecast.columns = ['ds', 'y']
            historical_forecast.columns.name = None

            # Predict
            forecast = my_model.predict(n=pred_ahead, series=val_transformed)

            # Inverse-transform forecasts and obtain the real predicted values
            forecast = transformer.inverse_transform(forecast)
            forecast = forecast.pd_dataframe().reset_index()
            forecast.columns.name = None

            forecast = pd.concat([historical_forecast, forecast], axis=0).reset_index(drop=True)

            # Check the dataframe if the frequency is always {diff_time} minute
            full_range = pd.date_range(forecast['ds'].iloc[0], forecast['ds'].iloc[-1], freq="{minutes}T".format(minutes = diff_time))
            assert full_range.difference(forecast['ds']).shape[0] == 0

            forecast_test = forecast["y"].tolist()[-24*measures_per_hour*(2 + num_days):-24*measures_per_hour]

            return forecast, forecast_test, my_model
        
        def GetMetricsLSTM(forecast, forecast_test, train_data, test_data, num_days, dict_assets, metrics_list):
            try:
                asset_number = dict_assets[asset_name]
            except:
                asset_number = 3
            try:
                # take the comman ds for forecast and train_data
                forecast_train = forecast[forecast['ds'].isin(train_data['ds'])].reset_index(drop=True)
                real_vals_train = train_data[train_data['ds'].isin(forecast['ds'])].reset_index(drop=True)
                r2_score_train = r2_score(real_vals_train['y'].to_list(), forecast_train['y'].to_list())
                mae_score_train = mean_absolute_error(real_vals_train['y'].to_list(), forecast_train['y'].to_list())
            except:
                r2_score_train = 0
            
            real_vals_test = test_data["y"].tolist()

            if len(forecast_test) == len(real_vals_test):
                r2_score_test = r2_score(real_vals_test, forecast_test)
                mae_score_test = mean_absolute_error(real_vals_test, forecast_test)
                metrics = {
                    'metrics': [
                        {
                        'name': 'r2_score_test',
                        'numberValue':  float(r2_score_test),
                        'format': "PERCENTAGE"
                        },
                        {
                            'name': 'r2_score_train',
                            "numberValue": float(r2_score_train),
                            "format": "PERCENTAGE"
                        },
                        {
                            "name": "asset_number",
                            "numberValue": asset_name,
                            "format": "RAW"
                        },
                         {
                            "name": "mae_train",
                            "numberValue": mae_score_train,
                            "format": "RAW"
                        },
                        {
                            "name": "mae_test",
                            "numberValue": mae_score_test,
                            "format": "RAW"
                        }
                    ]}  
                
                metrics_list.append(metrics)
                
            else:
                ic(len(forecast_test))
                ic(len(real_vals_test))
            return metrics_list

        def SaveModelLSTM(model, measurement_name, asset_name, pilot_name):
            model.save("/tmp/lstm_model.pt")
            # model_loaded = RNNModel.load("/tmp/lstm_model.pt")
            
            date = maya.when("now").rfc2822()
            f_name = "model_{domain}_{asset}.pt"\
                .format(domain = measurement_name, asset = asset_name)
            try:
                result = client.fput_object(
                    "test", f_name, "/tmp/lstm_model.pt"
                )

                print(
                    "created {0} object; etag: {1}, version-id: {2}".format(
                        result.object_name, result.etag, result.version_id,
                    ),
                )
            except:
                url_disc = "https://discord.com/api/webhooks/1002537248622923816/_9XY9Hi_mjzh2LTVqnmSKXlIFJ5rgBO2b8xna5pynUrzALgtC4aXSFq89uMdlW_v-ZzT"
                message = "Model not saved for {measurement_name} - {asset_name}".format(measurement_name = measurement_name, asset_name = asset_name)
                webhook = DiscordWebhook(url = url_disc, content = message)
                webhook.execute()

                s3 = boto3.resource(
                    service_name='s3',
                    aws_access_key_id='QyvycO9kc2cm58K8',
                    aws_secret_access_key='tKtUrdQzQgWfhfBwhbQF3yGbyZ43oPn92iGAT7g0',
                    endpoint_url='https://s3.tebi.io'
                )

                for bucket in s3.buckets.all():
                    ic(bucket.name)
                
                # Upload a new file
                data = open('/tmp/lstm_model.pt', 'rb')
                f_name = "model_{pilot}_{domain}_{asset}_latest_lstm.pt"\
                .format(pilot = pilot_name,domain = measurement_name, asset = asset_name)
                s3.Bucket('test-pf').put_object(Key=f_name, Body=data)

                url_disc = "https://discord.com/api/webhooks/1002537248622923816/_9XY9Hi_mjzh2LTVqnmSKXlIFJ5rgBO2b8xna5pynUrzALgtC4aXSFq89uMdlW_v-ZzT"
                message = "Model sent to tebi for {measurement_name} - {asset_name}".format(measurement_name = measurement_name, asset_name = asset_name)
                webhook = DiscordWebhook(url = url_disc, content = message)
                webhook.execute()


        ############

        print("Modifying Data")
        print(maya.now().rfc2822())

        data_ds, max_date, last_cummulative_value = ModifyData(data, asset_name)

        print("Start Training")
        
        if len(pd.unique(data_ds.y)) >= 20:

            date_train = datetime.strftime(maya.parse(max_date).add(days = -1).datetime(), "%Y-%m-%d")
            train_data = data_ds[data_ds.ds < date_train]
            test_data = data_ds[data_ds.ds >= date_train]

            # Train Prophet
            print("Training Prophet")
            print(maya.now().rfc2822())
            forecast_prophet, model = Train_Prophet(train_data, num_days, measures_per_hour, diff_time, weather_data, mode_prophet, daily_seasonality, weekly_seasonality)
            metrics_list_prophet = GetMetricsProphet(forecast_prophet,train_data, test_data, num_days, dict_assets, metrics_list, date_train)
            
            # Train LSTM
            print("Training LSTM")
            print(maya.now().rfc2822())

            try:
                ic(diff_time)
                forecast_lstm, forecast_test, model = Train_LSTM(data=train_data, split_proportion=0.9, diff_time=diff_time,
                                                            num_days=num_days, measures_per_hour=measures_per_hour, 
                                                            n_epochs=25)
                metrics_list_lstm = GetMetricsLSTM(forecast_lstm, forecast_test, train_data, test_data, num_days, dict_assets, metrics_list)
            except ValueError:
                metrics_list_lstm = [{
                    "name": "mae_test",
                    "numberValue": -1
                }]
            print("Finish Training")
            print(maya.now().rfc2822())
            ############

            # Compare Models

            forecast = forecast_prophet
            metrics_list = metrics_list_prophet
            
            print("Metrics Prophet")

            for metric in metrics_list:
                try:
                    print(metric["name"])
                    print(metric["numberValue"])
                except:
                    print(metrics_list)
                    break

            print("Metrics LSTM")

            for metric in metrics_list_lstm:
                try:
                    print(metric["name"])
                    print(metric["numberValue"])
                except:
                    print(metrics_list)
                    break

            ###########
            
            SaveModelProphet(model, measurement_name, asset_name, pilot_name)

            try:
                SaveModelLSTM(model, measurement_name, asset_name, pilot_name)
            except:
                print("LSTM Model Not Saved")

        elif data_ds.shape[0] < 10:
            print("Not enough values")
            forecast = data_ds

        else:
            data_ds, names_prevs_vars, cat_features_names = ManageData(data_ds)
            
            catboost_model, accuracy_score_train, accuracy_score_test = Train_CatBoost(data_ds, cat_features_names)

            forecast = Predict_CatBoost(catboost_model,
                            data_ds, 
                            last_cummulative_value,
                            num_days, measures_per_hour, diff_time,
                            cat_features_names, names_prevs_vars)

            Save_CatBoost(catboost_model, pilot_name, measurement_name, asset_name)
            metrics_list = Metrics_CatBoost(accuracy_score_train, accuracy_score_test,
                                            metrics_list)

        ic(metrics_list)
        return forecast[forecast.ds > max_date], metrics_list

    
    # Get Parameters
    
    dict_assets = {}
    measures_per_hour = int(60/int(diff_time))
    time_prediction = maya.now().epoch
    num_days = int(num_days)


    with open(input_data_path) as file:
        data_str = json.load(file)
    
    data = pd.DataFrame(data_str)
    metrics_list = []
    ic(asset_name)

    weather_data = pd.read_feather(input_weather_path)
    
    class TestError(Exception):
        "Input correctly City name or Lat AND Lon for city"
        pass

    try:
        ic(diff_time)
        forecasted_data, metrics_list = ForecastData(data, asset_name, measurement_name, metrics_list, measures_per_hour, diff_time, weather_data, mode_prophet, daily_seasonality, weekly_seasonality)
    except TestError:
        forecasted_dict = {
            "ds": [],
            "yhat": []
        }
        forecasted_data = pd.DataFrame(forecasted_dict)
        metrics_list = []
    
    
    try:
        forecasted_data.to_csv('/tmp/forecast_test_{asset_name}.csv'.format(asset_name = asset_name), index = False)
        data_to_send = open('/tmp/forecast_test_{asset_name}.csv'.format(asset_name = asset_name), 'rb')
        f_name = "forecast_test_{pilot}_{asset_name}.csv".format(pilot = pilot_name, asset_name = asset_name)
        s3 = boto3.resource(
            service_name='s3',
            aws_access_key_id='QyvycO9kc2cm58K8',
            aws_secret_access_key='tKtUrdQzQgWfhfBwhbQF3yGbyZ43oPn92iGAT7g0',
            endpoint_url='https://s3.tebi.io'
        )
        s3.Bucket('test-pf').put_object(Key=f_name, Body=data_to_send)
        url_disc = "https://discord.com/api/webhooks/1002537248622923816/_9XY9Hi_mjzh2LTVqnmSKXlIFJ5rgBO2b8xna5pynUrzALgtC4aXSFq89uMdlW_v-ZzT"
        message = "Data File: {f_name} Saved to Tebi".format(f_name = f_name)
        webhook = DiscordWebhook(url = url_disc, content = message)
        webhook.execute()
    except:
        url_disc = "https://discord.com/api/webhooks/1002537248622923816/_9XY9Hi_mjzh2LTVqnmSKXlIFJ5rgBO2b8xna5pynUrzALgtC4aXSFq89uMdlW_v-ZzT"
        message = "Unable to save data to tebi"
        webhook = DiscordWebhook(url = url_disc, content = message)
        webhook.execute()

        message = "Values for {asset_name}: {list_values}".format(asset_name = asset_name,list_values = forecasted_data.yhat.tolist())
        webhook = DiscordWebhook(url = url_disc, content = message)
        webhook.execute()

    forecasted_data["yhat"] = forecasted_data["yhat"].apply(lambda x : max(x,0))

    forecasted_data.to_csv(forecast_data_path, index = False)


    domain_ = "electricity"

    with open("/tmp/metrics_{domain}.json".format(domain = domain_), "w") as file:
        json.dump(metrics_list, file)
    
    

    s3 = boto3.resource(
                service_name='s3',
                aws_access_key_id='QyvycO9kc2cm58K8',
                aws_secret_access_key='tKtUrdQzQgWfhfBwhbQF3yGbyZ43oPn92iGAT7g0',
                endpoint_url='https://s3.tebi.io'
            )
    data = open("/tmp/metrics_{domain}.json".format(domain = domain_), 'rb')
    f_name = "metrics_{domain}_latest.json"\
    .format(domain = measurement_name)
    s3.Bucket('test-pf').put_object(Key=f_name, Body=data)
    


    url_disc = "https://discord.com/api/webhooks/1002537248622923816/_9XY9Hi_mjzh2LTVqnmSKXlIFJ5rgBO2b8xna5pynUrzALgtC4aXSFq89uMdlW_v-ZzT"
    message = "Forecasting done for {domain} and asset name : {asset_name}".format(domain = domain_, asset_name = asset_name)
    webhook = DiscordWebhook(url = url_disc, content = message)
    webhook.execute()


# Send_Data

In [109]:
def CheckSendForecast(send_forecast:str) -> bool:
    from discord_webhook import DiscordWebhook
    url_disc = "https://discord.com/api/webhooks/1002537248622923816/_9XY9Hi_mjzh2LTVqnmSKXlIFJ5rgBO2b8xna5pynUrzALgtC4aXSFq89uMdlW_v-ZzT"
    message = "Forecast not send, the option is not 'yes' or 'no', please check this, the option sent was{option}".format(option = send_forecast)
    webhook = DiscordWebhook(url = url_disc, content = message)
    
    if send_forecast == "yes":
        return True
    elif send_forecast == "no":
        return False
    else:
        webhook.execute()
        return False

In [110]:
def SendForecast(input_forecast_data_path: InputPath(str),url_pilot:str, pilot_name:str, asset_name : str, measurement_name: str, key_measurement: str, num_days):
    from discord_webhook import DiscordWebhook
    import json
    import pandas as pd
    from icecream import ic
    import requests
    import pandas as pd
    import maya
    from tqdm import tqdm
    from datetime import datetime

    from urllib3.exceptions import InsecureRequestWarning
    import warnings
    import contextlib

    ## Function Definition ## 

    def GetRequest(url, headers ={}, payload = {}):

        old_merge_environment_settings = requests.Session.merge_environment_settings

        @contextlib.contextmanager
        def no_ssl_verification():
            opened_adapters = set()

            def merge_environment_settings(self, url, proxies, stream, verify, cert):
                # Verification happens only once per connection so we need to close
                # all the opened adapters once we're done. Otherwise, the effects of
                # verify=False persist beyond the end of this context manager.
                opened_adapters.add(self.get_adapter(url))

                settings = old_merge_environment_settings(self, url, proxies, stream, verify, cert)
                settings['verify'] = False

                return settings

            requests.Session.merge_environment_settings = merge_environment_settings

            try:
                with warnings.catch_warnings():
                    warnings.simplefilter('ignore', InsecureRequestWarning)
                    yield
            finally:
                requests.Session.merge_environment_settings = old_merge_environment_settings

                for adapter in opened_adapters:
                    try:
                        adapter.close()
                    except:
                        pass
        
        with no_ssl_verification():
            response = requests.request("GET", url, headers = headers, data = payload)
            
        try:
            return response.json()
        except:
            dict_ = {
                "status_code": response.status_code,
                "text": response.text
            }
            return dict_

    def GetFeaturesMeasurement(dicts_measurements, dicts_assets, asset_name, measurement_name):
        domain_ = "None"
        direction = "None"
        type_ = "None"

        for dict_ in dicts_measurements:
            if dict_["name"] == measurement_name:
                try:
                    if dict_["asset"]["name"] == asset_name:
                        print("Asset Found")
                        domain_ = dict_["domain"]
                        direction = dict_["direction"]
                        type_ = dict_["type"]["name"]
                except:
                    continue
        
        if domain_ == "None" and direction == "None" and type_ == "None":
            for dict_ in dicts_assets:
                if dict_["name"] == asset_name:
                    for meas in dict_["measurements"]:
                        if meas["sensor_name"] == measurement_name:
                            print("Measurement Found")
                            domain_ = meas["domain"]
                            direction = meas["direction"]
                            type_ = meas["type"]["name"]

                            break

        
        return domain_, direction, type_

    def GetMeasurementInfo(pilot_name, url_pilot, measurement_name, asset_name):
        if pilot_name != "Virtual":
            url_measurements = "{url_pilot}/api-postgre/1.0/api/measurements".format(
                        url_pilot = url_pilot
                    )
            url_assets = "{url_pilot}/api-postgre/1.0/api/assets".format(
                        url_pilot = url_pilot
                    )
            
            dict_measurement = GetRequest(url_measurements)
            dict_asset = GetRequest(url_assets)
            
            
        else:

            dict_asset= [
                        {"name": "building1", "measurements":[{"sensor_name": "heat_meter", "domain": "heat","direction":"in", "type":{"name": "power_wh"}}, 
                                                                {"sensor_name": "electricity_meter", "domain": "electricity","direction":"in", "type":{"name": "power_wh"}}]},
                        {"name": "building2", "measurements":[{"sensor_name": "heat_meter", "domain": "heat","direction":"in", "type":{"name": "power_wh"}}, 
                                                                {"sensor_name": "electricity_meter", "domain": "electricity","direction":"in", "type":{"name": "power_wh"}}]},
                        {"name": "gas_boiler1", "measurements":[{"sensor_name": "heat_meter", "domain": "heat","direction":"out", "type":{"name": "power_wh"}}]},
                        {"name": "gas_boiler2", "measurements":[{"sensor_name": "heat_meter", "domain": "heat","direction":"out", "type":{"name": "power_wh"}}]},
                        {"name": "cogenerator1", "measurements":[{"sensor_name": "electricity_meter", "domain": "electricity","direction":"out", "type":{"name": "power_wh"}}]},
                        {"name": "cogenerator2", "measurements":[{"sensor_name": "electricity_meter", "domain": "electricity","direction":"out", "type":{"name": "power_wh"}}]},
                        {"name": "wind_farm_1", "measurements":[{"sensor_name": "electricity_meter", "domain": "electricity","direction":"out", "type":{"name": "power_wh"}}]},
                        {"name": "pv_panels_1", "measurements":[{"sensor_name": "electricity_meter", "domain": "electricity","direction":"out", "type":{"name": "power_wh"}}]},
                        {"name": "solar_collector1", "measurements":[{"sensor_name": "heat_meter", "domain": "heat","direction":"out", "type":{"name": "power_wh"}}]}

                    ]
            dict_measurement = []

        return GetFeaturesMeasurement(dict_measurement, dict_asset, asset_name, measurement_name)
 
    def GetPostData(time_, value, 
                    measurement_name, 
                    asset_name,  domain_,
                    direction_energy, type_, 
                    time_prediction):

        data_post = {
                "bucket": "renergetic",
                "measurement": measurement_name,
                "fields":{
                    key_measurement: value,
                    "time": time_,
                },
                "tags":{
                    "domain": domain_,
                    "type_data": "forecasting",
                    "direction": direction_energy,
                    "prediction_window": "{hours}h".format(hours = int(num_days) * 24),
                    "asset_name": asset_name,
                    "measurement_type": type_,
                    "time_prediction": time_prediction
                }
            }
        return data_post
    
    def PostData(data_post, url_pilot, pilot_name):
        from urllib3.exceptions import InsecureRequestWarning
        import warnings
        import contextlib

        old_merge_environment_settings = requests.Session.merge_environment_settings

        @contextlib.contextmanager
        def no_ssl_verification():
            opened_adapters = set()

            def merge_environment_settings(self, url, proxies, stream, verify, cert):
                # Verification happens only once per connection so we need to close
                # all the opened adapters once we're done. Otherwise, the effects of
                # verify=False persist beyond the end of this context manager.
                opened_adapters.add(self.get_adapter(url))

                settings = old_merge_environment_settings(self, url, proxies, stream, verify, cert)
                settings['verify'] = False

                return settings

            requests.Session.merge_environment_settings = merge_environment_settings

            try:
                with warnings.catch_warnings():
                    warnings.simplefilter('ignore', InsecureRequestWarning)
                    yield
            finally:
                requests.Session.merge_environment_settings = old_merge_environment_settings

                for adapter in opened_adapters:
                    try:
                        adapter.close()
                    except:
                        pass
        if pilot_name == "Virtual":
            url = "http://influx-api-ren-prototype.apps.paas-dev.psnc.pl/api/measurement"
        else:
            url = url_pilot + "/api-measurement/1.0/api/measurement"

        with no_ssl_verification():
            response = requests.request("POST", url, headers=headers, data=json.dumps(data_post))
        
        return response.status_code


    class TestError(Exception):
        "This error shows to avoid the completion of the task for test purposes"
        pass
    
    ## Procedure ##

    

    forecasted_data = pd.read_csv(input_forecast_data_path)
    domain_, direction_, type_ = GetMeasurementInfo(pilot_name, url_pilot,measurement_name, asset_name)
    ic(type_)
    ic(direction_)
    ic(domain_)
    time_prediction = datetime.strftime(maya.now().datetime(), "%Y-%m-%d %H:%M:%S")

    if measurement_name == "electricity_meter":
        domain_ = "electricity"
    elif measurement_name == "heat_meter":
        domain_ = "heat"
    
    values_ok = []
    values_not_ok = []

    for index, row in tqdm(forecasted_data.iterrows(), total = forecasted_data.shape[0]):
        time_ = str(row["ds"])
        value = row["yhat"]

        
        data_post = GetPostData(time_, value, 
                                measurement_name= measurement_name,
                                domain_ = domain_, asset_name= asset_name, direction_energy= direction_,
                                type_ = type_, time_prediction= time_prediction)

        headers = {
            "Content-Type": "application/json"
        }

        try:
            status_code = PostData(data_post, url_pilot, pilot_name)

        except:
            url_disc = "https://discord.com/api/webhooks/1002537248622923816/_9XY9Hi_mjzh2LTVqnmSKXlIFJ5rgBO2b8xna5pynUrzALgtC4aXSFq89uMdlW_v-ZzT"
            message = "Error in updating value for measurement name: {measurement_name} in asset: {asset_name} in time {time_pred}"\
                .format(measurement_name = "electricity_meter", asset_name = asset_name, time_pred = data_post["fields"]["time"])
            webhook = DiscordWebhook(url = url_disc, content = message)
            webhook.execute()
            status_code = 400
        
        if status_code > 299:
            ic(time_)
            ic(value)
            url_disc = "https://discord.com/api/webhooks/1002537248622923816/_9XY9Hi_mjzh2LTVqnmSKXlIFJ5rgBO2b8xna5pynUrzALgtC4aXSFq89uMdlW_v-ZzT"
            message = "Error in sending the value for measurement name: {measurement_name} in asset: {asset_name} in time {time_pred}"\
                .format(measurement_name = measurement_name, asset_name = asset_name, time_pred = data_post["fields"]["time"])
            webhook = DiscordWebhook(url = url_disc, content = message)
            webhook.execute()
            if len(values_not_ok) == 0:
                print(data_post)
                print(url_pilot)
            
            values_not_ok.append(time_)

        else:
            values_ok.append(time_)
        
    ic(values_ok)
    ic(values_not_ok)
    ic(data_post)
    ic(url_pilot)
        


    

In [111]:
def CheckSendNotification(send_notifications_check:str) -> bool:
    if send_notifications_check == "no notifications":
        return False
    else: 
        return True

In [112]:
def SendNotification(forecast_data_path: InputPath(str), threshold_data_path: InputPath(str), asset_name, pilot_name, url_pilot):
    
    import json 
    import pandas as pd
    from discord_webhook import DiscordWebhook
    import maya
    from datetime import datetime
    import requests
    import fuckit
    from icecream import ic
    import numpy as np

    
    # get notification code for anomaly high and low
    @fuckit
    def GetNotificationCodes(pilot_name, url_pilot):
        if pilot_name == "Virtual":
            url_notification_definition = "http://api-ren-prototype.apps.paas-dev.psnc.pl/api/notification/definition"
        else:
            url_notification_definition = "{url_pilot}/api-postgre/1.0/api/notification/definition".format(
            url_pilot = url_pilot
        )
        payload={}
        headers = {}

        response = requests.request("GET", url_notification_definition, headers=headers, data=payload)

        

        try:
            dict_notifications = response.json()
            if response.status_code > 299:
                ic(url_notification_definition)
                raise ValueError("The request was not successful")
        except:
            return {}
        
        code_high = 0
        code_low = 0

        for notif in dict_notifications:
            if notif["message"] == "message.anomaly.high":
                code_high = notif["code"]
            elif notif["message"] == "message.anomaly.low":
                code_low = notif["code"]
        codes = {
            "code_high": code_high,
            "code_low": code_low
        }
        return codes
    
    def ObtainCodes(codes):
        if "code_low" in codes.keys():
            code_low = codes["code_low"]
        else:
            code_low = 0

        if "code_high" in codes.keys():
            code_high = codes["code_high"]
        else:
            code_high = 0
        return code_high, code_low
    
    def GetIds(asset_name, pilot_name, url_pilot):
        # get asset_id for asset_name
        payload = {}
        headers = {}
        
        if pilot_name == "Virtual":
            url_asset_name = "http://api-ren-prototype.apps.paas-dev.psnc.pl/api/assets?name={asset_name}".format(asset_name = asset_name)
        else:
            url_asset_name = "{url_pilot}/api-postgre/1.0/api/assets?name={asset_name}".format(url_pilot = url_pilot, asset_name = asset_name)
        
        try:
            response = requests.request("GET", url_asset_name, headers=headers, data=payload)
            dict_asset = response.json()[0]
            id_asset = dict_asset["id"]
        except:
            dict_asset = {}
            id_asset = -1
        
        if "measurements" in dict_asset and len(dict_asset["measurements"]) > 0:
            id_measurement = dict_asset["measurements"][0]["id"]
        else:
            id_measurement = -1

        id_dashboard = 1

        return id_asset, id_dashboard, id_measurement
    
    def PostNotification(code_low, date_from, date_to, id_asset, id_dashboard, value, measurement_id, time_, name_pilot, url_pilot):
        date_to = maya.parse(time_).add(minutes = 15).epoch
        dict_post = {
            "notification_code": code_low,
            "date_from": date_from,
            "date_to": date_to,
            "asset": id_asset,
            "dashboard": id_dashboard,
            "value": value,
            "measurement": measurement_id,
        }
        if name_pilot == "Virtual":
            url = "http://api-ren-prototype.apps.paas-dev.psnc.pl/api/notification"
        else:
            url = "{url_pilot}/api-postgre/1.0/api/notification".format(
                url_pilot = url_pilot
            )
        headers = {
            "Content-Type": "application/json"
        }
        try:
            response = requests.request("POST", url, headers=headers, data=json.dumps(dict_post))
            status_code = response.status_code
        except:
            print(url)
            print(dict_post)
            raise ValueError
        
        if response.status_code > 299:
            print(response.text)
            print(response.status_code)
            print(url)
            print(dict_post)
            raise ValueError

        url_disc = "https://discord.com/api/webhooks/1002537248622923816/_9XY9Hi_mjzh2LTVqnmSKXlIFJ5rgBO2b8xna5pynUrzALgtC4aXSFq89uMdlW_v-ZzT"
        message = "Anomaly detect between {date_from} and {date_to} to asset {asset_name}. Response of Notification {status_code}".\
            format(date_from = date_from, date_to = date_to, status_code = status_code, asset_name = asset_name)
        webhook = DiscordWebhook(url = url_disc, content = message)
        webhook.execute()

        return response, status_code

    def NotificationProcess(forecast_data, code_low, code_high, id_asset, id_dashboard, measurement_id, name_pilot):

        failed_notifications = []
        success_notification = []

        max_ds = max(forecast_data["ds"])
        date_notification = datetime.strftime(maya.now().datetime(), "%Y-%m-%d %H:%M:%S")
        mode = "none"
        values = []

        for index,row in forecast_data.iterrows():
            value = row["yhat"]
            time_ = str(row["ds"])
            print(mode)
            # SEND NOTIFICATION
            if mode == "none":
                if value < threshold_min:
                    date_from = maya.parse(time_).epoch
                    mode = "lower"

                    if time_ == max_ds:
                        response, status_code = PostNotification(code_low, date_from, date_to, id_asset, id_dashboard, value, measurement_id, time_, name_pilot, url_pilot)
                        if status_code > 299:
                            failed_notifications.append("Failed Notification send from {date_from} to {date_to}".format(date_from = date_from, date_to = date_to))
                            failed_notifications.append(response.text)
                        else:
                            success_notification.append("Success Notification send from {date_from} to {date_to}".format(date_from = date_from, date_to = date_to))
                    else:
                        values.append(value)
                if value > threshold_max:
                    date_from = maya.parse(time_).epoch
                    mode = "upper"

                    if time_ == max_ds:
                        response, status_code = PostNotification(code_low, date_from, date_to, id_asset, id_dashboard, value, measurement_id, time_, name_pilot, url_pilot)
                        if status_code > 299:
                            failed_notifications.append("Failed Notification send from {date_from} to {date_to}".format(date_from = date_from, date_to = date_to))
                            failed_notifications.append(response.text)
                        else:
                            success_notification.append("Success Notification send from {date_from} to {date_to}".format(date_from = date_from, date_to = date_to))
                    else:
                        values.append(value)
            elif mode == "lower":
                if value > threshold_min or time_ == max_ds:
                    date_to = maya.parse(time_).epoch
                    if len(values) == 0:
                        value_notification = 0
                    else:
                        value_notification = np.mean(values)

                    response, status_code = PostNotification(code_low, date_from, date_to, id_asset, id_dashboard, value_notification, measurement_id, time_, name_pilot, url_pilot)
                    if status_code > 299:
                        failed_notifications.append("Failed Notification send from {date_from} to {date_to}".format(date_from = date_from, date_to = date_to))
                        failed_notifications.append(response.text)
                    else:
                        success_notification.append("Success Notification send from {date_from} to {date_to}".format(date_from = date_from, date_to = date_to))

                    if value > threshold_max:
                        mode = "upper"
                        date_from = maya.parse(time_).epoch
                        values = [value]
                    else:
                        values = []
                        mode = "none"
                else:
                    values.append(value)
            elif mode == "upper":
                if value < threshold_max or time_ == max_ds:
                    date_to = maya.parse(time_).epoch
                    if len(values) == 0:
                        value_notification = 0
                    else:
                        value_notification = np.mean(values)

                    response, status_code = PostNotification(code_low, date_from, date_to, id_asset, id_dashboard, value_notification, measurement_id, time_, name_pilot, url_pilot)
                    if status_code > 299:
                        failed_notifications.append("Failed Notification send from {date_from} to {date_to}".format(date_from = date_from, date_to = date_to))
                        failed_notifications.append(response.text)
                    else:
                        success_notification.append("Success Notification send from {date_from} to {date_to}".format(date_from = date_from, date_to = date_to))


                    if value < threshold_min:
                        date_from = maya.parse(time_).epoch
                        mode = "lower"
                        values = [value]
                    else:
                        values = []
                        mode = "none"

                    
                else:
                    values.append(value)

        return success_notification, failed_notifications

    with open(threshold_data_path) as file:
        dict_threshold = json.load(file)
        try:
            threshold_min = dict_threshold[asset_name]
        except:
            threshold_min = 0
        
        try:
            threshold_max = dict_threshold[asset_name]
        except:
            threshold_max = 1000000000000000
        
        threshold_min = 10
        threshold_max = 0
    
    forecast_data = pd.read_csv(forecast_data_path)
    

    codes = GetNotificationCodes(pilot_name, url_pilot)
    if codes == None:
        codes = {}
    code_high, code_low = ObtainCodes(codes)
    id_asset, id_dashboard, id_measurement = GetIds(asset_name, pilot_name, url_pilot)
    success_notifications, failed_notifications = NotificationProcess(forecast_data, code_low, code_high, id_asset, id_dashboard, id_measurement, pilot_name)

    print("SUCCESS")
    for not_ in success_notifications:
        print(not_)
    
    print("-----------")

    print("FAILED")
    for not_ in failed_notifications:
        print(not_)



# AUXILIAR

In [113]:
def ExportModelToMinio(input_model_path: InputPath(str),measurement_name, 
    path_minio = "minio.kubeflow-renergetic.svc:9000",
    access_key = "minio",
    secret_key = "DaTkKc45Hxr1YLR4LxR2xJP2"
    ):

    from minio import Minio
    import json
    with open(input_model_path) as file:
        model_serialiazed = json.load()
    client = Minio(
        path_minio,
        access_key=access_key,
        secret_key=secret_key,
    )


In [114]:
def Get_List_Assets(measurement_name, dict_assets) -> dict:
    import json
    dict_assets = json.loads(dict_assets)
    print(measurement_name)
    print(dict_assets)
    print(type(dict_assets))
    return dict_assets[measurement_name]

# PIPELINE

In [115]:
def REN_Forecast_Test_Pipeline(url_pilot,
    diff_time:int,
    filter_vars:list = [],
    filter_case:list = [],
    url = "minio-kubeflow-renergetic.apps.dcw1-test.paas.psnc.pl",
    access_key="minio",
    secret_key="DaTkKc45Hxr1YLR4LxR2xJP2",
    min_date = "5 May 2023",
    max_date = "today",
    mode = "no notifications",
    list_measurements:list = ["electricity_meter", "heat_meter"],
    dict_assets : dict = {
        "electricity_meter": ["building1", "building2"],
        "heat_meter": ["building1", "building2"]
    },
    key_measurement = "energy",
    type_measurement = "simulated",
    pilot_name = "Virtual",
    hourly_aggregate = "no",
    minute_aggregate = "no",
    num_days: int = 1,
    send_forecast = "no",
    mae_threshold:float = 1000000,
    mode_prophet: str = "additive",
    daily_seasonality:int = 10,
    weekly_seasonality: int = 10,
    timestamp: float = time.time()
    ):

    env_var = V1EnvVar(name='HOME', value='/tmp')
    download_data_op = comp.create_component_from_func(
        GetData, packages_to_install = ["requests", "numpy", "maya","pandas", "icecream", "tqdm", "discord-webhook", "retry"], output_component_file = "download_data_op_component.yaml")
    download_weather_open_meteo_op = comp.create_component_from_func(
        DownloadWeatherData_OpenMeteo, output_component_file= "open_meteo_component.yaml", packages_to_install=["requests", "numpy", "maya","pandas", "icecream", "tqdm", "discord-webhook", "retry", "pyarrow"]
    )
    download_weather_influx_db_op = comp.create_component_from_func(
        DownloadDataFromInfluxDB, output_component_file="weather_influx_db_component.yaml", packages_to_install=["pandas", "pyarrow"]
    )

    check_metrics_forecast_op = comp.create_component_from_func(
        CalculateForecastMetrics, packages_to_install=["maya", "icecream", "pandas","scikit-learn"], output_component_file = "metric_check_op.yaml"
    )

    get_thresholds_op = comp.create_component_from_func(
        GetThresholds, packages_to_install= ["requests"], output_component_file= "thresholds_component.yaml"
    )
    get_list_op = comp.create_component_from_func(
        Get_List_Assets, output_component_file= "get_list_component.yaml"
    )
    process_data_op = comp.create_component_from_func(
        ProcessData, packages_to_install= ["maya", "pandas", "icecream", "tqdm"], output_component_file= "process_data_op_component.yaml"
    )
    forecast_and_train_data_op = comp.create_component_from_func(
        ForecastProcess, packages_to_install = [],base_image= "adcarras/ren-docker-forecast:0.0.1", output_component_file = "forecast_data_op_component.yaml")
    forecast_data_op = comp.create_component_from_func(
        PredictFromPreviousModel, packages_to_install= [], base_image= "adcarras/ren-docker-forecast:0.0.1", output_component_file= "forecast_from_previous.yaml"
    )
    check_send_forecast_op = comp.create_component_from_func(
        CheckSendForecast, packages_to_install=["discord-webhook"], output_component_file= "check_send_forecast_component.yaml"
    )
    send_forecast_op = comp.create_component_from_func(SendForecast, packages_to_install=["requests", "numpy", "maya","pandas", "icecream", "discord-webhook", "tqdm", "minio", "boto3"], output_component_file= "send_forecast_comp.yaml")

    check_send_notification_op = comp.create_component_from_func(
        CheckSendNotification, output_component_file= "check_send_notification.yaml"
    )

    send_notification_op = comp.create_component_from_func(
        SendNotification, packages_to_install=["pandas", "discord-webhook", "maya", "fuckit", "icecream"],output_component_file="send_notification.yaml"
    )

    # BEGIN PIPELINE DEFINITION

    get_thresholds_task = get_thresholds_op(url_pilot, pilot_name)

    download_weather_influx_task = download_weather_influx_db_op(timestamp)

    download_weather_open_meteo_task = download_weather_open_meteo_op(download_weather_influx_task.output, pilot_name, min_date)

   
    

    with dsl.ParallelFor(list_measurements) as measurement:
        download_task = (download_data_op(measurement, min_date, max_date, url_pilot,pilot_name, type_measurement, key_measurement, filter_vars, filter_case).add_env_variable(env_var)
                            .set_memory_request('2Gi')
                            .set_memory_limit('4Gi')
                            .set_cpu_request('2')
                            .set_cpu_limit('4'))
        process_task = (process_data_op(download_task.outputs["output_data_forecast"], 
                        hourly_aggregate,
                        minute_aggregate,
                        min_date, 
                        max_date)
                        .set_memory_request('2Gi')
                            .set_memory_limit('4Gi')
                            .set_cpu_request('2')
                            .set_cpu_limit('4'))
        
        get_list_task = (get_list_op(measurement, dict_assets))

        check_send_forecast_task = check_send_forecast_op(send_forecast)
        check_send_notification_task = check_send_notification_op(mode)

        
        with dsl.ParallelFor(get_list_task.output) as asset:
            check_forecast_task = (check_metrics_forecast_op(download_task.outputs["output_data_metric"], asset, mae_threshold = mae_threshold)
                                   .set_memory_request('2Gi')
                                    .set_memory_limit('4Gi')
                                    .set_cpu_request('2')
                                    .set_cpu_limit('4'))
            with dsl.Condition(check_forecast_task.output == True):
                forecast_train_task = (forecast_and_train_data_op(process_task.output, download_weather_open_meteo_task.output,
                measurement, 
                url, 
                access_key, 
                secret_key, 
                mode,
                url_pilot,
                diff_time,
                pilot_name,
                send_forecast,
                asset,
                num_days,
                mode_prophet, daily_seasonality, weekly_seasonality).add_env_variable(env_var)
                .set_memory_request('2Gi')
                .set_memory_limit('4Gi')
                .set_cpu_request('2')
                .set_cpu_limit('4')
                )

                with dsl.Condition(check_send_forecast_task.output == True):
                    send_forecast_task = send_forecast_op(forecast_train_task.outputs["forecast_data"], url_pilot, pilot_name, asset, measurement, key_measurement, num_days)

                with dsl.Condition(check_send_notification_task.output == True):
                    send_notification_task = send_notification_op(forecast_train_task.outputs["forecast_data"], get_thresholds_task.output, asset,pilot_name, url_pilot)
            
            with dsl.Condition(check_forecast_task.output == False):
                forecast_task = forecast_data_op(process_task.output, download_weather_open_meteo_task.output, 
                                                 pilot_name, measurement, asset, "", 
                                                 max_date, num_days, diff_time)
                with dsl.Condition(check_send_forecast_task.output == True):
                    send_forecast_task = send_forecast_op(forecast_task.outputs["forecast_data"], url_pilot, pilot_name, asset, measurement, key_measurement, num_days)

                with dsl.Condition(check_send_notification_task.output == True):
                    send_notification_task = send_notification_op(forecast_task.outputs["forecast_data"], get_thresholds_task.output, asset,pilot_name, url_pilot)

compiler.Compiler().compile(pipeline_func = REN_Forecast_Test_Pipeline, package_path ="Forecast_Data_Pipeline.yaml")
    

    
    