In [18]:
import pandas as pd
from sqlalchemy import *
from create_ref_mariadb import mariadb_connection
from create_ref_mongodb import mongodb_connection
import pymongo


In [19]:
from pprint import pprint
import pandas as pd

client = mongodb_connection()

def last_weather_forecast_to_df(client):
    # Get the last update
    last_forcast_update = client.forecast.find({}, {"data" : 1}).sort("extract_date", -1).limit(1)
    last_forcast_update = list(last_forcast_update)[0]["data"]

    # Select required data from the document
    list_data = []
    for windfarm in last_forcast_update:
        for forecast in windfarm["list"]:
            dict_forecast = {}
            for key, value in forecast.items():
                if key == "dt_txt":
                    dict_forecast["forecast_date"] = value
                elif key == "wind":
                    dict_forecast["windspeed"] = value["speed"]
                else :
                    continue
            dict_forecast["windfarm_id"] = windfarm["windfarm_id"]
            list_data.append(dict_forecast)

    # Transform list in to DataFrame
    df = pd.DataFrame(list_data)
    return df

df_forecast = last_weather_forecast_to_df(client)

df_forecast.head()


Unnamed: 0,windspeed,forecast_date,windfarm_id
0,4.12,2023-06-07 15:00:00,1ec6d7a1-4b96-67a4-9358-df2d2b033685
1,4.75,2023-06-07 18:00:00,1ec6d7a1-4b96-67a4-9358-df2d2b033685
2,3.06,2023-06-07 21:00:00,1ec6d7a1-4b96-67a4-9358-df2d2b033685
3,2.55,2023-06-08 00:00:00,1ec6d7a1-4b96-67a4-9358-df2d2b033685
4,1.73,2023-06-08 03:00:00,1ec6d7a1-4b96-67a4-9358-df2d2b033685


In [20]:
eng = mariadb_connection()
df_power_curve = pd.read_sql(
    """SELECT p.windturbine_id, windspeed, power, windfarm_id,latitude,longitude
        FROM mariadb_itw.windturbines as wt inner join mariadb_itw.powercurves as p
        ON p.windturbine_id = wt.windturbine_id ;""",
    con=eng
    )

df_power_curve.head()

Unnamed: 0,windturbine_id,windspeed,power,windfarm_id,latitude,longitude
0,1ec6d7ce-93f7-6ef8-abda-53533bbf8f7e,0.0,0.0,1ec6d7a1-4b96-67a4-9358-df2d2b033685,48.7136,4.44707
1,1ec6d7ce-93f7-6ef8-abda-53533bbf8f7e,0.1,0.0,1ec6d7a1-4b96-67a4-9358-df2d2b033685,48.7136,4.44707
2,1ec6d7ce-93f7-6ef8-abda-53533bbf8f7e,0.2,0.0,1ec6d7a1-4b96-67a4-9358-df2d2b033685,48.7136,4.44707
3,1ec6d7ce-93f7-6ef8-abda-53533bbf8f7e,0.3,0.0,1ec6d7a1-4b96-67a4-9358-df2d2b033685,48.7136,4.44707
4,1ec6d7ce-93f7-6ef8-abda-53533bbf8f7e,0.4,0.0,1ec6d7a1-4b96-67a4-9358-df2d2b033685,48.7136,4.44707


In [64]:
def forecast_power_by_turbine(df_forecast: pd.DataFrame, df_power_curve: pd.DataFrame) -> pd.DataFrame:
    # Get estimated power for the 5-next days based on forecast wind and powercurve
    df_final = pd.DataFrame()
    for index, row in df_forecast.iterrows():
        df = df_power_curve[(df_power_curve["windfarm_id"] == row["windfarm_id"]) & (df_power_curve["windspeed"] == round(row["windspeed"], 1))]
        df_copy = df.copy()
        df_copy.loc[:,"forecast_date"] = row["forecast_date"]
        df_final = pd.concat([df_final, df_copy])
    df_final.rename(columns = {"power": "power_kw"}, inplace = True)
    df_final.drop(columns = ["latitude", "longitude"], inplace=True)
    df_final.sort_values(by=["forecast_date", "windfarm_id", "windturbine_id"], inplace=True)
    df_final["prod_kwh"] = df_final["power_kw"] * 3
    df_final.reset_index(inplace=True, drop=True)
    return df_final

df_test = forecast_power_by_turbine(df_forecast, df_power_curve)

df_test.head()

Unnamed: 0,windturbine_id,windspeed,power_kw,windfarm_id,forecast_date,prod_kwh
0,1ec6d7ce-93f7-6ef8-abda-53533bbf8f7e,4.1,125.6,1ec6d7a1-4b96-67a4-9358-df2d2b033685,2023-06-07 15:00:00,376.8
1,1ec6d7ce-93fa-6edc-a613-53533bbf8f7e,4.1,125.6,1ec6d7a1-4b96-67a4-9358-df2d2b033685,2023-06-07 15:00:00,376.8
2,1ec6d7ce-93fd-6272-a3af-53533bbf8f7e,4.1,125.6,1ec6d7a1-4b96-67a4-9358-df2d2b033685,2023-06-07 15:00:00,376.8
3,1ec6d7ce-93ff-639c-9afd-53533bbf8f7e,4.1,125.6,1ec6d7a1-4b96-67a4-9358-df2d2b033685,2023-06-07 15:00:00,376.8
4,1ec6d7ce-9401-646c-b141-53533bbf8f7e,4.1,125.6,1ec6d7a1-4b96-67a4-9358-df2d2b033685,2023-06-07 15:00:00,376.8


In [76]:
def max_power_by_turbine(eng) -> pd.DataFrame:
    # function to have max power_curve from mariadb
    df_max_power = pd.read_sql(
        """SELECT windturbine_id, max(power)
            FROM mariadb_itw.powercurves
            GROUP BY windturbine_id;""",
        con=eng
        )
    return df_max_power

eng = mariadb_connection()

max_power_by_turbine(eng).head()

Unnamed: 0,windturbine_id,max(power)
0,1ec6d7ce-93f7-6ef8-abda-53533bbf8f7e,2000.0
1,1ec6d7ce-93fa-6edc-a613-53533bbf8f7e,2000.0
2,1ec6d7ce-93fd-6272-a3af-53533bbf8f7e,2000.0
3,1ec6d7ce-93ff-639c-9afd-53533bbf8f7e,2000.0
4,1ec6d7ce-9401-646c-b141-53533bbf8f7e,2000.0


In [None]:
df_test2 = df_test.drop(columns=["windspeed", "windfarm_id"]).groupby(by=["windturbine_id", "forecast_date"]).sum()