In [1]:
import pandas as pd
import numpy as np
import duckdb

import mlflow

In [2]:
# Caminho do banco
db_path = "../../data/duckdb/database.duckdb"

# Conexão com o banco DuckDB
con = duckdb.connect(db_path)

# Define raiz do projeto (ajuste conforme seu ambiente)
mlflow.set_tracking_uri(uri="http://127.0.0.1:8080")

# Carrega os dados da camada bronze
df = con.execute("""
    SELECT * FROM feature.previsao_consumo
""").df()

# Gerar dados de inferência

In [3]:
max_date = df['date'].max()

df = df[df['date'] == max_date]

In [4]:
def atualizar_colunas_lag(df: pd.DataFrame, coluna_alvo: str, prefixo_lag: str, num_lags: int) -> pd.DataFrame:
    """
    Atualiza as colunas de lag:
        lag_{n-1} -> lag_n
        ...
        lag_1 -> lag_2
        coluna_alvo -> lag_1

    Parâmetros:
        df (pd.DataFrame): DataFrame com a coluna alvo e colunas de lag.
        coluna_alvo (str): Nome da coluna atual (ex: 'consumption_kwh').
        prefixo_lag (str): Prefixo usado nas colunas de lag (ex: 'consumption_kwh_lag').
        num_lags (int): Quantidade total de lags (ex: 3 cria lag_1 até lag_3).

    Retorna:
        pd.DataFrame atualizado com as colunas de lag movidas.
    """
    df = df.copy()

    # Move de trás para frente
    for i in range(num_lags, 1, -1):
        df[f"{prefixo_lag}_{i}"] = df[f"{prefixo_lag}_{i-1}"]

    # lag_1 recebe o valor atual da coluna alvo
    df[f"{prefixo_lag}_1"] = df[coluna_alvo]

    return df

In [5]:
df = atualizar_colunas_lag(df, coluna_alvo='consumption_kwh', prefixo_lag='consumption_kwh_lag', num_lags=14)
df = atualizar_colunas_lag(df, coluna_alvo='temperature', prefixo_lag='temperature_lag', num_lags=14)
df = atualizar_colunas_lag(df, coluna_alvo='humidity', prefixo_lag='humidity_lag', num_lags=14)

# Inferência de Modelo

In [6]:
def carregar_ultimo_modelo(model_name: str):
    """
    Carrega o modelo mais recente registrado no MLflow Model Registry com o nome fornecido.

    Parâmetros:
        model_name (str): Nome do modelo registrado no MLflow (ex: 'previsao_consumo_SP_0')

    Retorna:
        modelo carregado (usualmente um XGBoostRegressor ou PyFunc)
    """
    # Caminho padrão para o último modelo em produção (você pode usar 'latest' também)
    model_uri = f"models:/{model_name}/latest"

    # Carrega o modelo
    model = mlflow.sklearn.load_model(model_uri)

    return model

In [7]:
df['date'] = pd.to_datetime(df['date'])
df.set_index('date', inplace=True)

resultados = []
features = [col for col in df.columns if col not in ['client_id', 'date', 'consumption_kwh', 'cluster', 'region','humidity','temperature']]


grupos = df.groupby(['cluster', 'region'])

for (cluster, region), grupo_df in grupos:

    nome_modelo = f"previsao_cliente_{region}_{cluster}"
    modelo = carregar_ultimo_modelo(nome_modelo)

    y_pred = modelo.predict(grupo_df[features])

    output_df = grupo_df[['client_id']]
    output_df['region'] = region
    output_df['forecast'] = y_pred

    resultados.append(output_df)

resultados = pd.concat(resultados)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  output_df['region'] = region
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  output_df['forecast'] = y_pred


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  output_df['region'] = region
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  output_df['forecast'] = y_pred


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  output_df['region'] = region
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  output_df['forecast'] = y_pred


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  output_df['region'] = region
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  output_df['forecast'] = y_pred


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  output_df['region'] = region
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  output_df['forecast'] = y_pred


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  output_df['region'] = region
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  output_df['forecast'] = y_pred


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  output_df['region'] = region
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  output_df['forecast'] = y_pred


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  output_df['region'] = region
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  output_df['forecast'] = y_pred


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  output_df['region'] = region
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  output_df['forecast'] = y_pred


# Salvar Previsões

In [8]:
con.execute("""
CREATE TABLE IF NOT EXISTS output.previsao_consumo (
    client_id VARCHAR,
    region VARCHAR,
    forecast DOUBLE
)
""")

<duckdb.duckdb.DuckDBPyConnection at 0x2eb2e575270>

In [9]:
# Limpa dados se as tabelas já existirem
con.execute("DELETE FROM output.previsao_consumo")

<duckdb.duckdb.DuckDBPyConnection at 0x2eb2e575270>

In [10]:
# Registra como tabelas temporárias
con.register("resultados", resultados)

<duckdb.duckdb.DuckDBPyConnection at 0x2eb2e575270>

In [11]:
# Insere os dados nas tabelas gold
con.execute("INSERT INTO output.previsao_consumo SELECT * FROM resultados")

<duckdb.duckdb.DuckDBPyConnection at 0x2eb2e575270>

In [12]:
con.close()