In [32]:
import polars as pl
import networkx as nx

from pathlib import Path
from intercambios.extract import DataExtractor

TOPO_PATH = r"topo.csv"
DATA_PATH = r"data.csv"
PATH_ACCDB = r"C:\Users\felipe.bastidas\PyProyectos\prg_20240128\Datos\Model PRGdia_Full_Definitivo Solution\Model PRGdia_Full_Definitivo Solution.accdb"
TOPOLOGY_PATH = r"C:\Users\felipe.bastidas\PyProyectos\prg_20240128\Antecedentes\topologia.xlsx"

data_extractor = DataExtractor(Path(PATH_ACCDB),Path(TOPOLOGY_PATH))
data_extractor.extract_data()

df_topo = pl.read_csv(TOPO_PATH)
df_data = pl.read_csv(DATA_PATH, try_parse_dates=True)

In [166]:
gen_pivot = (
    data_extractor.gen
    .pivot(
        values="value",
        columns="property",
        index=["generator", "datetime"]
    )
    .select(["generator","datetime","Capacity Curtailed"])
)

data_process = (
        data_extractor.cmg
        .join(
            data_extractor.nodes,
            on="node",
            how="left")
        .join(
            gen_pivot,
            on=["generator", "datetime"],
            how="left"
        )
        .fill_null(0)
        .group_by(["node", "datetime"])
        .agg(
            pl.col("cmg").first().alias("cmg"),
            pl.col("Capacity Curtailed").sum().alias("curt"),
        )
        .lazy()
        #.head()
)
data_process.collect()

node,datetime,cmg,curt
str,datetime[μs],f64,f64
"""LVegas110""",2024-02-01 17:00:00,53.834143,0.0
"""Oeste220""",2024-01-29 04:00:00,87.706497,0.0
"""Tocopilla220_B…",2024-02-02 18:00:00,45.635315,0.0
"""Atacama220_BP1…",2024-01-29 05:00:00,84.886524,0.0
"""LoAguirre500""",2024-01-29 03:00:00,0.0,0.0
"""Esmeralda110""",2024-01-28 21:00:00,87.203505,0.0
"""Trupan220""",2024-01-31 21:00:00,0.0,0.0
"""Miraje220""",2024-02-01 05:00:00,80.182986,0.0
"""Degan110""",2024-02-01 07:00:00,68.264248,0.0
"""LosChangos220""",2024-02-01 10:00:00,-0.255763,0.0


In [192]:
from dataclasses import dataclass

import datetime as dt

@dataclass
class DataSchema:
    dates: dt.datetime
    curt: float


def hour_curtailment(cmg: pl.DataFrame, nodes: list[str]) -> float:
    return (
        cmg
        .filter(
            pl.col("node").is_in(nodes)
        )
        .group_by("datetime")
        .agg(pl.col("curt").sum().alias("curt"))
        .select("curt")
        .item()
    )

def get_data(cmg:pl.DataFrame, date: dt.datetime) -> pl.DataFrame:
    return cmg.filter(pl.col("datetime")==date)

def get_hours(cmg:pl.DataFrame, node:str) -> pl.DataFrame:
    return cmg.filter(pl.col("node")==node,pl.col("cmg").le(0)).select("datetime").sort("datetime")

def check_cmg(cmg: pl.DataFrame, nodo: str) -> bool:
    if cmg.filter(pl.col("node")==nodo).select("cmg").is_empty():
        return True
    return cmg.filter(pl.col("node")==nodo).select("cmg").item() <= 0

def obtener_nodos_con_valor_negativo(G: nx.Graph, nodo_inicial: str, cmg: pl.DataFrame) -> list[str]:
    # Asumimos que los valores están almacenados en un atributo de nodo llamado 'valor'
    nodos_con_valor_negativo = []
    
    # Verificamos que el nodo inicial esté en el grafo
    if nodo_inicial not in G:
        print(f"El nodo inicial {nodo_inicial} no se encuentra en el grafo.")
        return nodos_con_valor_negativo
    
    # Realizamos un recorrido BFS para explorar los nodos conectados
    visitados = set()  # Conjunto para llevar registro de los nodos ya visitados
    cola = [nodo_inicial]  # Cola para BFS, comienza con el nodo inicial
    
    while cola:
        nodo_actual = cola.pop(0)  # Sacamos el primer nodo de la cola
        if nodo_actual not in visitados:
            visitados.add(nodo_actual)  # Marcamos el nodo actual como visitado
            # Verificamos el valor del nodo actual
            if check_cmg(cmg, nodo_actual):
                nodos_con_valor_negativo.append(nodo_actual)
            
            # Añadimos los nodos adyacentes no visitados a la cola
            for vecino in G[nodo_actual]:
                if vecino not in visitados:
                    cola.append(vecino)
    
    return nodos_con_valor_negativo

In [196]:
G = nx.from_pandas_edgelist(df_topo.to_pandas(), 'Nodo', 'Central')
curt_data = []
for date in get_hours(data_process.collect(), "Andes220").iter_rows():
    data = get_data(data_process.collect(), *date)
    nodes = obtener_nodos_con_valor_negativo(G, "Andes220", data)
    curt = hour_curtailment(data, nodes)
    curt_data.append(DataSchema(*date, curt))

In [220]:
aux = [{'datetime': data.dates, 'curt': data.curt} for data in curt_data]
df_curt = pl.DataFrame(aux)
df_curt.filter(pl.col("datetime").ge(dt.datetime(2024,1,29,0))).head(24)

datetime,curt
datetime[μs],f64
2024-01-29 09:00:00,592.565162
2024-01-29 10:00:00,817.731803
2024-01-29 11:00:00,1120.411339
2024-01-29 12:00:00,1200.00147
2024-01-29 13:00:00,1397.090399
2024-01-29 14:00:00,1541.460476
2024-01-29 15:00:00,1608.782336
2024-01-29 16:00:00,1543.614328
2024-01-29 17:00:00,1394.533
2024-01-29 18:00:00,595.759004


In [215]:
(
    pl.DataFrame()
    .select(
        pl.datetime_range(
            start=data_extractor.cmg.select("datetime").min().item(),
            end=data_extractor.cmg.select("datetime").max().item(),
            interval="1h"
        ).alias("datetime"),
    )
    .join(df_curt, on="datetime", how="left")
    .with_columns(
        pl.when(pl.col("curt").gt(80))
        .then(80)
        .otherwise(pl.col("curt"))
        .alias("curt")
    )
    .fill_null(0)
)

datetime,curt
datetime[μs],f64
2024-01-28 00:00:00,0.0
2024-01-28 01:00:00,0.0
2024-01-28 02:00:00,0.0
2024-01-28 03:00:00,0.0
2024-01-28 04:00:00,0.0
2024-01-28 05:00:00,0.0
2024-01-28 06:00:00,0.0
2024-01-28 07:00:00,0.0
2024-01-28 08:00:00,0.0
2024-01-28 09:00:00,80.0
