#### Import and Merge Generation Data from .parquet Files

In [None]:
import os
import glob
import pandas as pd

# Folder containing the .parquet files
folder_path = r"..\data\raw"

# Find all .parquet files in the folder
parquet_files = glob.glob(os.path.join(folder_path, "*.parquet"))

if not parquet_files:
    print("No Parquet files were found in the specified folder.")
else:
    # Load each Parquet file and add it to a list
    df_list = []
    for file in parquet_files:
        print(f"Importing {file}...")
        df_temp = pd.read_parquet(file)
        df_list.append(df_temp)

    # Concatenate all DataFrames into a single one
    df_merged = pd.concat(df_list, ignore_index=True)
    print(f"\nMerged {len(df_list)} DataFrames.")
    print(f"The resulting DataFrame has {df_merged.shape[0]} rows and {df_merged.shape[1]} columns.")

    # At this point, df_merged is in memory and ready for further processing
    # For example, display the first few rows:
    display(df_merged.head())

Importing ..\data\raw\post_despacho_20130101.parquet...
Importing ..\data\raw\post_despacho_20130102.parquet...
Importing ..\data\raw\post_despacho_20130103.parquet...
Importing ..\data\raw\post_despacho_20130104.parquet...
Importing ..\data\raw\post_despacho_20130105.parquet...
Importing ..\data\raw\post_despacho_20130106.parquet...
Importing ..\data\raw\post_despacho_20130107.parquet...
Importing ..\data\raw\post_despacho_20130108.parquet...
Importing ..\data\raw\post_despacho_20130109.parquet...
Importing ..\data\raw\post_despacho_20130110.parquet...
Importing ..\data\raw\post_despacho_20130111.parquet...
Importing ..\data\raw\post_despacho_20130112.parquet...
Importing ..\data\raw\post_despacho_20130113.parquet...
Importing ..\data\raw\post_despacho_20130114.parquet...
Importing ..\data\raw\post_despacho_20130115.parquet...
Importing ..\data\raw\post_despacho_20130116.parquet...
Importing ..\data\raw\post_despacho_20130117.parquet...
Importing ..\data\raw\post_despacho_20130118.par

#### Normilize_string and Date Format

In [None]:
# --- Define Functions ---

import unicodedata
import pandas as pd

def normalize_string(s):
    """
    Converts a string to lowercase and removes accents.
    
    Parameters:
        s (str): The input string.
    
    Returns:
        str: The normalized string.
    """
    s = s.lower()
    s = ''.join(c for c in unicodedata.normalize('NFKD', s) if not unicodedata.combining(c))
    return s

def normalize_central_column(df, column="CENTRAL"):
    """
    Normalizes the values in the specified column by converting to lowercase and removing accents.
    
    Parameters:
        df (pd.DataFrame): DataFrame containing the column.
        column (str): Column name to normalize (default "CENTRAL").
    
    Returns:
        pd.DataFrame: Updated DataFrame.
    """
    df[column] = df[column].astype(str).apply(normalize_string)
    return df

def format_fecha_column(df, column="FECHA"):
    """
    Converts the specified date column to datetime using '%Y-%m-%dT%H:%M:%S'.
    
    Parameters:
        df (pd.DataFrame): DataFrame containing the date column.
        column (str): Column name to format (default "FECHA").
    
    Returns:
        pd.DataFrame: Updated DataFrame.
    """
    df[column] = pd.to_datetime(df[column], format='%Y-%m-%dT%H:%M:%S', errors='coerce')
    return df

def remove_unwanted_units(df, unit_col="CENTRAL"):
    """
    Removes rows from the DataFrame where the unit (in unit_col) is in the exclude list.
    
    Parameters:
        df (pd.DataFrame): DataFrame with the unit column.
        unit_col (str): Name of the unit column (default "CENTRAL").
    
    Returns:
        pd.DataFrame: Updated DataFrame with unwanted units removed.
    """
    exclude_units = [
        "arroyo barril",
        "cayman",
        "dajabon",
        "los mina 1",
        "los mina 2",
        "haina 3",
        "puerto plata 1",
        "puerto plata 2",
        "santo domingo 5",
        "santo domingo 8",
        "timbeque 1",
        "timbeque 2"
    ]
    df = df[~df[unit_col].isin(exclude_units)]
    return df

def fill_missing_with_next_available(df, date_col='FECHA', central_col='CENTRAL', max_offset=14):
    """
    Fills missing dates in the DataFrame by finding records from a subsequent date that 
    has the same weekday as the missing date. For each missing date, the function checks
    offsets starting at 7 days up to max_offset days. If a record is found, it is duplicated 
    and its date is set to the missing date.
    
    Parameters:
        df (pd.DataFrame): DataFrame with at least the date_col and central_col.
        date_col (str): Name of the date column (assumed to be datetime).
        central_col (str): Name of the plant column.
        max_offset (int): Maximum number of days offset to check (default is 14).
    
    Returns:
        pd.DataFrame: DataFrame with missing dates filled.
    """
    df[date_col] = pd.to_datetime(df[date_col]).dt.normalize()
    all_dates = pd.date_range(start=df[date_col].min(), end=df[date_col].max())
    present_dates = pd.to_datetime(df[date_col].unique())
    missing_dates = all_dates.difference(present_dates)
    
    fill_rows = []
    
    for missing_date in missing_dates:
        filled = False
        for offset in range(7, max_offset + 1):
            candidate_date = missing_date + pd.Timedelta(days=offset)
            if candidate_date.weekday() != missing_date.weekday():
                continue
            df_candidate = df[df[date_col] == candidate_date]
            if not df_candidate.empty:
                for _, row in df_candidate.iterrows():
                    new_row = row.copy()
                    new_row[date_col] = missing_date
                    fill_rows.append(new_row)
                filled = True
                break
        if not filled:
            print(f"No matching record found to fill missing date {missing_date.date()}")
    
    if fill_rows:
        df_filled = pd.concat([df, pd.DataFrame(fill_rows)], ignore_index=True)
    else:
        df_filled = df.copy()
    
    df_filled = df_filled.sort_values(by=[central_col, date_col]).reset_index(drop=True)
    return df_filled
def standardize_central_names(df, column="CENTRAL"):
    """
    Standardizes central names by applying a mapping. This function assumes that the values 
    in the specified column are already in lowercase.
    
    Mappings applied:
      - "central hidroelectrica hatillo 2" -> "hatillo 2"
      - "parque fotovoltaico bayahonda (bayasol)" -> "parque fotovoltaico bayasol"
      - "parque fotovoltaico montecristi solar1" -> "parque fotovoltaico montecristi solar 1"
      - "parque eolico los guzmancito 2" -> "parque eolico los guzmancitos 2"
      - "hatillo" -> "hatillo 1"
      - "juancho los cocos 1" -> "los cocos 1"
      - "aes andres xxxxxx" -> "aes andres"
    
    Parameters:
        df (pd.DataFrame): DataFrame containing the central names.
        column (str): Name of the column to standardize (default "CENTRAL").
    
    Returns:
        pd.DataFrame: DataFrame with standardized central names.
    """
    mapping = {
        "central hidroelectrica hatillo 2": "hatillo 2",
        "parque fotovoltaico bayahonda (bayasol)": "parque fotovoltaico bayasol",
        "parque fotovoltaico montecristi solar1": "parque fotovoltaico montecristi solar 1",
        "parque eolico los guzmancito 2": "parque eolico los guzmancitos 2",
        "hatillo": "hatillo 1",
        "juancho los cocos 1": "los cocos 1",
        "aes andres xxxxxx": "aes andres"
    }
    
    df[column] = df[column].replace(mapping)
    return df
def remove_daily_duplicates_by_max_hours(df, date_col="FECHA", unit_col="CENTRAL"):
    """
    Removes duplicate rows for the same unit (central) and date by keeping only the row 
    with the highest sum of hour columns (H1 to H24).
    
    Assumes the date column is already normalized (i.e., time set to midnight).
    
    Parameters:
        df (pd.DataFrame): DataFrame containing at least the columns specified in date_col, 
                           unit_col, and hour columns (H1...H24).
        date_col (str): Name of the date column (default "FECHA").
        unit_col (str): Name of the central column (default "CENTRAL").
    
    Returns:
        pd.DataFrame: DataFrame with duplicates removed, keeping only the row with the maximum 
                      hour sum for each combination of unit and date.
    """
    # Identify hour columns (H1 to H24) present in the DataFrame
    hour_cols = [f"H{i}" for i in range(1, 25) if f"H{i}" in df.columns]
    if not hour_cols:
        print("No hour columns found in the DataFrame.")
        return df
    
    # Calculate the sum of hour columns for each row and add it as a temporary column
    df["hour_sum"] = df[hour_cols].sum(axis=1, skipna=True)
    
    # Group by date and unit, and select the index of the row with the maximum hour_sum for each group
    idx = df.groupby([date_col, unit_col])["hour_sum"].idxmax()
    
    # Retrieve those rows and remove the temporary hour_sum column
    df_unique = df.loc[idx].copy()
    df_unique.drop(columns=["hour_sum"], inplace=True)
    
    return df_unique

def aggregate_unit_groups(df, group_mapping, date_col="FECHA", unit_col="CENTRAL"):
    """
    Aggregates rows for unit groups based on a mapping.
    
    For each group defined in group_mapping (where the key is the unified unit name and the value
    is a list of variants), the function:
      1. Filters rows where unit_col is in the variants.
      2. Groups these rows by the date (date_col) and sums the hourly columns (H1 to H24).
      3. Creates new rows with the unified unit name (the key).
      4. Removes the original rows for these variants from the DataFrame.
      5. Appends the aggregated rows back to the DataFrame.
    
    Parameters:
      df (pd.DataFrame): DataFrame containing at least the date_col, unit_col, and hourly columns (H1 ... H24).
      group_mapping (dict): Dictionary where keys are the unified unit names and values are lists of variants.
      date_col (str): Name of the date column (default "FECHA").
      unit_col (str): Name of the unit column (default "CENTRAL").
    
    Returns:
      pd.DataFrame: DataFrame with aggregated rows for each specified group.
    """
    # Identify hourly columns (H1 to H24) that exist in the DataFrame
    hour_cols = [f"H{i}" for i in range(1, 25) if f"H{i}" in df.columns]
    if not hour_cols:
        print("No hourly columns found in the DataFrame.")
        return df

    # Normalize the date column (set time to midnight)
    df[date_col] = pd.to_datetime(df[date_col]).dt.normalize()
    
    aggregated_rows = []
    
    # Loop over each group in the mapping
    for unified_unit, variants in group_mapping.items():
        # Filter rows where unit is in the variants list
        mask = df[unit_col].isin(variants)
        df_group = df[mask].copy()
        if df_group.empty:
            continue
        # Group by date and sum the hourly columns
        df_agg = df_group.groupby(date_col, as_index=False)[hour_cols].sum()
        # Set the unit column to the unified unit for all aggregated rows
        df_agg[unit_col] = unified_unit
        
        aggregated_rows.append(df_agg)
    
    # If any aggregated rows were created, combine them
    if aggregated_rows:
        df_aggregated = pd.concat(aggregated_rows, ignore_index=True)
    else:
        df_aggregated = pd.DataFrame(columns=[date_col, unit_col] + hour_cols)
    
    # Remove original rows that belong to any of the variants in the mapping
    all_variants = [variant for variants in group_mapping.values() for variant in variants]
    df_remaining = df[~df[unit_col].isin(all_variants)]
    
    # Combine the remaining rows with the aggregated rows
    df_result = pd.concat([df_remaining, df_aggregated], ignore_index=True)
    
    # Sort by unit and date for clarity
    df_result = df_result.sort_values(by=[unit_col, date_col]).reset_index(drop=True)
    
    return df_result

def aggregate_los_mina(df, date_col="FECHA", unit_col="CENTRAL"):
    """
    Aggregates rows for 'los mina' by processing two groups:
      - Group 1: Sum hourly values from rows with unit in ["los mina 5", "los mina 6", "los mina 7"].
      - Group 2: For rows with unit in ["parque energetico los mina cc parcial", "parque energetico los mina cc total"],
                 select the row with the maximum hour sum (H1 to H24) for each day.
    
    For each day with data in either group, a new aggregated row with unit "los mina" is created,
    with hourly values equal to the sum of Group 1 and Group 2 values.
    
    Parameters:
      df (pd.DataFrame): DataFrame containing at least the date_col, unit_col, and hourly columns (H1 ... H24).
      date_col (str): Name of the date column (assumed to be datetime or will be normalized).
      unit_col (str): Name of the unit/central column.
      
    Returns:
      pd.DataFrame: DataFrame with the original rows for these groups removed and replaced with aggregated rows.
    """
    # Ensure the date column is datetime and normalized (time set to midnight)
    df[date_col] = pd.to_datetime(df[date_col]).dt.normalize()
    
    # Determine hourly columns (H1 to H24) available in df
    hour_cols = [f"H{i}" for i in range(1, 25) if f"H{i}" in df.columns]
    if not hour_cols:
        print("No hourly columns found.")
        return df
    
    # Define the variant groups
    group1_variants = ["los mina 5", "los mina 6", "los mina 7"]
    group2_variants = ["parque energetico los mina cc parcial", "parque energetico los mina cc total"]
    
    # Filter rows for each group
    df_group1 = df[df[unit_col].isin(group1_variants)].copy()
    df_group2 = df[df[unit_col].isin(group2_variants)].copy()
    
    # Determine all unique dates where either group has data
    dates = pd.to_datetime(pd.concat([df_group1[date_col], df_group2[date_col]]).unique())
    
    aggregated_rows = []
    
    for d in dates:
        # Initialize aggregated hourly values for the day as zeros
        agg_values = {col: 0 for col in hour_cols}
        
        # Group 1 aggregation: Sum rows for the date d if present
        df1_d = df_group1[df_group1[date_col] == d]
        if not df1_d.empty:
            sum_group1 = df1_d[hour_cols].sum()
            for col in hour_cols:
                agg_values[col] += sum_group1[col]
        
        # Group 2 aggregation: For the date d, select the row with maximum hour sum if present
        df2_d = df_group2[df_group2[date_col] == d].copy()  # <-- Make an explicit copy here
        if not df2_d.empty:
            df2_d["hour_sum"] = df2_d[hour_cols].sum(axis=1, skipna=True)
            idx = df2_d["hour_sum"].idxmax()
            max_row = df2_d.loc[idx]
            for col in hour_cols:
                agg_values[col] += max_row[col]
        
        # Create the aggregated row if there was data in either group for the day
        if not (df1_d.empty and df_group2.empty):
            new_row = {date_col: d, unit_col: "los mina"}
            for col in hour_cols:
                new_row[col] = agg_values[col]
            aggregated_rows.append(new_row)
    
    # Create a DataFrame from aggregated rows
    if aggregated_rows:
        df_aggregated = pd.DataFrame(aggregated_rows)
    else:
        df_aggregated = pd.DataFrame(columns=[date_col, unit_col] + hour_cols)
    
    # Remove original rows for both groups from the DataFrame
    df_remaining = df[~df[unit_col].isin(group1_variants + group2_variants)]
    
    # Combine the remaining rows with the new aggregated rows
    df_result = pd.concat([df_remaining, df_aggregated], ignore_index=True)
    
    # Sort by unit and date
    df_result = df_result.sort_values(by=[unit_col, date_col]).reset_index(drop=True)
    
    return df_result


In [None]:
# 1. Normalize the 'CENTRAL' column
df_merged = normalize_central_column(df_merged)

# 2. Format the 'FECHA' column to datetime
df_merged = format_fecha_column(df_merged)

# 3. Remove unwanted units
df_merged = remove_unwanted_units(df_merged, unit_col="CENTRAL")

# 4. Fill missing dates by copying records from the same weekday one week later
df_merged = fill_missing_with_next_available(df_merged, date_col='FECHA', central_col='CENTRAL')

# 5. Standardize central names
df_merged = standardize_central_names(df_merged, column="CENTRAL")

# 6. Remove daily duplicates by keeping the row with the highest hour sum
df_merged = remove_daily_duplicates_by_max_hours(df_merged, date_col="FECHA", unit_col="CENTRAL")

group_mapping = {
    "aes andres": ["aes andres fo", "aes andres gn", "aes andres"],
    "cespm 1": ["cespm 1 fo", "cespm 1 gn", "cespm 1"],
    "cespm 2": ["cespm 2 fo", "cespm 2 gn", "cespm 2"],
    "cespm 3": ["cespm 3 fo", "cespm 3 gn", "cespm 3"],
    "estrella del mar 2": ["estrella del mar 2 cfo", "estrella del mar 2 cgn", "estrella del mar 2 sfo", "estrella del mar 2 sgn"],
    "estrella del mar 3": ["estrella del mar 3 ccp", "estrella del mar 3 cct", "estrella del mar 3 cs", "estrella del mar 3 sgn", "estrella del mar 3"],
    "powership azua": ["powership azua kps 26", "powership azua kps 60", "powership azua"],
    "los origenes": ["los origenes power plant fuel oil", "los origenes power plant gas natural", "los origenes"],
    "quisqueya 1": ["quisqueya 1 fo", "quisqueya 1 gn", "quisqueya 1 san pedro", "quisqueya 1 san pedro fo", 
                     "quisqueya 1 san pedro gn", "quisqueya 1b san pedro", "quisqueya 1b san pedro fo", 
                     "quisqueya 1b san pedro gn", "quisqueya 1"],
    "quisqueya 2": ["quisqueya 2", "quisqueya 2 fo", "quisqueya 2 gn"],
    "san felipe": ["san felipe cc", "san felipe vap", "san felipe"]
}
# 7. aggregate 
df_merged = aggregate_unit_groups(df_merged, group_mapping, date_col="FECHA", unit_col="CENTRAL")

df_merged = aggregate_los_mina(df_merged, date_col="FECHA", unit_col="CENTRAL")

In [None]:

los_mina_counts = df_merged[df_merged["CENTRAL"] == "los mina"].groupby("FECHA").size()
print("Records for 'los mina' per day (should be 1 for each date):")
print(los_mina_counts.head())

Records for 'los mina' per day (should be 1 for each date):
FECHA
2013-01-01    1
2013-01-02    1
2013-01-03    1
2013-01-04    1
2013-01-05    1
dtype: int64


In [None]:
# Check results for one group (for example, "aes andres")
aes_counts = df_merged[df_merged["CENTRAL"]=="aes andres"].groupby("FECHA").size()
print("Records for 'aes andres' per day (should be 1 for each date):")
print(aes_counts.head())

Records for 'aes andres' per day (should be 1 for each date):
FECHA
2013-01-01    1
2013-01-02    1
2013-01-03    1
2013-01-04    1
2013-01-05    1
dtype: int64


In [None]:
df_merged

Unnamed: 0,GRUPOS,INDICE,GRUPO,EMPRESA,CENTRAL,FECHA,H1,H2,H3,H4,...,H15,H16,H17,H18,H19,H20,H21,H22,H23,H24
0,,,,,aes andres,2013-01-01,207.00,241.0,217.0,220.0,...,237.0,265.0,234.0,220.00,254.00,229.00,236.00,220.00,260.00,260.00
1,,,,,aes andres,2013-01-02,239.00,275.0,277.0,249.0,...,246.0,250.0,248.0,238.00,284.00,273.00,260.00,224.00,210.00,256.00
2,,,,,aes andres,2013-01-03,224.00,261.0,223.0,239.0,...,279.0,276.0,231.0,246.00,280.00,262.00,246.00,230.00,230.00,230.00
3,,,,,aes andres,2013-01-04,256.00,227.0,202.0,230.0,...,276.0,273.0,266.0,261.00,241.00,253.00,219.00,276.00,240.00,256.00
4,,,,,aes andres,2013-01-05,0.00,0.0,0.0,0.0,...,0.0,0.0,0.0,0.00,0.00,0.00,0.00,0.00,0.00,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
423552,4 - Hidroeléctrica,4.0,Hidroeléctrica,EGEHID,valdesia 2,2025-03-12,0.00,0.0,0.0,0.0,...,0.0,0.0,0.0,1.74,22.88,22.37,21.79,21.77,22.89,22.21
423553,4 - Hidroeléctrica,4.0,Hidroeléctrica,EGEHID,valdesia 2,2025-03-13,2.29,0.0,0.0,0.0,...,0.0,0.0,0.0,0.00,0.00,3.22,20.41,10.18,0.00,0.00
423554,4 - Hidroeléctrica,4.0,Hidroeléctrica,EGEHID,valdesia 2,2025-03-14,0.00,0.0,0.0,0.0,...,0.0,0.0,0.0,0.00,17.44,22.46,22.45,22.45,22.45,5.23
423555,4 - Hidroeléctrica,4.0,Hidroeléctrica,EGEHID,valdesia 2,2025-03-15,0.00,0.0,0.0,0.0,...,0.0,0.0,0.0,0.00,0.00,0.00,0.00,0.00,0.00,0.00


#### Watch each Central Unit and count

In [None]:
# Configurar para que se muestren todas las filas
pd.set_option('display.max_rows', None)

# Contar las ocurrencias de cada valor en la columna 'CENTRAL'
central_counts = df_merged['CENTRAL'].value_counts().reset_index()
central_counts.columns = ['CENTRAL', 'count']

# Mostrar el DataFrame resultante
display(central_counts)

# (Opcional) Restaurar la configuración predeterminada si es necesario
pd.reset_option('display.max_rows')

Unnamed: 0,CENTRAL,count
0,aes andres,4458
1,aguacate 1,4458
2,aguacate 2,4458
3,aniana vargas 1,4458
4,aniana vargas 2,4458
5,baiguaque 1,4458
6,baiguaque 2,4458
7,barahona carbon,4458
8,cespm 3,4458
9,cespm 2,4458


In [None]:
# Configurar para que se muestren todas las filas
pd.set_option('display.max_rows', None)

# Ordenar el DataFrame por la columna 'CENTRAL' (orden alfabético)
central_counts_sorted = central_counts.sort_values(by='CENTRAL')
# Mostrar el DataFrame ordenado
display(central_counts_sorted)

Unnamed: 0,CENTRAL,count
0,aes andres,4458
1,aguacate 1,4458
2,aguacate 2,4458
3,aniana vargas 1,4458
4,aniana vargas 2,4458
5,baiguaque 1,4458
6,baiguaque 2,4458
7,barahona carbon,4458
79,bersal,3859
80,brazo derecho,3846


In [None]:
# Identify hourly columns (H1 through H24)
hour_cols = [f"H{i}" for i in range(1, 25) if f"H{i}" in df_merged.columns]

# Compute each row’s peak hourly output
df_merged["peak_hourly"] = df_merged[hour_cols].max(axis=1)

# For each plant, find the index of the row with the maximum peak_hourly
idx = df_merged.groupby("CENTRAL")["peak_hourly"].idxmax()

# Build a DataFrame showing CENTRAL, its max generation, and the date when it occurred
peak_df = (
    df_merged
    .loc[idx, ["CENTRAL", "peak_hourly", "FECHA"]]
    .rename(columns={"peak_hourly": "max_generated", "FECHA": "date_of_max"})
    .reset_index(drop=True)
)

print("Peak generation by plant (value and date):")
peak_df

Peak generation by plant (value and date):


Unnamed: 0,CENTRAL,max_generated,date_of_max
0,aes andres,317.0,2017-05-03
1,aguacate 1,30.0,2013-01-02
2,aguacate 2,47.38,2019-08-09
3,aniana vargas 1,0.3,2013-05-04
4,aniana vargas 2,0.32,2017-04-27
5,baiguaque 1,0.55,2013-10-16
6,baiguaque 2,1.2,2016-07-21
7,barahona carbon,54.45,2019-03-19
8,bersal,24.35,2015-02-09
9,brazo derecho,3.0,2017-02-05


In [None]:
import pandas as pd

# Identify hourly columns (H1 to H24) present in the DataFrame
hour_cols = [f"H{i}" for i in range(1, 25) if f"H{i}" in df_merged.columns]

# Filter rows for "estrella del mar 2"
df_est = df_merged[df_merged["CENTRAL"] == "estrella del mar 2"].copy()

# Calculate the maximum generation among hourly columns for each row
df_est["max_gen"] = df_est[hour_cols].max(axis=1)

# Filter rows where the maximum generation exceeds 120
df_filtered = df_est[df_est["max_gen"] > 120]

# Select only the date and max generation columns to display
result = df_filtered[["FECHA", "max_gen"]]

print("Dates for 'estrella del mar 2' with maximum hourly generation > 120:")
print(result)

Dates for 'estrella del mar 2' with maximum hourly generation > 120:
            FECHA  max_gen
100325 2014-06-17   133.00
101699 2018-03-22   124.24
101750 2018-05-12   122.16


#### Buscador por nombre

In [None]:
import pandas as pd

# Configurar para que se muestren todas las filas y columnas completas
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)          # O ajusta según tu pantalla
pd.set_option('display.max_colwidth', None)     # Muestra todo el contenido de cada columna

# Filtrar el DataFrame para la central "CAYMAN"

df_buscar_central = df_merged[df_merged['CENTRAL'] == 'los mina 7']

# Mostrar el DataFrame filtrado
display(df_buscar_central)


Unnamed: 0,GRUPOS,INDICE,GRUPO,EMPRESA,CENTRAL,FECHA,H1,H2,H3,H4,H5,H6,H7,H8,H9,H10,H11,H12,H13,H14,H15,H16,H17,H18,H19,H20,H21,H22,H23,H24,peak_hourly


#### Buscar por Fecha


In [None]:
import pandas as pd

# Configurar para que se muestren todas las filas y columnas completas
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)          # O ajusta según tu pantalla
pd.set_option('display.max_colwidth', None)     # Muestra todo el contenido de cada columna

# Filtrar el DataFrame para la central "CAYMAN"
df_buscar_fecha = df_merged[df_merged['FECHA'] == '2018-12-29']

# Mostrar el DataFrame filtrado
display(df_buscar_fecha)


Unnamed: 0,GRUPOS,INDICE,GRUPO,EMPRESA,CENTRAL,FECHA,H1,H2,H3,H4,H5,H6,H7,H8,H9,H10,H11,H12,H13,H14,H15,H16,H17,H18,H19,H20,H21,H22,H23,H24,peak_hourly
2188,,,,,aes andres,2018-12-29,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6646,3 - Hidroeléctrica,3.0,Hidroeléctrica,EGEHID,aguacate 1,2018-12-29,27.07,26.67,26.74,25.9,26.1,26.61,26.7,26.34,26.46,26.12,25.85,25.5,26.08,26.0,26.62,26.82,26.79,26.2,26.72,25.99,25.08,25.39,25.0,25.0,27.07
11104,3 - Hidroeléctrica,3.0,Hidroeléctrica,EGEHID,aguacate 2,2018-12-29,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.83,26.27,26.18,25.93,25.71,4.32,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,26.27
15562,3 - Hidroeléctrica,3.0,Hidroeléctrica,EGEHID,aniana vargas 1,2018-12-29,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.0,0.2
20020,3 - Hidroeléctrica,3.0,Hidroeléctrica,EGEHID,aniana vargas 2,2018-12-29,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2
24478,3 - Hidroeléctrica,3.0,Hidroeléctrica,EGEHID,baiguaque 1,2018-12-29,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.2
28936,3 - Hidroeléctrica,3.0,Hidroeléctrica,EGEHID,baiguaque 2,2018-12-29,0.3,0.3,0.3,0.3,0.3,0.3,0.3,0.3,0.3,0.3,0.3,0.3,0.3,0.3,0.3,0.3,0.3,0.3,0.3,0.3,0.3,0.3,0.3,0.3,0.3
33394,1 - Térmica,1.0,Térmica,EGEHAINA,barahona carbon,2018-12-29,52.48,52.48,52.48,52.48,52.48,52.48,52.48,52.48,52.48,52.48,52.48,52.48,52.48,52.48,52.48,52.48,52.48,52.48,52.48,52.48,52.48,52.48,52.0,52.0,52.48
37253,1 - Térmica,1.0,Térmica,BERSAL,bersal,2018-12-29,16.0,16.0,16.0,16.0,0.0,0.0,0.0,0.0,0.0,16.0,16.0,0.0,0.0,6.0,6.0,6.0,6.0,16.0,6.0,16.0,6.0,0.0,0.0,0.0,16.0
41099,3 - Hidroeléctrica,3.0,Hidroeléctrica,EGEHID,brazo derecho,2018-12-29,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
