# Hydropower Capacty Factor
- Define pAvailability for each Hydropower Reservoir
- Define pVREgenProfile for ROR

In [1]:
import pandas as pd
import os

## 1. Load Generation Data

In [21]:
filename = 'pGenDataExcel_clean.csv'
folder_epm = os.path.join('..', '..', 'epm', 'input', 'data_capp')
file_path = os.path.join(folder_epm, 'supply', filename)
if not os.path.exists(file_path):
    # Display the total path to the file, not relative
    raise FileNotFoundError(f"The file {os.path.abspath(file_path)} does not exist. Please check the path.")
else:
    print(f"File {file_path} found. Proceeding with the analysis.")

# Load the generation data
data_gen = pd.read_csv(file_path, index_col=None, header=[0])

# Filter hydropower plants
data_gen = data_gen[data_gen['tech'].isin(['ROR', 'ReservoirHydro'])]

data_gen.set_index(['gen'], inplace=True)

# Display the first few rows of the generation data
print(data_gen.head())

File ../../epm/input/data_capp/supply/pGenDataExcel_clean.csv found. Proceeding with the analysis.
                                   zone            tech   fuel  Status  StYr  \
gen                                                                            
Lauca                            Angola  ReservoirHydro  Water       1  2017   
Cambambe 2                       Angola  ReservoirHydro  Water       1  2016   
Capanda                          Angola  ReservoirHydro  Water       1  2004   
Cambambe 1                       Angola  ReservoirHydro  Water       1  1962   
1smallhydrodro2012_hydro_angola  Angola  ReservoirHydro  Water       1  2012   

                                 RetrYr  Capacity  DescreteCap  fuel2  \
gen                                                                     
Lauca                              2050    2070.0          NaN    NaN   
Cambambe 2                         2050     700.0          NaN    NaN   
Capanda                            2050     520.

## 2. Load the African Hydropower Atlas

Download the African Hydropower Atlas from the [African Hydropower Atlas](https://www.hydroshare.org/resource/5e8ebdc3bfd24207852539ecf219d915/) website, and put it in the `input` folder.

The file should be named `African_Hydropower_Atlas_v2-0.xlsx`.

In [3]:
file_atlas = os.path.join('input', 'African_Hydropower_Atlas_v2-0.xlsx')
if not os.path.exists(file_atlas):
    raise FileNotFoundError(f"The file {file_atlas} does not exist. Please download the African Hydropower Atlas and place it in the input folder.")
else:
    print(f"File {file_atlas} found. Proceeding with the analysis.")

File input/African_Hydropower_Atlas_v2-0.xlsx found. Proceeding with the analysis.


In [6]:
data_atlas = pd.read_excel(file_atlas, sheet_name='2 - Hydrofleet2020', index_col=None, skiprows=None, header=0)
data_atlas.rename(columns={'Name': 'Unit Name'}, inplace=True)
data_atlas.set_index(['Country', 'Unit Name'], inplace=True)

data_atlas = data_atlas.droplevel('Country')
# Rename columns to match the expected format
cols = pd.MultiIndex.from_product([['baseline', 'dry', 'wet'], range(1, 13)], names=['scenario', 'month'])
# Add scenarios to the columns
data_atlas = data_atlas.set_axis(cols, axis=1)

print(data_atlas.head())

scenario        baseline                                                    \
month                 1    2    3    4    5         6         7         8    
Unit Name                                                                    
Ighil Emda      0.857215  1.0  1.0  1.0  1.0  0.388244  0.035388  0.003170   
Erraguene       1.000000  1.0  1.0  1.0  1.0  0.446470  0.041604  0.023276   
Mansouria       1.000000  1.0  1.0  1.0  1.0  0.446470  0.041604  0.023276   
Darguina        0.857215  1.0  1.0  1.0  1.0  0.388244  0.035388  0.003170   
Souk El Djemaa       NaN  NaN  NaN  NaN  NaN       NaN       NaN       NaN   

scenario                            ...  wet                                \
month                 9         10  ...   3    4    5         6         7    
Unit Name                           ...                                      
Ighil Emda      0.203179  0.185520  ...  1.0  1.0  1.0  0.669888  0.061060   
Erraguene       0.075531  0.092803  ...  1.0  1.0  1.0  1.00000

## 3. Merge Generation Data with Hydropower Atlas
Separate between ROR and ReservoirHydro technologies, and then merge the data with the hydropower atlas.

In [14]:
seasons_dict = {
    1: 2,
    2: 2,
    3: 2,
    4: 2,
    5: 1,
    6: 1,
    7: 1,
    8: 1,
    9: 1,
    10: 2,
    11: 2,
    12: 2
}  # grouping months into 4 seasons, to define according to user


In [13]:
data_ror = data_gen[data_gen['tech'] == 'ROR'].copy()
data_reservoir = data_gen[data_gen['tech'] == 'ReservoirHydro'].copy()

### Handle Hydropower Reservoirs

In [19]:
for s in data_atlas.columns.get_level_values('scenario').unique():
    # Filter the data for the current scenario
    data_atlas_scenario = data_atlas.xs(s, level='scenario', axis=1)

    # Keep only the index from data_gen
    result = data_reservoir.join(data_atlas_scenario, how="left")

    # Keep only columns from data_atlas
    result = result[data_atlas_scenario.columns]

    # Group by the seasons defined in seasons_dict
    result = result.T.groupby(seasons_dict).mean().T

    # Add 'Q{}' prefix to the column names
    result.columns = [f'Q{col}' for col in result.columns]

    filename = f'pAvailability_{s}.csv'
    result.to_csv(os.path.join('output', filename))
    print(f"Saved {filename} with shape {result.shape}.")

Saved pAvailability_baseline.csv with shape (67, 2).
Saved pAvailability_dry.csv with shape (67, 2).
Saved pAvailability_wet.csv with shape (67, 2).


### Handle Run-of-River (ROR) Plants

In [37]:
def generate_pVREgenProfile(result, template):
    """Generate pVREgenProfile for ROR plants.

    This function takes a scenario identifier and a template DataFrame,
    and generates a pVREgenProfile DataFrame for Run-of-River (ROR) plants.
    It reshapes the result DataFrame to match the template structure,
    broadcasting the values across all hour columns.

    Parameters:
        s: str
            The scenario identifier, used for naming the output file.
        template: pd.DataFrame
            The template DataFrame that defines the structure of the output.
    """

    result_reset = result.reset_index()  # so 'gen' becomes a column
    result_long = result_reset.melt(id_vars='gen', var_name='season', value_name='value')

    daytypes = template.reset_index()[['season', 'daytype']].drop_duplicates()
    merged = result_long.merge(daytypes, on='season', how='left')

    # get hour columns from template
    hour_cols = template.columns.difference(['season', 'daytype'])

    # broadcast the value across all hour columns
    for col in hour_cols:
        merged[col] = merged['value']

    merged_final = merged.drop(columns=['value'])
    merged_final = merged_final.set_index(['gen', 'season', 'daytype'])
    merged_final.index.names = ['gen', 'q', 'd']

    return merged_final

In [38]:
template = pd.read_csv(os.path.join(folder_epm, 'pHours.csv'), index_col=[0, 1], header=0)


In [40]:
for s in data_atlas.columns.get_level_values('scenario').unique():
    # Filter the data for the current scenario
    data_atlas_scenario = data_atlas.xs(s, level='scenario', axis=1)

    # Keep only the index from data_gen
    result = data_reservoir.join(data_atlas_scenario, how="left")

    # Keep only columns from data_atlas
    result = result[data_atlas_scenario.columns]

    # Group by the seasons defined in seasons_dict
    result = result.T.groupby(seasons_dict).mean().T

    # Add 'Q{}' prefix to the column names
    result.columns = [f'Q{col}' for col in result.columns]
    result.columns.names = ['season']

    result = generate_pVREgenProfile(result, template)

    result.to_csv(os.path.join('output', f'pVREgenProfile_{s}.csv'))
    print(f'Saved pVREgenProfile_{s}.csv with shape {result.shape}.')


Saved pVREgenProfile_baseline.csv with shape (670, 24).
Saved pVREgenProfile_dry.csv with shape (670, 24).
Saved pVREgenProfile_wet.csv with shape (670, 24).
