In [None]:
import premise as ps
import gc
import os
from dotenv import load_dotenv
import bw2data as bd

def premise_generator(
    base_db: str,
    base_db_version: str,
    scenarios: list[dict],
    overwrite_bw_db: bool = False,
    keep_uncertainty_data: bool = True,
    use_multiprocessing: bool = True,
    batch_size: int = 5,
) -> None:
    """
    This method generates a premise database for a list of scenarios.

    Parameters
    ----------
    base_db : str
        The name of the base database to use.
    base_db_version : str
        The version of the base database to use.
    scenarios : list
        A list of scenarios, where each scenario is a dictionary of the form:
        {
            "model": str,
            "pathway": str,
            "year": int
        }
    overwrite_bw_db : bool, optional
        Whether or not to overwrite existing databases, by default False
    keep_uncertainty_data : bool, optional
        Whether or not to keep uncertainty data, by default True
    use_multiprocessing : bool, optional
        Whether or not to use multiprocessing, by default True
    batch_size : int, optional
        The number of scenarios to process in each batch, by default 5
        Batches are processed sequentially to avoid memory issues, so the larger the batch size, the longer the processing time.
    """
    load_dotenv()
    encryption_key = os.getenv("encryption_key")

    if not encryption_key:
        raise ValueError(
            "Encryption key not found. Please ask the maintainers for a .env file with an encryption key."
        )

    valid_scenarios = _validate_scenarios(scenarios, overwrite_bw_db)

    # Process scenarios in batches
    for i in range(0, len(valid_scenarios), batch_size):
        batch_scenarios = valid_scenarios[i : i + batch_size]
        _process_batch(
            batch_scenarios,
            base_db,
            base_db_version,
            encryption_key,
            use_multiprocessing,
            keep_uncertainty_data,
        )

def _validate_scenarios(
    scenarios: list, overwrite_bw_db: bool = False
) -> list:
    """
    This method takes a list of scenarios and checks if they are already in the Brightway database.
    If they are, it removes them from the list of scenarios.
    This is done to avoid overwriting existing databases. and to avoid re-calculating LCI databases that already exist.

    Parameters
    ----------
    scenarios : list
        A list of scenarios, where each scenario is a dictionary of the form:
        {
            "model": str,
            "pathway": str,
            "year": int
        }
    overwrite_bw_db : bool, optional
        Whether or not to overwrite existing databases, by default False

    Returns
    -------
    list
        A list of scenarios that are not already in the Brightway database.
    """
    valid_scenarios = []
    for scenario in scenarios:
        name = _get_scenario_name(scenario)
        if name in bd.databases:
            if overwrite_bw_db:
                print(f"Database {name} already exists, removing.")
                bd.Database(name).delete()
            else:
                print(f"Database {name} already exists, skipping.")
                continue
        valid_scenarios.append(scenario)
    return valid_scenarios

def _process_batch(
    batch_scenarios: list[dict],
    base_db: str,
    base_db_version: str,
    encryption_key: str,
    use_multiprocessing: bool,
    keep_uncertainty_data: bool,
) -> None:
    """
    This method processes a batch of scenarios.

    Parameters
    ----------
    batch_scenarios : list
        A list of scenarios, where each scenario is a dictionary of the form:
        {
            "model": str,
            "pathway": str,
            "year": int
        }
    base_db : str
        The name of the base database to use.
    base_db_version : str
        The version of the base database to use.
    encryption_key : str
        The encryption key to use.
    use_multiprocessing : bool
        Whether or not to use multiprocessing.
    keep_uncertainty_data : bool
        Whether or not to keep uncertainty data.
    """
    names_list = _get_a_list_of_scenario_names(batch_scenarios)
    print("Generating premise database for the current batch.")
    ndb = ps.NewDatabase(
        scenarios=batch_scenarios,
        source_db=base_db,
        source_version=base_db_version,
        key=encryption_key,
        use_multiprocessing=use_multiprocessing,
        keep_uncertainty_data=keep_uncertainty_data,
    )

    ndb.update_all()
    print("Writing database to brightway")
    ndb.write_db_to_brightway(names_list)
    del ndb
    gc.collect()

def _get_a_list_of_scenario_names(scenarios: list[dict]) -> list[str]:
    """
    Takes a list of dictionaries and returns a list of formatted strings.

    Each dictionary in the list should have the keys 'model', 'pathway', and 'year'.
    The function returns a list of strings in the format "pathway-model-year".

    Args:
    scenarios (list of dict): A list of dictionaries with keys 'model', 'pathway', and 'year'.

    Returns:
    list of str: A list of formatted strings.
    """

    return [
        f"{scenario['pathway']}_{scenario['model']}_{scenario['year']}"
        for scenario in scenarios
    ]
