### New setup

In [1]:
import os
import pandas as pd
import yaml
                

In [2]:
# Load the configuration from the YAML file
def load_config(config_file):
    with open(config_file, 'r') as file:
        config = yaml.safe_load(file)
    return config

In [3]:
# Function to process unchanged files
def process_files(base_dir):
    for root, dirs, files in os.walk(base_dir):
        for file in files:
            if file.endswith('csv'):
                file_path = os.path.join(root, file)
                df = pd.read_csv(file_path)
                yield df, file_path

In [4]:
def process_unchanged_files(config, main_folder, output_path):
    # Process unchanged files
    unchanged_files = config['parameters']['unchanged_files'] #list of files
    # Load each CSV file from the main folder
    for df, file_path in process_files(main_folder):
        param_name = os.path.splitext(os.path.basename(file_path))[0]
        if param_name in unchanged_files:
            #print(f"Skipping {param_name} as it is in the no_change_parameters list.")
            file_path = os.path.join(output_path, f'{param_name}.csv')
            df.to_csv(file_path)
            
 

In [5]:
def process_changed_files(config, main_folder, output_path):
    changed_files = config['parameters']['changed_files'] # list of file with parameters
    
    #Load each CSV file from the main folder
    for df, file_path in process_files(main_folder):
        #combined_new_regions = pd.DataFrame()
        param_name = os.path.splitext(os.path.basename(file_path))[0]
        # Construct the full file path
        file_path = os.path.join(output_path, f'{param_name}.csv')
        for file in changed_files:
            if param_name == file['file_name']:
                method = file['method']
                index_count = file['index_count']
                regions_to_split = file['regions_to_split']
                new_regions = file['new_regions']
                split_ratio = file['split_ratio']
                print(param_name, method, index_count, regions_to_split, new_regions, split_ratio)
                df.set_index(df.columns[:index_count].tolist(), inplace=True)
                
                # Process regions to split
                for region in regions_to_split:
                    if region in df.index.get_level_values('Region'):
                        print("True")
                        # Filter rows for the region to split
                        region_data = df.loc[region]
                        # Create new DataFrame for the split regions
                        new_region_dfs = []
                        for new_region, ratio in split_ratio.items():
                            new_data = region_data.copy()
                            new_data['Value'] *= ratio
                            new_data['Region'] = new_region
                            new_data.reset_index(inplace=True)
                            new_region_dfs.append(new_data) 
                        combined_new_regions = pd.concat(new_region_dfs, ignore_index=True)
                        # Add the new regions back to the original DataFrame
                        df.reset_index(inplace=True)  # Temporarily reset the index
                        df = pd.concat([df, combined_new_regions], ignore_index=True)

                        # Reapply multi-index  
                        df.set_index(df.columns[:index_count].tolist(), inplace=True)

                        # Save the DataFrame to the constructed path
                        df.to_csv(file_path)   

                    else:
                        print("do not have region in dataset")
                        df.to_csv(file_path)    

                        combined_new_regions = pd.concat(new_region_dfs, ignore_index=True)            

In [6]:
pd.set_option('display.max_rows', None)
base_dir = "/Users/shwetat/Projects/Genesys-mod_data_repo/GENeSYS_MOD.data/Data/Parameters"
output_dir = "/Users/shwetat/Projects/Genesys-mod_data_repo/GENeSYS_MOD.data/DataNew"
config = load_config("config_disaggregation.yaml")
process_unchanged_files(config, base_dir, output_dir)
process_changed_files(config, base_dir, output_dir)
#trade_files_(config, base_dir, output_dir)


Par_TotalAnnualMaxActivity copy 3 ['NO'] ['NO1', 'NO2'] {'NO1': 1.0, 'NO2': 1.0}
True
Par_REMinProductionTarget copy 3 ['NO'] ['NO1', 'NO2'] {'NO1': 1.0, 'NO2': 1.0}
do not have region in dataset
Par_ReserveMargin copy 2 ['NO'] ['NO1', 'NO2'] {'NO1': 1.0, 'NO2': 1.0}
True
Par_ModalSplitByFuel copy 4 ['NO'] ['NO1', 'NO2'] {'NO1': 1.0, 'NO2': 1.0}
True
Par_CapitalCost copy 3 ['NO'] ['NO1', 'NO2'] {'NO1': 1.0, 'NO2': 1.0}
do not have region in dataset
Par_TechnologyDiscountRate copy 2 ['NO'] ['NO1', 'NO2'] {'NO1': 1.0, 'NO2': 1.0}
True
Par_ResidualCapacity copy 3 ['NO'] ['NO1', 'NO2'] {'NO1': 1.0, 'NO2': 1.0}
True
Par_GrowthRateTradeCapacity copy 4 ['NO'] ['NO1', 'NO2'] {'NO1': 1.0, 'NO2': 1.0}
True
Par_TradeCapacity copy 4 ['NO'] ['NO1', 'NO2'] {'NO1': 1.0, 'NO2': 1.0}
True
Par_RegionalAnnualEmissionLimit copy 3 ['NO'] ['NO1', 'NO2'] {'NO1': 1.0, 'NO2': 1.0}
True
Par_AnnualExogenousEmission copy 3 ['NO'] ['NO1', 'NO2'] {'NO1': 1.0, 'NO2': 1.0}
do not have region in dataset
Par_RegionalMo

In [None]:
                """ #Combine the new region DataFrames
                combined_new_regions = pd.concat(new_region_dfs, ignore_index=True)

                # Add the new regions back to the original DataFrame
                df.reset_index(inplace=True)  # Temporarily reset the index
                df = pd.concat([df, combined_new_regions], ignore_index=True)

                # Reapply multi-index  
                df.set_index(df.columns[:index_count].tolist(), inplace=True)

                # Construct the full file path
                file_path = os.path.join(output_path, f'{param_name}.csv')

                # Save the DataFrame to the constructed path
                df.to_csv(file_path) """

In [26]:
# Example function to update DataFrame with new regions
def add_new_regions(df, region_to_split, new_regions):
    """
    Function to add new regions to the DataFrame by copying values of a given region.
    
    Args:
    - df (pd.DataFrame): Input DataFrame
    - region_to_split (str): The region whose data needs to be copied
    - new_regions (list): List of new regions to add
    
    Returns:
    - pd.DataFrame: Updated DataFrame with new regions
    """
    # Filter rows matching the region_to_split
    rows_to_copy = df[df['Region'] == region_to_split]
    
    # Create new rows by duplicating rows_to_copy for each new region
    for new_region in new_regions:
        new_rows = rows_to_copy.copy()
        new_rows['Region'] = new_region
        df = pd.concat([df, new_rows], ignore_index=True)
    
    return df

# Example usage
if __name__ == "__main__":
    # Load DataFrame (replace with your actual file path)
    file_path = "/Users/shwetat/Projects/Genesys-mod_data_repo/GENeSYS_MOD.data/Data/Parameters/Par_GeneralDiscountRate/Par_GeneralDiscountRate.csv"  # Change this to the actual file path
    df = pd.read_csv(file_path)
    
    # Parameters for the regions to copy and add
    region_to_split = "NO"  # Region to split (source region)
    new_regions = ["NO1", "NO2"]  # New regions to create
    
    # Update DataFrame
    updated_df = add_new_regions(df, region_to_split, new_regions)
    
    # Save updated DataFrame
    #updated_file_path = "updated_" + file_path
    updated_df.to_csv('generaldiscountrate.csv', index=False)

In [63]:
def single_indexed(config, main_folder, output_path):
    single_index_file = config['parameters']['changed_files1'] # list of file with parameters
    
    #Load each CSV file from the main folder
    for df, file_path in process_files(main_folder):
        #combined_new_regions = pd.DataFrame()
        param_name = os.path.splitext(os.path.basename(file_path))[0]
       
        # Construct the full file path
        file_path = os.path.join(output_path, f'{param_name}.csv')
        for file in single_index_file:
            if param_name == file['file_name']:
                print(param_name)
                method = file['method']
                
                regions_to_split = file['regions_to_split']
                new_regions = file['new_regions']
                split_ratio = file['split_ratio']
                df = df.set_index(['Region'])
                #print(df.head())
                
                
                # Process regions to split
                for region in regions_to_split:
                    if region in df.index.get_level_values('Region'):
                        print(df.head())
                        # Filter rows for the region to split
                    region_data = df.loc[region]
                    print(region_data.head())
                    # Create new DataFrame for the split regions
                    new_region_dfs = []
                    for new_region, ratio in split_ratio.items():
                        #print(new_region, ratio)
                        new_data = region_data.copy()

                        new_data['Value'] *= ratio
                        new_data['Region'] = new_region
                        new_region_dfs.append(new_data) 
                        combined_new_regions = pd.concat(new_region_dfs, ignore_index=True)
                        # Add the new regions back to the original DataFrame
                        
                        df = pd.concat([df, combined_new_regions], ignore_index=True)

                        # Reapply multi-index  
                        

                        # Save the DataFrame to the constructed path
                        df.to_csv(file_path)   

                    else:
                        print("do not have region in dataset")
                        df.to_csv(file_path)    

                        combined_new_regions = pd.concat(new_region_dfs, ignore_index=True)

In [64]:
single_indexed(config, base_dir, output_dir)

Par_GeneralDiscountRate
        Value  Unnamed: 2     Unit      Source  Updated at  \
Region                                                       
AT       0.05         NaN  Percent  Assumption  08.09.2023   
BE       0.05         NaN  Percent  Assumption  08.09.2023   
BG       0.05         NaN  Percent  Assumption  08.09.2023   
CH       0.05         NaN  Percent  Assumption  08.09.2023   
CZ       0.05         NaN  Percent  Assumption  08.09.2023   

                                      Updated by  
Region                                            
AT      Konstantin Löffler <kl@wip.tu-berlin.de>  
BE      Konstantin Löffler <kl@wip.tu-berlin.de>  
BG      Konstantin Löffler <kl@wip.tu-berlin.de>  
CH      Konstantin Löffler <kl@wip.tu-berlin.de>  
CZ      Konstantin Löffler <kl@wip.tu-berlin.de>  
Value               0.05
Unnamed: 2           NaN
Unit             Percent
Source        Assumption
Updated at    08.09.2023
Name: NO, dtype: object
do not have region in dataset
