There are (too) many technologies. These can potentially create problems (such as memory overload). Here we combine some technologies that are very similar.

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
from pathlib import Path
import shutil
# import some functions from the help_functions.py file
from help_functions import combine_tech, reorder_columns

choose which version we want to combine (single agent or multi-agents)

In [3]:
# Define the supported versions
SUPPORTED_VERSIONS = ['single_agents', 'Ofgem_agents']

# Set the version (should be one of the SUPPORTED_VERSIONS)
version = SUPPORTED_VERSIONS[0]  # Change this to your desired version

In [4]:
# input data folder
data_folder = Path.cwd().parent / 'MUSE_Files'/ 'Buildings' / version

output_folder = Path.cwd().parent / 'MUSE_Files' / 'Buildings' / 'tech_reduced'/ version

# Check if the folder exists, and create it if it doesn't
output_folder.mkdir(parents=True, exist_ok=True)

print(f'Input folder: {data_folder}')
print(f'Output folder: {output_folder}')


Input folder: c:\Users\jyang8\MUSE_models\MUSE_UK_Buildings\MUSE_Files\Buildings\single_agents
Output folder: c:\Users\jyang8\MUSE_models\MUSE_UK_Buildings\MUSE_Files\Buildings\tech_reduced\single_agents


First process the Technodata.csv

combine similar technologies based on identical values, except the "ProcessName" and "MaxCapacityAddition" columns

In [5]:
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(data_folder / 'Technodata.csv')
unit_row = df.iloc[[0]]
df = df.iloc[1:].reset_index(drop=True)

FileNotFoundError: [Errno 2] No such file or directory: 'c:\\Users\\jyang8\\MUSE_models\\MUSE_UK_Buildings\\MUSE_Files\\Buildings\\single_agents\\Technodata.csv'

In [None]:
# Define the columns to group by (all except 'ProcessName' and 'MaxCapacityAddition')
group_by_columns = [col for col in df.columns if col not in ['ProcessName', 'MaxCapacityAddition']]

# Group the DataFrame based on similarity of all columns except 'ProcessName' and 'MaxCapacityAddition'
grouped_tech = df.groupby(group_by_columns, as_index=False).agg({
    'ProcessName': lambda x: '; '.join(x),
    'MaxCapacityAddition': lambda x: max(pd.to_numeric(x, errors='coerce'))
})

# Create a mapping dictionary to track which original rows were combined
process_name_mapping = {}

# Iterate over groups in the grouped DataFrame
for combined_index, group in grouped_tech.iterrows():
    # Combined ProcessName in the grouped DataFrame
    combined_process_name = group['ProcessName']
    
    # Find matching rows in the original DataFrame based on all other columns
    matching_rows = df[
        (df[group_by_columns] == group[group_by_columns]).all(axis=1)
    ]
    
    # Original ProcessNames from the matching rows
    original_process_names = matching_rows['ProcessName'].tolist()
    
    # Map the combined ProcessName to the original ProcessNames
    process_name_mapping[combined_process_name] = original_process_names


# put "ProcessName" as the first column
grouped_tech = reorder_columns(grouped_tech, "ProcessName")
# re-add the unit row
grouped_tech = pd.concat([unit_row, grouped_tech], ignore_index=True)

# Save the grouped DataFrame to a new CSV file
grouped_tech.to_csv(output_folder / 'Technodata.csv', index=False)



Second, proccess the ExistingCapacity.csv

In [None]:
# Function to map the ProcessName column using the process_name_mapping
def map_process_name(process_name):
    for combined_name, original_names in process_name_mapping.items():
        if process_name in original_names:
            return combined_name
    return process_name  # Return the original name if no match is found

In [None]:
# read the existing capacity file
ExistingCapacity_df = pd.read_csv(data_folder / 'ExistingCapacity.csv')

# Apply the mapping to the ProcessName column in the new DataFrame
ExistingCapacity_df['ProcessName']= ExistingCapacity_df['ProcessName'].apply(map_process_name)

# Identify numerical columns and other relevant columns
numerical_columns = [col for col in ExistingCapacity_df.columns if col not in ['ProcessName', 'RegionName', 'Unit']]
non_numerical_columns = ['RegionName', 'Unit']

# Group by 'ProcessName' and aggregate
ExistingCapacity_df = ExistingCapacity_df.groupby('ProcessName', as_index=False).agg(
    {**{col: 'sum' for col in numerical_columns}, **{col: 'first' for col in non_numerical_columns}}
)

# save the file
ExistingCapacity_df.to_csv(output_folder / 'ExistingCapacity.csv', index=False)

CommIn.csv and CommOut.csv

In [None]:
for file in ['CommIn.csv', 'CommOut.csv']:
    # read the CommIn file
    df = pd.read_csv(data_folder / file)

    # seperate the annoying unit row, (will be added back later)
    unit_row = df.iloc[[0]]

    # the actual data
    data = df.iloc[1:].reset_index(drop=True)

    # Identify numerical columns for averaging (exclude non-numerical columns)
    exclude_columns = ['ProcessName', 'RegionName', 'Time']
    if 'Level' in data.columns:
        exclude_columns.append('Level')
    numerical_columns = [col for col in data.columns if col not in exclude_columns]
    non_numerical_columns = [col for col in exclude_columns if col in data.columns]

    # Convert numerical columns to numeric, coercing errors to NaN
    for col in numerical_columns:
        data[col] = pd.to_numeric(data[col], errors='coerce')

    # Apply the mapping to the ProcessName column in the new DataFrame
    data['ProcessName']= data['ProcessName'].apply(map_process_name)

    # Group by 'ProcessName' and aggregate
    combined_rows = data.groupby('ProcessName', as_index=False).agg(
        {**{col: 'mean' for col in numerical_columns}, **{col: 'first' for col in non_numerical_columns}}
    )
    
    # add the unit row back
    combined_rows = pd.concat([unit_row, combined_rows], ignore_index=True)
    
    # save the file
    combined_rows.to_csv(output_folder / file, index=False)

    print(f"{file} saved successfully")


CommIn.csv saved successfully
CommOut.csv saved successfully


TechnodataTimeslices.csv

In [None]:
TechnodataTimeslices = pd.read_csv(data_folder / 'TechnodataTimeslices.csv')
# Apply the mapping to the ProcessName column in the new DataFrame
TechnodataTimeslices ['ProcessName']= TechnodataTimeslices ['ProcessName'].apply(map_process_name)

# Group by the specified columns and compute the average for target columns
group_columns = ['ProcessName', 'RegionName', 'Time', 'season', 'period']
average_columns = ['UtilizationFactor', 'MinimumServiceFactor']

# Combine rows and compute averages
TechnodataTimeslices = TechnodataTimeslices.groupby(group_columns, as_index=False).agg(
    {col: 'mean' for col in average_columns}
)

TechnodataTimeslices.to_csv(output_folder / 'TechnodataTimeslices.csv', index=False)

Other MUSE MUSE input files will be same, so we just need to copy it over here

In [None]:
# Define the source directory (one level up)
source_dir = data_folder

# Define the destination directory (current working directory)
destination_dir = output_folder

# Ensure the destination directory exists
destination_dir.mkdir(exist_ok=True)

# Iterate through CSV files in the source directory
for file in source_dir.glob("*.csv"):  # Matches all CSV files in the source directory
    destination_path = destination_dir / file.name
    
    # Check if the file already exists in the current directory
    if not destination_path.exists():
        # Copy the file if it doesn't exist
        shutil.copy(file, destination_path)
        print(f"Copied: {file} to {destination_path}")
    #else:
        #print(f"File already exists: {destination_path}")

# Copy the TOML file
toml_file = next(source_dir.glob("*.toml"), None)  # Get the first (and only) .toml file
if toml_file:
    destination_path = destination_dir / toml_file.name
    if not destination_path.exists():
        shutil.copy(toml_file, destination_path)
        print(f"Copied TOML: {toml_file} to {destination_path}")
    else:
        print(f"TOML already exists: {destination_path}")

print("copy operation completed.")

Copied: c:\Users\jyang8\MUSE_models\MUSE_UK_Buildings\MUSE_Files\Buildings\single_agent\Agents.csv to c:\Users\jyang8\MUSE_models\MUSE_UK_Buildings\MUSE_Files\Buildings\tech_reduced\single_agent\Agents.csv
Copied: c:\Users\jyang8\MUSE_models\MUSE_UK_Buildings\MUSE_Files\Buildings\single_agent\Consumption2010.csv to c:\Users\jyang8\MUSE_models\MUSE_UK_Buildings\MUSE_Files\Buildings\tech_reduced\single_agent\Consumption2010.csv
Copied: c:\Users\jyang8\MUSE_models\MUSE_UK_Buildings\MUSE_Files\Buildings\single_agent\Consumption2020.csv to c:\Users\jyang8\MUSE_models\MUSE_UK_Buildings\MUSE_Files\Buildings\tech_reduced\single_agent\Consumption2020.csv
Copied: c:\Users\jyang8\MUSE_models\MUSE_UK_Buildings\MUSE_Files\Buildings\single_agent\Consumption2030.csv to c:\Users\jyang8\MUSE_models\MUSE_UK_Buildings\MUSE_Files\Buildings\tech_reduced\single_agent\Consumption2030.csv
Copied: c:\Users\jyang8\MUSE_models\MUSE_UK_Buildings\MUSE_Files\Buildings\single_agent\Consumption2040.csv to c:\Users\jy

====================================END===================================