### This Python script combines the Residential and Services sectors into one, a.k.a, the Building sector.
### The following csv files from each of the two sectors will be combined: 
   * 1. "Consumption*.csv"
   * 2. "TechnodataTimeslices.csv" 
   * 3. "Technodata.csv"
   * 4. "GlobalCommodities.csv"
   * 5. "CommIn.csv" 
   * 6.  "CommOut.csv"
   * 7. "Projections.csv"
   * 8. "ExistingCapacity.csv" 
   * 9. "Agent.csv" (**) 

===============================================================

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
from pathlib import Path
import os
# import numpy as np
# import shutil # for moving/copying files

In order to combine these CSV files, we import 3 functions:
- merge_by_column
- merge_by_row
- merge_by_column_and_row

In [3]:
from help_functions import merge_by_column,merge_by_row,merge_by_column_and_row

## Locate data folders that will be used for merge

In [5]:
# Get the current working directory
current_dir = Path(os.getcwd())

choose which version we want to combinw (single agent or multi-agents)

In [7]:
# Define the supported versions
SUPPORTED_VERSIONS = ['single_agent', 'Ofgem_agents']

# Set the version (should be one of the SUPPORTED_VERSIONS)
version = SUPPORTED_VERSIONS[1]  # Change this to your desired version

# Load the Ofgem data

# Define paths relative to the current working directory
residential_folder = current_dir.parent / 'Residential' / 'MUSE_Files'/ version # Adjust as needed depending on the location of your notebook

service_folder = current_dir.parent / 'Service' / 'MUSE_Files'
output_folder = current_dir .parent / 'Buildings'/ 'MUSE_Files'/ version

# Ensure the output folder exists, and create it if it doesn't
output_folder.mkdir(parents=True, exist_ok=True)

# Print paths to confirm
print(f"Residential folder path: {residential_folder}")
print(f"Service folder path: {service_folder}")
print(f"Output folder path: {output_folder}")


Residential folder path: c:\Users\jyang8\MUSE_models\MUSE_buildings\Residential\MUSE_Files\Ofgem_agents
Service folder path: c:\Users\jyang8\MUSE_models\MUSE_buildings\Service\MUSE_Files
Output folder path: c:\Users\jyang8\MUSE_models\MUSE_buildings\Buildings\MUSE_Files\Ofgem_agents


## Merge  "Consumption*.csv"

In [8]:
# column name to merge on
column_name = 'Timeslice'

# List all CSV files in the residential folder starting with "Consumption"
residential_files = [f for f in residential_folder.iterdir() if f.name.startswith("Consumption") and f.suffix == '.csv']

# Loop over each file path in residential_files and call the merge function
for residential_file in residential_files:
    # Derive the corresponding service file path and output file path
    service_file = service_folder / residential_file.name
    output_file = output_folder / residential_file.name
    
    # Check if the corresponding service file exists
    if service_file.exists():
        # Call the function to merge each pair
        merge_by_column(residential_file, service_file, column_name, output_file)
    else:
        print(f"Service file for {residential_file.name} not found in {service_folder}.")


Service file for Consumption2010.csv not found in c:\Users\jyang8\MUSE_models\MUSE_buildings\Service\MUSE_Files.
Service file for Consumption2020.csv not found in c:\Users\jyang8\MUSE_models\MUSE_buildings\Service\MUSE_Files.
Service file for Consumption2030.csv not found in c:\Users\jyang8\MUSE_models\MUSE_buildings\Service\MUSE_Files.
Service file for Consumption2040.csv not found in c:\Users\jyang8\MUSE_models\MUSE_buildings\Service\MUSE_Files.
Service file for Consumption2050.csv not found in c:\Users\jyang8\MUSE_models\MUSE_buildings\Service\MUSE_Files.


## Merge "TechnodataTimeslices.csv" 

In [113]:
FileName = 'TechnodataTimeslices.csv'
TechnodataTimeslices1 = service_folder / FileName
TechnodataTimeslices2= residential_folder / FileName
output_file = output_folder / FileName

merge_by_row(TechnodataTimeslices1, TechnodataTimeslices2, output_file)

Merged files saved to c:\Users\jyang8\MUSE_models\MUSE_buildings\Buildings\MUSE_Files\TechnodataTimeslices.csv with duplicates removed.


Unnamed: 0,ProcessName,RegionName,Time,season,period,UtilizationFactor,MinimumServiceFactor
0,SCHDAIR00 [SER.COOLTH.HIGH-CONSUMPTION: .00.AI...,UK,2010,A,Day,1.000000,0.000000
1,SCHDAIR00 [SER.COOLTH.HIGH-CONSUMPTION: .00.AI...,UK,2010,A,Evening,1.000000,0.000000
2,SCHDAIR00 [SER.COOLTH.HIGH-CONSUMPTION: .00.AI...,UK,2010,A,Night,1.000000,0.000000
3,SCHDAIR00 [SER.COOLTH.HIGH-CONSUMPTION: .00.AI...,UK,2010,A,Peak,1.000000,0.000000
4,SCHDAIR00 [SER.COOLTH.HIGH-CONSUMPTION: .00.AI...,UK,2010,P,Day,1.000000,0.000000
...,...,...,...,...,...,...,...
4971,RWNAWHTRG01 [RES.WATER.NEW-AVERAGE: .01.NGA.BO...,UK,2010,S,Peak,0.153123,0.145466
4972,RWNAWHTRG01 [RES.WATER.NEW-AVERAGE: .01.NGA.BO...,UK,2010,W,Day,0.104837,0.099595
4973,RWNAWHTRG01 [RES.WATER.NEW-AVERAGE: .01.NGA.BO...,UK,2010,W,Evening,0.053023,0.050372
4974,RWNAWHTRG01 [RES.WATER.NEW-AVERAGE: .01.NGA.BO...,UK,2010,W,Night,0.100217,0.095206


## Merge "Technodata.csv" 

In [121]:
from help_functions import merge_by_row_technodata

FileName = 'Technodata.csv'
Technodata1 = service_folder / FileName
Technodata2= residential_folder / FileName
output_file = output_folder / FileName


merge_by_row_technodata(Technodata1, Technodata2, output_file)

Merged files saved to c:\Users\jyang8\MUSE_models\MUSE_buildings\Buildings\MUSE_Files\Technodata.csv with duplicates removed.


Unnamed: 0_level_0,RegionName,Time,cap_par,cap_exp,fix_par,fix_exp,var_par,var_exp,MaxCapacityAddition,MaxCapacityGrowth,TotalCapacityLimit,TechnicalLife,UtilizationFactor,ScalingSize,efficiency,InterestRate,Fuel,EndUse,Agent2,Agent1
ProcessName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
Unit,-,year,MGBP2020/PJ_a,-,MGBP2020/PJ_a,-,MGBP2020/PJ,-,PJ,%,PJ,years,-,PJ,%,-,-,-,new,new
RCEOTHER00 [RES.COOKING: .00.OTHER.KETTLE.MICROWAVE],UK,2010,61.177609885735,1,0.0,1,0,1,2.0,100,100000000.0,5.0,1.0,1,0.8,0.1,ELC,RES.COOKING.OTHER,0,1
RCEOTHER01 [RES.COOKING: .01.OTHER.KETTLE.MICROWAVE],UK,2010,61.177609885735,1,0.0,1,0,1,100000000.0,100,100000000.0,5.0,1.0,1,0.8,0.1,ELC,RES.COOKING.OTHER,0,1
RCHEHOB00 [RES.COOKING: .00.HOB.ELECTRIC.],UK,2010,164.642857142857,1,8.23214285714286,1,0,1,2.0,100,100000000.0,14.0,1.0,1,1.09656574908097,0.1,ELC,RES.COOKING.HOBS,0,1
RCHEHOB01 [RES.COOKING: .01.HOB.ELECTRIC.],UK,2010,164.642857142857,1,8.23214285714286,1,0,1,100000000.0,100,100000000.0,14.0,1.0,1,1.09656574908097,0.1,ELC,RES.COOKING.HOBS,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
SWLDSTD01 [SER.WATER.LOW-CONSUMPTION: .01.STANDALONE.DELIVERY.],UK,2010,0.000604577645330003,1,0.000604577645330003,1,0,1,100000000.0,100,100000000.0,50.0,1.0,1,1.0,0.1,NGA,SER.HOT-WATER.LOW-CONSUMPTION,1,0
SWLWHTRE00 [SER.WATER.LOW-CONSUMPTION: .00.ELC.RESISTANCE.STANDALONE.],UK,2010,11.0641722133044,1,1.10641722133044,1,0,1,2.0,100,100000000.0,15.0,1.0,1,0.85,0.1,ELC,SER.HOT-WATER.LOW-CONSUMPTION,1,0
SWLWHTRE01 [SER.WATER.LOW-CONSUMPTION: .01.ELC.RESISTANCE.STANDALONE.],UK,2010,11.0641722133044,1,1.10641722133044,1,0,1,100000000.0,100,100000000.0,15.0,1.0,1,0.85,0.1,ELC,SER.HOT-WATER.LOW-CONSUMPTION,1,0
SWLWHTRG00 [SER.WATER.LOW-CONSUMPTION: .00.NGA.BOILER.STD.STANDALONE.],UK,2010,10.6826490335353,1,1.06826490335353,1,0,1,2.0,100,100000000.0,15.0,1.0,1,0.8,0.1,NGA,SER.HOT-WATER.LOW-CONSUMPTION,1,0


## Merge "GlobalCommodities.csv"

In [115]:
FileName = 'GlobalCommodities.csv'
GlobalCommodities1 = service_folder / FileName
GlobalCommodities2= residential_folder / FileName
output_file = output_folder / FileName

merge_by_row(GlobalCommodities1, GlobalCommodities2, output_file)

Merged files saved to c:\Users\jyang8\MUSE_models\MUSE_buildings\Buildings\MUSE_Files\GlobalCommodities.csv with duplicates removed.


Unnamed: 0,Commodity,CommodityType,CommodityName
0,NGA,Energy,NGA
1,BOG,Energy,BOG
2,BIOMASS,Energy,BIOMASS
3,ELC,Energy,ELC
4,OIL,Energy,OIL
5,HCO,Energy,HCO
6,LFO,Energy,LFO
7,WOOD,Energy,WOOD
8,HYDROGEN,Energy,HYDROGEN
9,PELLETS,Energy,PELLETS


## Merge "CommIn.csv" 

In [116]:

FileName = 'CommIn.csv'
CommIn1 = service_folder / FileName
CommIn2 = residential_folder / FileName
output_file = output_folder / FileName

merge_by_column_and_row(CommIn1, CommIn2, output_file)

# reordering the columns in the merged file: ["ProcessName", "RegionName", "Time","Level"] will be the first four columns
reorder_cols = ["ProcessName", "RegionName", "Time","Level"]

merged_CommIn = pd.read_csv(output_file)[reorder_cols + [col for col in pd.read_csv(output_file).columns if col not in reorder_cols]]


# Move the "Unit" row to the first position
unit_row = merged_CommIn[merged_CommIn['ProcessName'] == 'Unit']  # Identify the "Unit" row
non_unit_rows = merged_CommIn[merged_CommIn['ProcessName'] != 'Unit']  # Exclude the "Unit" row

# Concatenate the "Unit" row at the top
merged_CommIn = pd.concat([unit_row, non_unit_rows], ignore_index=True)

# Save or work with the reordered DataFrame
merged_CommIn.to_csv(output_file, index=False)




Merged files saved to c:\Users\jyang8\MUSE_models\MUSE_buildings\Buildings\MUSE_Files\CommIn.csv with duplicates removed, 'Unit' rows combined, and missing values filled with 0.


## "CommOut.csv"

In [117]:
FileName = 'CommOut.csv'
CommOut1 = service_folder / FileName
CommOut2 = residential_folder / FileName
output_file = output_folder / FileName

merge_by_column_and_row(CommOut1, CommOut2, output_file)

# Similar to the "CommIn", we reorder the columns in the merged file: ["ProcessName", "RegionName", "Time"] will be the first three columns
# There is no "Level" column in this file
reorder_cols = ["ProcessName", "RegionName", "Time"]

merged_CommOut = pd.read_csv(output_file)[reorder_cols + [col for col in pd.read_csv(output_file).columns if col not in reorder_cols]]

# Save or work with the reordered DataFrame
merged_CommOut.to_csv(output_file, index=False)

Merged files saved to c:\Users\jyang8\MUSE_models\MUSE_buildings\Buildings\MUSE_Files\CommOut.csv with duplicates removed, 'Unit' rows combined, and missing values filled with 0.


## "Projections.csv"

In [118]:
FileName = 'Projections.csv'
Projections1 = service_folder / FileName
Projections2 = residential_folder / FileName
output_file = output_folder / FileName

merge_by_column(Projections1, Projections2, "Time", output_file) # merging based on the "Time" column

Merged files saved to c:\Users\jyang8\MUSE_models\MUSE_buildings\Buildings\MUSE_Files\Projections.csv


Unnamed: 0,RegionName,Attribute,Time,NGA,BOG,BIOMASS,ELC,OIL,HCO,LFO,...,RES.COOLING,RES.COMPUTERS,RES.REFRIGERATORS,RES.LIGHTING,RES.OTHER,RES.WET.APPLIANCES,RES.SPACE-HEAT.EXISTING-AVERAGE,RES.SPACE-HEAT.NEW-AVERAGE,RES.HOT-WATER.EXISTING-AVERAGE,RES.HOT-WATER.NEW-AVERAGE
0,UK,CommodityPrice,2010,12.552,0,10.019,43.342675,19.794434,19.794434,19.794434,...,0,0,0,0,0,0,0,0,0,0
1,UK,CommodityPrice,2015,12.4795,0,11.8485,45.822469,19.794434,17.9035,17.9035,...,0,0,0,0,0,0,0,0,0,0
2,UK,CommodityPrice,2020,11.083333,0,12.692744,57.166667,19.794434,16.748,16.748,...,0,0,0,0,0,0,0,0,0,0
3,UK,CommodityPrice,2025,11.083333,0,12.692744,57.166667,19.794434,16.748,16.748,...,0,0,0,0,0,0,0,0,0,0
4,UK,CommodityPrice,2030,11.083333,0,12.692744,57.166667,19.794434,16.748,16.748,...,0,0,0,0,0,0,0,0,0,0
5,UK,CommodityPrice,2035,11.083333,0,12.692744,57.166667,19.794434,16.748,16.748,...,0,0,0,0,0,0,0,0,0,0
6,UK,CommodityPrice,2040,11.083333,0,12.692744,57.166667,19.794434,16.748,16.748,...,0,0,0,0,0,0,0,0,0,0
7,UK,CommodityPrice,2045,11.083333,0,12.692744,57.166667,19.794434,16.748,16.748,...,0,0,0,0,0,0,0,0,0,0
8,UK,CommodityPrice,2050,11.083333,0,12.692744,57.166667,19.794434,16.748,16.748,...,0,0,0,0,0,0,0,0,0,0


## "ExistingCapacity.csv" 

In [119]:
FileName = 'ExistingCapacity.csv'
merge_by_row(service_folder / FileName
                , residential_folder / FileName
                , output_folder / FileName
                )

# reordering the columns in the merged file: ["ProcessName", "RegionName", "Unit"] will be the first three columns
reorder_cols = ["ProcessName", "RegionName", "Unit"]
merged_ExistingCapacity = pd.read_csv(output_folder / FileName)[reorder_cols + [col for col in pd.read_csv(output_folder / FileName).columns if col not in reorder_cols]]

# Save 
merged_ExistingCapacity.to_csv(output_folder / FileName, index=False)

Merged files saved to c:\Users\jyang8\MUSE_models\MUSE_buildings\Buildings\MUSE_Files\ExistingCapacity.csv with duplicates removed.


## "Agent.csv" * 

In [120]:
FileName = 'Agent.csv'

Agent1= residential_folder / FileName
Agent2= service_folder / FileName
output_file = output_folder / FileName

merged_agents = merge_by_row(Agent1, Agent2, output_file)


Merged files saved to c:\Users\jyang8\MUSE_models\MUSE_buildings\Buildings\MUSE_Files\Agent.csv with duplicates removed.


====================================END=========================================