### This Python script combines the Residential and Services sectors into one, a.k.a, the Building sector.
### The following csv files from each sector will be combined: 
   * 1. "Consumption*.csv"
   * 2. "TechnodataTimeslices.csv" 
   * 3. "Technodata.csv"
   * 4. "GlobalCommodities.csv"
   * 5. "CommIn.csv" 
   * 6.  "CommOut.csv"
   * 7. "Projections.csv"
   * 8. "ExistingCapacity.csv" 
   * 9. "Agent.csv" (**) 

===============================================================

In [17]:
import pandas as pd
from pathlib import Path
import os
import numpy as np
import shutil # for moving/copying files

### In order to combine these CSV files, we import 3 functions:
- merge_by_column
- merge_by_row
- merge_by_column_and_row

In [18]:
from help_functions import merge_by_column,merge_by_row,merge_by_column_and_row

## Locate data folders and files that will be used for merge

In [19]:
# Get the current working directory
current_dir = Path(os.getcwd())

# Define paths relative to the current working directory
# Adjust as needed depending on the location of your notebook
residential_folder = current_dir.parent / 'Residential' / 'MUSE_Files'
service_folder = current_dir.parent / 'Service' / 'MUSE_Files'
output_folder = current_dir .parent / 'Buildings'/ 'MUSE_Files'

# Ensure the output folder exists, and create it if it doesn't
output_folder.mkdir(parents=True, exist_ok=True)

# Print paths to confirm
print(f"Residential folder path: {residential_folder}")
print(f"Service folder path: {service_folder}")
print(f"Output folder path: {output_folder}")


Residential folder path: c:\Users\jyang8\MUSE_models\MUSE_buildings\Residential\MUSE_Files
Service folder path: c:\Users\jyang8\MUSE_models\MUSE_buildings\Service\MUSE_Files
Output folder path: c:\Users\jyang8\MUSE_models\MUSE_buildings\Buildings\MUSE_Files


## Merge  "Consumption*.csv"

In [20]:
# column name to merge on
column_name = 'Timeslice'

# List all CSV files in the residential folder starting with "Consumption"
residential_files = [f for f in residential_folder.iterdir() if f.name.startswith("Consumption") and f.suffix == '.csv']

# Loop over each file path in residential_files and call the merge function
for residential_file in residential_files:
    # Derive the corresponding service file path and output file path
    service_file = service_folder / residential_file.name
    output_file = output_folder / residential_file.name
    
    # Check if the corresponding service file exists
    if service_file.exists():
        # Call the function to merge each pair
        merge_by_column(residential_file, service_file, column_name, output_file)
    else:
        print(f"Service file for {residential_file.name} not found in {service_folder}.")


Merged files saved to c:\Users\jyang8\MUSE_models\MUSE_buildings\Buildings\MUSE_Files\Consumption2010.csv
Merged files saved to c:\Users\jyang8\MUSE_models\MUSE_buildings\Buildings\MUSE_Files\Consumption2020.csv
Merged files saved to c:\Users\jyang8\MUSE_models\MUSE_buildings\Buildings\MUSE_Files\Consumption2030.csv
Merged files saved to c:\Users\jyang8\MUSE_models\MUSE_buildings\Buildings\MUSE_Files\Consumption2040.csv
Merged files saved to c:\Users\jyang8\MUSE_models\MUSE_buildings\Buildings\MUSE_Files\Consumption2050.csv


## Merge "TechnodataTimeslices.csv" 

In [21]:
FileName = 'TechnodataTimeslices.csv'
TechnodataTimeslices1 = service_folder / FileName
TechnodataTimeslices2= residential_folder / FileName
output_file = output_folder / FileName

merge_by_row(TechnodataTimeslices1, TechnodataTimeslices2, output_file)

Merged files saved to c:\Users\jyang8\MUSE_models\MUSE_buildings\Buildings\MUSE_Files\TechnodataTimeslices.csv with duplicates removed.


## Merge "Technodata.csv" 

In [22]:
FileName = 'Technodata.csv'
Technodata1 = service_folder / FileName
Technodata2= residential_folder / FileName
output_file = output_folder / FileName

merge_by_row(Technodata1, Technodata2, output_file)

Merged files saved to c:\Users\jyang8\MUSE_models\MUSE_buildings\Buildings\MUSE_Files\Technodata.csv with duplicates removed.


## Merge "GlobalCommodities.csv"

In [23]:
FileName = 'GlobalCommodities.csv'
GlobalCommodities1 = service_folder / FileName
GlobalCommodities2= residential_folder / FileName
output_file = output_folder / FileName

merge_by_row(GlobalCommodities1, GlobalCommodities2, output_file)

Merged files saved to c:\Users\jyang8\MUSE_models\MUSE_buildings\Buildings\MUSE_Files\GlobalCommodities.csv with duplicates removed.


## Merge "CommIn.csv" 

In [24]:

FileName = 'CommIn.csv'
CommIn1 = service_folder / FileName
CommIn2 = residential_folder / FileName
output_file = output_folder / FileName

merge_by_column_and_row(CommIn1, CommIn2, output_file)

# reordering the columns in the merged file: ["ProcessName", "RegionName", "Time","Level"] will be the first four columns
reorder_cols = ["ProcessName", "RegionName", "Time","Level"]

merged_CommIn = pd.read_csv(output_file)[reorder_cols + [col for col in pd.read_csv(output_file).columns if col not in reorder_cols]]


# Move the "Unit" row to the first position
unit_row = merged_CommIn[merged_CommIn['ProcessName'] == 'Unit']  # Identify the "Unit" row
non_unit_rows = merged_CommIn[merged_CommIn['ProcessName'] != 'Unit']  # Exclude the "Unit" row

# Concatenate the "Unit" row at the top
merged_CommIn = pd.concat([unit_row, non_unit_rows], ignore_index=True)

# Save or work with the reordered DataFrame
merged_CommIn.to_csv(output_file, index=False)




Merged files saved to c:\Users\jyang8\MUSE_models\MUSE_buildings\Buildings\MUSE_Files\CommIn.csv with duplicates removed, 'Unit' rows combined, and missing values filled with 0.


## "CommOut.csv"

In [25]:
FileName = 'CommOut.csv'
CommOut1 = service_folder / FileName
CommOut2 = residential_folder / FileName
output_file = output_folder / FileName

merge_by_column_and_row(CommOut1, CommOut2, output_file)

# Similar to the "CommIn", we reorder the columns in the merged file: ["ProcessName", "RegionName", "Time"] will be the first three columns
# There is no "Level" column in this file
reorder_cols = ["ProcessName", "RegionName", "Time"]

merged_CommOut = pd.read_csv(output_file)[reorder_cols + [col for col in pd.read_csv(output_file).columns if col not in reorder_cols]]

# Save or work with the reordered DataFrame
merged_CommOut.to_csv(output_file, index=False)

Merged files saved to c:\Users\jyang8\MUSE_models\MUSE_buildings\Buildings\MUSE_Files\CommOut.csv with duplicates removed, 'Unit' rows combined, and missing values filled with 0.


## "Projections.csv"

In [26]:
FileName = 'Projections.csv'
Projections1 = service_folder / FileName
Projections2 = residential_folder / FileName
output_file = output_folder / FileName

merge_by_column(Projections1, Projections2, "Time", output_file) # merging based on the "Time" column

Merged files saved to c:\Users\jyang8\MUSE_models\MUSE_buildings\Buildings\MUSE_Files\Projections.csv


Unnamed: 0,RegionName,Attribute,Time,NGA,BOG,BIOMASS,ELC,OIL,HCO,LFO,...,RES.COOLING,RES.COMPUTERS,RES.REFRIGERATORS,RES.LIGHTING,RES.OTHER,RES.WET.APPLIANCES,RES.SPACE-HEAT.EXISTING-AVERAGE,RES.SPACE-HEAT.NEW-AVERAGE,RES.HOT-WATER.EXISTING-AVERAGE,RES.HOT-WATER.NEW-AVERAGE
0,UK,CommodityPrice,2010,12.552,0,10.019,43.342675,19.794434,19.794434,19.794434,...,0,0,0,0,0,0,0,0,0,0
1,UK,CommodityPrice,2015,12.4795,0,11.8485,45.822469,19.794434,17.9035,17.9035,...,0,0,0,0,0,0,0,0,0,0
2,UK,CommodityPrice,2020,11.083333,0,12.692744,57.166667,19.794434,16.748,16.748,...,0,0,0,0,0,0,0,0,0,0
3,UK,CommodityPrice,2025,11.083333,0,12.692744,57.166667,19.794434,16.748,16.748,...,0,0,0,0,0,0,0,0,0,0
4,UK,CommodityPrice,2030,11.083333,0,12.692744,57.166667,19.794434,16.748,16.748,...,0,0,0,0,0,0,0,0,0,0
5,UK,CommodityPrice,2035,11.083333,0,12.692744,57.166667,19.794434,16.748,16.748,...,0,0,0,0,0,0,0,0,0,0
6,UK,CommodityPrice,2040,11.083333,0,12.692744,57.166667,19.794434,16.748,16.748,...,0,0,0,0,0,0,0,0,0,0
7,UK,CommodityPrice,2045,11.083333,0,12.692744,57.166667,19.794434,16.748,16.748,...,0,0,0,0,0,0,0,0,0,0
8,UK,CommodityPrice,2050,11.083333,0,12.692744,57.166667,19.794434,16.748,16.748,...,0,0,0,0,0,0,0,0,0,0


## "ExistingCapacity.csv" 

In [27]:
FileName = 'ExistingCapacity.csv'
merge_by_row(service_folder / FileName
                , residential_folder / FileName
                , output_folder / FileName
                )

# reordering the columns in the merged file: ["ProcessName", "RegionName", "Unit"] will be the first three columns
reorder_cols = ["ProcessName", "RegionName", "Unit"]
merged_ExistingCapacity = pd.read_csv(output_folder / FileName)[reorder_cols + [col for col in pd.read_csv(output_folder / FileName).columns if col not in reorder_cols]]

# Save 
merged_ExistingCapacity.to_csv(output_folder / FileName, index=False)

Merged files saved to c:\Users\jyang8\MUSE_models\MUSE_buildings\Buildings\MUSE_Files\ExistingCapacity.csv with duplicates removed.


## "Agent.csv" * 

In [28]:
FileName = 'GlobalCommodities.csv'
GlobalCommodities1 = service_folder / FileName
GlobalCommodities2= residential_folder / FileName
output_file = output_folder / FileName

merge_by_row(GlobalCommodities1, GlobalCommodities2, output_file)

Merged files saved to c:\Users\jyang8\MUSE_models\MUSE_buildings\Buildings\MUSE_Files\GlobalCommodities.csv with duplicates removed.


In [29]:
FileName = 'Agent.csv'

Agent1= residential_folder / FileName
Agent2= service_folder / FileName
output_file = output_folder / FileName

merged_agents = merge_by_row(Agent1, Agent2, output_file)


Merged files saved to c:\Users\jyang8\MUSE_models\MUSE_buildings\Buildings\MUSE_Files\Agent.csv with duplicates removed.


====================================END=========================================