### This Python script combines the Residential and Services sectors into one, a.k.a, the Building sector.
### The following csv files from each of the two sectors will be combined: 
   * 1. "Consumption*.csv"
   * 2. "TechnodataTimeslices.csv" 
   * 3. "Technodata.csv"
   * 4. "GlobalCommodities.csv"
   * 5. "CommIn.csv" 
   * 6.  "CommOut.csv"
   * 7. "Projections.csv"
   * 8. "ExistingCapacity.csv" 
   * 9. "Agent.csv" (**) 

===============================================================

In [18]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [19]:
import pandas as pd
from pathlib import Path
import os
# import numpy as np
# import shutil # for moving/copying files

In order to combine these CSV files, we import 3 functions:
- merge_by_column
- merge_by_row
- merge_by_column_and_row

In [20]:
from help_functions import merge_by_column,merge_by_row,merge_by_column_and_row

## Locate data folders that will be used for merge

In [21]:
# Get the current working directory
current_dir = Path(os.getcwd())

choose which version we want to combinw (single agent or multi-agents)

In [22]:
# Define the supported versions
SUPPORTED_VERSIONS = ['single_agent', 'Ofgem_agents']

# Set the version (should be one of the SUPPORTED_VERSIONS)
version = SUPPORTED_VERSIONS[1]  # Change this to your desired version

# Load the Ofgem data

# Define paths relative to the current working directory
residential_folder = current_dir.parent / 'Residential' / 'MUSE_Files'/ version # Adjust as needed depending on the location of your notebook

service_folder = current_dir.parent / 'Service' / 'MUSE_Files'
output_folder = current_dir .parent / 'Buildings'/ 'MUSE_Files'/ version

# Ensure the output folder exists, and create it if it doesn't
output_folder.mkdir(parents=True, exist_ok=True)

# Print paths to confirm
print(f"Residential folder path: {residential_folder}")
print(f"Service folder path: {service_folder}")
print(f"Output folder path: {output_folder}")


Residential folder path: c:\Users\jyang8\MUSE_models\MUSE_buildings\Residential\MUSE_Files\Ofgem_agents
Service folder path: c:\Users\jyang8\MUSE_models\MUSE_buildings\Service\MUSE_Files
Output folder path: c:\Users\jyang8\MUSE_models\MUSE_buildings\Buildings\MUSE_Files\Ofgem_agents


## Merge  "Consumption*.csv"

In [23]:
# column name to merge on
column_name = 'Timeslice'

# List all CSV files in the residential folder starting with "Consumption"
residential_files = [f for f in residential_folder.iterdir() if f.name.startswith("Consumption") and f.suffix == '.csv']

# Loop over each file path in residential_files and call the merge function
for residential_file in residential_files:
    # Derive the corresponding service file path and output file path
    service_file = service_folder / residential_file.name
    output_file = output_folder / residential_file.name
    
    # Check if the corresponding service file exists
    if service_file.exists():
        # Call the function to merge each pair
        merge_by_column(residential_file, service_file, column_name, output_file)
    else:
        print(f"Service file for {residential_file.name} not found in {service_folder}.")


Merged files saved to c:\Users\jyang8\MUSE_models\MUSE_buildings\Buildings\MUSE_Files\Ofgem_agents\Consumption2010.csv
Merged files saved to c:\Users\jyang8\MUSE_models\MUSE_buildings\Buildings\MUSE_Files\Ofgem_agents\Consumption2020.csv
Merged files saved to c:\Users\jyang8\MUSE_models\MUSE_buildings\Buildings\MUSE_Files\Ofgem_agents\Consumption2030.csv
Merged files saved to c:\Users\jyang8\MUSE_models\MUSE_buildings\Buildings\MUSE_Files\Ofgem_agents\Consumption2040.csv
Merged files saved to c:\Users\jyang8\MUSE_models\MUSE_buildings\Buildings\MUSE_Files\Ofgem_agents\Consumption2050.csv


## Merge "TechnodataTimeslices.csv" 

In [24]:
FileName = 'TechnodataTimeslices.csv'
TechnodataTimeslices1 = service_folder / FileName
TechnodataTimeslices2= residential_folder / FileName
output_file = output_folder / FileName

try:
    merge_by_row(TechnodataTimeslices1, TechnodataTimeslices2, output_file)
except Exception:
    print("Error merging TechnodataTimeslices.csv")

Merged files saved to c:\Users\jyang8\MUSE_models\MUSE_buildings\Buildings\MUSE_Files\Ofgem_agents\TechnodataTimeslices.csv with duplicates removed.


## Merge "Technodata.csv" 

In [25]:
from help_functions import merge_by_row_technodata

FileName = 'Technodata.csv'
Technodata1 = service_folder / FileName
Technodata2= residential_folder / FileName
output_file = output_folder / FileName

try:
    merge_by_row_technodata(Technodata1, Technodata2, output_file)
except Exception:
    print("Error merging Technodata.csv")

Merged files saved to c:\Users\jyang8\MUSE_models\MUSE_buildings\Buildings\MUSE_Files\Ofgem_agents\Technodata.csv with duplicates removed.


## Merge "GlobalCommodities.csv"

In [26]:
FileName = 'GlobalCommodities.csv'
GlobalCommodities1 = service_folder / FileName
GlobalCommodities2= residential_folder / FileName
output_file = output_folder / FileName

try:
    merge_by_row(GlobalCommodities1, GlobalCommodities2, output_file)
except Exception:
    print("Error merging GlobalCommodities.csv")

Merged files saved to c:\Users\jyang8\MUSE_models\MUSE_buildings\Buildings\MUSE_Files\Ofgem_agents\GlobalCommodities.csv with duplicates removed.


## Merge "CommIn.csv" 

In [27]:

FileName = 'CommIn.csv'
CommIn1 = service_folder / FileName
CommIn2 = residential_folder / FileName
output_file = output_folder / FileName

try:
    merge_by_column_and_row(CommIn1, CommIn2, output_file)
except Exception:
    print("Error merging CommIn.csv")

# reordering the columns in the merged file: ["ProcessName", "RegionName", "Time","Level"] will be the first four columns
reorder_cols = ["ProcessName", "RegionName", "Time","Level"]

merged_CommIn = pd.read_csv(output_file)[reorder_cols + [col for col in pd.read_csv(output_file).columns if col not in reorder_cols]]


# Move the "Unit" row to the first position
unit_row = merged_CommIn[merged_CommIn['ProcessName'] == 'Unit']  # Identify the "Unit" row
non_unit_rows = merged_CommIn[merged_CommIn['ProcessName'] != 'Unit']  # Exclude the "Unit" row

# Concatenate the "Unit" row at the top
merged_CommIn = pd.concat([unit_row, non_unit_rows], ignore_index=True)

# Save or work with the reordered DataFrame
merged_CommIn.to_csv(output_file, index=False)




Merged files saved to c:\Users\jyang8\MUSE_models\MUSE_buildings\Buildings\MUSE_Files\Ofgem_agents\CommIn.csv with duplicates removed, 'Unit' rows combined, and missing values filled with 0.


## "CommOut.csv"

In [28]:
FileName = 'CommOut.csv'
CommOut1 = service_folder / FileName
CommOut2 = residential_folder / FileName
output_file = output_folder / FileName

try:
    merge_by_column_and_row(CommOut1, CommOut2, output_file)
except Exception:
    print("Error merging CommOut.csv")

# Similar to the "CommIn", we reorder the columns in the merged file: ["ProcessName", "RegionName", "Time"] will be the first three columns
# There is no "Level" column in this file
reorder_cols = ["ProcessName", "RegionName", "Time"]

merged_CommOut = pd.read_csv(output_file)[reorder_cols + [col for col in pd.read_csv(output_file).columns if col not in reorder_cols]]

# Save or work with the reordered DataFrame
merged_CommOut.to_csv(output_file, index=False)

Merged files saved to c:\Users\jyang8\MUSE_models\MUSE_buildings\Buildings\MUSE_Files\Ofgem_agents\CommOut.csv with duplicates removed, 'Unit' rows combined, and missing values filled with 0.


## "Projections.csv"

In [29]:
FileName = 'Projections.csv'
Projections1 = service_folder / FileName
Projections2 = residential_folder / FileName
output_file = output_folder / FileName

try:
    merge_by_column(Projections1, Projections2, "Time", output_file) # merging based on the "Time" column
except Exception:
    print("Error merging Projections.csv")

Merged files saved to c:\Users\jyang8\MUSE_models\MUSE_buildings\Buildings\MUSE_Files\Ofgem_agents\Projections.csv


## "ExistingCapacity.csv" 

In [30]:
FileName = 'ExistingCapacity.csv'

ExistingCapacity1 = service_folder / FileName
ExistingCapacity2= residential_folder / FileName
output_file = output_folder / FileName

try:
    merge_by_row(ExistingCapacity1, ExistingCapacity2, output_file)
except Exception:
    print("Error merging ExistingCapacity.csv")

# reordering the columns in the merged file: ["ProcessName", "RegionName", "Unit"] will be the first three columns (for readability)
reorder_cols = ["ProcessName", "RegionName", "Unit"]

merged_ExistingCapacity = pd.read_csv(output_folder / FileName)[reorder_cols + [col for col in pd.read_csv(output_folder / FileName).columns if col not in reorder_cols]]

# Save 
merged_ExistingCapacity.to_csv(output_folder / FileName, index=False)

Merged files saved to c:\Users\jyang8\MUSE_models\MUSE_buildings\Buildings\MUSE_Files\Ofgem_agents\ExistingCapacity.csv with duplicates removed.


## "Agent.csv" * 

In [31]:
# depending on the version, the file name will be slightly different
if version == 'single_agent':
    Agent1= residential_folder / 'Agent.csv'
    Agent2= service_folder / 'Agent.csv'
    output_file = output_folder / 'Agent.csv'

elif version == 'Ofgem_agents':
    Agent1= residential_folder /  'Agents.csv'
    Agent2= service_folder / 'Agent.csv'
    output_file = output_folder / 'Agents.csv'

try:
    merged_agents = merge_by_row(Agent1, Agent2, output_file)
except Exception:
    print("Error merging Agents.csv")

Merged files saved to c:\Users\jyang8\MUSE_models\MUSE_buildings\Buildings\MUSE_Files\Ofgem_agents\Agents.csv with duplicates removed.


====================================END=========================================