In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from citylearn.citylearn import CityLearnEnv
from citylearn.agents.sac import SAC
import pandas as pd
from collections import defaultdict

# Meta-Learning Agent using SAC
class MetaRLAgent:
    def __init__(self, action_space, observation_space, num_tasks=3):
        self.num_tasks = num_tasks
        self.meta_model = SAC(observation_space, action_space)
        self.optimizer = optim.Adam(self.meta_model.parameters(), lr=0.001)

    def forward(self, task_data):
        """ Forward pass for meta-RL, adapting to a new task """
        return self.meta_model(task_data)
    
    def meta_train(self, envs, num_iterations=100, episodes=2):
        """ Meta-training loop """
        for iteration in range(num_iterations):
            meta_loss = 0
            for episode in range(episodes):
                for task_env in envs:
                    task_data = task_env.reset()  
                    loss = self.update(task_data)  # Perform task-specific update (inner loop)
                    meta_loss += loss
            
            # Meta-gradient update (outer loop)
            self.optimizer.zero_grad()
            meta_loss.backward()
            self.optimizer.step()
            print(f"Iteration {iteration + 1}/{num_iterations} complete.")
    
    def update(self, task_data):
        """ Inner loop update (task-specific adaptation) """
        task_loss = self.meta_model.learn(episodes=task_data) 
        return task_loss

# initialize environments (tasks)
def initialize_env(building_ids):
    return CityLearnEnv(schema='citylearn_challenge_2023_phase_3_1',
                        building_ids=building_ids, central_agent=False)

# Training the model on multiple buildings
def train_agent_on_buildings(building_scenarios, episodes=2):
    models = []
    for building_set in building_scenarios:
        # Initialize the building env
        env = initialize_env(building_set)
        obs_shape = env.observation_space[0].shape[0]
        print(f"Training on Buildings: {building_set}, Observation Size: {obs_shape}")

        # Initialize SAC agent
        model = SAC(env)
        
        model.learn(episodes=episodes) 

        # Store the trained model
        models.append(model)

    return models

# Train the Meta-RL agent on multiple buildings
building_scenarios = [
    [0],  # Train on Building 0
    [1],  # Train on Building 1
    [2],  # Train on Building 2
    [3],  # Train on Building 3
    [4]  # Train on Building 4
]

models2 = train_agent_on_buildings(building_scenarios)

print("Meta-RL training completed across multiple buildings!")

# Fine-tune and evaluate the model on unseen building (e.g., Building 6)
def fine_tune_and_evaluate(models, episodes=2):
    # Load the test environment for the unseen building
    test_env = initialize_env([5])

    meta_model = models[0]  # Using the model trained on Building 0
    meta_model.env = test_env 

    # Fine-tune the model on the unseen building
    print(f"\n Fine-tuning on unseen Building {test_building}...")
    meta_model.learn(episodes=episodes)

    # --- Test
    obs, _ = test_env.reset()
    done = False
    total_reward = 0
    while not done:
        actions = meta_model.predict(obs, deterministic=True)
        obs, reward, done, _, _ = test_env.step(actions)
        total_reward += reward

    # --- Evaluate KPIs for the unseen building ---
    kpis = test_env.evaluate()
    kpis = kpis.pivot(index="cost_function", columns="name", values="value").round(3)
    kpis = kpis.dropna(how="all")

    # --- Display KPIs ---
    print(f"\nKPIs for Unseen Building 6 (after fine-tuning):")
    display(kpis)
    print(f"Total Reward from Evaluation: {total_reward}")
    return kpis

# Fine-tuning and evaluating on Building 6 (unseen building)
kpis_unseen_building = fine_tune_and_evaluate(models2)


Couldn't import dot_parser, loading of dot files will not be possible.
Training on Buildings: [0], Observation Size: 30


  o = tensor(o).to(self.device)


Training on Buildings: [1], Observation Size: 30
Training on Buildings: [2], Observation Size: 30
Training on Buildings: [3], Observation Size: 30
Training on Buildings: [4], Observation Size: 30
Meta-RL training completed across multiple buildings!


NameError: name 'test_building' is not defined

In [5]:
def fine_tune_and_evaluate(models, episodes=2):
    # Load the test environment for the unseen building
    test_env = initialize_env([5])

    # Use the trained model (example: model trained on Building 0)
    meta_model = models[0]  # Using the model trained on Building 0
    meta_model.env = test_env  # Attach the test environment

    # Fine-tune the model on the unseen building
    print(f"\n Fine-tuning on unseen Building {6}...")
    meta_model.learn(episodes=episodes)

    # --- Test the adapted model ---
    obs, _ = test_env.reset()
    done = False
    total_reward = 0
    while not done:
        actions = meta_model.predict(obs, deterministic=True)
        obs, reward, done, _, _ = test_env.step(actions)
        #total_reward = total_reward + reward

    # --- Evaluate KPIs for the unseen building ---
    kpis = test_env.evaluate()
    kpis = kpis.pivot(index="cost_function", columns="name", values="value").round(3)
    kpis = kpis.dropna(how="all")

    # --- Display KPIs ---
    print(f"\nKPIs for Unseen Building {6} (after fine-tuning):")
    display(kpis)
    #print(f"Total Reward from Evaluation: {total_reward}")
    return kpis

# Fine-tuning and evaluating on Building 6 (unseen building)
kpis_unseen_building = fine_tune_and_evaluate(models2)
print(kpis_unseen_building)


 Fine-tuning on unseen Building 6...

KPIs for Unseen Building 6 (after fine-tuning):


name,Building_1,Building_2,Building_3,Building_4,Building_5,Building_6,District
cost_function,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
all_time_peak_average,,,,,,,0.894
annual_normalized_unserved_energy_total,0.023,0.022,0.017,0.018,0.021,0.019,0.02
carbon_emissions_total,0.825,1.061,0.937,0.815,0.989,0.842,0.911
cost_total,0.805,1.031,0.916,0.793,0.971,0.822,0.89
daily_one_minus_load_factor_average,,,,,,,0.997
daily_peak_average,,,,,,,0.908
discomfort_cold_delta_average,0.088,0.061,0.031,0.101,0.062,0.13,0.079
discomfort_cold_delta_maximum,3.849,2.826,2.72,2.882,2.23,3.57,3.013
discomfort_cold_delta_minimum,0.0,0.0,0.0,0.0,0.0,0.0,0.0
discomfort_cold_proportion,0.004,0.003,0.001,0.001,0.001,0.014,0.004


name                                           Building_1  Building_2  \
cost_function                                                           
all_time_peak_average                                 NaN         NaN   
annual_normalized_unserved_energy_total             0.023       0.022   
carbon_emissions_total                              0.825       1.061   
cost_total                                          0.805       1.031   
daily_one_minus_load_factor_average                   NaN         NaN   
daily_peak_average                                    NaN         NaN   
discomfort_cold_delta_average                       0.088       0.061   
discomfort_cold_delta_maximum                       3.849       2.826   
discomfort_cold_delta_minimum                       0.000       0.000   
discomfort_cold_proportion                          0.004       0.003   
discomfort_hot_delta_average                        0.727       0.285   
discomfort_hot_delta_maximum                       

In [None]:
print(kpis)