In [None]:
import numpy as np
import torch
import copy
from tqdm import tqdm
from torch.utils.tensorboard import SummaryWriter
import matplotlib.pyplot as plt
from collections import deque
import opendssdirect as dss
from SAC_MARL.maddpg import MADDPG
from SAC_MARL.replay_buffer import ReplayBuffer
from config import IEEE123_config

In [None]:
dss.Command('clear')
dss.Text.Command(f'Redirect "./123Bus/IEEE123Master.dss"')
dss.Solution.Solve()

In [None]:
args = IEEE123_config.get_args()

In [None]:
def generate_shuffled_weights(seed=32, total_numbers=91):
    """
    Generates a shuffled array of numbers including 0 to 10 and additional random numbers between 0 and 10.

    Parameters:
    - seed (int): The seed for random number generation.
    - total_numbers (int): The total number of numbers to generate, including 0 to 10.

    Returns:
    - torch.Tensor: A shuffled tensor of numbers.
    """
    torch.manual_seed(seed)

    # Step 1: Include the numbers from 0 to 10
    numbers_0_to_10 = torch.arange(0, 11)

    # Step 2: Generate the remaining random numbers between 0 and 10
    remaining_numbers = torch.randint(0, 11, (total_numbers - len(numbers_0_to_10),))

    # Step 3: Combine the arrays
    combined_numbers = torch.cat((numbers_0_to_10, remaining_numbers))

    # Step 4: Shuffle the combined array
    shuffled_numbers = combined_numbers[torch.randperm(combined_numbers.size(0))]

    return shuffled_numbers

In [None]:
class World:
    def __init__(self):
        
        self.weights = generate_shuffled_weights()

        self.num_areas = 5
        self.agent_1 = MADDPG(args, 0) 
        self.agent_2 = MADDPG(args, 1) 
        self.agent_3 = MADDPG(args, 2) 
        self.agent_4 = MADDPG(args, 3) 
        self.agent_5 = MADDPG(args, 4) 

        
        self.agent_n = list([self.agent_1,self.agent_2,self.agent_3,self.agent_4,self.agent_5])
        
        self.P_gen = 2600
        
        self.regions_switches = [
            [('1.2','2.2'),('1.3','3.3'),('8.2','12.2'),('8.1','9.1'),('13.3','34.3'),('18.1','19.1'),('21.2','22.2'),('23.3','24.3'),('25r.1.3', '26.1.3'),('25.1.2.3','28.1.2.3')],
            [('35.1.2','36.1.2'),('40.3','41.3'),('42.2','43.2'),('44.1','45.1'),('44.1.2.3','47.1.2.3')],
            [('57.2','58.2'),('60.1.2.3','61.1.2.3'),('60.1.2.3','62.1.2.3')],
            [('67.1','68.1'),('72.3','73.3'),('76.1.2.3','77.1.2.3'),('76.1.2.3','86.1.2.3'),('97.1.2.3','98.1.2.3')],
            [('101.3','102.3'),('105.2','106.2'),('108.1','109.1')]
        ]
        
        self.states = [[0 for _ in range(len(sublist))] for sublist in self.regions_switches]
        
        self.done=0

        
    def reset_network(self):
        dss.Command('clear')
        dss.Text.Command(f'Redirect "C:/Users/A-F/Desktop/123Bus/IEEE123Master.dss"')
        dss.Solution.Solve()
        
    def reset_world(self):
        self.done = 0
        self.states = [[0 for _ in range(len(sublist))] for sublist in self.regions_switches]
        self.reset_network()
        return self.states
            
            
    def get_reward(self):
        rew = 0
        x_w = self.get_load_data()
        x = self.get_restored_power()

        if (x > self.P_gen):
            self.done = 1
            rew = -10000 * (x - self.P_gen)
        else:
             rew = x_w
        return rew
    
    
    def get_restored_power(self):
        dss.Solution.Solve()
        powers = []
        for load in dss.Loads:
            powers.append(dss.CktElement.TotalPowers()[0])
            
        return sum(powers)
        
    
    def get_status(self, area_idx):
        if area_idx >= self.num_areas:
            raise ValueError("Invalid area index")
        return self.states[area_idx]
    
    
    
    def step(self, action_n, action_mask):
        obs_n = []
        reward_n = []
        done_n = []

        self.get_connected_pairs_for_regions(action_n, action_mask)
        
        for area_idx, (action, mask) in enumerate(zip(action_n, action_mask)):
            if not mask:
                self.step_area(area_idx, action)

        
        for agent_idx, agent in enumerate(self.agent_n):
            obs_n.append(self.get_status(agent_idx))
            reward_n.append(self.get_reward())
            done_n.append(self.get_done())
        
        return obs_n, reward_n, done_n

    def create_switches(self, bus_pairs):
        for pair in bus_pairs:
            bus1, bus2 = pair
            bus1_first = bus1.split('.')[0]
            bus2_first = bus2.split('.')[0]
            switch_command = f"New Line.Sw{bus1_first}{bus2_first} Phases=3 Bus1={bus1} Bus2={bus2} Switch=n  r1=1e-3 r0=1e-3 x1=0.000 x0=0.000 c1=0.000 c0=0.000"
            dss.run_command(switch_command)
            dss.Solution.Solve()


    def get_connected_pairs(self, switches, binary_list):
        connected_pairs = []
        for i in range(len(binary_list)):
            if binary_list[i] == 1:
                connected_pairs.append(switches[i])
        return connected_pairs

    def get_connected_pairs_for_regions(self, region_switches, action_mask):
        region_results = []
        for i, (region_switch, mask) in enumerate(zip(region_switches, action_mask)):
            if not mask: 
                result = self.get_connected_pairs(self.regions_switches[i], region_switch)
                region_results.append(result)
        for i, result in enumerate(region_results):
            self.create_switches(result)
        return region_results
 

    def get_load_data(self):
        dss.Solution.Solve()
        powers = []
        for load in dss.Loads:
            powers.append(dss.CktElement.TotalPowers()[0])


        if self.weights is None:
            self.weights = [1] * len(powers)
        

        if len(self.weights) != len(powers):
            raise ValueError("Length of weights must match the number of power values")
            
        weighted_powers = [p * w for p, w in zip(powers, self.weights)]
        
        return sum(weighted_powers)

    def get_done(self):
        return self.done
    
    def step_area(self, area_idx, action):
        if area_idx >= self.num_areas:
            raise ValueError("Invalid area index")

        for switch_idx, act in enumerate(action):
            self.states[area_idx][switch_idx] = int(self.states[area_idx][switch_idx]) | int(act)  # Use bitwise OR to update the state

world = World()

In [None]:
# Initialize agents
agent_1 = MADDPG(args, 0)
agent_2 = MADDPG(args, 1)
agent_3 = MADDPG(args, 2)
agent_4 = MADDPG(args, 3)
agent_5 = MADDPG(args, 4)

agent_n = [agent_1, agent_2, agent_3, agent_4, agent_5]

# Initialize parameters
max_train_steps = args.max_train_steps
evaluate_rewards = []
episodes = args.episode_limit

replay_buffer = ReplayBuffer(args)
total_steps = 0

def decay_noise(noise_std, decay_factor, min_noise_std):
    noise_std = max(min_noise_std, noise_std * decay_factor)
    return noise_std

def store_and_train(obs_n, a_n, r_n, obs_next_n, done_n, replay_buffer, agent_n):
    replay_buffer.store_transition(obs_n, a_n, r_n, obs_next_n, done_n)
    if replay_buffer.current_size > args.batch_size:
        for agent_id in range(args.N):
            agent_n[agent_id].train(replay_buffer, agent_n)

def bitwise_sum_tuples(actions_taken):
    bitwise_sum = []
    for actions in actions_taken:
        if not actions:
            continue
        tuple_length = len(next(iter(actions)))
        result = [0] * tuple_length
        for action in actions:
            result = [int(a) | int(b) for a, b in zip(result, action)]
        bitwise_sum.append(result)
    return bitwise_sum

def train():
    writer = SummaryWriter(log_dir='./runs/experiments')  # Specify your log directory here
    noise_std = args.noise_std_init
    min_noise_std = args.noise_std_min
    decay_factor = 0.85
    cumulative_rewards = []
    restored_powers = []
    moving_avg_window = 10  # You can adjust this window size
    moving_avg_rewards = deque(maxlen=moving_avg_window)
    moving_avg_restored_powers = deque(maxlen=moving_avg_window)
    smoothed_rewards = []
    smoothed_restored_powers = []

    for total_steps in tqdm(range(args.max_train_steps)):
        obs_n = world.reset_world()
        actions_taken = [set() for _ in range(args.N)]
        cumulative_reward = 0

        if total_steps % 1000 == 0:
            noise_std = decay_noise(noise_std, decay_factor, min_noise_std)
            print(f"Decaying noise: New noise_std = {noise_std}")

        for episode_step in range(args.episode_limit):
            a_n = [agent.choose_action(obs, noise_std) for agent, obs in zip(agent_n, obs_n)]
            action_mask = [tuple(a) in actions_taken[i] for i, a in enumerate(a_n)]
            obs_next_n, r_n, done_n = world.step(copy.deepcopy(a_n), action_mask)
            store_and_train(obs_n, a_n, r_n, obs_next_n, done_n, replay_buffer, agent_n)
            obs_n = obs_next_n

            for i, a in enumerate(a_n):
                actions_taken[i].add(tuple(a))

            total_steps += 1
            cumulative_reward += r_n[0]

            restored_power = world.get_restored_power()

            if any(done_n):
                print(f"Episode ended in: {episode_step} steps with cumulative reward of: {cumulative_reward}, and noise std: {noise_std}")
                break

        cumulative_rewards.append(cumulative_reward)
        restored_powers.append(restored_power)

        moving_avg_rewards.append(cumulative_reward)
        moving_avg_restored_powers.append(restored_power)

        smoothed_reward = np.mean(moving_avg_rewards)
        smoothed_restored_power = np.mean(moving_avg_restored_powers)

        smoothed_rewards.append(smoothed_reward)
        smoothed_restored_powers.append(smoothed_restored_power)

        # Log metrics to TensorBoard
        writer.add_scalar('Cumulative Reward', smoothed_reward, total_steps)
        writer.add_scalar('Restored Power', smoothed_restored_power, total_steps)

        print(f"Restored Power: {restored_power}")
        bitwise_sum = bitwise_sum_tuples(actions_taken)
        print(f"Actions taken are: {bitwise_sum}\n")
        print("-------------------------------------------------------------------------------")

        if total_steps % args.evaluate_freq == 0:
            evaluate_policy(total_steps, noise_std)

    plt.figure(1)
    plt.plot(smoothed_rewards)
    plt.xlabel('step')
    plt.ylabel('Cumulative Reward')
    plt.title('Cumulative Reward per step')
    plt.grid()
    plt.show()

    plt.figure(2)
    plt.plot(smoothed_restored_powers)
    plt.xlabel('step')
    plt.ylabel('Restored Power')
    plt.title('Restored Power per step')
    plt.grid()
    plt.show()

    writer.close()

def evaluate_policy(total_steps, noise_std):
    evaluate_reward = 0
    for _ in range(args.evaluate_times):
        episode_reward = 0
        obs_n = world.reset_world()
        actions_taken = [set() for _ in range(args.N)]

        for k in range(args.episode_limit):
            a_n = [agent.choose_action(obs, 0) for agent, obs in zip(agent_n, obs_n)]
            action_mask = [tuple(a) in actions_taken[i] for i, a in enumerate(a_n)]
            obs_next_n, r_n, done_n = world.step(copy.deepcopy(a_n), action_mask)
            episode_reward += r_n[0]
            obs_n = obs_next_n

            for i, a in enumerate(a_n):
                actions_taken[i].add(tuple(a))

            if all(done_n):
                break

        res_p = world.get_restored_power()
        print(f"Restored power: {res_p}")
        evaluate_reward += episode_reward

    evaluate_reward /= args.evaluate_times
    evaluate_rewards.append(evaluate_reward)
    print(f"Total steps: {total_steps}\tEvaluate reward: {evaluate_reward}\tNoise std: {noise_std}")

# Start training
train()


In [None]:
%load_ext tensorboard

In [None]:
tensorboard --logdir=runs --port=1015