In [3]:
import os
import pickle
import collections
import random
from typing import Tuple

import numpy as np
import gymnasium as gym
from tqdm import tqdm
import pandas as pd

import selfhealing_env

In [4]:
env = gym.make(id='SelfHealing-v0',
                opt_framework='Gurobipy',
                solver='gurobi',
                data_file='Case_33BW_Data.xlsx',
                solver_display=False,
                min_disturbance=1,
                max_disturbance=1,
                vvo=False,
                Sb=100,
                V0=1.05,
                V_min=0.95,
                V_max=1.05)

expert_episode_used = 500
steps = 5

data = pd.DataFrame(columns=['D', 'step' ,'old_obs', 'action', 'next_obs', 'done'])

options = {
            "Specific_Disturbance": None,
            "Expert_Policy_Required": True,
            "External_RNG": None
        }
sampled_idx = 0
with tqdm(total=expert_episode_used,
            desc='Collecting Expert Data') as pbar:
    while sampled_idx < expert_episode_used:
        # Ensure that the env has a solution
        while True:
            obs, info = env.reset(options=options)
            if info["Expert_Policy"] != None:
                D = info["Disturbance_Set"]
                sampled_idx += 1
                break
        # Collect expert experience
        
        Y = info["Expert_Policy"]["TieLine_Action"]  # a1-a5
        step = 0
        done = False
        while not done:
            old_obs = obs.copy()
            action = Y[step].item()
            next_obs, _, terminated, truncated, _ = env.step(
                action)
            done = terminated or truncated
            obs = next_obs
            data.loc[len(data)] = [D, step, old_obs, action, next_obs, done]
            step += 1
        pbar.update(1)




Collecting Expert Data: 100%|██████████| 500/500 [01:32<00:00,  5.38it/s]


In [5]:
# Save the data
data.to_pickle('expert_data.pkl')

In [6]:
data = pd.read_pickle('expert_data.pkl')