In [299]:
import pandas as pd
import numpy as np
import math
import copy
from typing import List
from abc import ABC
import warnings
import os
import random

import torch
import torch.nn as nn
import torch.nn.functional as F

import torch.optim as optim
from torch.optim import Adam

from torch.utils.data import Dataset, DataLoader
from torch.nn.utils import clip_grad_norm_

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.exceptions import DataConversionWarning

device = "cuda" if torch.cuda.is_available() else "cpu"
scaler = StandardScaler()
warnings.filterwarnings("ignore", category=DataConversionWarning)
np.set_printoptions(suppress=True)

## 1. 引入数据 + 数据预处理

In [358]:
# Cooling energy use:未预处理
# Heating energy use:未预处理
# Daylighting illuminance [Lux] (abbr.: DL):未预处理
# Thermally uncomfortable hours [h] (abbr.: UCH)：除100
# Embodied carbon: 除1000
# Operational Carbon (abbr.: OC)：log(x)
# Lifecycle cost (abbr.: LCost)：log10
# Initial cost (abbr.: ICost)：log10
# Payback Time (abbr.: PBT)：未预处理

variables_range = {
    "Overhang Depth":[0.4,0.6,0.8,1],
    "Roof insulation thickness":[0.1,0.15,0.2],
    "Wall insulation thickness":[0.03,0.04,0.05,0.06],
    "Insulation conductivity":[0.026,0.032,0.044],
    "Window conductivity":[1.8,2.3],
    "Window glazing thickness":[0.003, 0.006],
    "Infiltration rate":[0.00125,0.000625,0.0003125],
    "Light power":[7.5,15.],
    "Glazing layer number":[2,3],
    "HVAC COP":[3., 3.2],
    "WWR_att1":[0,1],
    "WWR_att2":[0,1],
    "Lighting control strategy_att1":[0,1],
    "Lighting control strategy_att2":[0,1],
    "PV Installation_att1":[0,1],
    "PV Installation_att2":[0,1],
}

predict_index = {
    'Cooling energy use':{"model_name":"model_state_dict_CEU.pth", "process_method":None,"model":None}, 
    'Heating energy use':{"model_name":"model_state_dict_HEU.pth", "process_method":None,"model":None},
    'Daylighting illuminance [Lux]':{"model_name":"model_state_dict_DL.pth", "process_method":None,"model":None}, 
    'Thermally uncomfortable hours [h]':{"model_name":"model_state_dict_UCH.pth", "process_method":100,"model":None},
    'Embodied carbon':{"model_name":"model_state_dict_EC.pth", "process_method":1000,"model":None},
    'Operational Carbon':{"model_name":"model_state_dict_OC.pth", "process_method":"le","model":None}, 
    'Initial Cost':{"model_name":"model_state_dict_ICost.pth", "process_method":"l10","model":None}, 
    'Lifecycle cost':{"model_name":"model_state_dict_LCost.pth", "process_method":"l10","model":None},
    'Payback Period':{"model_name":"model_state_dict_PBT.pth", "process_method":None,"model":None}
}

classes_num = np.array([len(v) for _,v in variables_range.items()])
id_to_range = [v for _,v in variables_range.items() ]

In [546]:
attributes = ['Overhang Depth', 'Roof insulation thickness',
              'Wall insulation thickness', 'Insulation conductivity',
              'Window conductivity', 'Window glazing thickness', 'Infiltration rate',
              'Light power', 'Glazing layer number', 'HVAC COP', 'WWR_att1', 'WWR_att2',
              'Lighting control strategy_att1', 'Lighting control strategy_att2',
              'PV Installation_att1', 'PV Installation_att2']
results = ['Cooling energy use', 'Heating energy use',
           'Daylighting illuminance [Lux]', 'Thermally uncomfortable hours [h]',
           'Embodied carbon','Operational Carbon', 'Initial Cost', 'Lifecycle cost','Payback Period']

## 2. 制作数据集

In [360]:
class myDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.FloatTensor(X.to_numpy())
        self.y = torch.FloatTensor(y.to_numpy())

    def __len__(self):
        return len(self.X)

    def __getitem__(self, index):
        x = self.X[index]
        y = self.y[index]
        return x, y

## 3. 定义模型

In [361]:
class MLP(nn.Module):
    def __init__(self, in_size, out_size = 1):
        super().__init__()
        self.linear_1 = nn.Linear(in_size, 32)
        self.linear_2 = nn.Linear(32, 64)
        self.linear_3 = nn.Linear(64, 128)
        self.linear_4 = nn.Linear(128, 64)
        self.linear_5 = nn.Linear(64, 32)
        self.linear_6 = nn.Linear(32, 1)
        self.para_init()
    
    def para_init(self):
        nn.init.xavier_normal_(self.linear_1.weight)
        nn.init.xavier_normal_(self.linear_2.weight)
        nn.init.xavier_normal_(self.linear_3.weight)
        nn.init.xavier_normal_(self.linear_4.weight)
        nn.init.xavier_normal_(self.linear_5.weight)
        nn.init.xavier_normal_(self.linear_6.weight)
    
    def forward(self, x):
        x = self.linear_1(x)
        x = nn.LeakyReLU()(x)
        x = self.linear_2(x)
        x = nn.LeakyReLU()(x)
        x = self.linear_3(x)
        x = nn.LeakyReLU()(x)
        x = self.linear_4(x)
        x = nn.LeakyReLU()(x)
        x = self.linear_5(x)
        x = nn.LeakyReLU()(x)
        x = self.linear_6(x)
        return x

## 4. 处理整体数据

In [362]:
# 引入数据
data = pd.read_excel("./data/Input2RL.xlsx", index_col = "No.")

# 对数据进行预处理：将文本信息转换为特征
data["WWR_att1"] = (data["WWR"]=="Reduced WWR").astype(int)
data["WWR_att2"] = (data["WWR"]=="Unchanged").astype(int)

data["Lighting control strategy_att1"] = (data["Lighting control strategy"]=="On/off control").astype(int)
data["Lighting control strategy_att2"] = (data["Lighting control strategy"]=="Continuous").astype(int)

data["PV Installation_att1"] = (data["PV Installation"]==False).astype(int)
data["PV Installation_att2"] = (data["PV Installation"]==True).astype(int)

# 丢掉不不为数值的数据
data = data.drop("WWR", axis=1)
data = data.drop("Lighting control strategy", axis=1)
data = data.drop("PV Installation", axis=1)

In [363]:
data_temp = copy.deepcopy(data)

# 对数据进行标准化操作
for a in attributes:
    data_temp[a] = scaler.fit_transform(data_temp[[a]])

for k, v in predict_index.items():
    index = k
    process_method = v["process_method"]
    model_name = v["model_name"]
    
    ######## 数据处理 ########
    if process_method==100:
        data_temp[index] = data_temp[index].apply(lambda x: round(x/100, 4)) 
    elif process_method==1000:
        data_temp[index] = data_temp[index].apply(lambda x: round(x/1000, 4)) 
    elif process_method == "l10":
        data_temp[index] = data_temp[index].apply(lambda x: round(math.log10(x), 4)) 
    elif process_method == "le":
        data_temp[index] = data_temp[index].apply(lambda x: round(math.log(x), 4)) 

    ######## 模型定义 ########
    model = MLP(in_size=16).to(device)
    model.load_state_dict(torch.load(f"./PTH Final/{model_name}"))
    predict_index[k]["model"] = model

In [364]:
def get_dataset(index):
    global attributes
    
    # 数据输入
    X = data_temp[attributes]
    # 预测数据
    y = data_temp[[index]]
    
    # 切分训练集、验证集、测试集
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state = 42)
    
    train_ds = myDataset(X_train, y_train)
    train_dl = DataLoader(train_ds, batch_size = 16, shuffle=True)
    
    valid_ds = myDataset(X_val, y_val)
    valid_dl = DataLoader(valid_ds, batch_size = 16)
    
    test_ds = myDataset(X_test, y_test)
    test_dl = DataLoader(test_ds, batch_size = 16)
    return train_dl, valid_dl, test_dl

In [365]:
def get_model(index):
    global predict_index
    model = predict_index[index]["model"]
    return model

In [366]:
def get_loss(index = None):
    if index == None:
        return "No Index is given"

    train_dl, valid_dl, test_dl = get_dataset(index)
    model = get_model(index)
    
    model.eval()
    with torch.no_grad():
        loss_list = []
        for x, y in test_dl:
            x = x.to(device)
            y = y.to(device)
            y_c = model(x)
            loss = nn.MSELoss()(y_c,y)
            loss_list.append(loss.item())
        loss_list = np.array(loss_list)
    return loss_list

In [367]:
# get_loss()函数的测试代码
for index in results:
    loss_list = get_loss(index)
    
    if process_method==100:
        loss = np.sqrt(loss_list)*100
    elif process_method==1000:
        loss = np.sqrt(loss_list)*1000
    elif process_method == "l10":
        loss = np.power(10,np.sqrt(loss_list))
    elif process_method == "le":
        loss = np.exp(np.sqrt(loss_list))
    elif process_method == None:
        loss = np.sqrt(loss_list)

    print(f"{index}, loss:{loss.mean()}")

Cooling energy use, loss:0.09502912775300838
Heating energy use, loss:0.20287503738034768
Daylighting illuminance [Lux], loss:0.1756702543189002
Thermally uncomfortable hours [h], loss:0.005668187461213296
Embodied carbon, loss:0.0021959944011468602
Operational Carbon, loss:0.016531975351301166
Initial Cost, loss:0.005080911458751396
Lifecycle cost, loss:0.005490686031925854
Payback Period, loss:0.2275413380377691


## 5. 编写环境

In [528]:
class MooSCH:
    def __init__(self, variables_range, predict_index, temp_data, origin_data):
        self.variables_range = variables_range
        self.predict_index = predict_index
        self.temp_data = temp_data
        self.origin_data = origin_data
        self.x_dim = len(variables_range)
        self.obj_dim = len(predict_index)
        self.scaler = StandardScaler()

    def scaled_data(self, u):
        indexes = list(self.variables_range.keys())
        v_list = []
        for v,idx in zip(u, indexes):
            self.scaler.fit(self.origin_data[[idx]])
            v_test = self.scaler.transform(pd.DataFrame([[v]], columns=[idx])).squeeze().tolist()
            v_list.append(v_test)
        v_list = np.array(v_list)
        return v_list
    
    def obj(self, u):
        # 注1：输入的是原值；然后需要scale data
        scaled_u = self.scaled_data(u)
        scaled_u = torch.FloatTensor(scaled_u).to(device)
        predict_outcomes = []
        for k,v in predict_index.items():
            process_method = v["process_method"]
            model = v["model"].to(device)
            model.eval()
            with torch.no_grad():
                outcome = model(scaled_u).cpu().numpy()

            if process_method==100:
                outcome_true = outcome*100
            elif process_method==1000:
                outcome_true = outcome*1000
            elif process_method == "l10":
                outcome_true = np.power(10,outcome)
            elif process_method == "le":
                outcome_true = np.exp(outcome)
            elif process_method == None:
                outcome_true = outcome
            
            predict_outcomes.append(outcome_true.tolist())
        predict_outcomes = np.array(predict_outcomes)
        return predict_outcomes

In [529]:
# # MooSCH()类的测试代码
# test_u = data.iloc[2,list(range(10)) + list(range(19,25))].values.tolist()
# testSCH = MooSCH(variables_range, predict_index, data_temp, data)
# np.set_printoptions(suppress=True)
# objectives = testSCH.obj(test_u)

# print(objectives.squeeze())
# print(data.iloc[2].values[10:19])

In [530]:
class MooTestSetting:
    def __init__(self, moo_prob):
        self.moo_prob = moo_prob
        self.variables_num = len(self.moo_prob.variables_range)
        self.class_num = np.array([len(v) for _,v in self.moo_prob.variables_range.items()])
        self.id_to_range = [v for _,v in self.moo_prob.variables_range.items() ]
        self.max_actions = [max(l) for l in self.id_to_range]

    def softmax(self, x):
        x_exp = np.exp(x - np.max(x))
        return x_exp / np.sum(x_exp)
    
    def _state_to_action(self, state):
        action_list = []
        for idx, ac in enumerate(state[:10]):
            ac = int(ac)
            action_list.append(self.id_to_range[idx][ac])   
        action_list.extend(state[10:])
        action_list = np.array(action_list)
        return action_list
    
    def obj_value(self,state):
        action = self._state_to_action(state)
        predict_outcome = self.moo_prob.obj(action)
        return predict_outcome

    def reward_value(self, elite_list, obj_value, elite_action_list, action):
        # 这里采用的是帕累托最优算法
        # elite_list中包含的是结果的集合
        
        m = self.moo_prob.obj_dim # objectives的数量
        num_dominated = 0
        reward = 1
        if_exist = False # 判断新进入的obj_value是否在elite_list中
        if_dominated = False # obj_value比elite_list中的数据要差
        if_dominate_others = False # obj_value比elite_list中的数据要好
        corr_sign = np.array([1,1,-1,1,1,1,1,1,1])

        for i, current_elite in enumerate(elite_list):
            if (current_elite == obj_value).all():
                # 如果obj_value已经在elite_list中存在，那么奖励为0
                reward = 0
                return reward

        dominated_indices = []
        for i, current_elite in enumerate(elite_list):
            # 如果current_elite != obj_value
            delta = current_elite - obj_value
            delta_sign = np.sign(delta)
            delta_sign = delta_sign.squeeze().astype("int64")
            delta_sum = np.sum(delta_sign == corr_sign)
            # print(delta_sign)
            # print(corr_sign)

            if delta_sum == m:
                # obj_value中的数据比current_elite中的数据要好【除了Daylighting illuminance，其他都要小】
                reward = 4 # 这么好的情况，给与一个正的奖励
                if_dominate_others = True
                dominated_indices.append(i)
            elif delta_sum  <= 6:
                # obj_value中的数据比current_elite中的数据要差【除了Daylighting illuminance，其他都要大】
                reward = -2 # 这么差的情况，给予一个负的奖励
                if_dominated = True
                break
            
        if if_dominated: # 当精英解支配了新解时
            # 如果elite_list中的个元素比obj_value要好，那么reward为-2。【说明新的action效果不好】
            return reward
        elif if_dominate_others: # 当新解支配了现有的精英解时
            # 如果obj_value要比elite_list中某些元素要好，那么reward为4。【说明新的action效果好】
            for idx in reversed(dominated_indices):
                elite_list.pop(idx)
                elite_action_list.pop(idx)
            elite_list.append(obj_value)
            elite_action_list.append(action)
            print(f"Found Obj Value:{obj_value}")
            return reward
        else:
            # 如果obj_value与elite_list不存在支配关系，那么reward为3
            elite_list.append(obj_value)
            elite_action_list.append(action)
            return reward

    def state_update(self, action):
        new_state = []
        for i, a in enumerate(action[:10]):
            idx = self.id_to_range[i].index(a)
            new_state.append(idx)
        new_state.extend(action[10:])
        new_state = np.array(new_state)
        return new_state

    def initial_state(self):
        state_list = []
        for k in self.class_num[:10]:
            state_p = self.softmax(np.random.rand(k).tolist())
            state = np.argmax(state_p)
            state_list.append(state)

        for a,b in zip(self.class_num[10::2], self.class_num[11::2]):
            a_p = self.softmax(np.random.rand(a).tolist())
            b_p = self.softmax(np.random.rand(b).tolist())
            if a_p[-1] > b_p[-1]:
                state_list.extend([1,0])
            elif a_p[-1] < b_p[-1]:
                state_list.extend([0,1])
            
        return state_list

In [531]:
class MooEnv:
    def __init__(self, moo_setting, *args):
        self.moo_setting = moo_setting

        self.init_state_func = self.moo_setting.initial_state
        self.update_state_func = self.moo_setting.state_update
        self.get_reward_func = self.moo_setting.reward_value
        self.get_obj_func = self.moo_setting.obj_value

        self.args = args
        self.elite_list = []
        self.elite_action_list = []
        self.state = None
        self.object = None

    def reset(self):
        self.state = self.init_state_func()
        self.object = self.get_obj_func(self.state)
        self.elite_list = [self.object]
        action = self.moo_setting._state_to_action(self.state)
        self.elite_action_list = [action]
        return self.state, self.elite_list

    def step(self, action):
        next_state = self.update_state_func(action)
        self.object = self.get_obj_func(next_state)
        reward = self.get_reward_func(self.elite_list, self.object, self.elite_action_list, action)
            
        self.state = next_state
        return self.state, reward

In [515]:
# 测试代码
# moo_prob = MooSCH(variables_range, predict_index, data_temp, data)
# moo_setting = MooTestSetting(moo_prob)
# moo = MooEnv(moo_setting)

In [532]:
def set_seed(seed = 42):
    os.environ['PYTHONHASHSEED'] = '42'
    random.seed(42)
    
    # 固定NumPy随机性
    np.random.seed(42)
    
    # 固定PyTorch随机性
    torch.manual_seed(42)
    torch.cuda.manual_seed(42)
    torch.cuda.manual_seed_all(42)
    
    # 确保完全可控的随机性
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

### 6. 定义Actor和Critic

In [533]:
# 定义Actor：输入的是state, 输出是一个39维的action
class Actor(nn.Module):
    def __init__(self, input_dim, classes_num):
        super().__init__()
        self.classes_num = classes_num
        # 定义共享主干层
        self.shared_feature_layer = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128,64),
            nn.ReLU()
        )
        # 策略网络分支
        self.policy_layer = nn.ModuleList([
            nn.Sequential(
                nn.Linear(64, num_class),
                nn.Softmax(dim=0)
            ) for num_class in self.classes_num
        ])

    def forward(self, state):
        shared_features = self.shared_feature_layer(state)
        policy = [layer(shared_features) for layer in self.policy_layer]

        return policy

In [534]:
# 定义critic: 输入的是state，输出一个q_value
class Critic(nn.Module):
    def __init__(self,input_dim):
        super().__init__()
        # 定义共享主干层
        self.shared_feature_layer = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128,64),
            nn.ReLU()
        )
        # 定义价值计算层
        self.value = nn.Linear(64,1)
    def forward(self, state):
        shared_features = self.shared_feature_layer(state)
        value = self.value(shared_features)
        return value

In [535]:
def train_iteration(initial_state, gamma, max_iter_step, actor_model, cirtic_model, optimizer_actor, optimizer_critic):
    global classes_num, id_to_range
    state_current = torch.FloatTensor(initial_state)
    
    step_reward = 0
    sum_reward = 0.0
    # avg_reward = 0.0

    for current_step in range(max_iter_step):

        optimizer_actor.zero_grad()
        optimizer_critic.zero_grad()
        

        value_current_out = cirtic_model(state_current)

        action_net_out = actor_model(state_current)

        action_list = []
        prob_list = []
        for idx, p in enumerate(action_net_out[:10]):
            action_idx = torch.multinomial(p,1)
            action = id_to_range[idx][action_idx]
            action_list.append(action)
            prob_list.append(p[action_idx].item())
            
        for p1,p2 in zip(action_net_out[10::2], action_net_out[11::2]):
            action_list.extend(torch.where(p1[-1] > p2[-1], torch.tensor([1, 0]), torch.tensor([0, 1])).tolist())
            prob_list.extend(torch.where(p1[-1] > p2[-1], torch.tensor([p1[-1], p2[0]]), torch.tensor([p1[0], p2[-1]])).tolist())


        action_list = torch.FloatTensor(action_list)
        prob_list = torch.FloatTensor(prob_list)

        state_next, step_reward = moo.step(action_list)
        state_next = torch.FloatTensor(state_next)
        value_next_out = cirtic_model(state_next)

        sum_reward += step_reward
        
        TD_err = step_reward + gamma* value_next_out - value_current_out
        log_prob = torch.log(prob_list).sum()

        loss_actor = -(TD_err * log_prob)
        loss_actor.backward(retain_graph=True)
        optimizer_actor.step()

        loss_critic = TD_err.pow(2).mean() # 这个其实是mseloss(): [(x - y) ^2]/N
        loss_critic.backward()
        optimizer_critic.step()

        state_current = state_next

        if (current_step+1) % 1000 == 0:
            print(f'Step: {current_step}, avg_reward = {sum_reward/(current_step+1)}, elite num = {len(moo.elite_list)}')

    return [sum_reward/max_iter_step, len(moo.elite_list)]

In [536]:
# 测试代码
seed = 42
set_seed(seed=seed)

# 定义环境
moo_prob = MooSCH(variables_range, predict_index, data_temp, data)
moo_setting = MooTestSetting(moo_prob)
moo = MooEnv(moo_setting)
# 初始化状态
initial_state = moo_setting.initial_state()

# 定义模型
actor = Actor(input_dim=16,classes_num=classes_num)
critic = Critic(input_dim=16)
optimizer_actor = optim.Adam(actor.parameters(), lr=1e-4)
optimizer_critic = optim.Adam(critic.parameters(), lr=1e-4)

results = train_iteration(
    initial_state = initial_state, 
    gamma = 0.99, 
    max_iter_step = 50000,
    actor_model = actor, 
    cirtic_model = critic, 
    optimizer_actor = optimizer_actor, 
    optimizer_critic = optimizer_critic
)

Step: 999, avg_reward = -1.988, elite num = 4
Step: 1999, avg_reward = -1.993, elite num = 4
Step: 2999, avg_reward = -1.9953333333333334, elite num = 4
Step: 3999, avg_reward = -1.9965, elite num = 4
Step: 4999, avg_reward = -1.9972, elite num = 4
Step: 5999, avg_reward = -1.9971666666666668, elite num = 5
Step: 6999, avg_reward = -1.9975714285714286, elite num = 5
Step: 7999, avg_reward = -1.997375, elite num = 5
Step: 8999, avg_reward = -1.9974444444444444, elite num = 5
Step: 9999, avg_reward = -1.9977, elite num = 5
Step: 10999, avg_reward = -1.9979090909090909, elite num = 5
Step: 11999, avg_reward = -1.9979166666666666, elite num = 5
Step: 12999, avg_reward = -1.998076923076923, elite num = 5
Step: 13999, avg_reward = -1.9980714285714285, elite num = 5
Step: 14999, avg_reward = -1.9982, elite num = 5
Step: 15999, avg_reward = -1.9983125, elite num = 5
Step: 16999, avg_reward = -1.998235294117647, elite num = 6
Step: 17999, avg_reward = -1.9983333333333333, elite num = 6
Step: 18

In [545]:
results

[-1.99896, 6]

In [557]:
def softmax(x):
    x_exp = np.exp(x - np.max(x))
    return x_exp / np.sum(x_exp)

In [558]:
dd = data[results].mean().values

In [560]:
data[results].mean().values

array([    19.08107276,     28.56071811,     39.7199386 ,   3150.82473265,
         6214.77242908,  91159.84986185,  66591.43173913, 168632.63809525,
           20.40138527])

In [561]:
data[results].mean()

Cooling energy use                       19.081073
Heating energy use                       28.560718
Daylighting illuminance [Lux]            39.719939
Thermally uncomfortable hours [h]      3150.824733
Embodied carbon                        6214.772429
Operational Carbon                    91159.849862
Initial Cost                          66591.431739
Lifecycle cost                       168632.638095
Payback Period                           20.401385
dtype: float64

In [559]:
softmax(x=dd)

array([0., 0., 0., 0., 0., 0., 0., 1., 0.])

In [537]:
el = np.array(moo.elite_list)

In [538]:
el

array([[[    18.55200195],
        [    32.52696609],
        [    65.41745758],
        [  3187.08886719],
        [  6349.30322266],
        [114271.6328125 ],
        [ 65807.21875   ],
        [164681.125     ],
        [    20.75338364]],

       [[    17.26791763],
        [    30.92500877],
        [    65.44688416],
        [  3196.20483398],
        [ 10707.97753906],
        [100379.2890625 ],
        [ 57676.09765625],
        [154064.015625  ],
        [    18.30887413]],

       [[    17.80431366],
        [    29.89082146],
        [    15.61038589],
        [  3180.19970703],
        [  9751.69140625],
        [ 67300.078125  ],
        [ 55348.8671875 ],
        [151152.46875   ],
        [    17.9585495 ]],

       [[    18.44833374],
        [    30.66171074],
        [    16.35787201],
        [  3178.36767578],
        [  9158.39160156],
        [ 58068.765625  ],
        [ 53119.9765625 ],
        [150405.171875  ],
        [    16.85069466]],

       [[    17.4846

In [539]:
eal = np.array(moo.elite_action_list)

In [540]:
eal

array([[0.4      , 0.15     , 0.03     , 0.026    , 1.8      , 0.006    ,
        0.0003125, 7.5      , 2.       , 3.       , 0.       , 1.       ,
        0.       , 1.       , 0.       , 1.       ],
       [0.4      , 0.1      , 0.06     , 0.044    , 1.8      , 0.003    ,
        0.0003125, 7.5      , 2.       , 3.2      , 0.       , 1.       ,
        0.       , 1.       , 0.       , 1.       ],
       [0.8      , 0.15     , 0.04     , 0.044    , 1.8      , 0.003    ,
        0.0003125, 7.5      , 3.       , 3.2      , 0.       , 1.       ,
        0.       , 1.       , 0.       , 1.       ],
       [0.6      , 0.2      , 0.03     , 0.044    , 1.8      , 0.003    ,
        0.0003125, 7.5      , 3.       , 3.2      , 0.       , 1.       ,
        0.       , 1.       , 0.       , 1.       ],
       [0.6      , 0.15     , 0.05     , 0.044    , 1.8      , 0.003    ,
        0.00125  , 7.5      , 3.       , 3.2      , 0.       , 1.       ,
        0.       , 1.       , 1.       , 0.     

In [541]:
# # 用于验证结果的测试代码
data[
(data["Overhang Depth"] == 0.4) & 
(data["Roof insulation thickness"] == 0.2) & 
(data["Wall insulation thickness"] == 0.05) &
(data["Insulation conductivity"] == 0.044) & 
(data["Window conductivity"] == 2.3) & 
(data["Window glazing thickness"] == 0.003) &
(data["Infiltration rate"] == 0.0003125) & 
(data["Light power"] == 7.5) & 
(data["Glazing layer number"] == 3.) &
(data["HVAC COP"] == 3.2) &
(data["WWR_att1"] == 0.) & 
(data["WWR_att2"] == 1.) & 
(data["Lighting control strategy_att1"] == 0.) &
(data["Lighting control strategy_att2"] == 1.) &
(data["PV Installation_att1"] == 1.) &
(data["PV Installation_att2"] == 0.) 
].values.squeeze()[10:19]

array([    16.76895216,     28.23942226,     20.13054407,   3182.04166667,
         5706.8945    , 101999.46755073,  46183.25565217, 137149.93116914,
           16.        ])

In [562]:
!pip freeze > requirement.txt