In [2]:
import warnings
import itertools
import pickle
import copy
import math
import bisect
import torch
import json
import sys
import os
import numpy as np
import pandas as pd
import torch.nn as nn
from transformers import BertModel
from itertools import combinations
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader, TensorDataset

from model import BertClassifier


F_P = {
    0: [0,1],
    1: [2,3],
    2: [4,5,6,7,8,9,10,11],
    3: [12,13],
}


class BertClassifier(nn.Module):
    
    def __init__(self, input_dim, output_dim, drop_rate=0.5):        
        super(BertClassifier, self).__init__()
        
        self.transform_dim = nn.Sequential(
            nn.Linear(input_dim, 768),
        )
        # bert-base-cased 768
        # bert-large-cased 1024
        self.bert = BertModel.from_pretrained("bert-base-cased")
        
        self.classifier = nn.Sequential(

            nn.Linear(768, 512),
            nn.ReLU(),
            nn.Dropout(drop_rate),
            nn.Linear(512, output_dim),
            # nn.ReLU(),
            # nn.Dropout(drop_rate),
            # nn.Linear(128, 3)
            # nn.ReLU(),
            # nn.Dropout(drop_rate),
            # nn.Linear(128, 64),
            # nn.ReLU(),
            # nn.Dropout(drop_rate),
            # nn.Linear(64, 32),
            # nn.ReLU(),
            # nn.Dropout(drop_rate),
            # nn.Linear(32, 3)
            
            )
                        
    def forward(self, inputs):
        
        inputs = self.transform_dim(inputs) # (batch size, 10, 128)
        outputs = self.bert(inputs_embeds=inputs) # (batch_size, sequence_length, hidden_size)
        last_hidden_state = outputs.last_hidden_state # 32, 10, 768
        outputs = last_hidden_state[:,-1,:]  # 32, 768    
        outputs = self.classifier(outputs) # 32, 3
        return outputs
    

def generate_partition(f_part):  #input a dict
    setnum = len(f_part)  #子集數
    num_set = [i for i in range(setnum)]  #子集數list
    cardlist = [len(i) for i in f_part.values()]  #各子集cardinality
    f_list = [i for i in f_part.values()]  #list of list
    all_subsets = []
    for r in range(setnum+1):
        for s in combinations(num_set, r):
            tmp = []
            for t in s:
                tmp.extend(f_list[t])
            all_subsets.append(np.array(sorted(tmp)))
    # Create a hash table to store the index of each subset
    subset_index_table = {}
    for index, value in enumerate(all_subsets):
        subset_index_table[tuple(value)] = index
    return all_subsets, subset_index_table, len(all_subsets)


class Hypercube_wpart:
    '''
    A class to create a hypercube object which stores values of vertices
    '''    
    #輸入維度
    def __init__(self, partition):   #input a dict
        self.f_part = partition
        self.n_dim = len(self.f_part)
        self.elem_count = sum([len(i) for i in self.f_part.values()])
        # vertex_values is a dictionary to store the value of each vertex.
        # Because np.array is not hashable, we use tuple to represent the vertex.
        self.vertex_values = {}
        self.vertices, self.vertex_index, self.vertex_num = generate_partition(self.f_part)  #vertex 上的value一次考慮整個subset
        self.edges, self.edge_num = self.build_edges()
        self.differential_matrix = None
        self.weight_matrix = None
        self.generate_min_l2_norm_matrix()
    
    def build_edges(self):
        num_set = [i for i in range(self.n_dim)]  #子集數list
        s_set = set(num_set)  #轉集合
        cardlist = [len(i) for i in self.f_part.values()]  #各子集cardinality
        f_list = [i for i in self.f_part.values()]  #list of list
        #print(f'Receive {f_list}')
        s_subset = set(tuple(i) for i in f_list)
        edges = []
        for r in range(self.n_dim): 
            for v in combinations(num_set, r):
                v_set = set(v)
                adjunct_v = s_set - v_set
                for new_elem in adjunct_v:
                    d_set = v_set | {new_elem}
                    outlist, inlist = [],[]                    
                    for k in v_set:
                        outlist.extend(f_list[k])
                    for l in d_set:
                        inlist.extend(f_list[l])
                    edges.append(((np.array(sorted(outlist))),np.array(sorted(inlist))))
        return edges, len(edges)
    
    def get_elements(self, index):
        return tuple(self.f_part[index])

    def set_vertex_values(self, vertex_values):         #設置點值
        for v in vertex_values:                         #用鍵值來做查找
            self.vertex_values[v] = vertex_values[v]
        
    def does_edge_exist(self, v1, v2):
        if abs(len(v1)-len(v2))==1:
            interset = np.intersect1d(v1,v2)
            smaller = v1 if len(v1)<len(v2) else v2
            return True if np.array_equal(smaller, interset) else False
        else:
            return False
    
    # Establish the matrix A in the above formula: AX-Y
    def generate_differential_matrix(self):
        if self.differential_matrix is None:
            self.differential_matrix = np.zeros((self.edge_num+1, self.vertex_num))
            for i,v_pair in enumerate(self.edges):
                j = self.vertex_index[tuple(v_pair[1])]
                k = self.vertex_index[tuple(v_pair[0])]
                self.differential_matrix[i][j] = 1
                self.differential_matrix[i][k] = -1
            # Add one more equestion that x_0 = 0 into the matrix form
            self.differential_matrix[-1][0]=1
        return self.differential_matrix

    # Pre-calcuate "W=(A^T*A)^-1*A^T" for the formula "X = ((A^T*A)^-1*A^T)*Y
    def generate_min_l2_norm_matrix(self):
        matrix_A = self.generate_differential_matrix()
        matrix_A_T = np.transpose(matrix_A)
        self.weight_matrix = np.linalg.inv(matrix_A_T @ matrix_A) @ matrix_A_T

    def get_gradient_vector(self):
        gradient_vector = np.zeros(self.edge_num)
        for i,v_pair in enumerate(self.edges):
            gradient_vector[i] = self.vertex_values[tuple(v_pair[1])]-self.vertex_values[tuple(v_pair[0])]    
        return gradient_vector      
        
    def get_partial_gradient_vector(self,subset_i):  #feature->subset
        feature_i = self.get_elements(subset_i)
        partial_gradient_vector = np.zeros(self.edge_num)
        for i,v_pair in enumerate(self.edges):
            if (not set(feature_i).issubset(set(v_pair[0]))) and (set(feature_i).issubset(set(v_pair[1]))):
                partial_gradient_vector[i] = self.vertex_values[tuple(v_pair[1])]-self.vertex_values[tuple(v_pair[0])]    
        return partial_gradient_vector
    
    def resolve_vi(self, subset_i, phi_0=0):  #feature->subset
        pd = self.get_partial_gradient_vector(subset_i)
        # Append equation x_0=0 at the end of partial gradient vector.
        pd = np.append(pd, phi_0)
        vi = self.weight_matrix @ pd
        # Reconstruct the vertex values
        new_vertices = {}
        for i,v in enumerate(self.vertices):
            new_vertices[tuple(v)] = vi[i]
        return vi, new_vertices

In [3]:
#sample n = 1000
sample_num = 1000
df = pd.read_csv('data_with_output.csv')
df = df.drop(columns=['Output'])
scaler = StandardScaler()
X_standardized = scaler.fit_transform(df)
nf_df = pd.DataFrame(X_standardized)
print(nf_df.shape, '\n', nf_df.head())
s_df = nf_df.sample(n=sample_num, random_state=1000)
print(s_df)
s_df.to_csv(f'sample{sample_num}.csv')

(40976, 14) 
         0        1         2         3         4         5         6   \
0 -4.52677  0.05025 -1.046263 -0.543637 -0.597382 -1.525155 -0.339233   
1 -4.52677  3.02762 -0.437004  0.823823 -0.261885 -1.495607 -0.834969   
2 -4.52677  3.02762 -0.161470  0.451242  0.205512 -1.466059 -0.339233   
3 -4.52677  3.02762 -0.574794  0.737841  0.888293 -1.515306 -0.339233   
4 -4.52677  3.02762 -0.187324 -0.451514  2.848481 -1.517768  1.643708   

         7         8         9         10         11        12        13  
0 -0.423039  0.312691  0.434652 -0.189808 -10.071359 -0.636407 -1.197630  
1  0.053782  0.421800  0.420173 -0.014344  -0.057674 -0.157584  1.399181  
2 -0.416238  0.319511  0.246424  0.500908  -0.057674  0.142256 -0.945585  
3 -0.421906  0.530908  0.485328 -0.267792  -0.057674  0.519077 -0.290205  
4 -0.961784  0.449077  0.355017  0.360954  -0.057674  0.039486  2.032951  
             0         1         2         3         4         5         6   \
6023  -0.296354  0

In [4]:
# creat dataframe for saving vertex value
c,_,_ = generate_partition(F_P)
collist = [i.tolist() for i in c]
colname = [tuple(i) for i in collist]
vertex_df = pd.DataFrame(columns=colname)
print(vertex_df.shape,vertex_df)
if 'Output' in s_df.columns: 
    s_df = s_df.drop(columns=['Output'])

(0, 16) Empty DataFrame
Columns: [(), (0, 1), (2, 3), (4, 5, 6, 7, 8, 9, 10, 11), (12, 13), (0, 1, 2, 3), (0, 1, 4, 5, 6, 7, 8, 9, 10, 11), (0, 1, 12, 13), (2, 3, 4, 5, 6, 7, 8, 9, 10, 11), (2, 3, 12, 13), (4, 5, 6, 7, 8, 9, 10, 11, 12, 13), (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11), (0, 1, 2, 3, 12, 13), (0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13), (2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13), (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13)]
Index: []


In [6]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
s1_model_path = 'stage_1_checkpoint.pth'
s1_model =  torch.load(s1_model_path, map_location=device).to(device)
json_file_path = 'controllable_para_v014_14.json'
tool_name = 'ASCVD'
with open(json_file_path, 'r') as f:
    params = json.load(f)[tool_name]
    print(params)
    f.close()
target_features = []

''' 
    The feature_translation is a list of tuples, each tuple contains two integers.
    The tuples record the correponding position of the ith feature in the 4*freature matrix
    as the input the prediction model.
    Ex.
    [(0, 0), (0, 1), 
     (1, 2), (1, 3), 
     (2, 4), (2, 5), (2, 6), (2, 7), (2, 8), (2, 9), (2, 10), (2, 11), 
     (3, 12),(3, 13)]
'''
# Flat the feature list and construct teh freature translation list to map feature into the input of the model
feature_translation = []
sub_op_num = 0
for entry in [sub_op for _, sub_op in params.items()]:
    if isinstance(entry, str):
        target_features.append(entry)
    else:
        target_features.extend(entry)
        feature_translation.extend([(sub_op_num,len(feature_translation)+j) for j in range(len(entry))])
        sub_op_num = sub_op_num+1 

print(target_features)
print(f"There are {sub_op_num} sub processes. So the feature translation is {feature_translation}")

def padding_zero(df, flag): 
    # 將一維參數matrix擴展為4維
    data_arr = df.to_numpy()
    result = []
    for data in data_arr:
        empty_arr = np.zeros((sub_op_num, len(feature_translation))) # chamber數 * 總參數數量
        for i, pos in enumerate(feature_translation):
            empty_arr[pos[0]][pos[1]] = data[i]
        if(flag == 1): # bert.py使用
            result.append(empty_arr)
        if(flag == 2): # bert_du.py使用
            result.append(empty_arr.tolist())
    
    if(flag == 1): # bert.py使用
        result = pd.DataFrame({'X': [result[i] for i in range(len(result))]})
    return result

{'EQ': ['X_-TACT_TIME_mean', 'X_-CONVEYOR_SPEED_mean'], 'PUMP': ['PUMP_high', 'PUMP_low'], 'CH': ['CLN1_over-etching-ratio', 'CLN1_EPT_time', 'clean_count', 'EPT_clean_count_ratio', 'NH3_TREAT_-RF_FREQ-max', 'NH3_TREAT_-RF_FREQ-range', 'NH3_TREAT_-RF_FREQ-mean', 'NP_3_-MFC_VOL_SIH4-range'], 'VENT': ['VENT_high', 'VENT_low'], 'y': 'Output'}
['X_-TACT_TIME_mean', 'X_-CONVEYOR_SPEED_mean', 'PUMP_high', 'PUMP_low', 'CLN1_over-etching-ratio', 'CLN1_EPT_time', 'clean_count', 'EPT_clean_count_ratio', 'NH3_TREAT_-RF_FREQ-max', 'NH3_TREAT_-RF_FREQ-range', 'NH3_TREAT_-RF_FREQ-mean', 'NP_3_-MFC_VOL_SIH4-range', 'VENT_high', 'VENT_low', 'Output']
There are 4 sub processes. So the feature translation is [(0, 0), (0, 1), (1, 2), (1, 3), (2, 4), (2, 5), (2, 6), (2, 7), (2, 8), (2, 9), (2, 10), (2, 11), (3, 12), (3, 13)]


In [15]:
# scaler = StandardScaler()
# X_standardized = scaler.fit_transform(s_df)
# nf_df = pd.DataFrame(X_standardized)
# print(nf_df.head())

         0         1         2         3         4         5         6   \
0 -0.304923  0.005068  0.621896  0.260290 -0.944142 -1.535447  2.065472   
1  0.742922  2.805651 -1.125247  0.730101  0.680975 -1.528042 -0.855299   
2 -1.003487  0.005068  0.765156  0.228976  0.619946  0.115899  1.578677   
3 -0.304923  0.005068 -0.204160  0.385580 -0.721909 -1.542852 -1.342094   
4 -1.003487 -1.328544  0.380797  0.542183 -0.612278  0.125772  0.605086   

         7         8         9         10        11        12        13  
0 -0.999699  0.554105  0.528570  0.320436 -0.060108  0.351599 -0.355518  
1  0.077931  0.581722  0.476775  1.071436 -0.060108 -0.111003  0.720120  
2 -0.866143  0.491967  0.439779  1.160648 -0.060108 -0.223048  0.214724  
3  1.509174 -2.014263 -2.223934 -1.633298 -0.060108  0.547788 -0.307819  
4 -0.668519 -2.552792 -2.793673 -1.963652 -0.060108  1.564819  0.646180  


In [7]:
# unit test of padding_zero function
print(nf_df.head(1))
result = padding_zero(nf_df, 1)
print([e for e in result.iloc[0]])
print(nf_df.shape)

        0        1         2         3         4         5         6   \
0 -4.52677  0.05025 -1.046263 -0.543637 -0.597382 -1.525155 -0.339233   

         7         8         9         10         11        12       13  
0 -0.423039  0.312691  0.434652 -0.189808 -10.071359 -0.636407 -1.19763  
[array([[ -4.52676957,   0.05025049,   0.        ,   0.        ,
          0.        ,   0.        ,   0.        ,   0.        ,
          0.        ,   0.        ,   0.        ,   0.        ,
          0.        ,   0.        ],
       [  0.        ,   0.        ,  -1.04626301,  -0.54363684,
          0.        ,   0.        ,   0.        ,   0.        ,
          0.        ,   0.        ,   0.        ,   0.        ,
          0.        ,   0.        ],
       [  0.        ,   0.        ,   0.        ,   0.        ,
         -0.59738229,  -1.52515499,  -0.3392332 ,  -0.42303916,
          0.31269149,   0.43465178,  -0.18980823, -10.07135875,
          0.        ,   0.        ],
       [  0.     

In [8]:
# Convert dataframe(n,1) in which element 'X' contains 4*feature array into nparray (n,4,14)
nf_df_4d = padding_zero(nf_df,flag=1)
nf_df_4d_arr = np.array([e for entry in nf_df_4d.values for e in entry])
print(nf_df_4d_arr.shape)

(40976, 4, 14)


In [9]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
s1_model.eval()
nf_df_4d_tensor = torch.tensor(nf_df_4d_arr,dtype=torch.float)
dataset = TensorDataset(nf_df_4d_tensor)
batch_size = int(nf_df_4d_tensor.size()[0])
loader = DataLoader(dataset, batch_size=batch_size)
outputs = []
with torch.no_grad():
    for batch_data in loader:
        # 将数据移到指定的设备上（如 CUDA 设备）
        batch_data = batch_data[0].to(device)
        
        # 将数据传递给模型进行推理
        batch_output = s1_model(batch_data)
        probs = (torch.nn.functional.softmax(batch_output, dim=1))
        # 将输出保存起来
        outputs += probs
print(len(outputs))
#print(outputs)

#取得模型平均和對應output
# output_arr = np.array([output.cpu().numpy()[0] for output in outputs])
# output_df = pd.DataFrame({'Output': output_arr})
# new_df= pd.concat([feature_df,output_df],axis=1)

KeyboardInterrupt: 

In [10]:
def model_inference(data):  #standardize dataframe
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    s1_model.eval()
    # scaler = StandardScaler()
    if 'Output' in data.columns:
        data = data.drop(columns=['Output'])
    # data_standardized_df = pd.DataFrame(scaler.fit_transform(data))
    #print(data.head())
    #print(f'Standardized data:\n{data_standardized_df.head(3)}')  # Debug
    
    data_4d = padding_zero(data,flag=1)
    
    #print(f'4D data:\n{data_4d.head()}')  # Debug
    
    data_4d_array = np.array([e for entry in data_4d.values for e in entry])
    data_4d_tensor = torch.tensor(data_4d_array,dtype=torch.float)
    
    #print(f'Tensor data shape: {data_4d_tensor.shape}')  # Debug 
    
    my_dataset = TensorDataset(data_4d_tensor)
    batch_size = min(256, int(data_4d_tensor.size()[0]))
    my_loader = DataLoader(my_dataset, batch_size=batch_size,num_workers=4)
    data_output = []
    with torch.no_grad():
        for batch_data in my_loader:
        # 将数据移到指定的设备上（如 CUDA 设备）
            batch_data = batch_data[0].to(device)
        # 将数据传递给模型进行推理
            batch_output = s1_model(batch_data)
            probs = (torch.nn.functional.softmax(batch_output, dim=1))
        # 将输出保存起来
            data_output += probs
    data_output_array = np.array([output.cpu().numpy()[0] for output in data_output])
    data_expectation_value = data_output_array.mean()
    return data_expectation_value, data_output_array

In [11]:
# 過濾掉FutureWarning
warnings.filterwarnings("ignore", category=FutureWarning)

# First, calculate the output mean of the experiment data
base_mean, details = model_inference(s_df)
print(f'Base mean: {base_mean}')

Base mean: 0.6264505982398987


In [12]:
para_num = 14
subset_sum = 4
#sample_num = 1000

vertices_fl = 'E_vi_202406020109.pkl'
# v_hist = vertices_fl.split('.')[0]+'_history.pkl'
# try:
#     with open(v_hist, 'rb') as f:
#         print(f'Loading form {v_hist}...')
#         v_history = pickle.load(f)
#         print(type(v_history))
# except FileNotFoundError:
#     v_history = {}

# print(f"History vertices: {v_history}")
#mean_exp = new_df['Output'].mean()
AUO_coalitions, _, _ = generate_partition(F_P) 
count_n = 0
if 'Output' in df.columns: 
    df = df.drop(columns=['Output'])
print(s_df.head())
print(nf_df.head())

             0         1         2         3         4         5         6   \
6023  -0.296354  0.050250  0.625376  0.250624 -0.800528 -1.515306  2.139444   
8192   0.761250  3.027620 -1.121511  0.680523  0.684116 -1.507919 -0.834969   
36611 -1.001423  0.050250  0.768615  0.221970  0.628361  0.131999  1.643708   
7854  -0.296354  0.050250 -0.200559  0.365270 -0.597504 -1.522693 -1.330704   
31514 -1.001423 -1.367545  0.384313  0.508570 -0.497349  0.141848  0.652238   

             7         8         9         10        11        12        13  
6023  -1.012265  0.565005  0.536005  0.320086 -0.057674  0.345084 -0.276528  
8192   0.051657  0.592282  0.485328  1.014444 -0.057674 -0.067252  0.692380  
36611 -0.880407  0.503631  0.449131  1.096928 -0.057674 -0.167123  0.237131  
7854   1.464693 -1.971767 -2.157093 -1.486293 -0.057674  0.519956 -0.233562  
31514 -0.685298 -2.503671 -2.714536 -1.791731 -0.057674  1.426479  0.625776  
        0        1         2         3         4         

In [13]:
for i_index in range(100):
    #print(f'index = {i_index}')
    instance = s_df.iloc[i_index]
    coalition_estimated_values = {}
    #print(f'coalition_estimated_values_start = {coalition_estimated_values}')
    eval_df = nf_df.sample(n=100)  #要不斷sample
    base_mean, details = model_inference(eval_df)
    #print(f'Base mean: {base_mean}')
    for coalition in AUO_coalitions: 
        vi_df = eval_df.copy()  #用copy()才不會去更改到原始的dataframe
        if len(coalition)!=0:
            vi_df.iloc[:,coalition] = instance.iloc[coalition]          
        #print(f'coalition = {coalition}')
        #print(vi_df.head(3))
        exp, details = model_inference(vi_df)
        #print(f'exp = {exp}')
        coalition_estimated_values[tuple(coalition)] =  exp - base_mean
        #print(f'coalition_estimated_values[tuple(coalition)] = {coalition_estimated_values[tuple(coalition)]}')
        count_n += 1  
        if count_n % 10 == 0:
            with open(vertices_fl, 'wb') as f:
                pickle.dump(vertex_df, f)
            print(f"Saved {count_n} vertex")
    #print(f'coalition_estimated_values = {coalition_estimated_values}')
    instance_df = pd.DataFrame([coalition_estimated_values])
    vertex_df = pd.concat([vertex_df, instance_df], ignore_index=True)            
   

with open(vertices_fl, 'wb') as f:
    pickle.dump(vertex_df, f)

Saved 10 vertex
Saved 20 vertex
Saved 30 vertex
Saved 40 vertex
Saved 50 vertex
Saved 60 vertex
Saved 70 vertex
Saved 80 vertex
Saved 90 vertex
Saved 100 vertex
Saved 110 vertex
Saved 120 vertex
Saved 130 vertex
Saved 140 vertex
Saved 150 vertex
Saved 160 vertex
Saved 170 vertex
Saved 180 vertex
Saved 190 vertex
Saved 200 vertex
Saved 210 vertex
Saved 220 vertex
Saved 230 vertex
Saved 240 vertex
Saved 250 vertex
Saved 260 vertex
Saved 270 vertex
Saved 280 vertex
Saved 290 vertex
Saved 300 vertex
Saved 310 vertex
Saved 320 vertex
Saved 330 vertex
Saved 340 vertex
Saved 350 vertex
Saved 360 vertex
Saved 370 vertex
Saved 380 vertex
Saved 390 vertex
Saved 400 vertex
Saved 410 vertex
Saved 420 vertex
Saved 430 vertex
Saved 440 vertex
Saved 450 vertex
Saved 460 vertex
Saved 470 vertex
Saved 480 vertex
Saved 490 vertex
Saved 500 vertex
Saved 510 vertex
Saved 520 vertex
Saved 530 vertex
Saved 540 vertex
Saved 550 vertex
Saved 560 vertex
Saved 570 vertex
Saved 580 vertex
Saved 590 vertex
Saved 

In [14]:
print(vertex_df.shape)

(100, 16)


In [18]:
for i_index in range(100,200):
    #print(f'index = {i_index}')
    instance = s_df.iloc[i_index]
    coalition_estimated_values = {}
    #print(f'coalition_estimated_values_start = {coalition_estimated_values}')
    eval_df = nf_df.sample(n=100)  #要不斷sample
    base_mean, details = model_inference(eval_df)
    #print(f'Base mean: {base_mean}')
    for coalition in AUO_coalitions: 
        vi_df = eval_df.copy()  #用copy()才不會去更改到原始的dataframe
        if len(coalition)!=0:
            vi_df.iloc[:,coalition] = instance.iloc[coalition]          
        #print(f'coalition = {coalition}')
        #print(vi_df.head(3))
        exp, details = model_inference(vi_df)
        #print(f'exp = {exp}')
        coalition_estimated_values[tuple(coalition)] =  exp - base_mean
        #print(f'coalition_estimated_values[tuple(coalition)] = {coalition_estimated_values[tuple(coalition)]}')
        count_n += 1  
        if count_n % 10 == 0:
            with open(vertices_fl, 'wb') as f:
                pickle.dump(vertex_df, f)
            print(f"Saved {count_n} vertex")
    #print(f'coalition_estimated_values = {coalition_estimated_values}')
    instance_df = pd.DataFrame([coalition_estimated_values])
    vertex_df = pd.concat([vertex_df, instance_df], ignore_index=True)            
   

with open(vertices_fl, 'wb') as f:
    pickle.dump(vertex_df, f)

Saved 1610 vertex
Saved 1620 vertex
Saved 1630 vertex
Saved 1640 vertex
Saved 1650 vertex
Saved 1660 vertex
Saved 1670 vertex
Saved 1680 vertex
Saved 1690 vertex
Saved 1700 vertex
Saved 1710 vertex
Saved 1720 vertex
Saved 1730 vertex
Saved 1740 vertex
Saved 1750 vertex
Saved 1760 vertex
Saved 1770 vertex
Saved 1780 vertex
Saved 1790 vertex
Saved 1800 vertex
Saved 1810 vertex
Saved 1820 vertex
Saved 1830 vertex
Saved 1840 vertex
Saved 1850 vertex
Saved 1860 vertex
Saved 1870 vertex
Saved 1880 vertex
Saved 1890 vertex
Saved 1900 vertex
Saved 1910 vertex
Saved 1920 vertex
Saved 1930 vertex
Saved 1940 vertex
Saved 1950 vertex
Saved 1960 vertex
Saved 1970 vertex
Saved 1980 vertex
Saved 1990 vertex
Saved 2000 vertex
Saved 2010 vertex
Saved 2020 vertex
Saved 2030 vertex
Saved 2040 vertex
Saved 2050 vertex
Saved 2060 vertex
Saved 2070 vertex
Saved 2080 vertex
Saved 2090 vertex
Saved 2100 vertex
Saved 2110 vertex
Saved 2120 vertex
Saved 2130 vertex
Saved 2140 vertex
Saved 2150 vertex
Saved 2160

In [20]:
print(vertex_df.shape)
print(vertex_df.iloc[0:5,:])

(200, 16)
    ()    (0, 1)    (2, 3)  (4, 5, 6, 7, 8, 9, 10, 11)  (12, 13)  \
0  0.0 -0.056516 -0.018734                    0.045142 -0.007646   
1  0.0 -0.001230 -0.003270                   -0.000125 -0.006132   
2  0.0  0.119478 -0.026200                   -0.011343  0.005535   
3  0.0 -0.078831  0.004747                   -0.341013 -0.012067   
4  0.0  0.079112 -0.027634                   -0.260073 -0.014424   

   (0, 1, 2, 3)  (0, 1, 4, 5, 6, 7, 8, 9, 10, 11)  (0, 1, 12, 13)  \
0     -0.106934                          0.020485       -0.078097   
1      0.054099                         -0.063368       -0.026565   
2      0.123736                          0.221338        0.119354   
3     -0.081727                         -0.353102       -0.099172   
4      0.059395                          0.129242        0.109980   

   (2, 3, 4, 5, 6, 7, 8, 9, 10, 11)  (2, 3, 12, 13)  \
0                          0.013593       -0.028639   
1                          0.031311       -0.008005   
2

In [22]:
for i_index in range(200,400):
    #print(f'index = {i_index}')
    instance = s_df.iloc[i_index]
    coalition_estimated_values = {}
    #print(f'coalition_estimated_values_start = {coalition_estimated_values}')
    eval_df = nf_df.sample(n=100)  #要不斷sample
    base_mean, details = model_inference(eval_df)
    #print(f'Base mean: {base_mean}')
    for coalition in AUO_coalitions: 
        vi_df = eval_df.copy()  #用copy()才不會去更改到原始的dataframe
        if len(coalition)!=0:
            vi_df.iloc[:,coalition] = instance.iloc[coalition]          
        #print(f'coalition = {coalition}')
        #print(vi_df.head(3))
        exp, details = model_inference(vi_df)
        #print(f'exp = {exp}')
        coalition_estimated_values[tuple(coalition)] =  exp - base_mean
        #print(f'coalition_estimated_values[tuple(coalition)] = {coalition_estimated_values[tuple(coalition)]}')
        count_n += 1  
        if count_n % 10 == 0:
            with open(vertices_fl, 'wb') as f:
                pickle.dump(vertex_df, f)
            print(f"Saved {count_n} vertex")
    #print(f'coalition_estimated_values = {coalition_estimated_values}')
    instance_df = pd.DataFrame([coalition_estimated_values])
    vertex_df = pd.concat([vertex_df, instance_df], ignore_index=True)            
   

with open(vertices_fl, 'wb') as f:
    pickle.dump(vertex_df, f)

Saved 3210 vertex
Saved 3220 vertex
Saved 3230 vertex
Saved 3240 vertex
Saved 3250 vertex
Saved 3260 vertex
Saved 3270 vertex
Saved 3280 vertex
Saved 3290 vertex
Saved 3300 vertex
Saved 3310 vertex
Saved 3320 vertex
Saved 3330 vertex
Saved 3340 vertex
Saved 3350 vertex
Saved 3360 vertex
Saved 3370 vertex
Saved 3380 vertex
Saved 3390 vertex
Saved 3400 vertex
Saved 3410 vertex
Saved 3420 vertex
Saved 3430 vertex
Saved 3440 vertex
Saved 3450 vertex
Saved 3460 vertex
Saved 3470 vertex
Saved 3480 vertex
Saved 3490 vertex
Saved 3500 vertex
Saved 3510 vertex
Saved 3520 vertex
Saved 3530 vertex
Saved 3540 vertex
Saved 3550 vertex
Saved 3560 vertex
Saved 3570 vertex
Saved 3580 vertex
Saved 3590 vertex
Saved 3600 vertex
Saved 3610 vertex
Saved 3620 vertex
Saved 3630 vertex
Saved 3640 vertex
Saved 3650 vertex
Saved 3660 vertex
Saved 3670 vertex
Saved 3680 vertex
Saved 3690 vertex
Saved 3700 vertex
Saved 3710 vertex
Saved 3720 vertex
Saved 3730 vertex
Saved 3740 vertex
Saved 3750 vertex
Saved 3760

In [24]:
print(vertex_df.tail)

<bound method NDFrame.tail of       ()    (0, 1)    (2, 3)  (4, 5, 6, 7, 8, 9, 10, 11)  (12, 13)  \
0    0.0 -0.056516 -0.018734                    0.045142 -0.007646   
1    0.0 -0.001230 -0.003270                   -0.000125 -0.006132   
2    0.0  0.119478 -0.026200                   -0.011343  0.005535   
3    0.0 -0.078831  0.004747                   -0.341013 -0.012067   
4    0.0  0.079112 -0.027634                   -0.260073 -0.014424   
..   ...       ...       ...                         ...       ...   
395  0.0 -0.064708 -0.005147                    0.101029 -0.007977   
396  0.0  0.055786 -0.020408                    0.059892 -0.034912   
397  0.0 -0.026820 -0.023915                    0.016681  0.003001   
398  0.0 -0.077867  0.017598                    0.081263 -0.020323   
399  0.0 -0.073594 -0.019889                    0.108478  0.007847   

     (0, 1, 2, 3)  (0, 1, 4, 5, 6, 7, 8, 9, 10, 11)  (0, 1, 12, 13)  \
0       -0.106934                          0.020485       

In [25]:
vertex_df.to_csv('vertex_val.csv', index=False) 

X_-TACT_TIME_mean          165.0
X_-CONVEYOR_SPEED_mean    4200.0
Name: 8192, dtype: float64

[0, 1]

Unnamed: 0,X_-TACT_TIME_mean,X_-CONVEYOR_SPEED_mean,PUMP_high,PUMP_low,CLN1_over-etching-ratio,CLN1_EPT_time,clean_count,EPT_clean_count_ratio,NH3_TREAT_-RF_FREQ-max,NH3_TREAT_-RF_FREQ-range,NH3_TREAT_-RF_FREQ-mean,NP_3_-MFC_VOL_SIH4-range,VENT_high,VENT_low
14635,165,4200,28942.83333,12625.12,0.007494,10008,2,5004.0,13988,451,13675.7692,1,16471.57333,7963.95
16943,165,4200,28942.83333,12625.12,0.007494,10008,2,5004.0,13988,451,13675.7692,1,16471.57333,7963.95
39741,165,4200,28942.83333,12625.12,0.007494,10008,2,5004.0,13988,451,13675.7692,1,16471.57333,7963.95
18373,165,4200,28942.83333,12625.12,0.007494,10008,2,5004.0,13988,451,13675.7692,1,16471.57333,7963.95
38402,165,4200,28942.83333,12625.12,0.007494,10008,2,5004.0,13988,451,13675.7692,1,16471.57333,7963.95
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13747,165,4200,28942.83333,12625.12,0.007494,10008,2,5004.0,13988,451,13675.7692,1,16471.57333,7963.95
40005,165,4200,28942.83333,12625.12,0.007494,10008,2,5004.0,13988,451,13675.7692,1,16471.57333,7963.95
22449,165,4200,28942.83333,12625.12,0.007494,10008,2,5004.0,13988,451,13675.7692,1,16471.57333,7963.95
17346,165,4200,28942.83333,12625.12,0.007494,10008,2,5004.0,13988,451,13675.7692,1,16471.57333,7963.95


          ()    (0, 1)    (2, 3)  (4, 5, 6, 7, 8, 9, 10, 11)  (12, 13)  \
0   0.000000 -0.066063 -0.016101                   -0.006981 -0.001281   
1   0.000000 -0.066063 -0.016101                   -0.006981 -0.001281   
2   0.000000 -0.066063 -0.016101                   -0.006981 -0.001281   
3   0.000000 -0.066063 -0.016101                   -0.006981 -0.001281   
4   0.000000 -0.066063 -0.016101                   -0.006981 -0.001281   
5   0.000000 -0.066063 -0.016101                   -0.006981 -0.001281   
6   0.000000 -0.066063 -0.016101                   -0.006981 -0.001281   
7   0.000000 -0.066063 -0.016101                   -0.006981 -0.001281   
8   0.000000 -0.066063 -0.016101                   -0.006981 -0.001281   
9   0.000000 -0.066063 -0.016101                   -0.006981 -0.001281   
10 -0.004485 -0.068757 -0.017998                    0.002006 -0.001748   
11 -0.004485 -0.068757 -0.017998                    0.002006 -0.001748   
12  0.000000 -0.056675 -0.012028      