In [30]:

import numpy as np
import itertools
class Hypercube:                #超立方體
    '''
    A class to create a hypercube object which stores values on vertices
    and values on the edges between neighboring vertices
    '''    
    #輸入維度、(點鍵值)、(點值)
    def __init__(self, n_vertices, vertex_keys = None, vertex_values = None):   
        self.n_vertices = n_vertices
        self.v_num = 2**self.n_vertices
        self.V = [np.array([])] + all_subsets(n_vertices)   #所有子集包含空集，即所有點
        self.V_value = {str(v) : 0 for v in self.V}         #所有點值  先設為0
        self.E = []                                         #所有邊
        self.E_value = {}                                   #邊值
        self.partial_gradient = {vertex : {} for vertex in range(n_vertices)}   #各個維度的partial gradient
        self.matrix = np.full((self.v_num,self.v_num),np.nan)
        self.partial_gradient_matrix = np.full((self.v_num,self.v_num),np.nan) 
        self.vi = [np.array([0]*(self.v_num)) for _ in range(self.n_vertices)]
        self.vi_matrix = [np.full((self.v_num,self.v_num),np.nan) for _ in range(self.n_vertices)] 
        
    def set_vertex_values(self, vertex_values):         #設置點值
        for v in vertex_values:                         #用鍵值來做查找
            self.V_value[v] = vertex_values[v]
            
        # edge values are the differences between neighboring vertex values
        #self._calculate_edges()
        
    '''def _calculate_edges(self):                 #計算邊值
        
        # calculate the usual gradients: the difference between neighboring edges
       # for i, v in enumerate(self.V):
            for _v in self.V[i+1:]:
                if self._vertices_form_a_valid_edge(v, _v):
                    self.E.append((v, _v))
                    self.E_value[str((v, _v))] = self.V_value[str(_v)] - self.V_value[str(v)]
        
        # calculate partial gradients
        for vertex in range(self.n_vertices):
            self.partial_gradient[vertex] = self.E_value.copy()
            for v, _v in self.E:
                is_relevant_edge_for_partial_gradient = (vertex in v and vertex not in _v) or (vertex in _v and vertex not in v)
                if not is_relevant_edge_for_partial_gradient:
                    self.partial_gradient[vertex][str((v, _v))] = 0'''
            
    def _vertices_form_a_valid_edge(self, v, _v):       #檢查交集和是否相鄰
        # vertices are neighbors in a hypercube
        # if they differ by exactly one element
        differ_in_size_by_1 = (abs(len(v) - len(_v)) == 1)
    
        the_intersection = np.intersect1d(v, _v)                #兩個集合的交集
        #print(type(v[0]),type(_v[0]),type(the_intersection[0]))
        intersection_is_nonempty = len(the_intersection) > 0 or len(v)==0 or len(_v) == 0
        is_intersection = False
        if len(the_intersection)>0:
            if len(the_intersection)==len(v) or len(the_intersection)==len(_v):
                is_intersection = True
        else:
            if len(v)==0 and len(_v)==1:
                is_intersection = True
       # print(is_intersection)
        return differ_in_size_by_1 and intersection_is_nonempty and is_intersection
    
    #create matrix for Hypercube
    
    def trans_to_matrix(self,feature_i):
        for i, v in enumerate(self.V):
            for j,_v in enumerate(self.V[i+1:]):
                if self._vertices_form_a_valid_edge(v, _v):
                    self.matrix[i][i+j+1] = self.V_value[str(_v)] - self.V_value[str(v)]
                    self.matrix[i+j+1][i] = self.V_value[str(v)] - self.V_value[str(_v)]
        
            
        self.partial_gradient_matrix = self.matrix.copy()
        for j, v in enumerate(self.V):
            for k,_v in enumerate(self.V[j+1:]):
                if self._vertices_form_a_valid_edge(v, _v):
                    is_relevant_edge_for_partial_gradient = (feature_i in v and feature_i not in _v) or (feature_i in _v and feature_i not in v)
                    if not is_relevant_edge_for_partial_gradient:
                        self.partial_gradient_matrix[j][j+k+1] = 0
                        self.partial_gradient_matrix[j+k+1][j] = 0
        self.vi[feature_i] = self.shapley_residuals_in_matrix()
        #self.vi[i] = self.shapley_residuals_in_matrix
    def shapley_residuals_in_matrix(self):
            derivative_i  = np.full((self.v_num,self.v_num),0)
            b_i = np.array([0]*self.v_num)
            for j  in range(self.v_num):
                for k in range(self.v_num):
                    if np.isnan(self.partial_gradient_matrix[j][k]):
                        continue
                    elif j == 0 or k ==0:
                        derivative_i[j][j] += 1

                        b_i[j] += - self.partial_gradient_matrix[j][k]
                    else:
                        derivative_i[j][j] += 1
                        derivative_i[j][k] += -1
                        b_i[j] += - self.partial_gradient_matrix[j][k]
            #print(derivative_i.shape,b_i.shape)
            A = derivative_i[1:,1:]
            print(A)
            print(np.linalg.det(A))
            B = b_i[1:]
            print(B)
            A_inverse = np.linalg.inv(A)
            vi = np.insert(np.dot(A_inverse,B),0,0)
            vi_V =  [np.array([])] + all_subsets(self.n_vertices)
            vi_V_value = {str(v) : 0 for v in vi_V} 
            for k,v in enumerate(vi_V):               
                print('k',k,'v',v)  
                print('vi_V_value:',vi_V_value,'v:',v) 
            print('vi_V_value:', vi_V_value)
            return vi
    
            
            
    #def trans_partial_matrix(self):
        
####################
def all_subsets(n_elts):
    '''
        returns a list of 2^{n_elts} lists
        each a different subset of {1, 2,...,n_elts}
    '''
    res = [np.array(list(itertools.combinations(set(range(n_elts)), i))) for i in range(n_elts)]
    res = {i : res[i] for i in range(n_elts)}
    res[n_elts] = np.array([i for i in range(n_elts)]).reshape(1,-1)
    return [res[i][j] for i in range(1,n_elts+1) for j in range(res[i].shape[0])]

def get_residual(old_cube, new_cube, vertex):   #計算殘差
    '''
    returns: residual dictionary
        
        { edge : ▼_player_v[edge] - ▼v_player[edge] for edge in old_cube }
    '''
    assert set(old_cube.E_value.keys()) == set(new_cube.E_value.keys())     #判斷兩個字典中鍵值的組合是否相同。assert:
    res = {}
    for e in old_cube.E_value.keys():
        res[e] = old_cube.partial_gradient[vertex][e] - new_cube.E_value[e] #對應某特徵的邊相減 即gradient_i_v - gradient_v_i(殘差)
    return res
count = [0,0,0,0,0,0,0,0,0,0,0,0,0,0]
#記得添加number of vertex
def residual_norm(old_cube, vertex_values, vertex,num_features):     #old_cube是原本的SHAP得到的立方體
    '''
    old_cube: v, our game
    vertex: player
    vertex_values: v_player, proposed game
    
    assumes that the order of the values in vertex_values align with the order of the values in old_cube.V
    
    returns: || ▼_player_v - ▼v_player ||
    '''
    if count[vertex]==0 :
        count[vertex] += 1
    new_cube = Hypercube(num_features)
    new_cube.set_vertex_values({str(_vertex) : vertex_values[j] for j, _vertex in enumerate(old_cube.V)})   #將數值設定成0.5
    return np.sum([(r)**2 for r in get_residual(old_cube, new_cube, vertex).values()]), get_residual(old_cube, new_cube, vertex).values() #計算所有residual造成的影響加總
#改一下參數
def compute_residuals_v(old_cube,vertex_of_v_i_cube,_v,num_features):            #(instance cube,算出來的v_i cube,這個cube的指定feature)
    new_vertex =  np.append(np.array(0), vertex_of_v_i_cube)
    new_c = Hypercube(num_features)
    coalitions = [np.array([])] + all_subsets(num_features)
    b = {}
    for i, coalition in enumerate(coalitions):
        b[str(coalition)] = new_vertex[i]
    new_c.set_vertex_values(b)
    res = get_residual(old_cube,new_c,_v)
    return(res.values())



In [None]:
A = Hypercube(14)


In [31]:
feature_num = 3
import numpy as np 
import itertools
def all_subsets(n_elts):
    '''
        returns a list of 2^{n_elts} lists
        each a different subset of {1, 2,...,n_elts}
    '''
    res = [np.array(list(itertools.combinations(set(range(n_elts)), i))) for i in range(n_elts)]
    res = {i : res[i] for i in range(n_elts)}
    res[n_elts] = np.array([i for i in range(n_elts)]).reshape(1,-1)
    return [res[i][j] for i in range(1,n_elts+1) for j in range(res[i].shape[0])]
def f_x(x_list):
    return 1*x_list[0]+ 2*x_list[1]*x_list[2]
pers = [np.array([])] + all_subsets(3)   #所有帶值subset
test_value = []
a={}
for i,per in enumerate(pers):
    x_l = [0,0,0]
    for j in range(3):
        if j in per:
            x_l[j] = 1
    test_value.append(f_x(x_l))
    a[str(per)] = test_value[i]
print(a)
test_cube = Hypercube(3)
test_cube.set_vertex_values(a)
from scipy.optimize import minimize 
x0 = np.array([0.5]*7)
f0 = lambda x : residual_norm(test_cube, np.append(np.array(0), x), 0,3)[0]
f1 = lambda x : residual_norm(test_cube, np.append(np.array(0), x), 1,3)[0]
f2 = lambda x : residual_norm(test_cube, np.append(np.array(0), x), 2,3)[0]

    #print('solving first cube...')
v0 = minimize(f0, x0)           #最小化殘差
    #print('..done')
    #print('solving second cube...')
v1 = minimize(f1, x0)
    #print('..done')

    #print('solving third cube...')
v2 = minimize(f2, x0)
    #print('..done')
    
# residual = ||▼_feature_cube - ▼cube_feature|| after optimization
b = compute_residuals_v(test_cube,v0.x,0,3)
gradient_vi = [v0.x, v1.x, v2.x]
res_r = []
#for k  in range(3):
    #b = compute_residuals_v(test_cube,gradient_vi[k],k,3)
    #round_b = [ round(k,2) for k in b ]
    #print(round_b)
    #res_r.append(sum([abs(r) for r in b]))
#print(test_cube.partial_gradient[1].values())
#print(test_cube.E_value.values())
#print(res_r)
test_cube.trans_to_matrix(1)
test_cube.shapley_residuals_in_matrix()
print('=========')
print(test_cube.matrix)
print(test_cube.partial_gradient_matrix)
print(test_cube.vi[1])


{'[]': 0, '[0]': 1, '[1]': 0, '[2]': 0, '[0 1]': 1, '[0 2]': 1, '[1 2]': 2, '[0 1 2]': 3}
[[ 3  0  0 -1 -1  0  0]
 [ 0  3  0 -1  0 -1  0]
 [ 0  0  3  0 -1 -1  0]
 [-1 -1  0  3  0  0 -1]
 [-1  0 -1  0  3  0 -1]
 [ 0 -1 -1  0  0  3 -1]
 [ 0  0  0 -1 -1 -1  3]]
384.0
[ 0  0 -2  0 -2  2  2]
k 0 v []
vi_V_value: {'[]': 0, '[0]': 0, '[1]': 0, '[2]': 0, '[0 1]': 0, '[0 2]': 0, '[1 2]': 0, '[0 1 2]': 0} v: []
k 1 v [0]
vi_V_value: {'[]': 0, '[0]': 0, '[1]': 0, '[2]': 0, '[0 1]': 0, '[0 2]': 0, '[1 2]': 0, '[0 1 2]': 0} v: [0]
k 2 v [1]
vi_V_value: {'[]': 0, '[0]': 0, '[1]': 0, '[2]': 0, '[0 1]': 0, '[0 2]': 0, '[1 2]': 0, '[0 1 2]': 0} v: [1]
k 3 v [2]
vi_V_value: {'[]': 0, '[0]': 0, '[1]': 0, '[2]': 0, '[0 1]': 0, '[0 2]': 0, '[1 2]': 0, '[0 1 2]': 0} v: [2]
k 4 v [0 1]
vi_V_value: {'[]': 0, '[0]': 0, '[1]': 0, '[2]': 0, '[0 1]': 0, '[0 2]': 0, '[1 2]': 0, '[0 1 2]': 0} v: [0 1]
k 5 v [0 2]
vi_V_value: {'[]': 0, '[0]': 0, '[1]': 0, '[2]': 0, '[0 1]': 0, '[0 2]': 0, '[1 2]': 0, '[0 1 2]': 0} v

In [None]:
# Define the objective function (assuming it works with CuPy arrays)
def objective_function(x):
    # Your objective function implementation goes here
    return cp.sum(x ** 2)

# Convert the initial guess to a CuPy array
x0 = cp.array([0.5] * 7)

# Use CuPy's minimize function to find the minimum
result = cp.optimize.minimize(objective_function, x0)

print("Optimized result:", result.x)

In [None]:
import itertools
import pandas as pd 
import numpy as np 
import shap
from shap import KernelExplainer # shap套件
from sklearn.ensemble import RandomForestClassifier #randomforest
#create dataset
x1 = np.random.randn(500)
x2 = np.random.randn(500)
x3 = np.random.randn(500)
# label depends on interaction of X1 and X2, and not at all on X3
y = np.intp(x1*x2< 1) 
df = pd.DataFrame({"Y":y, "X1":x1, "X2":x2, "X3":x3})
features = df.iloc[:,[1,2,3]]
labels = df.iloc[:,0]
#print(df.head(20))

#train model and kernelSHAP
# train random forest 
shapley_value_mean = [0,0,0]
resi = [0,0,0]
new_resi = [0,0,0]
new_n = [[0,0,0,0],[0,0,0,0],[0,0,0,0]]
model = RandomForestClassifier(n_estimators=25) 
model.fit(features, labels)  
def all_subsets(n_elts):
    '''
        returns a list of 2^{n_elts} lists
        each a different subset of {1, 2,...,n_elts}
    '''
    res = [np.array(list(itertools.combinations(set(range(n_elts)), i))) for i in range(n_elts)]
    res = {i : res[i] for i in range(n_elts)}
    res[n_elts] = np.array([i for i in range(n_elts)]).reshape(1,-1)
    return [res[i][j] for i in range(1,n_elts+1) for j in range(res[i].shape[0])]
#測試沒問題，acc94%
#acc = 0.
#sum = 0.
#pred = model.predict(features_test)
#for i in range(len(pred)):
#    sum += 1
#    if pred[i] == labels_test[i]:
#        acc += 1
#accuracy = acc / sum
#print('accuracy = ',accuracy)
# train explainer on the model and the data 
#挑選前50筆資料做為background data，background data的目的是提供base_line等等
explainer = KernelExplainer(model.predict_proba,features[:500],link='logit')
flag = 1
#print(features_test[:5])
for i in range(200):
    #print('see where')
    instance = features.values[i,:]         #features.values把features轉成numpy數組
    shap_values = explainer.shap_values(features.values[i,:])
#print(shap_values)
#shap.summary_plot(shap_values,features_test,show=False)
#處理subset
    coalition_estimated_values = {str(np.array([])): 0} #建立字典，key是將空numpy array字串化，值都是0
    coalitions = [np.array([])] + all_subsets(3)   #所有帶值subset
    #print('instance: ',instance)
#print('explainer.y: ',explainer.y)
   #print(explainer.y[:][:,1])
    #print('資料及預測模型平均:',np.mean(labels))
    
    for coalition in coalitions:                #製作合成資料集，並取得資料集中相同特徵子集的模型預測平均
        
        synth = pd.DataFrame(explainer.synth_data)
        if flag:
            flag = 0
            synth.to_csv('synth_data.csv', index=False) 
        for feature in coalition:
            synth = synth[synth[feature] == instance[feature]]      #符合相同特徵子集的合成資料
            model_mean = np.mean(labels)
            impact = np.mean(explainer.y[synth.index][:,1]) - model_mean    #計算符合相同特徵子集的合成資料的標籤平均值減掉原始資料集的baseline      
            coalition_estimated_values[str(coalition)] = impact
    #製作資料集
  
        
    #synth.to_csv('synth_data.csv', index=False) 
#print (model.predict_proba([instance]))
#print (model.predict_proba([instance])[:,1])
#coalition_estimated_values['[0 1 2]'] = np.mean(model.predict_proba([instance])[:,1] - model_mean)
   # print('coalition_estimated_values: ',coalition_estimated_values)
    cube = Hypercube(3)
    cube.set_vertex_values(coalition_estimated_values)
    #print(cube.E_value)
# constrained optimization: 
# the null vertex must always have value 0 since it represents the empty coalition,
# but all other vertices in the new cube are subject to the minimizer
# so x0 has 7 elements instead of 8, and we always append 
# a 0 to the head of the input array in the optimized functions
    from scipy.optimize import minimize 
    x0 = np.array([0.5]*7)
    f0 = lambda x : residual_norm(cube, np.append(np.array(0), x), 0)[0]
    f1 = lambda x : residual_norm(cube, np.append(np.array(0), x), 1)[0]
    f2 = lambda x : residual_norm(cube, np.append(np.array(0), x), 2)[0]

    #print('solving first cube...')
    v0 = minimize(f0, x0)           #最小化殘差
    #print('..done')
    #print('solving second cube...')
    v1 = minimize(f1, x0)
    #print('..done')

    #print('solving third cube...')
    v2 = minimize(f2, x0)
    #print('..done')
  
    v = [v0.x,v1.x,v2.x]
    #算residuals
    shapley_value = []
    res_r = []
    vi = [v0.x, v1.x, v2.x]
    #print(vi)
    for j in range(3):
        b = compute_residuals_v(cube,vi[j],j)
        res_r.append(sum([abs(r) for r in b]))
        shapley_value.append(vi[j][-1])
        shapley_value_mean[j] +=  abs(shapley_value[j])/200.
        resi[j] += res_r[j]/200.
    #new_res計算
    new_res = [0,0,0]
    #print('p_g: ',cube.partial_gradient[0])
    for k in range(3):
        p_g_e = []
        for e in cube.partial_gradient[k]:
            if cube.partial_gradient[k][e]!=0:
                p_g_e.append(cube.partial_gradient[k][e])
        #print('p_g_e:',p_g_e)
        p_g_e_n = (p_g_e - np.mean(p_g_e))/np.std(p_g_e)
        p_g_e_v = np.std(p_g_e)/np.mean(p_g_e)
        new_res[k] = abs(p_g_e_v)
        new_resi[k] += new_res[k]
        sum_list = [x+abs(y) for x,y in zip(new_n[k],p_g_e_n)]
        new_n[k] = sum_list
    #print('原始偏移量: ',cube.V_value["[0 1 2]"],'f0偏移: ',v0.x[-1],'f1偏移: ',v1.x[-1],'f2偏移: ',v2.x[-1],'偏移相加: ',v0.x[-1]+v1.x[-1]+v2.x[-1])
print('特徵與其餘特徵倆倆關係: ','f1:', new_n[0],'f2:',new_n[1],'f3:',new_n[2])
print('特徵的標準差偏移，值越大影響越大: ',new_resi)
print('SHapley_residuals: ',resi)
print(shapley_value_mean)


In [None]:
def trans_to_matrix(Hypercube,)

In [None]:
import numpy as np 
import itertools
def all_subsets(n_elts):
    '''
        returns a list of 2^{n_elts} lists
        each a different subset of {1, 2,...,n_elts}
    '''
    res = [np.array(list(itertools.combinations(set(range(n_elts)), i))) for i in range(n_elts)]
    res = {i : res[i] for i in range(n_elts)}
    res[n_elts] = np.array([i for i in range(n_elts)]).reshape(1,-1)
    return [res[i][j] for i in range(1,n_elts+1) for j in range(res[i].shape[0])]
def f_x(x_list):
    return 1*x_list[0]+ 2*x_list[1]*x_list[2]
pers = [np.array([])] + all_subsets(3)   #所有帶值subset
test_value = []
a={}
for i,per in enumerate(pers):
    x_l = [0,0,0]
    for j in range(3):
        if j in per:
            x_l[j] = 1
    test_value.append(f_x(x_l))
    a[str(per)] = test_value[i]
print(a)
test_cube = Hypercube(3)
test_cube.set_vertex_values(a)
from scipy.optimize import minimize 
x0 = np.array([0.5]*7)
f0 = lambda x : residual_norm(test_cube, np.append(np.array(0), x), 0,3)[0]
f1 = lambda x : residual_norm(test_cube, np.append(np.array(0), x), 1,3)[0]
f2 = lambda x : residual_norm(test_cube, np.append(np.array(0), x), 2,3)[0]

    #print('solving first cube...')
v0 = minimize(f0, x0)           #最小化殘差
    #print('..done')
    #print('solving second cube...')
v1 = minimize(f1, x0)
    #print('..done')

    #print('solving third cube...')
v2 = minimize(f2, x0)
    #print('..done')
test_cube.trans_to_matrix()
# residual = ||▼_feature_cube - ▼cube_feature|| after optimization
b = compute_residuals_v(test_cube,v0.x,0,3)
gradient_vi = [v0.x, v1.x, v2.x]
res_r = []
for k  in range(3):
    b = compute_residuals_v(test_cube,gradient_vi[k],k,3)
    round_b = [ round(k,2) for k in b ]
    print(round_b)
    res_r.append(sum([abs(r) for r in b]))
print(test_cube.partial_gradient[1].values())
print(test_cube.E_value.values())
print(res_r)



In [None]:
'''cube = Hypercube(3)
cube.set_vertex_values(coalition_estimated_values)
print(cube.E_value)
# constrained optimization: 
# the null vertex must always have value 0 since it represents the empty coalition,
# but all other vertices in the new cube are subject to the minimizer
# so x0 has 7 elements instead of 8, and we always append 
# a 0 to the head of the input array in the optimized functions
from scipy.optimize import minimize 
x0 = np.array([0.5]*7)
f0 = lambda x : residual_norm(cube, np.append(np.array(0), x), 0)
f1 = lambda x : residual_norm(cube, np.append(np.array(0), x), 1)
f2 = lambda x : residual_norm(cube, np.append(np.array(0), x), 2)

print('solving first cube...')
v0 = minimize(f0, x0)           #最小化殘差
print('..done')
print('solving second cube...')
v1 = minimize(f1, x0)
print('..done')

print('solving third cube...')
v2 = minimize(f2, x0)
print('..done')
    
# residual = ||▼_feature_cube - ▼cube_feature|| after optimization
residuals = [v0.fun, v1.fun, v2.fun]
print(residuals)'''

In [None]:
data1 = np.array([1, 2, 3, 4, 5])
data2 = np.array([10, 20, 30, 40, 50])

# 计算第一组数据集的标准差
std1 = np.std(data1)/np.mean(data1)

# 计算第二组数据集的标准差
std2 = np.std(data2)/np.mean(data2)

print(std1,std2)

In [None]:
a = [1,2,3]
b = [1,1,1]
c = []

In [None]:
import random
coalition_estimated_values = {str(np.array([])): 0} #建立字典，key是將空numpy array字串化，值都是0
coalitions = [np.array([])] + all_subsets(3)   #所有帶值subset
instance = [0,1,2]
dataset = []
num = 100
for coalition in coalitions:
    s = len(coalition)**4 + 1
    create_data = []
    for i in range(int(100/s)):
        create_data.append([random.random(),random.random(),random.random()])
    for feature in coalition:
        for data in create_data:
            data[feature] = instance[feature] 
    dataset += create_data
for data in dataset:
    print(data)

友達資料處理

In [16]:
import os
import time
import pandas as pd
import numpy as np
import json
import torch
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader
from data_process import read_selected_data, get_y,  split_data, compute_class_weights
from dataset import BertDataset
from model import BertClassifier
from training import train_model
from utils import draw_pics, initial_record
from sklearn.model_selection import StratifiedShuffleSplit, train_test_split
import matplotlib.pyplot as plt
total_params = 14
csv_file_path = '/hcds_vol/private/luffy/GANGAN-master/data/processed_data/v014_stage_1.csv'
json_file_path = '/hcds_vol/private/luffy/GANGAN-master/data/controllable_para_v014_14.json'
tool_name = 'ASCVD'
epochs = 50000
lr = 1e-5
batch_size = 1024
device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')
save_folder_name = 'stage-1-param_'+str(total_params)+'-batch_'+str(batch_size)+'-lr_'+str(lr)
with open(json_file_path, 'r') as f:
    params = json.load(f)
    f.close()
s1_df = pd.read_csv(csv_file_path)
#s1_df.shape
all_key = list(params[tool_name]) 
params_list = []                #取得json檔內的特徵
for key in all_key:
    all_param = params[tool_name][key]
    if(type(all_param) == list):
        for param in all_param:
             params_list.append(param)
    else:
        params_list.append(all_param)
    
# 取得Json檔內包含的特徵
s1_df = s1_df[params_list] 
#print(params)
s1_df.head(10)
feature_df = s1_df.drop(['DFT_CNT'], axis=1)


  from .autonotebook import tqdm as notebook_tqdm


In [17]:
scaler = StandardScaler()
X_standardized = scaler.fit_transform(feature_df)
nf_df = pd.DataFrame(X_standardized)
nf_df
param_group = [] # [2,2,4,2]
all_key = list(params[tool_name]) # ['EQ', 'PUMP', 'CH', 'VENT', 'y']
all_key.remove('y')

for key in all_key:
    all_value = params[tool_name][key]
    param_group.append(len(all_value))
param_group

[2, 2, 8, 2]

In [18]:
def padding_zero(df, tool_name, total_params, flag, params=params): 
    # 將一維參數matrix擴展為4維
    data_arr = df.to_numpy()
    result = []
    for i in range(len(data_arr)):
        arr_index = 0
        empty_arr = np.zeros((4,total_params)) # chamber數 * 總參數數量
        param_group_cp = param_group.copy()
        for j in range(len(empty_arr)):
            while(param_group_cp[j] > 0):
                empty_arr[j][arr_index] = data_arr[i][arr_index]
                param_group_cp[j] -= 1
                arr_index += 1
        
        if(flag == 1): # bert.py使用
            result.append(empty_arr)
        if(flag == 2): # bert_du.py使用
            result.append(empty_arr.tolist())
    
    if(flag == 1): # bert.py使用
        result = pd.DataFrame({'X': [result[i] for i in range(len(result))]})
    return result

In [19]:
nf_df_4d = padding_zero(nf_df,tool_name,total_params,flag=1)
nf_df_4d_object = nf_df_4d.to_numpy()
nf_df_4d_list = []
for i in range(len(nf_df_4d_object)):
    nf_df_4d_list.append(nf_df_4d_object[i][0])
nf_df_4d_arr = np.array(nf_df_4d_list)

友達資料模型預測及平均

In [20]:
from torch.utils.data import DataLoader, TensorDataset
s1_model_path = '/hcds_vol/private/luffy/GANGAN-master/model/predictor/stage_1_checkpoint.pth'
s1_model =  torch.load(s1_model_path).to(device)
s1_model.eval()
nf_df_4d_tensor = torch.tensor(nf_df_4d_arr,dtype=torch.float)
dataset = TensorDataset(nf_df_4d_tensor)
batch_size = 256
loader = DataLoader(dataset, batch_size=batch_size)
outputs = []
with torch.no_grad():
    for batch_data in loader:
        # 将数据移到指定的设备上（如 CUDA 设备）
        batch_data = batch_data[0].to(device)
        
        # 将数据传递给模型进行推理
        batch_output = s1_model(batch_data)
        probs = (torch.nn.functional.softmax(batch_output, dim=1))
        # 将输出保存起来
        outputs += probs

取得模型平均和對應output

In [21]:

output_arr = np.array([output.cpu().numpy()[0] for output in outputs])
output_df = pd.DataFrame({'Output': output_arr})
new_df = pd.concat([feature_df,output_df],axis=1)


In [34]:
import numpy as np 
import itertools
def all_subsets(n_elts):
    '''
        returns a list of 2^{n_elts} lists
        each a different subset of {1, 2,...,n_elts}
    '''
    res = [np.array(list(itertools.combinations(set(range(n_elts)), i))) for i in range(n_elts)]
    res = {i : res[i] for i in range(n_elts)}
    res[n_elts] = np.array([i for i in range(n_elts)]).reshape(1,-1)
    return [res[i][j] for i in range(1,n_elts+1) for j in range(res[i].shape[0])]
AUO_coalitions = [np.array([])] + all_subsets(14) 
coalition_estimated_values = {}
instance = new_df.iloc[0]
synth = new_df 
mean_exp = synth['Output'].mean()
count_n = 0
intervals = 0.15
for coalition in AUO_coalitions:
    synth = new_df 
    for feature in coalition:
       lower_limit = instance[params_list[feature]]*(1-intervals)
       upper_limit =  instance[params_list[feature]]*(1+intervals)
       synth = synth[(synth[params_list[feature]]>=lower_limit)&(synth[params_list[feature]]<=upper_limit)]
    if count_n==25:
        print('資料集長度:',len(synth))
        print('feature數:',len(coalition))
        count_n = 0
    count_n += 1
    impact = synth['Output'].mean() - mean_exp
    coalition_estimated_values[str(coalition)] = impact

資料集長度: 1
feature數: 2
資料集長度: 2450
feature數: 2
資料集長度: 1
feature數: 2
資料集長度: 1
feature數: 2
資料集長度: 112
feature數: 3
資料集長度: 329
feature數: 3
資料集長度: 1
feature數: 3
資料集長度: 14906
feature數: 3
資料集長度: 4924
feature數: 3
資料集長度: 573
feature數: 3
資料集長度: 1937
feature數: 3
資料集長度: 6141
feature數: 3
資料集長度: 3948
feature數: 3
資料集長度: 102
feature數: 3
資料集長度: 817
feature數: 3
資料集長度: 11415
feature數: 3
資料集長度: 6712
feature數: 3
資料集長度: 33765
feature數: 3
資料集長度: 110
feature數: 4
資料集長度: 56
feature數: 4
資料集長度: 127
feature數: 4
資料集長度: 5
feature數: 4
資料集長度: 6
feature數: 4
資料集長度: 56
feature數: 4
資料集長度: 126
feature數: 4
資料集長度: 3
feature數: 4
資料集長度: 59
feature數: 4
資料集長度: 59
feature數: 4
資料集長度: 22
feature數: 4
資料集長度: 1
feature數: 4
資料集長度: 325
feature數: 4
資料集長度: 2145
feature數: 4
資料集長度: 1
feature數: 4
資料集長度: 1
feature數: 4
資料集長度: 315
feature數: 4
資料集長度: 1400
feature數: 4
資料集長度: 420
feature數: 4
資料集長度: 1
feature數: 4
資料集長度: 1
feature數: 4
資料集長度: 993
feature數: 4
資料集長度: 159
feature數: 4
資料集長度: 215
feature數: 4
資料集長度: 2024
feature數: 4
資料集長度: 1
feature數: 4
資料集長

In [10]:
AUOcube = Hypercube(14)


In [11]:
AUOcube.set_vertex_values(coalition_estimated_values)

In [12]:
AUOcube.trans_to_matrix(feature_i=0)
AUOcube.shapley_residuals_in_matrix()

[[14  0  0 ...  0  0  0]
 [ 0 14  0 ...  0  0  0]
 [ 0  0 14 ...  0  0  0]
 ...
 [ 0  0  0 ... 14  0 -1]
 [ 0  0  0 ...  0 14 -1]
 [ 0  0  0 ... -1 -1 14]]


  r = _umath_linalg.det(a, signature=signature)


inf
[0. 0. 0. ... 0. 0. 0.]


: 

In [3]:
!nvidia -smi

/bin/bash: nvidia: command not found


In [None]:
AUOcube.trans_to_matrix()
AUOcube.shapley_residuals_in_matrix()

In [None]:
a = [1,2]
b = []
c = [1,2,3]
if a==c or b==c:
    print(1)
else: 
    print(0)

In [None]:
A = np.random.rand(10000, 10000)
b = np.random.rand(10000)
c = b/A
print(c.shape)

In [1]:
!nvidia-smi

Fri Mar 22 16:37:37 2024       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.147.05   Driver Version: 525.147.05   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  Off  | 00000000:01:00.0 Off |                  Off |
|  0%   45C    P8    19W / 480W |     17MiB / 24564MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  NVIDIA GeForce ...  Off  | 00000000:07:00.0 Off |                  Off |
|  0%   43C    P8    19W / 480W |      6MiB / 24564MiB |      0%      Default |
|       