In [1]:
import numpy as np
from wsvm.application import Wsvm
from svm.application import Svm
from sklearn.svm import SVC
#from sklearn.metrics import f1_score
from sklearn.metrics  import classification_report,precision_recall_fscore_support as score
from sklearn.metrics import accuracy_score, confusion_matrix,roc_auc_score,f1_score
from sklearn.neighbors import NearestNeighbors
from sklearn.utils import _safe_indexing
from sklearn import metrics
import math
from datetime import datetime
from fuzzy.weight import fuzzy
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
import csv

In [2]:
def svm_lib(X_train, y_train,X_test):
    svc=SVC(probability=True, kernel='linear')
    model = svc.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    return y_pred

In [3]:
def wsvm(C,X_train, y_train,X_test,distribution_weight=None):
    model = Wsvm(C,distribution_weight)
    model.fit(X_train, y_train)
    test_pred = model.predict(X_test)
    return test_pred

In [4]:
def svm(C,X_train, y_train,X_test):
    model = Svm(C)
    model.fit(X_train, y_train)
    test_pred = model.predict(X_test)
    return test_pred

In [5]:
def is_tomek_new(X,y, class_type):
    # print(y)
    # print(type(y))
    nn = NearestNeighbors(n_neighbors=2)
    nn.fit(X)
    nn_index = nn.kneighbors(X, return_distance=False)[:, 1]
    links = np.zeros(len(y), dtype=bool)
    # find which class to not consider
    class_excluded = [c for c in np.unique(y) if c not in class_type]
    X_dangxet = []
    X_tl = []
    # there is a Tomek link between two samples if they are both nearest
    # neighbors of each others.
    for index_sample, target_sample in enumerate(y):
        if target_sample in class_excluded:
            continue
        if y[nn_index[index_sample]] != target_sample:
            if nn_index[nn_index[index_sample]] == index_sample:
                X_tl.append(index_sample)
                X_dangxet.append(nn_index[index_sample])
                links[index_sample] = True

    stt = np.zeros(len(X_dangxet), dtype=int)
    arr_tlp = np.stack((X_dangxet, X_tl, stt), axis=1)

    return arr_tlp

In [6]:
from Processing_Data import Co_Author_50_250
X_train, y_train, X_test, y_test = Co_Author_50_250.load_data(test_size=0.2)
arr_tlp = is_tomek_new(X_train,y_train,class_type=[-1])
print(arr_tlp)
for ind, val in enumerate(arr_tlp):
    for i in val:
        print(y_train[i])


[[140  23   0]
 [119  32   0]
 [ 36  50   0]
 [102  95   0]
 [141 218   0]]
1.0
-1.0
-1.0
1.0
-1.0
-1.0
1.0
-1.0
-1.0
1.0
-1.0
-1.0
1.0
-1.0
-1.0


In [7]:
def Gmean(y_test,y_pred):
    cm_WSVM = metrics.confusion_matrix(y_test, y_pred)
    sensitivity = cm_WSVM[1,1]/(cm_WSVM[1,0]+cm_WSVM[1,1])
    specificity = cm_WSVM[0,0]/(cm_WSVM[0,0]+cm_WSVM[0,1])
    gmean = math.sqrt(sensitivity*specificity)
    return specificity,sensitivity,gmean

In [8]:
def compute_weight(X, y,name_method ="actual_hyper_lin", name_function = "exp", beta = None,C = None, gamma = None, u = None, sigma = None):
    method = fuzzy.method()
    function = fuzzy.function()
    pos_index = np.where(y == 1)[0]
    neg_index = np.where(y == -1)[0]
    try:
        if name_method == "own_class_center": 
            d = method.own_class_center(X, y)
        elif name_method == "estimated_hyper_lin": # actual_hyper_lin, own_class_center
            d = method.estimated_hyper_lin(X, y)
        elif name_method == "own_class_center_opposite":
            d = method.own_class_center_opposite(X, y)
        elif name_method == 'actual_hyper_lin':
            d = method.actual_hyper_lin(X, y,C = C, gamma = gamma)
        elif name_method == 'own_class_center_divided':
            d = method.own_class_center_divided(X, y)
        elif name_method == "distance_center_own_opposite_tam":
            d_own, d_opp, d_tam = method.distance_center_own_opposite_tam(X,y)
        else:
            print('dont exist method')
        
        if name_function == "lin":
            W = function.lin(d)
        elif name_function == "exp":
            W = function.exp(d, beta)
        elif name_function == "lin_center_own":
            W = function.lin_center_own(d, pos_index,neg_index)
        elif name_function == 'gau':
            W = function.gau(d, u, sigma)
        elif name_function == "func_own_opp_new":
            W = function.func_own_opp_new(d_own,d_opp,pos_index,neg_index,d_tam)
        elif name_function == "func_own_opp_new_v1":
            W = function.func_own_opp_new_v1(d_own,d_opp,pos_index,neg_index,d_tam)
        elif name_function == "func_own_opp_new_v2":
            W = function.func_own_opp_new_v2(d_own,d_opp,pos_index,neg_index,d_tam)
    except Exception as e:
        print('dont exist function')
        print(e)
    # pos_index = np.where(y == 1)[0]
    # neg_index = np.where(y == -1)[0]
    r_pos = 1
    r_neg = len(pos_index)/len(neg_index)
    m = []
    W = np.array(W)
    m = W[pos_index]*r_pos
    m = np.append(m, W[neg_index]*r_neg)
    return m

In [9]:
def fuzzy_weight(beta_center, beta_estimate, beta_actual,X_train, y_train,namemethod,namefunction):
    if namemethod =="own_class_center_opposite" and namefunction == "exp":
        distribution_weight = compute_weight(X_train, y_train,name_method = namemethod,name_function = namefunction,beta = beta_center)
    elif namemethod =="own_class_center" and namefunction == "exp":
        distribution_weight = compute_weight(X_train, y_train,name_method = namemethod,name_function = namefunction,beta = beta_estimate)
    elif namemethod =="own_class_center_divided" and namefunction == "exp":
        distribution_weight = compute_weight(X_train, y_train,name_method = namemethod,name_function = namefunction,beta = beta_estimate)
    elif namemethod =="estimated_hyper_lin" and namefunction == "exp":
        distribution_weight = compute_weight(X_train, y_train,name_method = namemethod,name_function = namefunction,beta = beta_estimate)
    elif namemethod =="actual_hyper_lin" and namefunction == "exp":
        distribution_weight = compute_weight(X_train, y_train,name_method = namemethod,name_function = namefunction,beta = beta_actual)
    else:   
        distribution_weight = compute_weight(X_train, y_train,name_method = namemethod,name_function = namefunction)
    return distribution_weight

In [10]:
def data_tomelinks_new1(C,weight,X_test,y_test,X_train,y_train,n_neighbors,arr_tlp,clf=None,namemethod=None,namefunction=None):
    ro1 = 0.1
    ro3 = 0.1
    ro4 = 0.5
    ro2 = 0.5
    # links,ind_posX,ind_negX = is_tomek(X_train,y_train,class_type=[-1.0])
    
    #print(len(ind_posX))
    new_W = weight
    pos_index = np.where(y_train == 1)[0]
    neg_index = np.where(y_train == -1)[0]
    clf = Wsvm(C,new_W)
    clf.fit(X_train, y_train)    
    nn2 = NearestNeighbors(n_neighbors=n_neighbors)
    nn2.fit(X_train)

    # Mẫu âm bị phân loại sai -> Giảm trọng số của mẫu âm đó
    neg_pred = clf.predict(X_train[neg_index])
    idx_neg_wrong = np.where(neg_pred != -1.0)
    new_W[idx_neg_wrong] =  new_W[idx_neg_wrong]*0.5 # giam manh (Co-Author*0.9)

    # Tăng, giảm trọng số của các mẫu trong TLPs
    # Trường hợp 1, 2, 3, 4
    ind_nn_pos = [] # chứa chỉ số của các mẫu dương bị phân loại sai trong ind_posX
    y_nn_pos = [] #chứa nhãn của k mẫu dữ liệu gần nhất với mẫu dương được xét
    ind_nn_neg = [] # chứa chỉ số của các mẫu âm bị phân loại sai trong ind_negX
    y_nn_neg = [] # chứa nhãn của k mẫu dữ liệu gần nhất với mẫu âm được xét

    for ind,i in enumerate(arr_tlp):
        y_pred_pos = clf.predict([X_train[arr_tlp[ind][0]]]) #positive
        y_pred_neg = clf.predict([X_train[arr_tlp[ind][1]]]) #negative
        if (y_pred_pos == 1) and (y_pred_neg == 1): #dương dự đoán đúng, âm dự đoán sai
            new_W[arr_tlp[ind][0]] = new_W[arr_tlp[ind][0]]*(1 + ro1) # tăng trọng số mẫu dương
            new_W[arr_tlp[ind][1]] = new_W[arr_tlp[ind][1]]*(1 - ro1) # giảm trọng số mẫu âm
            arr_tlp[ind][2] = 1

            ind_nn_neg.append(ind)                          
            knn_X = (nn2.kneighbors([X_train[arr_tlp[ind][1]]])[1]).tolist()  #Xem lại
            for j in knn_X[0]:
                y_nn_neg.append(y_train[j])    # gom nhãn láng giềng của X_train[i] bị dự đoán sai vào y_nn_neg

        if (y_pred_pos == -1) and (y_pred_neg == -1): #âm dự đoán đúng, dương dự đoán sai
            new_W[arr_tlp[ind][0]] = new_W[arr_tlp[ind][0]]*(1 + ro3) 
            new_W[arr_tlp[ind][1]] = new_W[arr_tlp[ind][1]]*(1 - ro3)
            arr_tlp[ind][2] = 3

            ind_nn_pos.append(ind)                          
            knn_X = (nn2.kneighbors([X_train[arr_tlp[ind][0]]])[1]).tolist() #Xem lại 
            for j in knn_X[0]:
                y_nn_pos.append(y_train[j])    # gom nhãn láng giềng của X_train[i] bị dự đoán sai vào y_nn

    ind_nn_neg = np.array(ind_nn_neg)
    y_nn_neg = np.array(y_nn_neg)
    if len(y_nn_neg)>0:
        y_nn_neg = np.array_split(y_nn_neg, len(y_nn_neg)/n_neighbors) 
        for ind,i in enumerate(range(0,len(y_nn_neg))):   #
            if -1 not in y_nn_neg[i][1:]:      # Nếu không tồn tại nhãn -1 xung quanh X_train[i] bị dự đoán sai => nhiễu âm -> giảm mạnh trọng số
                for a in arr_tlp[[ind_nn_neg[ind]]]:
                    new_W[a[1]] = new_W[a[1]]*ro2
                arr_tlp[ind][2] = 2

    ind_nn_pos = np.array(ind_nn_pos)
    y_nn_pos = np.array(y_nn_pos)
    if len(y_nn_pos)>0:
        y_nn_pos = np.array_split(y_nn_pos, len(y_nn_pos)/n_neighbors) 
        for ind,i in enumerate(range(0,len(y_nn_pos))):   #
            if 1 not in y_nn_pos[i][1:]:      # Nếu không tồn tại nhãn 1 xung quanh X_train[i] bị dự đoán sai => nhiễu dương -> giảm mạnh trọng số
                for a in arr_tlp[[ind_nn_pos[ind]]]:
                    new_W[a[0]] = new_W[a[0]]*ro4
                arr_tlp[ind][2] = 4

    return new_W

In [11]:
import pandas as pd
def compute_average_result(filepath,filename,time,times):
    data = pd.read_csv(filepath)
    sp_svm = se_svm = gm_svm = f1s_svm = acc_svm = auc_svm = 0
    sp_wsvm = se_wsvm = gm_wsvm = f1s_wsvm = acc_wsvm = auc_wsvm = 0
    sp_cen_lin = se_cen_lin = gm_cen_lin = f1s_cen_lin = acc_cen_lin = auc_cen_lin = 0
    sp_cen_exp = se_cen_exp = gm_cen_exp = f1s_cen_exp = acc_cen_exp = auc_cen_exp = 0
    sp_shp_lin = se_shp_lin = gm_shp_lin = f1s_shp_lin = acc_shp_lin = auc_shp_lin = 0
    sp_shp_exp = se_shp_exp = gm_shp_exp = f1s_shp_exp = acc_shp_exp = auc_shp_exp = 0
    sp_hyp_lin = se_hyp_lin = gm_hyp_lin = f1s_hyp_lin = acc_hyp_lin = auc_hyp_lin = 0
    sp_hyp_exp = se_hyp_exp = gm_hyp_exp = f1s_hyp_exp = acc_hyp_exp = auc_hyp_exp = 0
    sp_new1 = se_new1 = gm_new1 = f1s_new1 = acc_new1 = auc_new1 = 0
    sp_new2 = se_new2 = gm_new2 = f1s_new2 = acc_new2 = auc_new2 = 0
    sp_new3 = se_new3 = gm_new3 = f1s_new3 = acc_new3 = auc_new3 = 0

    for i in range(0,len(data)):
        # if(data['Times'][i] == '1'):
        if (data['Name Method'][i] == 'SVM') and (data['Name Function'][i] == 'SVM'):
            sp_svm = sp_svm + float(data['SP'][i])
            se_svm = se_svm + float(data['SE'][i])
            gm_svm = gm_svm + float(data['Gmean'][i])
            f1s_svm = f1s_svm + float(data['F1 Score'][i])
            acc_svm = acc_svm + float(data['Accuracy'][i])
            auc_svm = auc_svm + float(data['AUC'][i])
        elif (data['Name Method'][i] == 'WSVM') and (data['Name Function'][i] == 'WSVM'):
            sp_wsvm = sp_wsvm + float(data['SP'][i])
            se_wsvm = se_wsvm + float(data['SE'][i])
            gm_wsvm = gm_wsvm + float(data['Gmean'][i])
            f1s_wsvm = f1s_wsvm + float(data['F1 Score'][i])
            acc_wsvm = acc_wsvm + float(data['Accuracy'][i])
            auc_wsvm = auc_wsvm + float(data['AUC'][i])
        elif (data['Name Method'][i] == 'own_class_center') and (data['Name Function'][i] == 'lin_center_own'):
            sp_cen_lin = sp_cen_lin + float(data['SP'][i])
            se_cen_lin = se_cen_lin + float(data['SE'][i])
            gm_cen_lin = gm_cen_lin + float(data['Gmean'][i])
            f1s_cen_lin = f1s_cen_lin + float(data['F1 Score'][i])
            acc_cen_lin = acc_cen_lin + float(data['Accuracy'][i])
            auc_cen_lin = auc_cen_lin + float(data['AUC'][i])
        elif (data['Name Method'][i] == 'own_class_center') and (data['Name Function'][i] == 'exp'):
            sp_cen_exp = sp_cen_exp + float(data['SP'][i])
            se_cen_exp = se_cen_exp+ float(data['SE'][i])
            gm_cen_exp = gm_cen_exp + float(data['Gmean'][i])
            f1s_cen_exp = f1s_cen_exp + float(data['F1 Score'][i])
            acc_cen_exp = acc_cen_exp + float(data['Accuracy'][i])
            auc_cen_exp = auc_cen_exp + float(data['AUC'][i])
        elif (data['Name Method'][i] == 'estimated_hyper_lin') and (data['Name Function'][i] == 'lin_center_own'):
            sp_shp_lin = sp_shp_lin + float(data['SP'][i])
            se_shp_lin = se_shp_lin + float(data['SE'][i])
            gm_shp_lin = gm_shp_lin + float(data['Gmean'][i])
            f1s_shp_lin = f1s_shp_lin + float(data['F1 Score'][i])
            acc_shp_lin = acc_shp_lin + float(data['Accuracy'][i])
            auc_shp_lin = auc_shp_lin + float(data['AUC'][i])
        elif (data['Name Method'][i] == 'estimated_hyper_lin') and (data['Name Function'][i] == 'exp'):
            sp_shp_exp = sp_shp_exp + float(data['SP'][i])
            se_shp_exp = se_shp_exp+ float(data['SE'][i])
            gm_shp_exp = gm_shp_exp + float(data['Gmean'][i])
            f1s_shp_exp = f1s_shp_exp + float(data['F1 Score'][i])
            acc_shp_exp = acc_shp_exp + float(data['Accuracy'][i])
            auc_shp_exp = auc_shp_exp + float(data['AUC'][i])
        elif (data['Name Method'][i] == 'actual_hyper_lin') and (data['Name Function'][i] == 'lin_center_own'):
            sp_hyp_lin = sp_hyp_lin + float(data['SP'][i])
            se_hyp_lin = se_hyp_lin + float(data['SE'][i])
            gm_hyp_lin = gm_hyp_lin + float(data['Gmean'][i])
            f1s_hyp_lin = f1s_hyp_lin + float(data['F1 Score'][i])
            acc_hyp_lin = acc_hyp_lin + float(data['Accuracy'][i])
            auc_hyp_lin = auc_hyp_lin + float(data['AUC'][i])
        elif (data['Name Method'][i] == 'actual_hyper_lin') and (data['Name Function'][i] == 'exp'):
            sp_hyp_exp = sp_hyp_exp + float(data['SP'][i])
            se_hyp_exp = se_hyp_exp + float(data['SE'][i])
            gm_hyp_exp = gm_hyp_exp + float(data['Gmean'][i])
            f1s_hyp_exp = f1s_hyp_exp + float(data['F1 Score'][i])
            acc_hyp_exp = acc_hyp_exp + float(data['Accuracy'][i])
            auc_hyp_exp = auc_hyp_exp + float(data['AUC'][i])
        elif (data['Name Method'][i] == 'distance_center_own_opposite_tam') and (data['Name Function'][i] == 'func_own_opp_new'):
            sp_new1 = sp_new1 + float(data['SP'][i])
            se_new1 = se_new1 + float(data['SE'][i])
            gm_new1 = gm_new1 + float(data['Gmean'][i])
            f1s_new1 = f1s_new1 + float(data['F1 Score'][i])
            acc_new1 = acc_new1 + float(data['Accuracy'][i])
            auc_new1 = auc_new1 + float(data['AUC'][i])
        elif (data['Name Method'][i] == 'distance_center_own_opposite_tam') and (data['Name Function'][i] == 'func_own_opp_new_v1'):
            sp_new2 = sp_new2 + float(data['SP'][i])
            se_new2 = se_new2 + float(data['SE'][i])
            gm_new2 = gm_new2 + float(data['Gmean'][i])
            f1s_new2 = f1s_new2 + float(data['F1 Score'][i])
            acc_new2 = acc_new2 + float(data['Accuracy'][i])
            auc_new2 = auc_new2 + float(data['AUC'][i])
        elif (data['Name Method'][i] == 'distance_center_own_opposite_tam') and (data['Name Function'][i] == 'func_own_opp_new_v2'):
            sp_new3= sp_new3 + float(data['SP'][i])
            se_new3 = se_new3 + float(data['SE'][i])
            gm_new3 = gm_new3 + float(data['Gmean'][i])
            f1s_new3 = f1s_new3 + float(data['F1 Score'][i])
            acc_new3 = acc_new3 + float(data['Accuracy'][i])
            auc_new3 = auc_new3 + float(data['AUC'][i])
        else:
            print("end")
    header = ['Name Method', 'Name Function', 'SP', 'SE', 'Gmean', 'F1 Score','Accuracy','AUC']
    data = [['SVM','SVM',sp_svm/(5*times),se_svm/(5*times),gm_svm/(5*times),f1s_svm/(5*times),acc_svm/(5*times),auc_svm/(5*times)],
            ['WSVM','WSVM',sp_wsvm/(5*times),se_wsvm/(5*times),gm_wsvm/(5*times),f1s_wsvm/(5*times),acc_wsvm/(5*times),auc_wsvm/(5*times)],
            ['own_class_center','lin_center_own',sp_cen_lin/(5*times),se_cen_lin/(5*times),gm_cen_lin/(5*times),f1s_cen_lin/(5*times),acc_cen_lin/(5*times),auc_cen_lin/(5*times)],
            ['own_class_center','exp',sp_cen_exp/(5*times),se_cen_exp/(5*times),gm_cen_exp/(5*times),f1s_cen_exp/(5*times),acc_cen_exp/(5*times),auc_cen_exp/(5*times)],
            ['estimated_hyper_lin','lin_center_own',sp_shp_lin /(5*times),se_shp_lin/(5*times),gm_shp_lin/(5*times),f1s_shp_lin/(5*times),acc_shp_lin/(5*times),auc_shp_lin /(5*times)],
            ['estimated_hyper_lin','exp',sp_shp_exp/(5*times),se_shp_exp /(5*times),gm_shp_exp/(5*times),f1s_shp_exp/(5*times),acc_shp_exp/(5*times),auc_shp_exp/(5*times)],
            ['actual_hyper_lin','lin_center_own',sp_hyp_lin/(5*times),se_hyp_lin/(5*times),gm_hyp_lin/(5*times),f1s_hyp_lin/(5*times),acc_hyp_lin/(5*times),auc_hyp_lin/(5*times)],
            ['actual_hyper_lin','exp',sp_hyp_exp/(5*times),se_hyp_exp/(5*times),gm_hyp_exp/(5*times),f1s_hyp_exp/(5*times),acc_hyp_exp/(5*times),auc_hyp_exp/(5*times)],
            ['distance_center_own_opposite_tam','func_own_opp_new',sp_new1/(5*times),se_new1/(5*times),gm_new1/(5*times),f1s_new1/(5*times),acc_new1/(5*times),auc_new1/(5*times)],
            ['distance_center_own_opposite_tam','func_own_opp_new_v1',sp_new2/(5*times),se_new2/(5*times),gm_new2/(5*times),f1s_new2/(5*times),acc_new2/(5*times),auc_new2/(5*times)],
            ['distance_center_own_opposite_tam','func_own_opp_new_v2',sp_new3/(5*times),se_new3/(5*times),gm_new3/(5*times),f1s_new3/(5*times),acc_new3/(5*times),auc_new3/(5*times)]]

    with open(f'./Experiment/Data_{filename}_{time}_Average.csv', 'a', encoding='UTF8', newline='') as f4:
        writer = csv.writer(f4)
        writer.writerow(header)
        writer.writerows(data)

In [12]:
name_method =["own_class_center","estimated_hyper_lin","actual_hyper_lin","distance_center_own_opposite_tam"]
name_function = ["lin_center_own","exp","func_own_opp_new", "func_own_opp_new_v1", "func_own_opp_new_v2"]
for namemethod in name_method:
    for namefunction in name_function:
        if namemethod =="distance_center_own_opposite_tam" and namefunction =="lin_center_own":
            continue
        elif namemethod =="distance_center_own_opposite_tam" and namefunction =="exp":
            continue
        elif namemethod == "own_class_center" and namefunction == "func_own_opp_new":
            continue
        elif namemethod == "estimated_hyper_lin" and namefunction == "func_own_opp_new":
            continue
        elif namemethod == "actual_hyper_lin" and namefunction == "func_own_opp_new":
            continue
        elif namemethod == "own_class_center" and namefunction == "func_own_opp_new_v1":
            continue
        elif namemethod == "estimated_hyper_lin" and namefunction == "func_own_opp_new_v1":
            continue
        elif namemethod == "actual_hyper_lin" and namefunction == "func_own_opp_new_v1":
            continue
        elif namemethod == "own_class_center" and namefunction == "func_own_opp_new_v2":
            continue
        elif namemethod == "estimated_hyper_lin" and namefunction == "func_own_opp_new_v2":
            continue
        elif namemethod == "actual_hyper_lin" and namefunction == "func_own_opp_new_v2":
            continue
        elif namemethod == "distance_center_own_opposite_tam" and namefunction == "func_own_opp_new":
            continue
        elif namemethod == "distance_center_own_opposite_tam" and namefunction == "func_own_opp_new_v1":
            continue
        # elif namemethod == "own_class_center" and namefunction == "lin":
        #     continue
        # elif namemethod == "estimated_hyper_lin" and namefunction == "lin":
        #     continue
        # elif namemethod == "actual_hyper_lin" and namefunction == "lin_center_own":
        #    continue
        # elif namemethod == "own_class_center" and namefunction == "lin_center_own":
        #     continue 
        # elif namemethod == "own_class_center" and namefunction == "exp":
        #     continue 
        # elif namemethod == "estimated_hyper_lin" and namefunction == "lin_center_own":
        #     continue 
        # elif namemethod == "estimated_hyper_lin" and namefunction == "exp":
        #     continue 
        # elif namemethod == "actual_hyper_lin" and namefunction == "lin_center_own":
        #     continue
        # elif namemethod == "actual_hyper_lin" and namefunction == "exp":
        #     continue
        else:
            print(namemethod, namefunction)

own_class_center lin_center_own
own_class_center exp
estimated_hyper_lin lin_center_own
estimated_hyper_lin exp
actual_hyper_lin lin_center_own
actual_hyper_lin exp
distance_center_own_opposite_tam func_own_opp_new_v2


In [13]:
# ################################# K-FOLD SCRIPT FOR CO-AUTHOR DATASETS ##############################
# C = 100
# T = 1
# N = 3
# n_neighbor = 5
# new_rate = [1/5]

# from Processing_Data import Co_Author_50_250_ir, Co_Author_250_750, Co_Author_100_900, Co_Author_100_700, Co_Author_100_500, Co_Author_50_350, Co_Author_200_1000, Co_Author_200_1000_1, Co_Author_300_1500
# dataset = Co_Author_50_250_ir
# beta_center, beta_estimate, beta_actual = 0.5, 0.8, 0.1

# name_method =["own_class_center","estimated_hyper_lin","actual_hyper_lin","distance_center_own_opposite_tam"]
# name_function = ["lin_center_own","exp","func_own_opp_new", "func_own_opp_new_v1","func_own_opp_new_v2"]

# time = datetime.now().strftime("%d%m%Y_%H%M%S")
# filename = (str(dataset).split("\\")[-1]).split(".")[0]

# #W.svm
# for n in range(0,N):
#     print("Lan boc: ",n+1)
#     header = ['Times','Rate','Fold','T','Name Method', 'Name Function', 'SP', 'SE', 'Gmean', 'F1 Score','Accuracy','AUC','Ma tran nham lan']
#     data = []
#     for newrate in new_rate:
#         X, y = dataset.load_data(newrate)
#         print(X.shape)
#         kfold_validation = StratifiedKFold(n_splits=5, shuffle=True) 
#         with open(f'./Experiment/KFold_Data_{filename}_{time}.csv', 'a', encoding='UTF8', newline='') as f3:
#             writer = csv.writer(f3)
#             writer.writerow(header)
#             fold = 1
#             for train_index, test_index in kfold_validation.split(X,y):
#                 # X_train, y_train = X.iloc[train_index], y.iloc[train_index]
#                 # X_test, y_test = X.iloc[test_index], y.iloc[test_index]
#                 X_train, y_train = X[train_index], y[train_index]
#                 X_test, y_test = X[test_index], y[test_index]
#                 print(X_test.shape)
                
#                 #Scalling Data
#                 sc_X = StandardScaler()
#                 X_train = sc_X.fit_transform(X_train)
#                 X_test = sc_X.transform(X_test)
#                 y_train = np.array(y_train)

#                 # NORMAL

#                 #Svm library
#                 print("SVM LIBRARY starting...\n")
#                 test_pred = svm_lib(X_train, y_train,X_test)
#                 sp,se,gmean = Gmean(y_test,test_pred)
#                 name1 = 'SVM'
#                 name2 = 'SVM'
#                 data.append([n+1,newrate,fold,"None",name1,name2,sp,se,gmean,f1_score(y_test, test_pred),accuracy_score(y_test,test_pred),roc_auc_score(y_test, test_pred),str(confusion_matrix(y_test, test_pred))])
                
#                 #Wsvm
#                 print("W.SVM starting...\n")
#                 N, d = X_train.shape
#                 distribution_weight = np.ones(N)
#                 test_pred = wsvm(C,X_train, y_train, X_test, distribution_weight)
#                 sp,se,gmean = Gmean(y_test,test_pred)
#                 name1 = 'WSVM'
#                 name2 = 'WSVM'
#                 data.append([n+1,newrate,fold,"None",name1,name2,sp,se,gmean,f1_score(y_test, test_pred),accuracy_score(y_test,test_pred),roc_auc_score(y_test, test_pred),str(confusion_matrix(y_test, test_pred))])

#                 #FuzyyWsvm
#                 for namemethod in name_method:
#                     for namefunction in name_function:
#                         if namemethod =="distance_center_own_opposite_tam" and namefunction =="lin_center_own":
#                             continue
#                         elif namemethod =="distance_center_own_opposite_tam" and namefunction =="exp":
#                             continue
#                         elif namemethod == "own_class_center" and namefunction == "func_own_opp_new":
#                             continue
#                         elif namemethod == "estimated_hyper_lin" and namefunction == "func_own_opp_new":
#                             continue
#                         elif namemethod == "actual_hyper_lin" and namefunction == "func_own_opp_new":
#                             continue
#                         elif namemethod == "own_class_center" and namefunction == "func_own_opp_new_v1":
#                             continue
#                         elif namemethod == "estimated_hyper_lin" and namefunction == "func_own_opp_new_v1":
#                             continue
#                         elif namemethod == "actual_hyper_lin" and namefunction == "func_own_opp_new_v1":
#                             continue
#                         elif namemethod == "own_class_center" and namefunction == "func_own_opp_new_v2":
#                             continue
#                         elif namemethod == "estimated_hyper_lin" and namefunction == "func_own_opp_new_v2":
#                             continue
#                         elif namemethod == "actual_hyper_lin" and namefunction == "func_own_opp_new_v2":
#                             continue
#                         else:
#                             print(f"Fuzzy W.SVM name_method = '{namemethod}',name_function = '{namefunction}' starting...\n")
#                             distribution_weight = fuzzy_weight(beta_center, beta_estimate, beta_actual,X_train, y_train,namemethod,namefunction)
#                             __ = fuzzy_weight(beta_center, beta_estimate, beta_actual,X_train, y_train,namemethod,namefunction)
#                             test_pred = wsvm(C,X_train, y_train, X_test, distribution_weight)
#                             sp,se,gmean = Gmean(y_test,test_pred)
#                             data.append([n+1,newrate,fold,"None",namemethod,namefunction,sp,se,gmean,f1_score(y_test, test_pred),accuracy_score(y_test,test_pred),roc_auc_score(y_test, test_pred),str(confusion_matrix(y_test, test_pred))])

#                 ############ USING AFW-CIL METHOD #########################

#                 arr_tlp = is_tomek_new(X_train, y_train, class_type = [-1.0])
#                 #FuzyyWsvm
#                 for namemethod in name_method:
#                     for namefunction in name_function:
#                         if namemethod =="distance_center_own_opposite_tam" and namefunction =="lin_center_own":
#                             continue
#                         elif namemethod =="distance_center_own_opposite_tam" and namefunction =="exp":
#                             continue
#                         elif namemethod == "own_class_center" and namefunction == "func_own_opp_new":
#                             continue
#                         elif namemethod == "estimated_hyper_lin" and namefunction == "func_own_opp_new":
#                             continue
#                         elif namemethod == "actual_hyper_lin" and namefunction == "func_own_opp_new":
#                             continue
#                         elif namemethod == "own_class_center" and namefunction == "func_own_opp_new_v1":
#                             continue
#                         elif namemethod == "estimated_hyper_lin" and namefunction == "func_own_opp_new_v1":
#                             continue
#                         elif namemethod == "actual_hyper_lin" and namefunction == "func_own_opp_new_v1":
#                             continue
#                         elif namemethod == "own_class_center" and namefunction == "func_own_opp_new_v2":
#                             continue
#                         elif namemethod == "estimated_hyper_lin" and namefunction == "func_own_opp_new_v2":
#                             continue
#                         elif namemethod == "actual_hyper_lin" and namefunction == "func_own_opp_new_v2":
#                             continue
#                         else:
#                             print("===========USING AFW-CIL=====================")
#                             print(f"Fuzzy W.SVM name_method = '{namemethod}',name_function = '{namefunction}' starting...\n")
#                             distribution_weight = fuzzy_weight(beta_center, beta_estimate, beta_actual,X_train, y_train,namemethod,namefunction)
#                             __ = fuzzy_weight(beta_center, beta_estimate, beta_actual,X_train, y_train,namemethod,namefunction)
                            
#                             for i in range(0,T):
#                                 new_W = data_tomelinks_new1(C,distribution_weight,X_test,y_test,X_train,y_train,n_neighbor,arr_tlp,clf=None,namemethod=namemethod,namefunction=namefunction)
#                                 test_pred = wsvm(C,X_train, y_train, X_test, new_W)
#                                 sp,se,gmean = Gmean(y_test,test_pred)
#                                 data.append([n+1,newrate,fold,i+1,namemethod,namefunction,sp,se,gmean,f1_score(y_test, test_pred),accuracy_score(y_test,test_pred),roc_auc_score(y_test, test_pred),str(confusion_matrix(y_test, test_pred))])
#                 fold = fold + 1
#             writer.writerows(data)

In [14]:
################################# K-FOLD SCRIPT FOR Abalone DATASETS ##############################
C = 100
T = 5
N = 1
n_neighbor = 5

from Processing_Data import Abanole_KFold, Yeast_KFold, Abanole_KFold
dataset = Abanole_KFold
beta_center, beta_estimate, beta_actual = 0.2, 0.2, 0.5

name_method =["own_class_center","estimated_hyper_lin","actual_hyper_lin","distance_center_own_opposite_tam"]
name_function = ["lin_center_own","exp","func_own_opp_new", "func_own_opp_new_v1","func_own_opp_new_v2"]

time = datetime.now().strftime("%d%m%Y_%H%M%S")
filename = (str(dataset).split("\\")[-1]).split(".")[0]

#W.svm
for n in range(0,N):
    print("Lan boc: ",n+1)
    header = ['Times','Fold','T','Name Method', 'Name Function', 'SP', 'SE', 'Gmean', 'F1 Score','Accuracy','AUC','Ma tran nham lan']
    data = []
    X, y = dataset.load_data()
    print(X.shape)
    kfold_validation = StratifiedKFold(n_splits=5, shuffle=True) 
    with open(f'./Experiment/KFold_Data_{filename}_{time}.csv', 'a', encoding='UTF8', newline='') as f3:
        writer = csv.writer(f3)
        writer.writerow(header)
        fold = 1
        for train_index, test_index in kfold_validation.split(X,y):
            print("Fold thứ ",fold)
            # X_train, y_train = X.iloc[train_index], y.iloc[train_index]
            # X_test, y_test = X.iloc[test_index], y.iloc[test_index]
            X_train, y_train = X[train_index], y[train_index]
            X_test, y_test = X[test_index], y[test_index]
            print(X_test.shape)
            
            #Scalling Data
            sc_X = StandardScaler()
            X_train = sc_X.fit_transform(X_train)
            X_test = sc_X.transform(X_test)
            y_train = np.array(y_train)

            # NORMAL

            #Svm library
            print("SVM LIBRARY starting...\n")
            test_pred = svm_lib(X_train, y_train,X_test)
            sp,se,gmean = Gmean(y_test,test_pred)
            name1 = 'SVM'
            name2 = 'SVM'
            data.append([n+1,fold,"None",name1,name2,sp,se,gmean,f1_score(y_test, test_pred),accuracy_score(y_test,test_pred),roc_auc_score(y_test, test_pred),str(confusion_matrix(y_test, test_pred))])
            
            #Wsvm
            print("W.SVM starting...\n")
            N, d = X_train.shape
            distribution_weight = np.ones(N)
            test_pred = wsvm(C,X_train, y_train, X_test, distribution_weight)
            sp,se,gmean = Gmean(y_test,test_pred)
            name1 = 'WSVM'
            name2 = 'WSVM'
            data.append([n+1,fold,"None",name1,name2,sp,se,gmean,f1_score(y_test, test_pred),accuracy_score(y_test,test_pred),roc_auc_score(y_test, test_pred),str(confusion_matrix(y_test, test_pred))])

            #FuzyyWsvm
            for namemethod in name_method:
                for namefunction in name_function:
                    if namemethod =="distance_center_own_opposite_tam" and namefunction =="lin_center_own":
                        continue
                    elif namemethod =="distance_center_own_opposite_tam" and namefunction =="exp":
                        continue
                    elif namemethod == "own_class_center" and namefunction == "func_own_opp_new":
                        continue
                    elif namemethod == "estimated_hyper_lin" and namefunction == "func_own_opp_new":
                        continue
                    elif namemethod == "actual_hyper_lin" and namefunction == "func_own_opp_new":
                        continue
                    elif namemethod == "own_class_center" and namefunction == "func_own_opp_new_v1":
                        continue
                    elif namemethod == "estimated_hyper_lin" and namefunction == "func_own_opp_new_v1":
                        continue
                    elif namemethod == "actual_hyper_lin" and namefunction == "func_own_opp_new_v1":
                        continue
                    elif namemethod == "own_class_center" and namefunction == "func_own_opp_new_v2":
                        continue
                    elif namemethod == "estimated_hyper_lin" and namefunction == "func_own_opp_new_v2":
                        continue
                    elif namemethod == "actual_hyper_lin" and namefunction == "func_own_opp_new_v2":
                        continue
                    elif namemethod == "distance_center_own_opposite_tam" and namefunction == "func_own_opp_new":
                        continue
                    elif namemethod == "distance_center_own_opposite_tam" and namefunction == "func_own_opp_new_v1":
                        continue
                    else:
                        print(f"Fuzzy W.SVM name_method = '{namemethod}',name_function = '{namefunction}' starting...\n")
                        distribution_weight = fuzzy_weight(beta_center, beta_estimate, beta_actual,X_train, y_train,namemethod,namefunction)
                        __ = fuzzy_weight(beta_center, beta_estimate, beta_actual,X_train, y_train,namemethod,namefunction)
                        test_pred = wsvm(C,X_train, y_train, X_test, distribution_weight)
                        sp,se,gmean = Gmean(y_test,test_pred)
                        data.append([n+1,fold,"None",namemethod,namefunction,sp,se,gmean,f1_score(y_test, test_pred),accuracy_score(y_test,test_pred),roc_auc_score(y_test, test_pred),str(confusion_matrix(y_test, test_pred))])

            ############ USING AFW-CIL METHOD #########################

            arr_tlp = is_tomek_new(X_train, y_train, class_type = [-1.0])
            #FuzyyWsvm
            for namemethod in name_method:
                for namefunction in name_function:
                    if namemethod =="distance_center_own_opposite_tam" and namefunction =="lin_center_own":
                        continue
                    elif namemethod =="distance_center_own_opposite_tam" and namefunction =="exp":
                        continue
                    elif namemethod == "own_class_center" and namefunction == "func_own_opp_new":
                        continue
                    elif namemethod == "estimated_hyper_lin" and namefunction == "func_own_opp_new":
                        continue
                    elif namemethod == "actual_hyper_lin" and namefunction == "func_own_opp_new":
                        continue
                    elif namemethod == "own_class_center" and namefunction == "func_own_opp_new_v1":
                        continue
                    elif namemethod == "estimated_hyper_lin" and namefunction == "func_own_opp_new_v1":
                        continue
                    elif namemethod == "actual_hyper_lin" and namefunction == "func_own_opp_new_v1":
                        continue
                    elif namemethod == "own_class_center" and namefunction == "func_own_opp_new_v2":
                        continue
                    elif namemethod == "estimated_hyper_lin" and namefunction == "func_own_opp_new_v2":
                        continue
                    elif namemethod == "actual_hyper_lin" and namefunction == "func_own_opp_new_v2":
                        continue
                    elif namemethod == "distance_center_own_opposite_tam" and namefunction == "func_own_opp_new":
                        continue
                    elif namemethod == "distance_center_own_opposite_tam" and namefunction == "func_own_opp_new_v1":
                        continue
                    else:
                        print("===========USING AFW-CIL=====================")
                        print(f"Fuzzy W.SVM name_method = '{namemethod}',name_function = '{namefunction}' starting...\n")
                        distribution_weight = fuzzy_weight(beta_center, beta_estimate, beta_actual,X_train, y_train,namemethod,namefunction)
                        __ = fuzzy_weight(beta_center, beta_estimate, beta_actual,X_train, y_train,namemethod,namefunction)                       
                        for i in range(0,T):
                            print ("T = ",i+1)
                            new_W = data_tomelinks_new1(C,distribution_weight,X_test,y_test,X_train,y_train,n_neighbor,arr_tlp,clf=None,namemethod=namemethod,namefunction=namefunction)
                            test_pred = wsvm(C,X_train, y_train, X_test, new_W)
                            sp,se,gmean = Gmean(y_test,test_pred)
                            data.append([n+1,fold,i+1,namemethod,namefunction,sp,se,gmean,f1_score(y_test, test_pred),accuracy_score(y_test,test_pred),roc_auc_score(y_test, test_pred),str(confusion_matrix(y_test, test_pred))])
            fold = fold + 1
            writer.writerows(data)

Lan boc:  1


(4177, 8)
Fold thứ  1
(836, 8)
SVM LIBRARY starting...

W.SVM starting...

Fuzzy W.SVM name_method = 'own_class_center',name_function = 'lin_center_own' starting...

Fuzzy W.SVM name_method = 'own_class_center',name_function = 'exp' starting...

Fuzzy W.SVM name_method = 'estimated_hyper_lin',name_function = 'lin_center_own' starting...

Fuzzy W.SVM name_method = 'estimated_hyper_lin',name_function = 'exp' starting...

Fuzzy W.SVM name_method = 'actual_hyper_lin',name_function = 'lin_center_own' starting...

Fuzzy W.SVM name_method = 'actual_hyper_lin',name_function = 'exp' starting...

Fuzzy W.SVM name_method = 'distance_center_own_opposite_tam',name_function = 'func_own_opp_new_v2' starting...

Fuzzy W.SVM name_method = 'own_class_center',name_function = 'lin_center_own' starting...

T =  1
T =  2
T =  3
T =  4
T =  5
Fuzzy W.SVM name_method = 'own_class_center',name_function = 'exp' starting...

T =  1
T =  2
T =  3
T =  4
T =  5
Fuzzy W.SVM name_method = 'estimated_hyper_lin',name_

In [15]:

def compare_arrays(array1, array2):
    different_positions = []
    for i in range(len(array1)):
        if array1[i] < array2[i]:
            different_positions.append(i)
    print(len(different_positions))
    return different_positions


In [16]:
arr1 = [6.46107584e-01,7.74539153e-01,8.36154216e-01,5.85846890e-01
,4.78750142e-01,7.24712002e-01,8.60405712e-01,8.94385685e-01
,9.89459875e-08,8.18858628e-01,7.37957592e-01,9.62049006e-01
,9.62049006e-01,8.60405712e-01,8.52836298e-01,9.26963289e-01
,8.60405712e-01,6.74300396e-01,8.58533028e-01,8.69846963e-01
,5.72461391e-01,7.66104954e-01,7.62186907e-01,8.42516675e-01
,7.20887149e-01,7.61739983e-01,8.68827627e-01,8.60850325e-01
,9.89459875e-08,7.33192652e-01,7.62186907e-01,6.05204927e-01
,8.60936954e-01,9.26963289e-01,8.52836298e-01,8.36154216e-01
,2.04112931e-01,8.94385685e-01,8.36154216e-01,7.92118895e-01
,1.54195450e-01,1.46915132e-01,1.56063593e-01,1.65869838e-01
,1.35590141e-01,1.61147244e-01,1.51056811e-01,1.53625707e-01
,1.50272402e-01,1.52892586e-01,1.40457234e-01,1.47339042e-01
,1.47339042e-01,1.47318452e-01,1.57016744e-01,1.53217478e-01
,1.41043443e-01,1.52816490e-01,1.61147244e-01,1.54257432e-01
,1.54934619e-01,1.18734043e-01,3.42781465e-02,7.04821197e-02
,1.54934619e-01,1.44881969e-01,1.51991745e-01,1.52223358e-01
,1.41964780e-01,1.41043443e-01,1.48241631e-01,8.71208735e-02
,1.56063593e-01,1.65102234e-01,1.52319518e-01,1.16344478e-01
,1.33460955e-01,1.07212736e-01,7.24451721e-02,1.49623631e-01
,1.42466149e-01,9.26495129e-02,1.12300888e-01,9.38226285e-02
,1.50043194e-01,1.16685767e-01,1.50043194e-01,1.55046519e-01
,1.53736854e-01,1.43106213e-01,1.50272402e-01,1.54195450e-01
,1.52670348e-01,1.51902407e-01,1.46071743e-01,1.58893474e-01
,1.49803623e-01,1.56649018e-01,1.35424189e-01,1.56649018e-01
,1.65869838e-01,1.56489772e-01,1.55439220e-01,8.90199019e-02
,1.50272402e-01,1.56972271e-01,1.55584158e-01,1.48121655e-01
,1.43106213e-01,1.44910593e-01,1.51941156e-01,1.38118369e-01
,1.49693609e-01,9.38226285e-02,9.38226285e-02,1.52670348e-01
,1.51056811e-01,1.53973687e-01,1.50272402e-01,1.44186234e-01
,1.53033142e-01,1.51407875e-01,1.50043194e-01,1.35142243e-01
,1.61345043e-01,1.45425816e-01,1.46046371e-01,1.50043194e-01
,1.48973700e-01,2.91451296e-08,1.44186234e-01,1.52319518e-01
,1.49477685e-01,9.38226285e-02,1.56649018e-01,1.38118369e-01
,1.51902407e-01,1.09502470e-01,1.38178302e-01,1.50272402e-01
,1.50272402e-01,1.49623631e-01,1.44772367e-01,1.53973687e-01
,1.43594230e-01,1.57180423e-01,1.08710130e-01,1.49622766e-01
,1.51056811e-01,9.38226285e-02,1.09502470e-01,1.57016744e-01
,1.32956361e-01,1.20982058e-01,1.44186234e-01,1.20267427e-01
,1.51056811e-01,1.65869838e-01,1.49923117e-01,1.23735740e-01
,1.52670348e-01,1.46915132e-01,1.35424189e-01,1.55783973e-01
,1.58893474e-01,1.41138398e-01,1.54257432e-01,1.17935517e-01
,1.50043194e-01,1.50043194e-01,1.60001582e-01,9.08062289e-02
,1.65516562e-01,1.12300888e-01,1.55204948e-01,1.16041212e-01
,1.56649018e-01,1.28080382e-01,6.65401319e-02,1.51691536e-01
,1.52319518e-01,1.51941156e-01,1.50272402e-01,1.51056811e-01
,1.46051147e-01,1.41964780e-01,1.34076448e-01,1.17935517e-01
,1.56439632e-01,1.56649018e-01,1.53562648e-01,9.26495129e-02
,1.20267427e-01,1.52892586e-01,1.50272402e-01,5.04617955e-02
,1.40712090e-01,1.44772367e-01,1.07212736e-01,1.50272402e-01
,1.52319518e-01,1.48121655e-01,1.66172209e-01,1.49477685e-01
,1.49693609e-01,1.54455928e-01,1.66172209e-01,1.55783973e-01
,1.51056811e-01,1.60001582e-01,1.17011898e-01,1.58049150e-01
,1.56063593e-01,7.24451721e-02,1.52319518e-01,1.51407875e-01
,1.56393892e-01,1.52670348e-01,1.17433769e-01,1.55439220e-01
,1.52319518e-01,1.49693609e-01,1.54241834e-01,1.52319518e-01
,1.51056811e-01,1.56063593e-01,1.33460955e-01,1.08710130e-01
,1.22392531e-01,1.52658749e-01,1.49623631e-01,1.50272402e-01
,9.38226285e-02,1.05659825e-01,1.56649018e-01,1.12300888e-01
,1.61295501e-01,1.55439220e-01,1.46691502e-01,1.52319518e-01]

arr2 =[6.46107584e-01,7.74539153e-01,8.36154216e-01,5.85846890e-01
,4.30875128e-01,7.24712002e-01,8.60405712e-01,8.94385685e-01
,9.89459875e-08,8.18858628e-01,7.37957592e-01,9.62049006e-01
,9.62049006e-01,8.60405712e-01,8.52836298e-01,9.26963289e-01
,8.60405712e-01,6.74300396e-01,8.58533028e-01,8.69846963e-01
,5.72461391e-01,7.66104954e-01,7.62186907e-01,8.42516675e-01
,7.20887149e-01,7.61739983e-01,8.68827627e-01,8.60850325e-01
,9.89459875e-08,7.33192652e-01,7.62186907e-01,6.05204927e-01
,8.60936954e-01,9.26963289e-01,8.52836298e-01,8.36154216e-01
,2.04112931e-01,8.94385685e-01,8.36154216e-01,7.92118895e-01
,1.54195450e-01,1.46915132e-01,1.56063593e-01,1.65869838e-01
,1.35590141e-01,1.61147244e-01,1.51056811e-01,1.53625707e-01
,1.50272402e-01,1.52892586e-01,1.26411511e-01,1.47339042e-01
,1.47339042e-01,1.47318452e-01,1.57016744e-01,1.53217478e-01
,1.41043443e-01,1.52816490e-01,1.61147244e-01,1.54257432e-01
,1.54934619e-01,1.18734043e-01,3.42781465e-02,7.04821197e-02
,1.54934619e-01,1.44881969e-01,1.51991745e-01,1.52223358e-01
,1.41964780e-01,1.41043443e-01,1.48241631e-01,8.71208735e-02
,1.56063593e-01,1.65102234e-01,1.52319518e-01,1.16344478e-01
,1.33460955e-01,1.07212736e-01,7.24451721e-02,1.49623631e-01
,1.42466149e-01,9.26495129e-02,1.12300888e-01,9.38226285e-02
,1.50043194e-01,1.16685767e-01,1.50043194e-01,1.55046519e-01
,1.53736854e-01,1.43106213e-01,1.50272402e-01,1.54195450e-01
,1.52670348e-01,1.51902407e-01,1.46071743e-01,1.58893474e-01
,1.49803623e-01,1.56649018e-01,1.35424189e-01,1.56649018e-01
,1.65869838e-01,1.56489772e-01,1.55439220e-01,8.90199019e-02
,1.50272402e-01,1.56972271e-01,1.55584158e-01,1.48121655e-01
,1.43106213e-01,1.44910593e-01,1.51941156e-01,1.38118369e-01
,1.49693609e-01,9.38226285e-02,9.38226285e-02,1.52670348e-01
,1.51056811e-01,1.53973687e-01,1.50272402e-01,1.44186234e-01
,1.53033142e-01,1.51407875e-01,1.50043194e-01,1.35142243e-01
,1.61345043e-01,1.45425816e-01,1.46046371e-01,1.50043194e-01
,1.48973700e-01,2.91451296e-08,1.44186234e-01,1.52319518e-01
,1.49477685e-01,9.38226285e-02,1.56649018e-01,1.38118369e-01
,1.51902407e-01,1.09502470e-01,1.38178302e-01,1.50272402e-01
,1.50272402e-01,1.49623631e-01,1.44772367e-01,1.53973687e-01
,1.43594230e-01,1.57180423e-01,1.08710130e-01,1.49622766e-01
,1.51056811e-01,9.38226285e-02,1.09502470e-01,1.57016744e-01
,1.32956361e-01,1.20982058e-01,1.44186234e-01,1.20267427e-01
,1.51056811e-01,1.65869838e-01,1.49923117e-01,1.23735740e-01
,1.52670348e-01,1.46915132e-01,1.35424189e-01,1.55783973e-01
,1.58893474e-01,1.41138398e-01,1.54257432e-01,1.17935517e-01
,1.50043194e-01,1.50043194e-01,1.60001582e-01,4.99434259e-02
,1.65516562e-01,1.12300888e-01,1.55204948e-01,1.16041212e-01
,1.56649018e-01,1.28080382e-01,3.65970726e-02,1.51691536e-01
,1.52319518e-01,1.51941156e-01,1.50272402e-01,1.51056811e-01
,1.46051147e-01,1.41964780e-01,1.34076448e-01,1.17935517e-01
,1.56439632e-01,1.56649018e-01,1.53562648e-01,9.26495129e-02
,1.20267427e-01,1.52892586e-01,1.50272402e-01,2.77539875e-02
,1.40712090e-01,1.44772367e-01,1.07212736e-01,1.50272402e-01
,1.52319518e-01,1.48121655e-01,1.66172209e-01,1.49477685e-01
,1.49693609e-01,1.54455928e-01,1.66172209e-01,1.55783973e-01
,1.51056811e-01,1.60001582e-01,1.05310708e-01,1.58049150e-01
,1.56063593e-01,7.24451721e-02,1.52319518e-01,1.51407875e-01
,1.56393892e-01,1.52670348e-01,1.05690392e-01,1.55439220e-01
,1.52319518e-01,1.49693609e-01,1.54241834e-01,1.52319518e-01
,1.51056811e-01,1.56063593e-01,1.33460955e-01,1.08710130e-01
,1.22392531e-01,1.52658749e-01,1.49623631e-01,1.50272402e-01
,9.38226285e-02,1.05659825e-01,1.56649018e-01,1.12300888e-01
,1.77425051e-01,1.55439220e-01,1.46691502e-01,1.52319518e-01]

compare_arrays(arr1,arr2)
# compare_arrays(arr2,arr3)

1


[236]

In [17]:
# ###################### TEST-SIZE SCRIPT FOR CO-AUTHOR DATASETS #################################
# C = 100
# T = 20
# N = 1
# n_neighbor = [4,5,6]
# test_size = [0.2]
# new_rate = [1/5]
# from Processing_Data import CoAuthor_600, CoAuthor_1800
# dataset = CoAuthor_1800
# beta_center, beta_estimate, beta_actual = 0.3, 0.6, 0.7

# name_method =["own_class_center","estimated_hyper_lin","actual_hyper_lin","distance_center_own_opposite_tam"]
# name_function = ["lin_center_own","exp","func_own_opp_new", "func_own_opp_new_v1","func_own_opp_new_v2"]
# time = datetime.now().strftime("%d%m%Y_%H%M%S")
# filename = (str(dataset).split("\\")[-1]).split(".")[0]


# for n in range(0,N):
#     print("Lần bốc thứ: ",n+1)
#     header = ['Lan boc','Test Size','Rate','T','K-Neighbors','Name Method', 'Name Function', 'SP', 'SE', 'Gmean', 'F1 Score','Accuracy','AUC','Ma tran nham lan']
#     data = []
#     for testsize in test_size:
#         for newrate in new_rate:
#             X_train, y_train, X_test, y_test = dataset.load_data(test_size=testsize, new_rate=newrate)
#             with open(f'./Experiment/TestSize_Data_{filename}_{time}.csv', 'a', encoding='UTF8', newline='') as f3:
#                 writer = csv.writer(f3)
#                 writer.writerow(header)
                
#                 #Scalling Data
#                 sc_X = StandardScaler()
#                 X_train = sc_X.fit_transform(X_train)
#                 X_test = sc_X.transform(X_test)
#                 y_train = np.array(y_train)

#                 # NORMAL

#                 #Svm library
#                 print("SVM LIBRARY starting...\n")
#                 test_pred = svm_lib(X_train, y_train,X_test)
#                 sp,se,gmean = Gmean(y_test,test_pred)
#                 name1 = 'SVM'
#                 name2 = 'SVM'
#                 data.append([n+1,testsize,newrate,"None","None",name1,name2,sp,se,gmean,f1_score(y_test, test_pred),accuracy_score(y_test,test_pred),roc_auc_score(y_test, test_pred),str(confusion_matrix(y_test, test_pred))])
                
#                 #Wsvm
#                 print("W.SVM starting...\n")
#                 N, d = X_train.shape
#                 distribution_weight = np.ones(N)
#                 test_pred = wsvm(C,X_train, y_train, X_test, distribution_weight)
#                 sp,se,gmean = Gmean(y_test,test_pred)
#                 name1 = 'WSVM'
#                 name2 = 'WSVM'
#                 data.append([n+1,testsize,newrate,"None","None",name1,name2,sp,se,gmean,f1_score(y_test, test_pred),accuracy_score(y_test,test_pred),roc_auc_score(y_test, test_pred),str(confusion_matrix(y_test, test_pred))])

#                 #FuzyyWsvm
#                 for namemethod in name_method:
#                     for namefunction in name_function:
#                         if namemethod =="distance_center_own_opposite_tam" and namefunction =="lin_center_own":
#                             continue
#                         elif namemethod =="distance_center_own_opposite_tam" and namefunction =="exp":
#                             continue
#                         elif namemethod == "own_class_center" and namefunction == "func_own_opp_new":
#                             continue
#                         elif namemethod == "estimated_hyper_lin" and namefunction == "func_own_opp_new":
#                             continue
#                         elif namemethod == "actual_hyper_lin" and namefunction == "func_own_opp_new":
#                             continue
#                         elif namemethod == "own_class_center" and namefunction == "func_own_opp_new_v1":
#                             continue
#                         elif namemethod == "estimated_hyper_lin" and namefunction == "func_own_opp_new_v1":
#                             continue
#                         elif namemethod == "actual_hyper_lin" and namefunction == "func_own_opp_new_v1":
#                             continue
#                         elif namemethod == "own_class_center" and namefunction == "func_own_opp_new_v2":
#                             continue
#                         elif namemethod == "estimated_hyper_lin" and namefunction == "func_own_opp_new_v2":
#                             continue
#                         elif namemethod == "actual_hyper_lin" and namefunction == "func_own_opp_new_v2":
#                             continue
#                         else:
#                             print(f"Fuzzy W.SVM name_method = '{namemethod}',name_function = '{namefunction}' starting...\n")
#                             distribution_weight = fuzzy_weight(beta_center, beta_estimate, beta_actual,X_train, y_train,namemethod,namefunction)
#                             test_pred = wsvm(C,X_train, y_train, X_test, distribution_weight)
#                             sp,se,gmean = Gmean(y_test,test_pred)
#                             data.append([n+1,testsize,newrate,"None","None",namemethod,namefunction,sp,se,gmean,f1_score(y_test, test_pred),accuracy_score(y_test,test_pred),roc_auc_score(y_test, test_pred),str(confusion_matrix(y_test, test_pred))])

#                 ############ USING AFW-CIL METHOD #########################
#                 arr_tlp = is_tomek_new(X_train, y_train, class_type = [-1.0])
#                 print(arr_tlp)
#                 #FuzyyWsvm
#                 for namemethod in name_method:
#                     for namefunction in name_function:
#                         if namemethod =="distance_center_own_opposite_tam" and namefunction =="lin_center_own":
#                             continue
#                         elif namemethod =="distance_center_own_opposite_tam" and namefunction =="exp":
#                             continue
#                         elif namemethod == "own_class_center" and namefunction == "func_own_opp_new":
#                             continue
#                         elif namemethod == "estimated_hyper_lin" and namefunction == "func_own_opp_new":
#                             continue
#                         elif namemethod == "actual_hyper_lin" and namefunction == "func_own_opp_new":
#                             continue
#                         elif namemethod == "own_class_center" and namefunction == "func_own_opp_new_v1":
#                             continue
#                         elif namemethod == "estimated_hyper_lin" and namefunction == "func_own_opp_new_v1":
#                             continue
#                         elif namemethod == "actual_hyper_lin" and namefunction == "func_own_opp_new_v1":
#                             continue
#                         elif namemethod == "own_class_center" and namefunction == "func_own_opp_new_v2":
#                             continue
#                         elif namemethod == "estimated_hyper_lin" and namefunction == "func_own_opp_new_v2":
#                             continue
#                         elif namemethod == "actual_hyper_lin" and namefunction == "func_own_opp_new_v2":
#                             continue
#                         else:
#                             print("===========USING AFW-CIL=====================")
#                             print(f"Fuzzy W.SVM name_method = '{namemethod}',name_function = '{namefunction}' starting...\n")
#                             distribution_weight = fuzzy_weight(beta_center, beta_estimate, beta_actual,X_train, y_train,namemethod,namefunction)                  
                            
#                             for i in range(0,T):
#                                 print("T = ",i+1)
#                                 for nb in n_neighbor:
#                                     print("K-neighbors = ",nb)
#                                     new_W = data_tomelinks_new1(C,distribution_weight,X_test,y_test,X_train,y_train,nb,arr_tlp,clf=None,namemethod=namemethod,namefunction=namefunction)
#                                     # print(arr_tlp)
#                                     # print(new_W)
#                                     test_pred = wsvm(C,X_train, y_train, X_test, new_W)
#                                     sp,se,gmean = Gmean(y_test,test_pred)
#                                     data.append([n+1,testsize,newrate,i+1,nb,namemethod,namefunction,sp,se,gmean,f1_score(y_test, test_pred),accuracy_score(y_test,test_pred),roc_auc_score(y_test, test_pred),str(confusion_matrix(y_test, test_pred))])
#                 writer.writerows(data)


In [18]:
# ###################### TEST-SIZE SCRIPT FOR YEAST DATASETS #################################
# from Processing_Data import Yeast
# C = 100
# T = 3
# N = 1
# n_neighbor = [5]
# test_size = [0.3]
# dataset = Yeast

# beta_center, beta_estimate, beta_actual = 0.9, 0.2, 0.3

# name_method =["own_class_center","estimated_hyper_lin","actual_hyper_lin","distance_center_own_opposite_tam"]
# name_function = ["lin_center_own","exp","func_own_opp_new", "func_own_opp_new_v1","func_own_opp_new_v2"]
# time = datetime.now().strftime("%d%m%Y_%H%M%S")
# filename = (str(dataset).split("\\")[-1]).split(".")[0]


# for n in range(0,N):
#     print("Lần bốc thứ: ",n+1)
#     header = ['Lan boc','Test Size','T','K-Neighbors','Name Method', 'Name Function', 'SP', 'SE', 'Gmean', 'F1 Score','Accuracy','AUC','Ma tran nham lan']
#     data = []
#     for testsize in test_size:
#         X_train, y_train, X_test, y_test = dataset.load_data(test_size=testsize)
#         with open(f'./Experiment/TestSize_Data_{filename}_{time}.csv', 'a', encoding='UTF8', newline='') as f3:
#             writer = csv.writer(f3)
#             writer.writerow(header)
            
#             #Scalling Data
#             sc_X = StandardScaler()
#             X_train = sc_X.fit_transform(X_train)
#             X_test = sc_X.transform(X_test)
#             y_train = np.array(y_train)

#             # NORMAL

#             #Svm library
#             print("SVM LIBRARY starting...\n")
#             test_pred = svm_lib(X_train, y_train,X_test)
#             sp,se,gmean = Gmean(y_test,test_pred)
#             name1 = 'SVM'
#             name2 = 'SVM'
#             data.append([n+1,testsize,"None","None",name1,name2,sp,se,gmean,f1_score(y_test, test_pred),accuracy_score(y_test,test_pred),roc_auc_score(y_test, test_pred),str(confusion_matrix(y_test, test_pred))])
            
#             #Wsvm
#             print("W.SVM starting...\n")
#             N, d = X_train.shape
#             distribution_weight = np.ones(N)
#             test_pred = wsvm(C,X_train, y_train, X_test, distribution_weight)
#             sp,se,gmean = Gmean(y_test,test_pred)
#             name1 = 'WSVM'
#             name2 = 'WSVM'
#             data.append([n+1,testsize,"None","None",name1,name2,sp,se,gmean,f1_score(y_test, test_pred),accuracy_score(y_test,test_pred),roc_auc_score(y_test, test_pred),str(confusion_matrix(y_test, test_pred))])

#             #FuzyyWsvm
#             for namemethod in name_method:
#                 for namefunction in name_function:
#                     if namemethod =="distance_center_own_opposite_tam" and namefunction =="lin_center_own":
#                         continue
#                     elif namemethod =="distance_center_own_opposite_tam" and namefunction =="exp":
#                         continue
#                     elif namemethod == "own_class_center" and namefunction == "func_own_opp_new":
#                         continue
#                     elif namemethod == "estimated_hyper_lin" and namefunction == "func_own_opp_new":
#                         continue
#                     elif namemethod == "actual_hyper_lin" and namefunction == "func_own_opp_new":
#                         continue
#                     elif namemethod == "own_class_center" and namefunction == "func_own_opp_new_v1":
#                         continue
#                     elif namemethod == "estimated_hyper_lin" and namefunction == "func_own_opp_new_v1":
#                         continue
#                     elif namemethod == "actual_hyper_lin" and namefunction == "func_own_opp_new_v1":
#                         continue
#                     elif namemethod == "own_class_center" and namefunction == "func_own_opp_new_v2":
#                         continue
#                     elif namemethod == "estimated_hyper_lin" and namefunction == "func_own_opp_new_v2":
#                         continue
#                     elif namemethod == "actual_hyper_lin" and namefunction == "func_own_opp_new_v2":
#                         continue
#                     else:
#                         print(f"Fuzzy W.SVM name_method = '{namemethod}',name_function = '{namefunction}' starting...\n")
#                         distribution_weight = fuzzy_weight(beta_center, beta_estimate, beta_actual,X_train, y_train,namemethod,namefunction)
#                         test_pred = wsvm(C,X_train, y_train, X_test, distribution_weight)
#                         sp,se,gmean = Gmean(y_test,test_pred)
#                         data.append([n+1,testsize,"None","None",namemethod,namefunction,sp,se,gmean,f1_score(y_test, test_pred),accuracy_score(y_test,test_pred),roc_auc_score(y_test, test_pred),str(confusion_matrix(y_test, test_pred))])

#             ############ USING AFW-CIL METHOD #########################
#             arr_tlp = is_tomek_new(X_train, y_train, class_type = [-1.0])
#             print(arr_tlp)
#             #FuzyyWsvm
#             for namemethod in name_method:
#                 for namefunction in name_function:
#                     if namemethod =="distance_center_own_opposite_tam" and namefunction =="lin_center_own":
#                         continue
#                     elif namemethod =="distance_center_own_opposite_tam" and namefunction =="exp":
#                         continue
#                     elif namemethod == "own_class_center" and namefunction == "func_own_opp_new":
#                         continue
#                     elif namemethod == "estimated_hyper_lin" and namefunction == "func_own_opp_new":
#                         continue
#                     elif namemethod == "actual_hyper_lin" and namefunction == "func_own_opp_new":
#                         continue
#                     elif namemethod == "own_class_center" and namefunction == "func_own_opp_new_v1":
#                         continue
#                     elif namemethod == "estimated_hyper_lin" and namefunction == "func_own_opp_new_v1":
#                         continue
#                     elif namemethod == "actual_hyper_lin" and namefunction == "func_own_opp_new_v1":
#                         continue
#                     elif namemethod == "own_class_center" and namefunction == "func_own_opp_new_v2":
#                         continue
#                     elif namemethod == "estimated_hyper_lin" and namefunction == "func_own_opp_new_v2":
#                         continue
#                     elif namemethod == "actual_hyper_lin" and namefunction == "func_own_opp_new_v2":
#                         continue
#                     else:
#                         print("===========USING AFW-CIL=====================")
#                         print(f"Fuzzy W.SVM name_method = '{namemethod}',name_function = '{namefunction}' starting...\n")
#                         distribution_weight = fuzzy_weight(beta_center, beta_estimate, beta_actual,X_train, y_train,namemethod,namefunction)                  
                        
#                         for i in range(0,T):
#                             print("T = ",i+1)
#                             for nb in n_neighbor:
#                                 print("K-neighbors = ",nb)
#                                 new_W = data_tomelinks_new1(C,distribution_weight,X_test,y_test,X_train,y_train,nb,arr_tlp,clf=None,namemethod=namemethod,namefunction=namefunction)
#                                 # print(arr_tlp)
#                                 # print(new_W)
#                                 test_pred = wsvm(C,X_train, y_train, X_test, new_W)
#                                 sp,se,gmean = Gmean(y_test,test_pred)
#                                 data.append([n+1,testsize,i+1,nb,namemethod,namefunction,sp,se,gmean,f1_score(y_test, test_pred),accuracy_score(y_test,test_pred),roc_auc_score(y_test, test_pred),str(confusion_matrix(y_test, test_pred))])
#             writer.writerows(data)


In [19]:
f3.close()