In [1]:
import numpy as np
import pandas as pd
from data import Vertebral_column, Vertebral_column_KF, Ecoli_TestSize, Haberman_TestSize, Transfution_TestSize, Pima_TestSize, Co_Author_TestSize
import trainning_of_adaboost as toa
from sklearn.ensemble import AdaBoostClassifier
import adaboost_svm, ImAda_DecisionTree
from report import report
from sklearn.metrics  import classification_report, precision_recall_fscore_support as score
from sklearn.metrics import accuracy_score, confusion_matrix, roc_auc_score, f1_score, precision_score
import math
from sklearn.ensemble import AdaBoostClassifier
from datetime import datetime
import csv
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from wsvm.application import Wsvm
from sklearn.svm import SVC

In [2]:
def compute_metrics(y_test,y_pred):
    cm_WSVM = confusion_matrix(y_test, y_pred)
    se = cm_WSVM[1,1]/(cm_WSVM[1,0]+cm_WSVM[1,1])
    sp = cm_WSVM[0,0]/(cm_WSVM[0,0]+cm_WSVM[0,1])
    gmean = math.sqrt(se*sp)
    f1s = f1_score(y_test,y_pred)
    acc = accuracy_score(y_test,y_pred)
    pre = precision_score(y_test,y_pred)
    auc = roc_auc_score(y_test, y_pred)

    return sp, se, gmean, f1s, pre, acc, auc, cm_WSVM

In [3]:
# 1. SVM lib
def svm_lib(X_train, y_train,X_test):
    clf = SVC(probability=True, kernel='linear')
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    return y_pred

In [4]:
# 2. SVM hand
# def svm(C,X_train, y_train,X_test):
#     model = Svm(C)
#     model.fit(X_train, y_train)
#     test_pred = model.predict(X_test)
#     return test_pred

In [5]:
# 3. DecisionTree
from sklearn import tree
def decisiontree(X_train, y_train,X_test):
    clf = tree.DecisionTreeClassifier()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    return y_pred

In [6]:
# 4. WSVM
def wsvm(C,X_train, y_train,X_test,distribution_weight=None):
    model = Wsvm(C,distribution_weight)
    model.fit(X_train, y_train)
    test_pred = model.predict(X_test)
    return test_pred

In [7]:
# 5. AdaBoost SVM
# def ada_svm(M, C, X_train, y_train, X_test, theta):
#     w, b, a = adaboost_svm.fit(X_train, y_train, M, C, instance_categorization=False, proposed = False, theta=theta)
#     y_pred = adaboost_svm.predict(X_test, w, b, a, M)
#     return y_pred

In [8]:
# 5. AdaBoost SVM
def ada_svm(X_train, y_train, X_test):
    clf = AdaBoostClassifier(SVC(probability=True,kernel='linear'),n_estimators=100,learning_rate=1.0, algorithm='SAMME')
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    return y_pred


In [9]:
# 6. AdaBoost DecisionTree
def ada_decisiontree(X_train, y_train,X_test):
    clf = AdaBoostClassifier(n_estimators=100)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    return y_pred

In [10]:
# 7. AdaBoost WSVM
def ada_wsvm(M, C, theta, X_train, y_train,X_test):
    w, b, a, d = toa.fit(X_train, y_train, M, C, instance_categorization=True, proposed_preprocessing = False,proposed_alpha = False, test_something = False, theta=theta)
    y_pred = toa.predict(X_test, w, b, a, M)
    return y_pred, a, d

In [11]:
# 8. IM.AdaBoost-1 WSVM
def imada1_wsvm(M, C, theta, X_train, y_train,X_test):
    w, b, a, d = toa.fit(X_train, y_train, M, C, instance_categorization=True, proposed_preprocessing = True, proposed_alpha = False, test_something = False, theta=theta)
    y_pred = toa.predict(X_test, w, b, a, M)
    return y_pred, a, d

In [12]:
#9. IM.AdaBoost-2 WSVM
def imada2_wsvm(M, C, X_train, y_train,X_test):
    w, b, a, d = toa.fit(X_train, y_train, M, C, instance_categorization = True,proposed_preprocessing = False,proposed_alpha=True,test_something = False)
    y_pred = toa.predict(X_test, w, b, a, M)
    return y_pred, a, d

In [13]:
#10. IM.AdaBoost-12 WSVM
def imada_12_wsvm(M, C, theta, X_train, y_train,X_test):
    w, b, a, d = toa.fit(X_train, y_train, M, C, instance_categorization = True,proposed_preprocessing= True,proposed_alpha=True,test_something = False, theta=theta)
    y_pred = toa.predict(X_test, w, b, a, M)
    return y_pred, a, d

In [14]:
#11. IM.AdaBoost1 + SVM
def imada1_svm(M, C, theta, X_train, y_train,X_test):
    w, b, a, d = toa.fit(X_train, y_train, M, C, instance_categorization = False, proposed_preprocessing= True,proposed_alpha=False,test_something = False, theta=theta)
    y_pred = toa.predict(X_test, w, b, a, M)
    return y_pred, a, d


In [15]:
#12. IM.AdaBoost2 + SVM
def imada2_svm(M, C, X_train, y_train,X_test):
    w, b, a, d = toa.fit(X_train, y_train, M, C, instance_categorization = False,proposed_preprocessing = False,proposed_alpha=True,test_something = False)
    y_pred = toa.predict(X_test, w, b, a, M)
    return y_pred, a, d

In [16]:
#13. IM.AdaBoost12 + SVM
def imada_12_svm(M, C, theta, X_train, y_train,X_test):
    w, b, a, d = toa.fit(X_train, y_train, M, C, instance_categorization = False,proposed_preprocessing= True,proposed_alpha=True,test_something = False, theta=theta)
    y_pred = toa.predict(X_test, w, b, a, M)
    return y_pred, a, d

In [17]:
def imada_1_decisiontree(M,theta,X_train, y_train,X_test):
    clf, a, d = ImAda_DecisionTree.fit(X_train, y_train, M, proposed_preprocessing = True, proposed_alpha = False, theta = theta)
    y_pred = ImAda_DecisionTree.predict(X_test, a, clf)
    return y_pred, a, d

In [18]:
def imada_2_decisiontree(M, X_train, y_train,X_test):
    clf, a, d = ImAda_DecisionTree.fit(X_train, y_train, M, proposed_preprocessing = False, proposed_alpha = True)
    y_pred = ImAda_DecisionTree.predict(X_test, a, clf)
    return y_pred, a, d

In [19]:
def imada_12_decisiontree(M,theta,X_train, y_train,X_test):
    clf, a, d = ImAda_DecisionTree.fit(X_train, y_train, M, proposed_preprocessing = True, proposed_alpha = True, theta = theta)
    y_pred = ImAda_DecisionTree.predict(X_test, a, clf)
    return y_pred, a, d

In [20]:
# M = 5
# from data import Ecoli_TestSize, Haberman_TestSize, Co_Author_TestSize, Transfution_TestSize
# import numpy as np
# from sklearn import tree
# import methods
# X_train, y_train, X_test, y_test = Ecoli_TestSize.load_data(test_size=0.2)
# # weights = np.ones(len(y)) / len(y)
# weights = methods.intinitialization_weight_adjustment(X_train, y_train, proposed=True, theta=1)
# # weak_clf = tree.DecisionTreeClassifier()
# # weak_clf.fit(X, y, sample_weight=weights)

# # pred_i = weak_clf.predict(X)
# alpha = []
# D = []
# clfs = []
# for i in range(M):
#     #train weak classifier with sample weight
#     # weak_clf = DecisionTree(criterion='gini', max_depth=5)
#     weak_clf = tree.DecisionTreeClassifier()
#     weak_clf.fit(X_train, y_train, sample_weight=weights)

#     pred_i = weak_clf.predict(X_train)

#     true_index, false_index,false_index_P,false_index_N = methods.find_true_false_index(y_train, pred_i)
#     print(true_index)
#     print(type(false_index))
#     print(len(false_index_P))
#     print(false_index_N)
#     # Compute i-th confident and append to the alpha
#     # alpha_i = methods.confident(W_ada,false_index_P,false_index_N,proposed_alpha) #Gốc
#     alpha_i, D_i = methods.confident(weights,false_index_P,false_index_N,proposed_alpha = True)
#     print(alpha_i)
#     alpha.append(alpha_i)
#     D.append(D_i)
#     clfs.append(weak_clf)
#     # Update weight adjustment and instance categorization
#     weights = methods.update_weight_adjustment(weights, alpha_i,true_index, false_index)
#     # weights = methods.update_weights(weights, pred_i, y_train, alpha_i)

#     print(weights)
          
# def predict(X, alpha, clfs):
#     y_pred = np.zeros(len(X))
#     for alpha, clf in zip(alpha, clfs):
#         y_pred_weak = clf.predict(X)
#         # quantize y_pred_weak to {0, 1}
#         y_pred_weak = np.where(y_pred_weak == 1, 1, -1)
#         y_pred += alpha * y_pred_weak

#     # quantize y_pred to {0, 1}
#     y_pred = np.where(y_pred > 0, 1, 0)
#     return y_pred


# y_pred = predict(X_test,alpha, clfs)
# print(y_pred)


In [21]:
# ####################################### TEST_IM.ADA.DECISIONTREE ################################
# M=10
# C=10000
# theta = 1
# N = 1
# test_size = [0.2]
# dataset = Ecoli_TestSize

# time = datetime.now().strftime("%d%m%Y_%H%M%S")
# filename = (str(dataset).split("\\")[-1]).split(".")[0]
# filepath = f'./Experiment/Data_{filename}_TestSize.csv'
# for n in range(0,N):
#     header = ['Test Size','Method', 'SP', 'SE', 'Gmean', 'F1 Score','Precision','Accuracy','AUC','Ma tran nham lan']
#     data = []
#     print("Lan boc: ", n+1)
#     for testsize in test_size:
#         X_train, y_train, X_test, y_test = dataset.load_data(test_size=testsize)       

#         #No 12
#         print("ImADA_1_DecisionTree starting...\n")
#         y_pred = imada_1_decisiontree(M, theta, X_train, y_train,X_test)
#         sp, se, gmean, f1s, pre, acc, auc, cm_WSVM = compute_metrics(y_test, y_pred)
#         name = 'ImADA_1_DecisionTree'
#         data.append([testsize,name,sp, se, gmean, f1s, pre, acc, auc, str(cm_WSVM)])

#         #No 13
#         print("ImADA_2_DecisionTree starting...\n")
#         y_pred = imada_2_decisiontree(M, X_train, y_train,X_test)
#         sp, se, gmean, f1s, pre, acc, auc, cm_WSVM = compute_metrics(y_test, y_pred)
#         name = 'ImADA_2_DecisionTree'
#         data.append([testsize,name,sp, se, gmean, f1s, pre, acc, auc, str(cm_WSVM)])

#         #No 14
#         print("ImADA_12_DecisionTree starting...\n")
#         y_pred = imada_12_decisiontree(M, theta, X_train, y_train,X_test)
#         sp, se, gmean, f1s, pre, acc, auc, cm_WSVM = compute_metrics(y_test, y_pred)
#         name = 'ImADA_12_DecisionTree'
#         data.append([testsize,name,sp, se, gmean, f1s, pre, acc, auc, str(cm_WSVM)])

#     with open(f'./Experiment/Data_{filename}_{time}_TestSize.csv', 'a', encoding='UTF8', newline='') as f1:
#         writer = csv.writer(f1)
#         writer.writerow(header)
#         writer.writerows(data)
    



In [22]:
# # ####################################### TEST SIZE SCRIPT ################################
# M=2
# C=10000
# theta = 2
# N = 2
# test_size = [0.2, 0.3]
# dataset = Ecoli_TestSize

# time = datetime.now().strftime("%d%m%Y_%H%M%S")
# filename = (str(dataset).split("\\")[-1]).split(".")[0]
# filepath = f'./Experiment/Data_{filename}_TestSize.csv'
# for n in range(0,N):
#     header = ['Test Size','Method', 'SP', 'SE', 'Gmean', 'F1 Score','Precision','Accuracy','AUC','Ma tran nham lan','List of err_w','List of alpha']
#     data = []
#     print("Lan boc: ", n+1)
#     for testsize in test_size:
#         X_train, y_train, X_test, y_test = dataset.load_data(test_size=testsize)
#         with open(f'./Experiment/Data_{filename}_{time}_TestSize.csv', 'a', encoding='UTF8', newline='') as f1:
#             writer = csv.writer(f1)
#             writer.writerow(header)

#             #No 1
#             print("Decision Tree starting...\n")
#             y_pred = decisiontree(X_train, y_train, X_test)
#             sp, se, gmean, f1s, pre, acc, auc, cm_WSVM = compute_metrics(y_test, y_pred)
#             name = "Decision Tree"
#             le = "None"
#             la = "None"
#             data.append([testsize,name,sp, se, gmean, f1s, pre, acc, auc, str(cm_WSVM),le,la])

#             #No 2
#             print("SVM (lib) starting...\n")
#             y_pred = svm_lib(X_train, y_train, X_test)
#             sp, se, gmean, f1s, pre, acc, auc, cm_WSVM = compute_metrics(y_test, y_pred)
#             name = "SVM (lib)"
#             le = "None"
#             la = "None"
#             data.append([testsize,name,sp, se, gmean, f1s, pre, acc, auc, str(cm_WSVM),le,la])

#             #No 3
#             print("WSVM starting...\n")
#             N, d = X_train.shape
#             distribution_weight = np.ones(N)
#             y_pred = wsvm(C,X_train, y_train,X_test,distribution_weight)
#             sp, se, gmean, f1s, pre, acc, auc, cm_WSVM = compute_metrics(y_test, y_pred)
#             name = "WSVM"
#             le = "None"
#             la = "None"
#             data.append([testsize,name,sp, se, gmean, f1s, pre, acc, auc, str(cm_WSVM),le,la])
            
#             #No 4
#             print("ADA_Decision Tree starting...\n")
#             y_pred = ada_decisiontree(X_train, y_train, X_test)
#             sp, se, gmean, f1s, pre, acc, auc, cm_WSVM = compute_metrics(y_test, y_pred)
#             name = 'ADA_DSTree'
#             le = "None"
#             la = "None"
#             data.append([testsize,name,sp, se, gmean, f1s, pre, acc, auc, str(cm_WSVM),le,la])

#             #No 5
#             print("ADA_SVM starting...\n")
#             y_pred = ada_svm(X_train, y_train, X_test)
#             sp, se, gmean, f1s, pre, acc, auc, cm_WSVM = compute_metrics(y_test, y_pred)
#             name = 'ADA_SVM'
#             le = "None"
#             la = "None"
#             data.append([testsize,name,sp, se, gmean, f1s, pre, acc, auc, str(cm_WSVM),le,la])

#             #No 6
#             print("ADA_WSVM starting...\n")
#             y_pred, a, d = ada_wsvm(M, C, theta, X_train, y_train,X_test)
#             sp, se, gmean, f1s, pre, acc, auc, cm_WSVM = compute_metrics(y_test, y_pred)
#             name = 'ADA_WSVM'
#             le = "None"
#             la = "None"
#             data.append([testsize,name,sp, se, gmean, f1s, pre, acc, auc, str(cm_WSVM),d,a])

#             #No 7
#             print("ImADA1_SVM starting...\n")
#             y_pred, a, d = imada1_svm(M, C, theta, X_train, y_train,X_test)
#             sp, se, gmean, f1s, pre, acc, auc, cm_WSVM = compute_metrics(y_test, y_pred)
#             name = 'ImADA1_SVM'
#             data.append([testsize,name,sp, se, gmean, f1s, pre, acc, auc, str(cm_WSVM),d,a])

#             #No 8
#             print("ImADA2_SVM starting...\n")
#             y_pred, a, d = imada2_svm(M, C, X_train, y_train,X_test)
#             sp, se, gmean, f1s, pre, acc, auc, cm_WSVM = compute_metrics(y_test, y_pred)
#             name = 'ImADA2_SVM'
#             data.append([testsize,name,sp, se, gmean, f1s, pre, acc, auc, str(cm_WSVM),d,a])

#             #No 9
#             print("ImADA_12_SVM starting...\n")
#             y_pred, a, d = imada_12_svm(M, C, theta, X_train, y_train,X_test)
#             sp, se, gmean, f1s, pre, acc, auc, cm_WSVM = compute_metrics(y_test, y_pred)
#             name = 'ImADA_12_SVM'
#             data.append([testsize,name,sp, se, gmean, f1s, pre, acc, auc, str(cm_WSVM),d,a])

#             #No 10
#             print("ImADA1_WSVM starting...\n")
#             y_pred, a, d = imada1_wsvm(M, C, theta, X_train, y_train,X_test)
#             sp, se, gmean, f1s, pre, acc, auc, cm_WSVM = compute_metrics(y_test, y_pred)
#             name = 'ImADA1_WSVM'
#             data.append([testsize,name,sp, se, gmean, f1s, pre, acc, auc, str(cm_WSVM),d,a])

#             #No 11
#             print("ImADA2_WSVM starting...\n")
#             y_pred, a, d = imada2_wsvm(M, C, X_train, y_train,X_test)
#             sp, se, gmean, f1s, pre, acc, auc, cm_WSVM = compute_metrics(y_test, y_pred)
#             name = 'ImADA2_WSVM'
#             data.append([testsize,name,sp, se, gmean, f1s, pre, acc, auc, str(cm_WSVM), d,a])

#             #No 12
#             print("ImADA_12_WSVM starting...\n")
#             y_pred, a, d = imada_12_wsvm(M, C, theta, X_train, y_train,X_test)
#             sp, se, gmean, f1s, pre, acc, auc, cm_WSVM = compute_metrics(y_test, y_pred)
#             name = 'ImADA_12_WSVM'
#             data.append([testsize,name,sp, se, gmean, f1s, pre, acc, auc, str(cm_WSVM), d,a])
            
#             writer.writerows(data)
    



In [23]:
# ####################################### TEST SIZE SCRIPT - NEW ################################
# M=20
# C=10000
# theta = 1
# N = 1
# test_size = [0.2, 0.3]
# dataset = Co_Author_TestSize

# time = datetime.now().strftime("%d%m%Y_%H%M%S")
# filename = (str(dataset).split("\\")[-1]).split(".")[0]
# filepath = f'./Experiment/Data_{filename}_TestSize.csv'
# for n in range(0,N):
#     header = ['Test Size','Method', 'SP', 'SE', 'Gmean', 'F1 Score','Precision','Accuracy','AUC','Ma tran nham lan','List of err_w','List of alpha']
#     data = []
#     print("Lan boc: ", n+1)
#     for testsize in test_size:
#         X_train, y_train, X_test, y_test = dataset.load_data(test_size=testsize)       
#         #No 1
#         print("Decision Tree starting...\n")
#         y_pred = decisiontree(X_train, y_train, X_test)
#         sp, se, gmean, f1s, pre, acc, auc, cm_WSVM = compute_metrics(y_test, y_pred)
#         name = "Decision Tree"
#         le = "None"
#         la = "None"
#         data.append([testsize,name,sp, se, gmean, f1s, pre, acc, auc, str(cm_WSVM),le,la])

#         #No 2
#         print("SVM (lib) starting...\n")
#         y_pred = svm_lib(X_train, y_train, X_test)
#         sp, se, gmean, f1s, pre, acc, auc, cm_WSVM = compute_metrics(y_test, y_pred)
#         name = "SVM (lib)"
#         le = "None"
#         la = "None"
#         data.append([testsize,name,sp, se, gmean, f1s, pre, acc, auc, str(cm_WSVM),le,la])

#         #No 3
#         print("WSVM starting...\n")
#         N, d = X_train.shape
#         distribution_weight = np.ones(N)
#         y_pred = wsvm(C,X_train, y_train,X_test,distribution_weight)
#         sp, se, gmean, f1s, pre, acc, auc, cm_WSVM = compute_metrics(y_test, y_pred)
#         name = "WSVM"
#         le = "None"
#         la = "None"
#         data.append([testsize,name,sp, se, gmean, f1s, pre, acc, auc, str(cm_WSVM),le,la])
        
#         #No 4
#         print("ADA_Decision Tree starting...\n")
#         y_pred = ada_decisiontree(X_train, y_train, X_test)
#         sp, se, gmean, f1s, pre, acc, auc, cm_WSVM = compute_metrics(y_test, y_pred)
#         name = 'ADA_DSTree'
#         le = "None"
#         la = "None"
#         data.append([testsize,name,sp, se, gmean, f1s, pre, acc, auc, str(cm_WSVM),le,la])

#         #No 5
#         print("ADA_SVM starting...\n")
#         y_pred = ada_svm(X_train, y_train, X_test)
#         sp, se, gmean, f1s, pre, acc, auc, cm_WSVM = compute_metrics(y_test, y_pred)
#         name = 'ADA_SVM'
#         le = "None"
#         la = "None"
#         data.append([testsize,name,sp, se, gmean, f1s, pre, acc, auc, str(cm_WSVM),le,la])

#         #No 6
#         print("ADA_WSVM starting...\n")
#         y_pred, a, d = ada_wsvm(M, C, theta, X_train, y_train,X_test)
#         sp, se, gmean, f1s, pre, acc, auc, cm_WSVM = compute_metrics(y_test, y_pred)
#         name = 'ADA_WSVM'
#         le = "None"
#         la = "None"
#         data.append([testsize,name,sp, se, gmean, f1s, pre, acc, auc, str(cm_WSVM),d,a])

#         #No 7
#         print("ImADA1_SVM starting...\n")
#         y_pred, a, d = imada1_svm(M, C, theta, X_train, y_train,X_test)
#         sp, se, gmean, f1s, pre, acc, auc, cm_WSVM = compute_metrics(y_test, y_pred)
#         name = 'ImADA1_SVM'
#         data.append([testsize,name,sp, se, gmean, f1s, pre, acc, auc, str(cm_WSVM),d,a])

#         #No 8
#         print("ImADA2_SVM starting...\n")
#         y_pred, a, d = imada2_svm(M, C, X_train, y_train,X_test)
#         sp, se, gmean, f1s, pre, acc, auc, cm_WSVM = compute_metrics(y_test, y_pred)
#         name = 'ImADA2_SVM'
#         data.append([testsize,name,sp, se, gmean, f1s, pre, acc, auc, str(cm_WSVM),d,a])

#         #No 9
#         print("ImADA_12_SVM starting...\n")
#         y_pred, a, d = imada_12_svm(M, C, theta, X_train, y_train,X_test)
#         sp, se, gmean, f1s, pre, acc, auc, cm_WSVM = compute_metrics(y_test, y_pred)
#         name = 'ImADA_12_SVM'
#         data.append([testsize,name,sp, se, gmean, f1s, pre, acc, auc, str(cm_WSVM),d,a])

#         #No 10
#         print("ImADA1_WSVM starting...\n")
#         y_pred, a, d = imada1_wsvm(M, C, theta, X_train, y_train,X_test)
#         sp, se, gmean, f1s, pre, acc, auc, cm_WSVM = compute_metrics(y_test, y_pred)
#         name = 'ImADA1_WSVM'
#         data.append([testsize,name,sp, se, gmean, f1s, pre, acc, auc, str(cm_WSVM),d,a])

#         #No 11
#         print("ImADA2_WSVM starting...\n")
#         y_pred, a, d = imada2_wsvm(M, C, X_train, y_train,X_test)
#         sp, se, gmean, f1s, pre, acc, auc, cm_WSVM = compute_metrics(y_test, y_pred)
#         name = 'ImADA2_WSVM'
#         data.append([testsize,name,sp, se, gmean, f1s, pre, acc, auc, str(cm_WSVM), d,a])

#         #No 12
#         print("ImADA_12_WSVM starting...\n")
#         y_pred, a, d = imada_12_wsvm(M, C, theta, X_train, y_train,X_test)
#         sp, se, gmean, f1s, pre, acc, auc, cm_WSVM = compute_metrics(y_test, y_pred)
#         name = 'ImADA_12_WSVM'
#         data.append([testsize,name,sp, se, gmean, f1s, pre, acc, auc, str(cm_WSVM), d,a])

#     with open(f'./Experiment/Data_{filename}_{time}_TestSize.csv', 'a', encoding='UTF8', newline='') as f1:
#         writer = csv.writer(f1)
#         writer.writerow(header)
#         writer.writerows(data)
    



In [24]:
####################################### TEST SIZE SCRIPT - FIND BEST PARAMETERS ################################
# M=[10,20,30,40,50]
# C=[0.1,10,100,1000,10000]
# theta = [0.5,1,1.5,2,2.5]
M=[10,15,20,25]
C=[10,100,1000,5000,10000]
theta = [0.3, 0.5, 0.7, 1,1.5,2]
N = 1
test_size = [0.2]
dataset = Haberman_TestSize

time = datetime.now().strftime("%d%m%Y_%H%M%S")
filename = (str(dataset).split("\\")[-1]).split(".")[0]
filepath = f'./Experiment/Data_{filename}_TestSize.csv'
for n in range(0,N):
    header = ['Test Size','Method', 'M','C','theta','SP', 'SE', 'Gmean', 'F1 Score','Precision','Accuracy','AUC','Ma tran nham lan','List of err_w','List of alpha']
    data = []
    print("Lan boc: ", n+1)
    for testsize in test_size:
        X_train, y_train, X_test, y_test = dataset.load_data(test_size=testsize)       
        #No 1
        print("Decision Tree starting...\n")
        y_pred = decisiontree(X_train, y_train, X_test)
        sp, se, gmean, f1s, pre, acc, auc, cm_WSVM = compute_metrics(y_test, y_pred)
        name = "Decision Tree"
        le = "None"
        la = "None"
        m = "None"
        c = "none"
        t = "none"
        data.append([testsize,name,m,c,t,sp, se, gmean, f1s, pre, acc, auc, str(cm_WSVM),le,la])

        #No 2
        print("SVM (lib) starting...\n")
        y_pred = svm_lib(X_train, y_train, X_test)
        sp, se, gmean, f1s, pre, acc, auc, cm_WSVM = compute_metrics(y_test, y_pred)
        name = "SVM (lib)"
        le = "None"
        la = "None"
        m = "None"
        c = "none"
        t = "none"
        data.append([testsize,name,m,c,t,sp, se, gmean, f1s, pre, acc, auc, str(cm_WSVM),le,la])
        
        #No 3
        print("ADA_Decision Tree starting...\n")
        y_pred = ada_decisiontree(X_train, y_train, X_test)
        sp, se, gmean, f1s, pre, acc, auc, cm_WSVM = compute_metrics(y_test, y_pred)
        name = 'ADA_DSTree'
        le = "None"
        la = "None"
        m = "None"
        c = "none"
        t = "none"
        data.append([testsize,name,m,c,t,sp, se, gmean, f1s, pre, acc, auc, str(cm_WSVM),le,la])

        #No 4
        print("ADA_SVM starting...\n")
        y_pred = ada_svm(X_train, y_train, X_test)
        sp, se, gmean, f1s, pre, acc, auc, cm_WSVM = compute_metrics(y_test, y_pred)
        name = 'ADA_SVM'
        le = "None"
        la = "None"
        m = "None"
        c = "none"
        t = "none"
        data.append([testsize,name,m,c,t,sp, se, gmean, f1s, pre, acc, auc, str(cm_WSVM),le,la])

        for m in M:
            for c in C:
                for t in theta:
                    print(m,c,t)
                    #No 5
                    print("WSVM starting...\n")
                    N, d = X_train.shape
                    distribution_weight = np.ones(N)
                    y_pred = wsvm(c,X_train, y_train,X_test,distribution_weight)
                    sp, se, gmean, f1s, pre, acc, auc, cm_WSVM = compute_metrics(y_test, y_pred)
                    name = "WSVM"
                    le = "None"
                    la = "None"
                    data.append([testsize,name,m,c,t,sp, se, gmean, f1s, pre, acc, auc, str(cm_WSVM),le,la])

                    #No 6
                    print("ADA_WSVM starting...\n")
                    y_pred, a, d = ada_wsvm(m, c, t, X_train, y_train,X_test)
                    sp, se, gmean, f1s, pre, acc, auc, cm_WSVM = compute_metrics(y_test, y_pred)
                    name = 'ADA_WSVM'
                    le = "None"
                    la = "None"
                    data.append([testsize,name,m,c,t,sp, se, gmean, f1s, pre, acc, auc, str(cm_WSVM),d,a])

                    #No 6
                    print("ImADA_1_DecisionTree starting...\n")
                    y_pred,a,d = imada_1_decisiontree(m, t, X_train, y_train,X_test)
                    sp, se, gmean, f1s, pre, acc, auc, cm_WSVM = compute_metrics(y_test, y_pred)
                    name = 'ImADA_1_DecisionTree'
                    data.append([testsize,name,m,c,t,sp, se, gmean, f1s, pre, acc, auc, str(cm_WSVM),d,a])

                    #No 7
                    print("ImADA_2_DecisionTree starting...\n")
                    y_pred,a,d = imada_2_decisiontree(m, X_train, y_train,X_test)
                    sp, se, gmean, f1s, pre, acc, auc, cm_WSVM = compute_metrics(y_test, y_pred)
                    name = 'ImADA_2_DecisionTree'
                    data.append([testsize,name,m,c,t,sp, se, gmean, f1s, pre, acc, auc, str(cm_WSVM),d,a])

                    #No 8
                    print("ImADA_12_DecisionTree starting...\n")
                    y_pred,a,d = imada_12_decisiontree(m, t, X_train, y_train,X_test)
                    sp, se, gmean, f1s, pre, acc, auc, cm_WSVM = compute_metrics(y_test, y_pred)
                    name = 'ImADA_12_DecisionTree'
                    data.append([testsize,name,m,c,t,sp, se, gmean, f1s, pre, acc, auc, str(cm_WSVM),d,a])

                    #No 9
                    print("ImADA1_SVM starting...\n")
                    y_pred, a, d = imada1_svm(m, c, t, X_train, y_train,X_test)
                    sp, se, gmean, f1s, pre, acc, auc, cm_WSVM = compute_metrics(y_test, y_pred)
                    name = 'ImADA1_SVM'
                    data.append([testsize,name,m,c,t,sp, se, gmean, f1s, pre, acc, auc, str(cm_WSVM),d,a])

                    #No 10
                    print("ImADA2_SVM starting...\n")
                    y_pred, a, d = imada2_svm(m, c, X_train, y_train,X_test)
                    sp, se, gmean, f1s, pre, acc, auc, cm_WSVM = compute_metrics(y_test, y_pred)
                    name = 'ImADA2_SVM'
                    data.append([testsize,name,m,c,t,sp, se, gmean, f1s, pre, acc, auc, str(cm_WSVM),d,a])

                    #No 11
                    print("ImADA_12_SVM starting...\n")
                    y_pred, a, d = imada_12_svm(m, c,t, X_train, y_train,X_test)
                    sp, se, gmean, f1s, pre, acc, auc, cm_WSVM = compute_metrics(y_test, y_pred)
                    name = 'ImADA_12_SVM'
                    data.append([testsize,name,m,c,t,sp, se, gmean, f1s, pre, acc, auc, str(cm_WSVM),d,a])

                    #No 12
                    print("ImADA1_WSVM starting...\n")
                    y_pred, a, d = imada1_wsvm(m, c,t, X_train, y_train,X_test)
                    sp, se, gmean, f1s, pre, acc, auc, cm_WSVM = compute_metrics(y_test, y_pred)
                    name = 'ImADA1_WSVM'
                    data.append([testsize,name,m,c,t,sp, se, gmean, f1s, pre, acc, auc, str(cm_WSVM),d,a])

                    #No 13
                    print("ImADA2_WSVM starting...\n")
                    y_pred, a, d = imada2_wsvm(m, c, X_train, y_train,X_test)
                    sp, se, gmean, f1s, pre, acc, auc, cm_WSVM = compute_metrics(y_test, y_pred)
                    name = 'ImADA2_WSVM'
                    data.append([testsize,name,m,c,t,sp, se, gmean, f1s, pre, acc, auc, str(cm_WSVM), d,a])

                    #No 14
                    print("ImADA_12_WSVM starting...\n")
                    y_pred, a, d = imada_12_wsvm(m, c, t, X_train, y_train,X_test)
                    sp, se, gmean, f1s, pre, acc, auc, cm_WSVM = compute_metrics(y_test, y_pred)
                    name = 'ImADA_12_WSVM'
                    data.append([testsize,name,m,c,t,sp, se, gmean, f1s, pre, acc, auc, str(cm_WSVM), d,a])

    with open(f'./Experiment/Data_{filename}_{time}_TestSize.csv', 'a', encoding='UTF8', newline='') as f1:
        writer = csv.writer(f1)
        writer.writerow(header)
        writer.writerows(data)
    



Lan boc:  1
Decision Tree starting...

SVM (lib) starting...

ADA_Decision Tree starting...

ADA_SVM starting...

10 10 0.3
WSVM starting...

ADA_WSVM starting...

eps 0.2295081967213116
alpha 0.6055451360473997
eps 0.41109422492401193
alpha 0.17972187525343317
eps 0.5000000000000001
alpha -2.2204460492503136e-16
eps 0.5000000000000001
alpha -2.2204460492503136e-16
eps 0.49999999999999994
alpha 1.1102230246251564e-16
eps 0.49999999999999994
alpha 1.1102230246251564e-16
eps 0.5
alpha 0.0
eps 0.5000000000000001
alpha -2.2204460492503136e-16
eps 0.4999999999999999
alpha 2.2204460492503126e-16
eps 0.5
alpha 0.0
ImADA_1_DecisionTree starting...

0.36312849162011174
0.0035795276359978003 0.009857468412978559
eps 0.0005188330197399048
alpha 3.7817047467555485
eps 0.006981536786171327
alpha 2.478740097380348
eps 0.2517576553319
alpha 0.544629988047158
eps 0.33411634850857475
alpha 0.34481283833664284
eps 0.3754409639582797
alpha 0.2544723096826672
eps 0.4002824161898766
alpha 0.202144256150984

In [25]:
# ####################################### TEST SIZE SCRIPT ################################
# M=10
# C=10000
# theta = 2
# N = 5
# test_size = [0.3]
# new_rate = [1/5]

# dataset = Co_Author

# time = datetime.now().strftime("%d%m%Y_%H%M%S")
# filename = (str(dataset).split("\\")[-1]).split(".")[0]
# filepath = f'./Experiment/Data_{filename}_TestSize.csv'
# for n in range(0,N):
#     header = ['Test Size','IR','Method', 'SP', 'SE', 'Gmean', 'F1 Score','Precision','Accuracy','AUC','Ma tran nham lan']
#     data = []
#     print("Lan boc: ", n+1)
#     for testsize in test_size:
#         for newrate in new_rate:
#             X_train, y_train, X_test, y_test = dataset.load_data(test_size=testsize, new_rate=newrate)
#             with open(f'./Experiment/Data_{filename}_{time}_TestSize.csv', 'a', encoding='UTF8', newline='') as f1:
#                 writer = csv.writer(f1)
#                 writer.writerow(header)

#                 print("ADA_Decision Tree starting...\n")
#                 y_pred = ada_decisiontree(X_train, y_train, X_test)
#                 sp, se, gmean, f1s, pre, acc, auc, cm_WSVM = compute_metrics(y_test, y_pred)
#                 name = 'ADA_DSTree'
#                 data.append([testsize,newrate,name,sp, se, gmean, f1s, pre, acc, auc, str(cm_WSVM)])

#                 print("ADA_WSVM starting...\n")
#                 y_pred = ada_wsvm(M, C, theta, X_train, y_train,X_test)
#                 sp, se, gmean, f1s, pre, acc, auc, cm_WSVM = compute_metrics(y_test, y_pred)
#                 name = 'ADA_WSVM'
#                 data.append([testsize,newrate,name,sp, se, gmean, f1s, pre, acc, auc, str(cm_WSVM)])

#                 print("ImADA1_WSVM starting...\n")
#                 y_pred = imada1_wsvm(M, C, theta, X_train, y_train,X_test)
#                 sp, se, gmean, f1s, pre, acc, auc, cm_WSVM = compute_metrics(y_test, y_pred)
#                 name = 'ImADA1_WSVM'
#                 data.append([testsize,newrate,name,sp, se, gmean, f1s, pre, acc, auc, str(cm_WSVM)])

#                 print("ImADA2_WSVM starting...\n")
#                 y_pred = imada2_wsvm(M, C, X_train, y_train,X_test)
#                 sp, se, gmean, f1s, pre, acc, auc, cm_WSVM = compute_metrics(y_test, y_pred)
#                 name = 'ImADA2_WSVM'
#                 data.append([testsize,newrate,name,sp, se, gmean, f1s, pre, acc, auc, str(cm_WSVM)])

#                 print("ImADA_12_WSVM starting...\n")
#                 y_pred = imada_12_wsvm(M, C, X_train, y_train,X_test)
#                 sp, se, gmean, f1s, pre, acc, auc, cm_WSVM = compute_metrics(y_test, y_pred)
#                 name = 'ImADA_12_WSVM'
#                 data.append([testsize,newrate,name,sp, se, gmean, f1s, pre, acc, auc, str(cm_WSVM)])
                
#                 writer.writerows(data)


In [26]:
# ######################## K-FOLD SCRIPT WITHOUT CHANGE RATE #########################

# M=100
# C=10000
# theta = 2
# N = 5
# from data import Ecoli_Kfold, Haberman_KFold, Transfution_Kfold
# dataset = Transfution_Kfold


# time = datetime.now().strftime("%d%m%Y_%H%M%S")
# filename = (str(dataset).split("\\")[-1]).split(".")[0]
# filepath = f'./Experiment/Data_{filename}_Full.csv'

# for n in range(0,N):
#     print("Lan boc: ",n+1)
#     X, y = dataset.load_data()
#     print(X.shape)
#     kfold_validation = StratifiedKFold(n_splits=5, shuffle=True)
#     header = ['Time','Fold','Method', 'SP', 'SE', 'Gmean', 'F1 Score','Precision','Accuracy','AUC','Ma tran nham lan']
#     data = []
#     with open(f'./Experiment/Data_{filename}_{time}_KFold.csv', 'a', encoding='UTF8', newline='') as f1:
#         writer = csv.writer(f1)
#         writer.writerow(header)
#         fold = 1
#         for train_index, test_index in kfold_validation.split(X,y):   
#             print("\nFold thu ",fold)            
#             # X_train, y_train = X.iloc[train_index], y.iloc[train_index]
#             # X_test, y_test = X.iloc[test_index], y.iloc[test_index]
#             X_train, y_train = X[train_index], y[train_index]
#             X_test, y_test = X[test_index], y[test_index]
#             print(X_test.shape)
            
#             #Scalling Data
#             sc_X = StandardScaler()
#             X_train = sc_X.fit_transform(X_train)
#             X_test = sc_X.transform(X_test)
#             y_train = np.array(y_train)

#             print("ADA_Decision Tree starting...\n")
#             y_pred = ada_decisiontree(X_train, y_train, X_test)
#             sp, se, gmean, f1s, pre, acc, auc, cm_WSVM = compute_metrics(y_test, y_pred)
#             name = 'ADA_DSTree'
#             data.append([n+1,fold,name,sp, se, gmean, f1s, pre, acc, auc, str(cm_WSVM)])

#             print("ADA_WSVM starting...\n")
#             y_pred = ada_wsvm(M, C, theta, X_train, y_train,X_test)
#             sp, se, gmean, f1s, pre, acc, auc, cm_WSVM = compute_metrics(y_test, y_pred)
#             name = 'ADA_WSVM'
#             data.append([n+1,fold,name,sp, se, gmean, f1s, pre, acc, auc, str(cm_WSVM)])

#             print("ImADA1_WSVM starting...\n")
#             y_pred = imada1_wsvm(M, C, theta, X_train, y_train,X_test)
#             sp, se, gmean, f1s, pre, acc, auc, cm_WSVM = compute_metrics(y_test, y_pred)
#             name = 'ImADA1_WSVM'
#             data.append([n+1,fold,name,sp, se, gmean, f1s, pre, acc, auc, str(cm_WSVM)])

#             print("ImADA2_WSVM starting...\n")
#             y_pred = imada2_wsvm(M, C, X_train, y_train,X_test)
#             sp, se, gmean, f1s, pre, acc, auc, cm_WSVM = compute_metrics(y_test, y_pred)
#             name = 'ImADA2_WSVM'
#             data.append([n+1,fold,name,sp, se, gmean, f1s, pre, acc, auc, str(cm_WSVM)])

#             print("ImADA_12_WSVM starting...\n")
#             y_pred = imada_12_wsvm(M, C, X_train, y_train,X_test)
#             sp, se, gmean, f1s, pre, acc, auc, cm_WSVM = compute_metrics(y_test, y_pred)
#             name = 'ImADA_12_WSVM'
#             data.append([n+1,fold,name,sp, se, gmean, f1s, pre, acc, auc, str(cm_WSVM)])
                
#             fold = fold + 1
#         writer.writerows(data)


In [27]:
# ######################## K-FOLD SCRIPT WITH CHANGE RATE #########################

# M=10
# C=10000
# theta = 0.2
# N = 5
# new_rate = [1/5,1/7,1/9]
# from data import Co_Author_KF

# dataset = Co_Author_KF

# time = datetime.now().strftime("%d%m%Y_%H%M%S")
# filename = (str(dataset).split("\\")[-1]).split(".")[0]
# filepath = f'./Experiment/Data_{filename}_Full.csv'

# for n in range(0,N):
#     print("Lan boc: ",n+1)
#     for ir in new_rate:
#         X, y = dataset.load_data(ir)
#         print("IR = ", ir)
#         print(X.shape)
#         kfold_validation = StratifiedKFold(n_splits=5, shuffle=True)
#         header = ['Time','IR','Fold','Method', 'SP', 'SE', 'Gmean', 'F1 Score','Precision','Accuracy','AUC','Ma tran nham lan']
#         data = []
#         with open(f'./Experiment/Data_{filename}_{time}_KFold.csv', 'a', encoding='UTF8', newline='') as f1:
#             writer = csv.writer(f1)
#             writer.writerow(header)
#             fold = 1
#             for train_index, test_index in kfold_validation.split(X,y):   
#                 print("\nFold thu ",fold)            
#                 # X_train, y_train = X.iloc[train_index], y.iloc[train_index]
#                 # X_test, y_test = X.iloc[test_index], y.iloc[test_index]
#                 X_train, y_train = X[train_index], y[train_index]
#                 X_test, y_test = X[test_index], y[test_index]
#                 print(X_test.shape)
                
#                 #Scalling Data
#                 sc_X = StandardScaler()
#                 X_train = sc_X.fit_transform(X_train)
#                 X_test = sc_X.transform(X_test)
#                 y_train = np.array(y_train)

#                 print("ADA_Decision Tree starting...\n")
#                 y_pred = ada_decisiontree(X_train, y_train, X_test)
#                 sp, se, gmean, f1s, pre, acc, auc, cm_WSVM = compute_metrics(y_test, y_pred)
#                 name = 'ADA_DSTree'
#                 data.append([n+1,ir,fold,name,sp, se, gmean, f1s, pre, acc, auc, str(cm_WSVM)])

#                 print("ADA_WSVM starting...\n")
#                 y_pred = ada_wsvm(M, C, theta, X_train, y_train,X_test)
#                 sp, se, gmean, f1s, pre, acc, auc, cm_WSVM = compute_metrics(y_test, y_pred)
#                 name = 'ADA_WSVM'
#                 data.append([n+1,ir,fold,name,sp, se, gmean, f1s, pre, acc, auc, str(cm_WSVM)])

#                 print("ImADA1_WSVM starting...\n")
#                 y_pred = imada1_wsvm(M, C, theta, X_train, y_train,X_test)
#                 sp, se, gmean, f1s, pre, acc, auc, cm_WSVM = compute_metrics(y_test, y_pred)
#                 name = 'ImADA1_WSVM'
#                 data.append([n+1,ir,fold,name,sp, se, gmean, f1s, pre, acc, auc, str(cm_WSVM)])

#                 print("ImADA2_WSVM starting...\n")
#                 y_pred = imada2_wsvm(M, C, X_train, y_train,X_test)
#                 sp, se, gmean, f1s, pre, acc, auc, cm_WSVM = compute_metrics(y_test, y_pred)
#                 name = 'ImADA2_WSVM'
#                 data.append([n+1,ir,fold,name,sp, se, gmean, f1s, pre, acc, auc, str(cm_WSVM)])

#                 print("ImADA_12_WSVM starting...\n")
#                 y_pred = imada_12_wsvm(M, C, X_train, y_train,X_test)
#                 sp, se, gmean, f1s, pre, acc, auc, cm_WSVM = compute_metrics(y_test, y_pred)
#                 name = 'ImADA_12_WSVM'
#                 data.append([n+1,ir,fold,name,sp, se, gmean, f1s, pre, acc, auc, str(cm_WSVM)])
                    
#                 fold = fold + 1
#             writer.writerows(data)
