In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras.callbacks import ReduceLROnPlateau
from sklearn.metrics import roc_auc_score,average_precision_score    
import warnings
import numpy as np
import pandas as pd
import os

warnings.simplefilter("ignore")

class CancelOut(keras.layers.Layer):
    '''
    CancelOut layer, keras implementation. 
    '''
    def __init__(self, activation='sigmoid', cancelout_loss=True, lambda_1=0.002, lambda_2=0.001):
        super(CancelOut, self).__init__()
        self.lambda_1 = lambda_1
        self.lambda_2 = lambda_2
        self.cancelout_loss = cancelout_loss
        
        if activation == 'sigmoid': self.activation = tf.sigmoid
        if activation == 'softmax': self.activation = tf.nn.softmax

    def build(self, input_shape):
        self.w = self.add_weight(
            shape=(input_shape[-1],),
            initializer=tf.keras.initializers.Constant(1),
            trainable=True,
        )
    def call(self, inputs):
        if self.cancelout_loss:
            self.add_loss( self.lambda_1 * tf.norm(self.w, ord=1) + self.lambda_2 * tf.norm(self.w, ord=2))
        return tf.math.multiply(inputs, self.activation(self.w))
    
    
    def get_config(self):
        return {"activation": self.activation}
    

    
def define_model(p, co_activation,co_loss,s,n_layer):

    inputs = keras.Input(shape=(p,))
    x = CancelOut(activation=co_activation,cancelout_loss=co_loss)(inputs)
    for i in range(n_layer):
        x = layers.Dense(s)(x)
        x = layers.LeakyReLU(0.2)(x)
    
    outputs = layers.Dense(1, activation='sigmoid')(x)
    model = keras.Model(inputs=inputs, outputs=outputs)
    
    
    return model

In [None]:
def set_data(path,data_name,k_feat):
    
    data = np.genfromtxt(path+ data_name + str(k_feat) + 'feat.csv',delimiter=',')

    X = data[:,:-1]
    y = data[:,-1]
    
    return X,y

def train_val_test_split(X,y,tr_idx,te_idx):
    
    # divide the data in train data (4/6), val data (1/6), test data (1/6)
    
    X_train, X_te = X[tr_idx], X[te_idx]
    y_train, y_te = y[tr_idx], y[te_idx]
    
    st = te_idx[-1] + 1 - len(te_idx)
    end = te_idx[-1]
    if te_idx[-1] == 999:
       
        val_id = tr_idx[:len(te_idx)]
        
    else:
       
        val_id = tr_idx[st:end]
    
    X_val = X[val_id]
    y_val = y[val_id]
    no_train = np.concatenate((val_id,te_idx))
    
    X_tr = np.delete(X, no_train, 0)
    y_tr = np.delete(y,no_train,0)
    
    return X_tr, X_val, X_te, y_tr, y_val, y_te

def cv_training_CancelOut(X,y,n,model_name,siz,lr,n_layer):
    
    # n: number of folds for KFold cross-val
    # model_name: identify the model when we save it. Last word must be "sigmoid" or "softmax", in this way we set cancelout layer
    # siz: neurons of hidden layer(s)
    # lr: learning rate
    # n_layer: number of hidden layers
    
    if model_name[-7:] == 'sigmoid':
        co_activation = 'sigmoid'
        co_loss = True
    if model_name[-7:] == 'softmax':
        co_activation = 'softmax'
        co_loss = False
    
    from sklearn.model_selection import KFold
    kf = KFold(n_splits=n)
    
    aucc = []
    auprc = []
    
    i = 0
    
    for tr_idx, te_idx in kf.split(X):
             
        X_tr,X_val,X_te,y_tr,y_val,y_te = train_val_test_split(X,y,tr_idx,te_idx)
        
        model = define_model(X.shape[1],co_activation,co_loss,siz,n_layer)
        opt = keras.optimizers.Adam(learning_rate=lr)
       
        model.compile(loss='binary_crossentropy', optimizer=opt,metrics=['acc'])
        
        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.33,patience=3, min_lr=0.00001)
        history = model.fit(X_tr,y_tr, epochs=num_epochs,validation_data=(X_val,y_val), batch_size = 64,callbacks = [reduce_lr])
        
        
        y_pred = model.predict(X_te)
    
        aucc.append(roc_auc_score(y_te,y_pred))
        auprc.append(average_precision_score(y_te, y_pred))
        print("lr",lr," - size:",siz)
        print("Auc:",aucc)
        print("Auprc:",auprc)
        
        dataDir = '/Users/utente/Documents/università/tesi - confronto FS/analisi 2-512 features'
        outputDir = os.path.join( dataDir, 'CancelOut')
        try:
            os.stat(outputDir)
        except:
            os.mkdir(outputDir)

        with open(os.path.join(outputDir, model_name + '_fold_'+ str(i) +'_importance.csv'), "a+") as myfile:
            myfile.write(','.join([str(x) for x in model.get_weights()[0].flatten()]) + '\n')
        
        i += 1
        
    with open(os.path.join(outputDir, 'cv_cancelout_' + model_name + '_accuracy.csv'), "a+") as myfile:
        myfile.write(str(accuracy))
    with open(os.path.join(outputDir, 'cv_cancelout_' + model_name + '_cohen.csv'), "a+") as myfile:
        myfile.write(str(cohen))
    with open(os.path.join(outputDir, 'cv_cancelout_' + model_name + '_auc.csv'), "a+") as myfile:
        myfile.write(str(aucc))
    with open(os.path.join(outputDir, 'cv_cancelout_' + model_name + '_auprc.csv'), "a+") as myfile:
        myfile.write(str(auprc))
        
        
    return np.mean(aucc), np.mean(auprc) , np.var(aucc), np.var(auprc)     


def get_features(model_names,n_split,k):                     
    
    # get the best k features from the model "model_names"
    
    best_feat = np.zeros((n_split,k))
                     
    
    
    for i in range(n_split):
            
        file = '/Users/utente/Documents/università/tesi - confronto FS/analisi 2-512 features/'+ model_names[0] + '/' + model_names[1] + '_fold_'+ str(i) +'_importance.csv'
        imp = np.genfromtxt(file,delimiter=',')[-1]
        
        print(imp.argsort()[-k:][::-1])    
                
        best_feat[i] = imp.argsort()[-k:][::-1]
            
                     
    return best_feat

def matches(best_feat,non_zero):
    
    # find how many features are important between the features selected by the model
    
    match = []
    for i in range(len(best_feat)):
        for j in range(len(non_zero)):
            if best_feat[i] == non_zero[j]:
                match.append(best_feat[i])
    return match

In [None]:
# RING+XOR+SUM SOFTMAX

dataDir = '/Users/utente/Documents/università/tesi - confronto FS/analisi 2-512 features'

path='./data/'
data_name='ring-xor-sum_1000samples-'
h_size = 64
num_epochs = 1000


cv_auc = []
var_auc = []
cv_auprc = []
var_auprc = []


for i in [6,8,16,32,64,128,256,512]:
    
    X,y = set_data(path,data_name,i)

    auccc,auprc,vauccc,vauprc = cv_training_CancelOut(X,y,6,'ring-xor-sum_'+str(i)+'_softmax',h_size,0.005,3)

    var_auc.append(vauccc)
    cv_auc.append(auccc)
    var_auprc.append(vauprc)
    cv_auprc.append(auprc)

In [None]:
splits = 6
K_feat = 6
name0 = 'Cancelout'
name1 = '_softmax'
dataset = 'ring-xor-sum_'
tot_feats = [6,8,16,32,64,128,256,512]

feat_res = []
feat_res_2k = []

# Best K features

for i in tot_feats:
    feat_res.append(get_features([name0,dataset+str(i)+name1],splits,K_feat))
    
m = np.zeros((len(tot_feats),splits)) #rows=ring2,ring4,ring8... columns=fold0,fold1,fold2...
for j in range(len(tot_feats)):
    for k in range(splits):
        m[j,k] =len(matches(feat_res[j][k],np.arange(K_feat)))

# best K feat averaged on the 6 folds
cv_feat = np.mean(m,axis=1) 
var_feat = np.var(m,axis=1)

# Best 2K features

for i in tot_feats:
    if i == 6 or i == 8:
        feat_res_2k.append(get_features([name0,dataset+str(i)+name1],splits,int(K_feat)))
    else:
        feat_res_2k.append(get_features([name0,dataset+str(i)+name1],splits,int(2*K_feat)))

m = np.zeros((len(tot_feats),splits)) #rows=ring2,ring4,ring8... columns=fold0,fold1,fold2...
for j in range(len(tot_feats)):
    for k in range(splits):
        
        m[j,k] =len(matches(feat_res_2k[j][k],np.arange(int(K_feat))))
        
# best 2K feat averaged on the 6 folds
cv_feat_2k = np.mean(m,axis=1)
var_feat_2k = np.var(m,axis=1)

In [None]:
# Save results

df = pd.DataFrame([cv_auc,var_auc,cv_auprc,var_auprc,cv_feat,var_feat,cv_feat_2k,var_feat_2k])
df1 = df.T
df1
df1.to_excel("Auc-Auprc-KFeat-2KFeat_CanceloutSoftmax_ring-xor-sum.xlsx")

In [None]:
# RING+XOR+SUM SIGMOID

dataDir = '/Users/utente/Documents/università/tesi - confronto FS/analisi 2-512 features'

path='./data/'
data_name='ring-xor-sum_1000samples-'
h_size = 64
num_epochs = 1000


cv_auc = []
var_auc = []
cv_auprc = []
var_auprc = []


for i in [6,8,16,32,64,128,256,512]:
    
    X,y = set_data(path,data_name,i)

    auccc,auprc,vauccc,vauprc = cv_training_CancelOut(X,y,6,'ring-xor-sum_'+str(i)+'_sigmoid',h_size,0.005,3)

    var_auc.append(vauccc)
    cv_auc.append(auccc)
    var_auprc.append(vauprc)
    cv_auprc.append(auprc)

In [None]:
splits = 6
K_feat = 6
name0 = 'Cancelout'
name1 = '_sigmoid'
dataset = 'ring-xor-sum_'
tot_feats = [6,8,16,32,64,128,256,512]

feat_res = []
feat_res_2k = []

# Best K features

for i in tot_feats:
    feat_res.append(get_features([name0,dataset+str(i)+name1],splits,K_feat))
    
m = np.zeros((len(tot_feats),splits)) #rows=ring2,ring4,ring8... columns=fold0,fold1,fold2...
for j in range(len(tot_feats)):
    for k in range(splits):
        m[j,k] =len(matches(feat_res[j][k],np.arange(K_feat)))

# best K feat averaged on the 6 folds
cv_feat = np.mean(m,axis=1) 
var_feat = np.var(m,axis=1)

# Best 2K features

for i in [6,8,16,32,64,128,256,512]:
    if i == 6 or i == 8:
        feat_res_2k.append(get_features([name0,dataset+str(i)+name1],splits,int(K_feat)))
    else:
        feat_res_2k.append(get_features([name0,dataset+str(i)+name1],splits,int(2*K_feat)))

m = np.zeros((len(tot_feats),splits)) #rows=ring2,ring4,ring8... columns=fold0,fold1,fold2...
for j in range(len(tot_feats)):
    for k in range(splits):
        
        m[j,k] =len(matches(feat_res_2k[j][k],np.arange(int(K_feat))))
        
# best 2K feat averaged on the 6 folds
cv_feat_2k = np.mean(m,axis=1)
var_feat_2k = np.var(m,axis=1)

In [None]:
# Save results

df = pd.DataFrame([cv_auc,var_auc,cv_auprc,var_auprc,cv_feat,var_feat,cv_feat_2k,var_feat_2k])
df1 = df.T
df1
df1.to_excel("Auc-Auprc-KFeat-2KFeat_CanceloutSigmoid_ring-xor-sum.xlsx")

In [None]:
# RING SOFTMAX

dataDir = '/Users/utente/Documents/università/tesi - confronto FS/analisi 2-512 features'

path='./data/'
data_name='ring_1000samples-'
h_size = 32
num_epochs = 700


cv_auc = []
var_auc = []
cv_auprc = []
var_auprc = []


for i in [2,4,8,16,32,64,128,256,512]:
    
    X,y = set_data(path,data_name,i)

    auccc,auprc,vauccc,vauprc = cv_training_CancelOut(X,y,6,'ring_'+str(i)+'_softmax',h_size,0.005,1)

    var_auc.append(vauccc)
    cv_auc.append(auccc)
    var_auprc.append(vauprc)
    cv_auprc.append(auprc)

In [None]:
splits = 6
K_feat = 2
name0 = 'Cancelout'
name1 = '_softmax'
dataset = 'ring_'
tot_feats = [2,4,8,16,32,64,128,256,512]

feat_res = []
feat_res_2k = []

# Best K features

for i in tot_feats:
    feat_res.append(get_features([name0,dataset+str(i)+name1],splits,K_feat))
    
m = np.zeros((len(tot_feats),splits)) #rows=ring2,ring4,ring8... columns=fold0,fold1,fold2...
for j in range(len(tot_feats)):
    for k in range(splits):
        m[j,k] =len(matches(feat_res[j][k],np.arange(K_feat)))

# best K feat averaged on the 6 folds
cv_feat = np.mean(m,axis=1) 
var_feat = np.var(m,axis=1)

# Best 2K features

for i in tot_feats:
    if i == 2:
        feat_res_2k.append(get_features([name0,dataset+str(i)+name1],splits,int(K_feat)))
    else:
        feat_res_2k.append(get_features([name0,dataset+str(i)+name1],splits,int(2*K_feat)))

m = np.zeros((len(tot_feats),splits)) #rows=ring2,ring4,ring8... columns=fold0,fold1,fold2...
for j in range(len(tot_feats)):
    for k in range(splits):
        
        m[j,k] =len(matches(feat_res_2k[j][k],np.arange(int(K_feat))))
        
# best 2K feat averaged on the 6 folds
cv_feat_2k = np.mean(m,axis=1)
var_feat_2k = np.var(m,axis=1)

In [None]:
# Save results

df = pd.DataFrame([cv_auc,var_auc,cv_auprc,var_auprc,cv_feat,var_feat,cv_feat_2k,var_feat_2k])
df1 = df.T
df1
df1.to_excel("Auc-Auprc-KFeat-2KFeat_CanceloutSoftmax_ring.xlsx")

In [None]:
# RING SIGMOID

dataDir = '/Users/utente/Documents/università/tesi - confronto FS/analisi 2-512 features'

path='./data/'
data_name='ring_1000samples-'
h_size = 32
num_epochs = 700


cv_auc = []
var_auc = []
cv_auprc = []
var_auprc = []


for i in [2,4,8,16,32,64,128,256,512]:
    
    X,y = set_data(path,data_name,i)

    auccc,auprc,vauccc,vauprc = cv_training_CancelOut(X,y,6,'ring_'+str(i)+'_sigmoid',h_size,0.005,1)

    var_auc.append(vauccc)
    cv_auc.append(auccc)
    var_auprc.append(vauprc)
    cv_auprc.append(auprc)

In [None]:
splits = 6
K_feat = 2
name0 = 'Cancelout'
name1 = '_sigmoid'
dataset = 'ring_'
tot_feats = [2,4,8,16,32,64,128,256,512]

feat_res = []
feat_res_2k = []

# Best K features

for i in tot_feats:
    feat_res.append(get_features([name0,dataset+str(i)+name1],splits,K_feat))
    
m = np.zeros((len(tot_feats),splits)) #rows=ring2,ring4,ring8... columns=fold0,fold1,fold2...
for j in range(len(tot_feats)):
    for k in range(splits):
        m[j,k] =len(matches(feat_res[j][k],np.arange(K_feat)))

# best K feat averaged on the 6 folds
cv_feat = np.mean(m,axis=1) 
var_feat = np.var(m,axis=1)

# Best 2K features

for i in tot_feats:
    if i == 2:
        feat_res_2k.append(get_features([name0,dataset+str(i)+name1],splits,int(K_feat)))
    else:
        feat_res_2k.append(get_features([name0,dataset+str(i)+name1],splits,int(2*K_feat)))

m = np.zeros((len(tot_feats),splits)) #rows=ring2,ring4,ring8... columns=fold0,fold1,fold2...
for j in range(len(tot_feats)):
    for k in range(splits):
        
        m[j,k] =len(matches(feat_res_2k[j][k],np.arange(int(K_feat))))
        
# best 2K feat averaged on the 6 folds
cv_feat_2k = np.mean(m,axis=1)
var_feat_2k = np.var(m,axis=1)

In [None]:
# Save results

df = pd.DataFrame([cv_auc,var_auc,cv_auprc,var_auprc,cv_feat,var_feat,cv_feat_2k,var_feat_2k])
df1 = df.T
df1
df1.to_excel("Auc-Auprc-KFeat-2KFeat_CanceloutSigmoid_ring.xlsx")

In [None]:
# XOR SOFTMAX

dataDir = '/Users/utente/Documents/università/tesi - confronto FS/analisi 2-512 features'

path='./data/'
data_name='xor_1000samples-'
h_size = 16
num_epochs = 1000


cv_auc = []
var_auc = []
cv_auprc = []
var_auprc = []


for i in [2,4,8,16,32,64,128,256,512]:
    
    X,y = set_data(path,data_name,i)

    auccc,auprc,vauccc,vauprc = cv_training_CancelOut(X,y,6,'xor_'+str(i)+'_softmax',h_size,0.005,1)

    var_auc.append(vauccc)
    cv_auc.append(auccc)
    var_auprc.append(vauprc)
    cv_auprc.append(auprc)


In [None]:
splits = 6
K_feat = 2
name0 = 'Cancelout'
name1 = '_softmax'
dataset = 'xor_'
tot_feats = [2,4,8,16,32,64,128,256,512]

feat_res = []
feat_res_2k = []

# Best K features

for i in tot_feats:
    feat_res.append(get_features([name0,dataset+str(i)+name1],splits,K_feat))
    
m = np.zeros((len(tot_feats),splits)) #rows=ring2,ring4,ring8... columns=fold0,fold1,fold2...
for j in range(len(tot_feats)):
    for k in range(splits):
        m[j,k] =len(matches(feat_res[j][k],np.arange(K_feat)))

# best K feat averaged on the 6 folds
cv_feat = np.mean(m,axis=1) 
var_feat = np.var(m,axis=1)

# Best 2K features

for i in tot_feats:
    if i == 2:
        feat_res_2k.append(get_features([name0,dataset+str(i)+name1],splits,int(K_feat)))
    else:
        feat_res_2k.append(get_features([name0,dataset+str(i)+name1],splits,int(2*K_feat)))

m = np.zeros((len(tot_feats),splits)) #rows=ring2,ring4,ring8... columns=fold0,fold1,fold2...
for j in range(len(tot_feats)):
    for k in range(splits):
        
        m[j,k] =len(matches(feat_res_2k[j][k],np.arange(int(K_feat))))
        
# best 2K feat averaged on the 6 folds
cv_feat_2k = np.mean(m,axis=1)
var_feat_2k = np.var(m,axis=1)

In [None]:
# Save results

df = pd.DataFrame([cv_auc,var_auc,cv_auprc,var_auprc,cv_feat,var_feat,cv_feat_2k,var_feat_2k])
df1 = df.T
df1
df1.to_excel("Auc-Auprc-KFeat-2KFeat_CanceloutSoftmax_XOR.xlsx")

In [None]:
# XOR SIGMOID

dataDir = '/Users/utente/Documents/università/tesi - confronto FS/analisi 2-512 features'

path='./data/'
data_name='xor_1000samples-'
h_size = 16
num_epochs = 1000


cv_auc = []
var_auc = []
cv_auprc = []
var_auprc = []


for i in [2,4,8,16,32,64,128,256,512]:
    
    X,y = set_data(path,data_name,i)

    auccc,auprc,vauccc,vauprc = cv_training_CancelOut(X,y,6,'xor_'+str(i)+'_sigmoid',h_size,0.005,1)

    var_auc.append(vauccc)
    cv_auc.append(auccc)
    var_auprc.append(vauprc)
    cv_auprc.append(auprc)


In [None]:
splits = 6
K_feat = 2
name0 = 'Cancelout'
name1 = '_sigmoid'
dataset = 'xor_'
tot_feats = [2,4,8,16,32,64,128,256,512]

feat_res = []
feat_res_2k = []

# Best K features

for i in tot_feats:
    feat_res.append(get_features([name0,dataset+str(i)+name1],splits,K_feat))
    
m = np.zeros((len(tot_feats),splits)) #rows=ring2,ring4,ring8... columns=fold0,fold1,fold2...
for j in range(len(tot_feats)):
    for k in range(splits):
        m[j,k] =len(matches(feat_res[j][k],np.arange(K_feat)))

# best K feat averaged on the 6 folds
cv_feat = np.mean(m,axis=1) 
var_feat = np.var(m,axis=1)

# Best 2K features

for i in tot_feats:
    if i == 2:
        feat_res_2k.append(get_features([name0,dataset+str(i)+name1],splits,int(K_feat)))
    else:
        feat_res_2k.append(get_features([name0,dataset+str(i)+name1],splits,int(2*K_feat)))

m = np.zeros((len(tot_feats),splits)) #rows=ring2,ring4,ring8... columns=fold0,fold1,fold2...
for j in range(len(tot_feats)):
    for k in range(splits):
        
        m[j,k] =len(matches(feat_res_2k[j][k],np.arange(int(K_feat))))
        
# best 2K feat averaged on the 6 folds
cv_feat_2k = np.mean(m,axis=1)
var_feat_2k = np.var(m,axis=1)

In [None]:
# Save results

df = pd.DataFrame([cv_auc,var_auc,cv_auprc,var_auprc,cv_feat,var_feat,cv_feat_2k,var_feat_2k])
df1 = df.T
df1
df1.to_excel("Auc-Auprc-KFeat-2KFeat_CanceloutSigmoid_XOR.xlsx")

In [None]:
# RING XOR SOFTMAX

dataDir = '/Users/utente/Documents/università/tesi - confronto FS/analisi 2-64 features'

path='./data/'
data_name='ring+xor_1000samples-'
h_size = 64
num_epochs = 2000

cv_auc = []
var_auc = []
cv_auprc = []
var_auprc = []


for i in [4,8,16,32,64,128,256,512]:
    
    X,y = set_data(path,data_name,i)

    auccc,auprc,vauccc,vauprc = cv_training_CancelOut(X,y,6,'ring-xor_'+str(i)+'_softmax',h_size,0.005,3)

    var_auc.append(vauccc)
    cv_auc.append(auccc)
    var_auprc.append(vauprc)
    cv_auprc.append(auprc)


In [None]:
splits = 6
K_feat = 4
name0 = 'Cancelout'
name1 = '_softmax'
dataset = 'ring-xor_'
tot_feats = [4,8,16,32,64,128,256,512]

feat_res = []
feat_res_2k = []

# Best K features

for i in tot_feats:
    feat_res.append(get_features([name0,dataset+str(i)+name1],splits,K_feat))
    
m = np.zeros((len(tot_feats),splits)) #rows=ring2,ring4,ring8... columns=fold0,fold1,fold2...
for j in range(len(tot_feats)):
    for k in range(splits):
        m[j,k] =len(matches(feat_res[j][k],np.arange(K_feat)))

# best K feat averaged on the 6 folds
cv_feat = np.mean(m,axis=1) 
var_feat = np.var(m,axis=1)

# Best 2K features

for i in tot_feats:
    if i == 4:
        feat_res_2k.append(get_features([name0,dataset+str(i)+name1],splits,int(K_feat)))
    else:
        feat_res_2k.append(get_features([name0,dataset+str(i)+name1],splits,int(2*K_feat)))

m = np.zeros((len(tot_feats),splits)) #rows=ring2,ring4,ring8... columns=fold0,fold1,fold2...
for j in range(len(tot_feats)):
    for k in range(splits):
        
        m[j,k] =len(matches(feat_res_2k[j][k],np.arange(int(K_feat))))
        
# best 2K feat averaged on the 6 folds
cv_feat_2k = np.mean(m,axis=1)
var_feat_2k = np.var(m,axis=1)

In [None]:
# Save results

df = pd.DataFrame([cv_auc,var_auc,cv_auprc,var_auprc,cv_feat,var_feat,cv_feat_2k,var_feat_2k])
df1 = df.T
df1
df1.to_excel("Auc-Auprc-KFeat-2KFeat_CanceloutSoftmax_RING-XOR.xlsx")

In [None]:
# RING XOR SIGMOID

dataDir = '/Users/utente/Documents/università/tesi - confronto FS/analisi 2-64 features'

path='./data/'
data_name='ring+xor_1000samples-'
h_size = 64
num_epochs = 2000

cv_auc = []
var_auc = []
cv_auprc = []
var_auprc = []


for i in [4,8,16,32,64,128,256,512]:
    
    X,y = set_data(path,data_name,i)

    auccc,auprc,vauccc,vauprc = cv_training_CancelOut(X,y,6,'ring-xor_'+str(i)+'_sigmoid',h_size,0.005,3)

    var_auc.append(vauccc)
    cv_auc.append(auccc)
    var_auprc.append(vauprc)
    cv_auprc.append(auprc)


In [None]:
splits = 6
K_feat = 4
name0 = 'Cancelout'
name1 = '_sigmoid'
dataset = 'ring-xor_'
tot_feats = [4,8,16,32,64,128,256,512]

feat_res = []
feat_res_2k = []

# Best K features

for i in tot_feats:
    feat_res.append(get_features([name0,dataset+str(i)+name1],splits,K_feat))
    
m = np.zeros((len(tot_feats),splits)) #rows=ring2,ring4,ring8... columns=fold0,fold1,fold2...
for j in range(len(tot_feats)):
    for k in range(splits):
        m[j,k] =len(matches(feat_res[j][k],np.arange(K_feat)))

# best K feat averaged on the 6 folds
cv_feat = np.mean(m,axis=1) 
var_feat = np.var(m,axis=1)

# Best 2K features

for i in tot_feats:
    if i == 4:
        feat_res_2k.append(get_features([name0,dataset+str(i)+name1],splits,int(K_feat)))
    else:
        feat_res_2k.append(get_features([name0,dataset+str(i)+name1],splits,int(2*K_feat)))

m = np.zeros((len(tot_feats),splits)) #rows=ring2,ring4,ring8... columns=fold0,fold1,fold2...
for j in range(len(tot_feats)):
    for k in range(splits):
        
        m[j,k] =len(matches(feat_res_2k[j][k],np.arange(int(K_feat))))
        
# best 2K feat averaged on the 6 folds
cv_feat_2k = np.mean(m,axis=1)
var_feat_2k = np.var(m,axis=1)

In [None]:
# Save results

df = pd.DataFrame([cv_auc,var_auc,cv_auprc,var_auprc,cv_feat,var_feat,cv_feat_2k,var_feat_2k])
df1 = df.T
df1
df1.to_excel("Auc-Auprc-KFeat-2KFeat_CanceloutSigmoid_RING-XOR.xlsx")