In [None]:
import numpy as np
import pandas as pd
import os
import keras
from keras.layers import Input,Dense,LeakyReLU
from keras import regularizers
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score,cohen_kappa_score,roc_auc_score,average_precision_score
from keras.callbacks import ReduceLROnPlateau

In [None]:
def set_data(path,data_name,k_feat):
    
    data = np.genfromtxt(path+ data_name + str(k_feat) + 'feat.csv',delimiter=',')

    X = data[:,:-1]    
    y = data[:,-1]
    
    return X,y


# architecture for RING and XOR 

def layer1h_model(h_size,p):
    
    inputs = Input(shape=(p,))
    
    x = Dense(h_size,kernel_regularizer=regularizers.l2(1e-5))(inputs)
    x = LeakyReLU(0.2)(x)
  
        
    outputs = Dense(1,activation='sigmoid')(x)

    return keras.Model(inputs=inputs,outputs=outputs)


# architecture for RING+XOR and RING+XOR+SUM 

def layer3h_model(h_size,p):
    
    inputs = Input(shape=(p,))
    
    x = Dense(h_size,kernel_regularizer=regularizers.l2(1e-5))
    x = LeakyReLU(0.2)(x)
    x = Dense(h_size,kernel_regularizer=regularizers.l2(1e-5))(x)
    x = LeakyReLU(0.2)(x)
    x = Dense(h_size,kernel_regularizer=regularizers.l2(1e-5))(x)
    x = LeakyReLU(0.2)(x)
        
    outputs = Dense(1,activation='sigmoid')(x)

    return keras.Model(inputs=inputs,outputs=outputs)

def train_val_test_split(X,y,tr_idx,te_idx):
    X_train, X_te = X[tr_idx], X[te_idx]
    y_train, y_te = y[tr_idx], y[te_idx]
    
    st = te_idx[-1] + 1 - len(te_idx)
    end = te_idx[-1]
    if te_idx[-1] == 999:
        val_id = tr_idx[:len(te_idx)]
    else:
        val_id = tr_idx[st:end]
    
    X_val = X[val_id]
    y_val = y[val_id]
    no_train = np.concatenate((val_id,te_idx))
    
    X_tr = np.delete(X, no_train, 0)
    y_tr = np.delete(y,no_train,0)
    
    return X_tr, X_val, X_te, y_tr, y_val, y_te


def cv_NN(X,y,n,h_size,num_epochs,dataDir,modeln,layers,lr):

    # n: number of folds for KFolds cross val
    # h_size: number of neurons per layer
    # layers: number of layers
    # lr: learning rate
    # modeln: name for file with the results
    
    aucc = []
    auprc = []

    kf = KFold(n_splits=n)

    for tr_idx, te_idx in kf.split(X):
        
        X_tr,X_val,X_te,y_tr,y_val,y_te = train_val_test_split(X,y,tr_idx,te_idx)
        
        if layers == 1: model = layer1h_model(h_size,X.shape[1])
        elif layers == 3: model = layer1h_model(h_size,X.shape[1])
        else: break
        opt = keras.optimizers.Adam(learning_rate=lr)

        model.compile(loss='binary_crossentropy', optimizer=opt,metrics=['acc'])
        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.33,patience=3, min_lr=0.00001)
        history = model.fit(X_tr,y_tr, epochs=num_epochs,validation_data=(X_val,y_val), batch_size = 64,callbacks = [reduce_lr])
        
        
        y_pred = model.predict(X_te)
        aucc.append(roc_auc_score(y_te,y_pred))
        auprc.append(average_precision_score(y_te, y_pred))
    
    model_name = modeln+'_feat' + str(X.shape[1])
    outputDir = os.path.join( dataDir, 'NN')
    try:
        os.stat(outputDir)
    except:
        os.mkdir(outputDir)

    with open(os.path.join(outputDir, model_name + '_fold_'+ str(i) +'_ypredproba.csv'), "a+") as myfile:
            myfile.write(','.join([str(x) for x in y_pred.flatten()]) + '\n')
    
    with open(os.path.join(outputDir, 'cv_NN_' + model_name + '_auc.csv'), "a+") as myfile:
        myfile.write(str(aucc))
    with open(os.path.join(outputDir, 'cv_NN_' + model_name + '_auprc.csv'), "a+") as myfile:
        myfile.write(str(auprc))
        
        
    return np.mean(aucc), np.mean(auprc) ,np.var(aucc), np.var(auprc)

In [None]:
dataDir = '/Users/utente/Documents/università/tesi - confronto FS/analisi 2-512 features'

path='./data/'
data_name='ring-xor-sum_1000samples-'
h_size = 64
num_epochs = 1000
lr = 0.005
n_layers = 3
    

cv_auc = []
var_auc = []
cv_auprc = []
var_auprc = []

for i in [6,8,16,32,64,128,256,512]:
    
    X,y = set_data(path,data_name,i)

    auccc,auprc,vauccc,vauprc = cv_NN(X,y,6,h_size,num_epochs,dataDir,'ring-xor-sum_'+str(i)+'_',n_layers,lr)
    var_auc.append(vauccc)
    cv_auc.append(auccc)
    var_auprc.append(vauprc)
    cv_auprc.append(auprc)

In [None]:
df = pd.DataFrame([,cv_auc,var_auc,cv_auprc,var_auprc])
df1 = df.T
df1
df1.to_excel("Auc-Auprc_NN_RING-XOR-SUM.xlsx")