In [1]:
import numpy as np
import pandas as pd
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.backend import epsilon
from sklearn.metrics import matthews_corrcoef
import sklearn as sk
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from scipy.special import expit
np.random.seed(5)

# Initialization

In [2]:
def initialization(layers, weights, biases):
    """
    Random initialization of weights and biases.
    """
    for i in range(1,len(layers)):
        w = np.random.rand(layers[i], layers[i-1])
        weights.append(w)
        
        b = np.zeros((layers[i],1))
        biases.append(b)
        
    return weights,biases

# Neural Net Functions

In [3]:
"""
Activation Functions:
"""
def sigmoid(x):
    """
    Sigmoid(x) = 1 / (1 + e^-x)
    """
    #x.astype(np.float64)
    #return 1 / (1 + np.exp(-x))
    # I used expit from sklearn because exp of very small values in numpy is giving NaN. Where as expit sets it to 0.
    return expit(x)

def relu(x):
    """
    Relu(x) = max(0,x)
    """
    return np.vectorize(lambda X : max(0,X))(x)

"""
Derivative of Activation Functions:
"""
def d_sigmoid(a_l):
    """
    d_sigmoid(x) = sigmoid(x) * (1- sigmoid(x))
    """
    return sigmoid(a_l)* sigmoid(1-a_l)

def d_relu(a_l):
    """
    d_relu = 0 if x<0 else 1
    """
    return np.vectorize(lambda x : 0 if x < 0 else 1)(a_l)


def binary_cross_entropy_loss(Y, A_L):
    """
    Binary Cross Entropy Loss:
    binary_cross_entropy_loss(Y_true, Y_pred) = (-1/total_examples)*((Y_true*log(Y_true)) + ((1-Y_true)*(1-log(Y_pred))))

    """
    m = Y.shape[1]
    cost = (-1/m) * (np.dot(Y, np.log(A_L +epsilon() ).T) + np.dot((1-Y), np.log(1-A_L + epsilon()).T))
    cost = np.squeeze(cost)
    return cost 

def d_loss(Y,A_L):
    """
    Derivative of Loss Function
    """
    return - (np.divide(Y, A_L + epsilon()) - np.divide(1 - Y, 1 - A_L +epsilon()))

def forwardProp(a_prev, w_l,b_l, func='relu'):
    """
    Forward Propagation through ONE layer.
    """
    z_l = np.dot(w_l,a_prev) + b_l
    assert(z_l.shape == (w_l.shape[0], a_prev.shape[1]))
    if func=='sigmoid':
        a_l = sigmoid(z_l)
    else:
        a_l = relu(z_l)
    assert (a_l.shape == (w_l.shape[0], a_prev.shape[1]))
    return (a_l,z_l)

def backProp(da_l, z_l, a_prev, w_l, total_exp, func='relu', regularize=False):
    """
    Back Propagation through ONE layer.
    """
    lambd = 0.1
    if func=='relu':
        dz_l = da_l * d_relu(z_l)
    else:
        dz_l = da_l * d_sigmoid(z_l)
        
    dw_l = np.dot(dz_l,a_prev.transpose()) / total_exp 
    if regularize:
        dw_l += ((lambd/total_exp)*w_l)
        
    db_l = np.sum(dz_l,axis = 1, keepdims = True) / total_exp
    da_prev = np.dot(w_l.transpose(), dz_l)
    return (da_prev, dw_l, db_l)

def forward_NN(layers, Zes, activations, weights, biases):
    """
    Forward Propagation through L number of layers.
    """
    for l in range(1,layers-1):#(1,2,----,l-1)
        A_next, Z_next = forwardProp(activations[l-1], weights[l], biases[l])
        activations.append(A_next)
        Zes.append(Z_next)
            
    # for last layer
    A_next, Z_next = forwardProp(activations[l], weights[l+1], biases[l+1], func='sigmoid')
    activations.append(A_next)
    Zes.append(Z_next)
    
    return (activations,Zes)

def backProp_NN(layers, Y, activations, Zes, weights, biases, regularize):
    """
    Back. Propagation through L number of layers.
    """
    total_exp     = Y.shape[1]
    d_weights     = []
    d_biases      = []
    d_activations = []
    
    # for last layer
    dA_L = d_loss(Y,activations[-1])
    d_activations.append(dA_L)
    dA_prev, dW_l, db_l = backProp(d_activations[0], Zes[layers-1], activations[-1], weights[layers-1],total_exp, func='sigmoid')
    d_activations.insert(0,dA_prev)
    d_weights.insert(0,dW_l)
    d_biases.insert(0,db_l)
    
    # Back Propogation
    for l in range(layers-2, 0,-1):#(l-1,l-2,---,1)
        dA_prev, dW_l, db_l = backProp(d_activations[0], Zes[l], activations[l-1], weights[l],total_exp, regularize=regularize)
        d_activations.insert(0,dA_prev)
        d_weights.insert(0,dW_l)
        d_biases.insert(0,db_l)
        
    return (d_weights, d_biases)


def training(epochs, A_0,Y, weights,biases, layers,l_r = 0.01, optimizer='gd', regularize=False):
    """
    Training of Neural Network.
    """
    adam = AdamOptim(layers)
    layers    = len(layers)
    total_exp = A_0.shape[1]
    
    for i in range(epochs):
        Zes         = [[0]]
        activations = []
        
        activations.append(A_0)
        activations, Zes = forward_NN(layers, Zes, activations, weights, biases)
        
        if i%100 == 0:
        # Calculating the loss
            loss = binary_cross_entropy_loss(Y,activations[-1])
            if regularize:
                loss += l2_regulazization(layers, weights, biases,total_exp)
            
            print(f"Epoch {i} / {epochs} ------- loss : {loss}")

            y_pred = np.array(list(map(lambda x: 0 if float(x)<0.5 else 1, activations[-1][0]))).reshape(1,-1)
            print(f"Epoch {i} / {epochs} -------  MCC  :{matthews_corrcoef(Y[0], y_pred[0])}")

        d_weights, d_biases = backProp_NN(layers, Y, activations, Zes,weights, biases, regularize)
        
        # Updating the Weights & Biases
        for l in range(1, layers): #(1,2,---,l)
            if optimizer == 'gd':
                weights[l] = weights[l] - (l_r * d_weights[l-1])
                biases[l]  = biases[l]  - (l_r * d_biases[l-1])
            if optimizer == 'adam':
                weights[l],biases[l] =  adam.update(i+1, weights[l], biases[l], d_weights[l-1], d_biases[l-1],l)
              
    return (weights, biases)

def testing(layers,x_test, y_test, weights, biases):
    """
    Evaluation of Trained Neural Net.
    """
    Zes         = [[0]]
    activations = []
    A_0         = x_test.transpose()

    activations.append(A_0)
    activations, Zes = forward_NN(layers, Zes, activations, weights, biases)
    y_pred           = np.array(list(map(lambda x: 0 if float(x)<0.5 else 1, activations[-1][0]))).reshape(1,-1)
    mcc              = matthews_corrcoef(y_test[0], y_pred[0])
    
    return mcc

def make_prediction(layers,x_test, weights, biases):
    """
    Make prediction using trained Neural Net.
    """
    Zes          = [[0]]
    activations  = []
    A_0          = x_test.transpose()
    
    activations.append(A_0)
    activations, Zes = forward_NN(layers, Zes, activations, weights, biases)
    y_pred           = np.array(list(map(lambda x: -1 if float(x) < 0.5 else 1, activations[-1][0]))).reshape(1,-1)
    
    return y_pred


### Adam Optimiser

In [4]:
class AdamOptim():
    def __init__(self, layers,eta=0.01, beta1=0.9, beta2=0.999, epsilon=1e-8):
        self.beta1 = beta1
        self.beta2 = beta2
        self.epsilon = epsilon
        self.eta = eta
        self.m_dw, self.v_dw = [[0]], [[0]]
        self.m_db, self.v_db = [[0]], [[0]]
        for i in range(1,len(layers)):
            w = np.zeros((layers[i], layers[i-1]))
            self.m_dw.append(w)
            self.v_dw.append(w)
            
            b = np.zeros((layers[i],1))
            self.m_db.append(b)
            self.v_db.append(b)
        
    def update(self,t, w, b, dw, db,layer):

        self.m_dw[layer] = np.multiply(self.beta1,self.m_dw[layer]) + np.multiply((1-self.beta1),dw)
        self.m_db[layer] = np.multiply(self.beta1,self.m_db[layer]) + np.multiply((1-self.beta1),db)

        self.v_dw[layer] = np.multiply(self.beta2,self.v_dw[layer]) + np.multiply((1-self.beta2),(dw**2))
        self.v_db[layer] = np.multiply(self.beta2,self.v_db[layer]) + np.multiply((1-self.beta2),(db**2))

        ## bias correction
        m_dw_corr = np.divide(self.m_dw[layer],(1-self.beta1**t))
        m_db_corr = self.m_db[layer]/(1-self.beta1**t)
        v_dw_corr = self.v_dw[layer]/(1-self.beta2**t)
        v_db_corr = np.divide(self.v_db[layer],(1-self.beta2**t))

        ## update weights and biases
        w = w - self.eta*(m_dw_corr/(np.sqrt(v_dw_corr)+self.epsilon))
        b = b - self.eta*(m_db_corr/(np.sqrt(v_db_corr)+self.epsilon))
        
        return w, b

In [5]:
def l2_regulazization(layers, weights, biases,m):
    lambd = 0.1
    w     = 0
    for l in range(1, layers):
        w += np.sum(np.square(weights[l]))
        
    L2_regularization_cost = lambd/(2*m)*w
    
    return L2_regularization_cost

In [6]:
def spliting(model_input, model_output):
    return train_test_split(model_input, model_output, test_size=0.2, random_state=5, stratify=model_output)

# Data Preparation

In [7]:
train_data  = pd.read_csv('train.dat', delimiter='\t', names=['class','peptides']) 
test_data   = pd.read_csv('test.dat',names=['peptides'])
total_data  = pd.concat([train_data, test_data])
total_data  = total_data.fillna(2)

print("Total data shape : ",total_data.shape)

Total data shape :  (1958, 2)


In [8]:
total_data.tail()

Unnamed: 0,class,peptides
387,2.0,GAYSVQSYEEAFDDGCVVKVAKKVATEATDTRGRDEIRTSCD
388,2.0,VISIPPVVEERPDGSKVEVAPEQNFVLINPEIIKASDQEDVGLEG
389,2.0,TGHALEFAGSAIRGLSMEGRMTLCNMAIEAGARVGMVAVDEK
390,2.0,AVEVKGPKGVLVTPTHTELNYAVEDG
391,2.0,NSIAKAIKARGEVIHCKLRAMKELSE


In [9]:
# Class count
count_class_0, count_class_nan, count_class_1 = total_data['class'].value_counts()
print(f"-1   : {count_class_0}")
print(f"Nan  : {count_class_nan}  [testing data]")
print(f"1    : {count_class_1}")

-1   : 1424
Nan  : 392  [testing data]
1    : 142


In [10]:
df_class_0   = total_data[total_data['class'] == -1]
df_class_1   = total_data[total_data['class'] == 1]
df_class_nan = total_data[(total_data['class'] == 2)]

In [11]:
total_data['class'].value_counts()

-1.0    1424
 2.0     392
 1.0     142
Name: class, dtype: int64

## Baseline data

In [12]:
over_sample = 142
base_sample = 1424
df_train_base = total_data.copy()
df_train_base['peptides'] = df_train_base['peptides'].apply(lambda x : str(x))


### Bag of words

In [13]:
cv      = CountVectorizer(lowercase=False, analyzer='char')
cv_base = cv.fit_transform(df_train_base['peptides'].values)
print(f"Shape : {cv_base.shape}")
print(f"Vocab : {cv.vocabulary_}")

model_input_base  = cv_base.toarray()[:over_sample+base_sample]
model_output_base = df_train_base[:over_sample+base_sample].drop('peptides', axis=1)
model_output_base = model_output_base.apply(lambda x: 0 if int(x)==-1 else 1, axis=1)

print("Input Shape : ",model_input_base.shape)
print("Output Shape : ",model_output_base.shape)

Shape : (1958, 23)
Vocab : {'D': 3, 'V': 18, 'E': 4, 'L': 10, 'I': 8, 'S': 16, 'P': 13, 'N': 12, 'A': 0, 'K': 9, 'F': 5, 'G': 6, 'T': 17, 'H': 7, 'W': 19, 'Y': 21, 'R': 15, 'C': 2, 'M': 11, 'Q': 14, 'X': 20, 'Z': 22, 'B': 1}
Input Shape :  (1566, 23)
Output Shape :  (1566,)


In [14]:
model_input_base[:1]

array([[1, 0, 0, 2, 2, 0, 0, 0, 1, 0, 3, 0, 1, 2, 0, 0, 1, 0, 2, 0, 0, 0,
        0]])

In [15]:
model_output_base.value_counts()

0    1424
1     142
dtype: int64

In [16]:
x_train_base, x_test_base, y_train_base,y_test_base = spliting(model_input_base, model_output_base.values)
print("X_train_shape : ",x_train_base.shape)
print("X_test_shape  : ",x_test_base.shape)
print("Positive Samples : ",len(np.where(y_train_base==1)[0]))
print("Negative Samples : ",len(np.where(y_train_base==0)[0]))

X_train_shape :  (1252, 23)
X_test_shape  :  (314, 23)
Positive Samples :  114
Negative Samples :  1138


In [17]:
# Base Model without modification to dataset
weights = [[0]]
biases  = [[0]]
layers  = [23,8,4,1]

weights_base, biases_base = initialization(layers, weights, biases)

A_0 = x_train_base.transpose()
Y   = y_train_base.reshape(-1,1).transpose()

updated_weights_base, updated_biases_base = training(1000, A_0, Y,weights_base, biases_base, layers, l_r=0.01, optimizer='adm', regularize=True)

Epoch 0 / 5000 ------- loss : 14.65330601502172
Epoch 0 / 5000 -------  MCC  :0.0
Epoch 100 / 5000 ------- loss : 0.2526957461885339
Epoch 100 / 5000 -------  MCC  :0.0
Epoch 200 / 5000 ------- loss : 0.2138421133931617
Epoch 200 / 5000 -------  MCC  :0.0
Epoch 300 / 5000 ------- loss : 0.1785964587564496
Epoch 300 / 5000 -------  MCC  :0.0
Epoch 400 / 5000 ------- loss : 0.15758811600025335
Epoch 400 / 5000 -------  MCC  :0.40255789573834033
Epoch 500 / 5000 ------- loss : 0.12673046543461175
Epoch 500 / 5000 -------  MCC  :0.8361449787271433
Epoch 600 / 5000 ------- loss : 0.1037227940948052
Epoch 600 / 5000 -------  MCC  :0.8489470603532381
Epoch 700 / 5000 ------- loss : 0.09283048236619504
Epoch 700 / 5000 -------  MCC  :0.8597636112396213
Epoch 800 / 5000 ------- loss : 0.08767893837257382
Epoch 800 / 5000 -------  MCC  :0.8645622602994465
Epoch 900 / 5000 ------- loss : 0.08404913350187984
Epoch 900 / 5000 -------  MCC  :0.8699128101956085
Epoch 1000 / 5000 ------- loss : 0.0819

In [18]:
test_mcc_base = testing(len(layers),x_test_base, y_test_base.reshape(1,-1), updated_weights_base, updated_biases_base)
print("Test MCC : ", test_mcc_base)

Test MCC :  0.7255244755244755


In [19]:
over_sample = 142
base_sample = 1424
true_test   = cv_base[over_sample+base_sample:] #df_test_over[1924:].drop('class',axis=1)
print(true_test.shape)

true_pred_base = make_prediction(len(layers), true_test.toarray(), updated_weights_base, updated_biases_base)
df             = pd.Series(true_pred_base[0])

df.to_csv('results_base.dat', index=False, header=None)
print("results_base.dat Created.")

(392, 23)
results_base.dat Created.


### N-gram

In [21]:
cv_n      = CountVectorizer(lowercase=False, analyzer='char', ngram_range=(1,2))
cv_base_n = cv_n.fit_transform(df_train_base['peptides'].values)
print(f"Shape : {cv_base_n.shape}")
#print(f"Vocab : {cv_n.vocabulary_}")

model_input_base_n  = cv_base_n.toarray()[:over_sample+base_sample]
model_output_base_n = df_train_base[:over_sample+base_sample].drop('peptides', axis=1)
model_output_base_n = model_output_base_n.apply(lambda x: 0 if int(x)==-1 else 1, axis=1)

print("Input Shape  : ",model_input_base_n.shape)
print("Output Shape : ",model_output_base_n.shape)

Shape : (1958, 436)
Input Shape  :  (1566, 436)
Output Shape :  (1566,)


In [22]:
x_train_base_n, x_test_base_n, y_train_base_n,y_test_base_n = spliting(model_input_base_n, model_output_base_n.values)
print("X_train_shape : ",x_train_base_n.shape)
print("X_test_shape  : ",x_test_base_n.shape)
print("Positive Samples : ",len(np.where(y_train_base_n==1)[0]))
print("Negative Samples : ",len(np.where(y_train_base_n==0)[0]))

X_train_shape :  (1252, 436)
X_test_shape  :  (314, 436)
Positive Samples :  114
Negative Samples :  1138


In [23]:
weights = [[0]]
biases  = [[0]]
layers  = [436,8,4,1]

weights_base_n, biases_base_n = initialization(layers, weights, biases)

A_0 = x_train_base_n.transpose()
Y   = y_train_base_n.reshape(-1,1).transpose()

updated_weights_base_n, updated_biases_base_n = training(2000, A_0, Y,weights_base_n, biases_base_n, layers, l_r=0.01, optimizer='adam', regularize=True)

Epoch 0 / 2000 ------- loss : 14.744968951593787
Epoch 0 / 2000 -------  MCC  :0.0
Epoch 100 / 2000 ------- loss : 0.2361411346298445
Epoch 100 / 2000 -------  MCC  :0.0
Epoch 200 / 2000 ------- loss : 0.15095104219994138
Epoch 200 / 2000 -------  MCC  :0.12638010058771676
Epoch 300 / 2000 ------- loss : 0.0737426059668658
Epoch 300 / 2000 -------  MCC  :0.9609629762780189
Epoch 400 / 2000 ------- loss : 0.05661490632168562
Epoch 400 / 2000 -------  MCC  :0.9903493355532945
Epoch 500 / 2000 ------- loss : 0.05187052962426105
Epoch 500 / 2000 -------  MCC  :0.9951672263900252
Epoch 600 / 2000 ------- loss : 0.05175624076438463
Epoch 600 / 2000 -------  MCC  :1.0
Epoch 700 / 2000 ------- loss : 0.05431591080865131
Epoch 700 / 2000 -------  MCC  :1.0
Epoch 800 / 2000 ------- loss : 0.056964185227265776
Epoch 800 / 2000 -------  MCC  :1.0
Epoch 900 / 2000 ------- loss : 0.05876601750039093
Epoch 900 / 2000 -------  MCC  :1.0
Epoch 1000 / 2000 ------- loss : 0.059183997833854196
Epoch 1000 

In [24]:
test_mcc_base_n = testing(len(layers),x_test_base_n, y_test_base_n.reshape(1,-1), updated_weights_base_n, updated_biases_base_n)
print("Test MCC : ", test_mcc_base_n)

Test MCC :  0.8039460539460539


In [25]:
over_sample = 142
base_sample = 1424
true_test = cv_base_n[over_sample+base_sample:] #df_test_over[1924:].drop('class',axis=1)
print(true_test.shape)

true_pred_base_n = make_prediction(len(layers), true_test.toarray(), updated_weights_base_n, updated_biases_base_n)

(392, 436)


In [27]:
df = pd.Series(true_pred_base[0])
df.to_csv('results_base_n.dat', index=False, header=None)
print("results_base_n.dat Created.")

results_base_n.dat Created.


# Oversampling the minority class

In [28]:
over_sample = 1424
base_sample = 1424
#df_train_over = total_data.copy()
df_class_1_over = df_class_1.sample(over_sample, replace=True,random_state=1)
df_train_over = pd.concat([df_class_1_over, df_class_0, df_class_nan], axis=0)

print('Random over-sampling:')
print(df_train_over['class'].value_counts())

df_train_over['peptides'] = df_train_over['peptides'].apply(lambda x : str(x))

Random over-sampling:
 1.0    1424
-1.0    1424
 2.0     392
Name: class, dtype: int64


In [29]:
# Bag of words
cv      = CountVectorizer(lowercase=False, analyzer='char')
cv_over = cv.fit_transform(df_train_over['peptides'].values)
print(f"Shape : {cv_over.shape}")
print(f"Vocab : {cv.vocabulary_}")

model_input_over  = cv_over.toarray()[:over_sample+base_sample]
model_output_over = df_train_over[:over_sample+base_sample].drop('peptides', axis=1)
model_output_over = model_output_over.apply(lambda x: 0 if int(x)==-1 else 1, axis=1)

print("Input Shape  : ",model_input_over.shape)
print("Output Shape : ",model_output_over.shape)

model_output_over.value_counts()

Shape : (3240, 23)
Vocab : {'A': 0, 'K': 9, 'R': 15, 'H': 7, 'G': 6, 'Y': 21, 'F': 5, 'I': 8, 'W': 19, 'V': 18, 'Q': 14, 'L': 10, 'C': 2, 'P': 13, 'T': 17, 'N': 12, 'M': 11, 'E': 4, 'S': 16, 'D': 3, 'X': 20, 'Z': 22, 'B': 1}
Input Shape  :  (2848, 23)
Output Shape :  (2848,)


1    1424
0    1424
dtype: int64

In [30]:
x_train_over, x_test_over, y_train_over,y_test_over = spliting(model_input_over, model_output_over.values)
print("X_train_shape : ",x_train_over.shape)
print("X_test_shape  : ",x_test_over.shape)
print("Positive Samples : ",len(np.where(y_train_over==1)[0]))
print("Negative Samples : ",len(np.where(y_train_over==0)[0]))

X_train_shape :  (2278, 23)
X_test_shape  :  (570, 23)
Positive Samples :  1139
Negative Samples :  1139


In [31]:
weights = [[0]]
biases  = [[0]]
layers  = [23,8,4,1]

weights_over, biases_over = initialization(layers, weights, biases)

A_0 = x_train_over.transpose()
Y   = y_train_over.reshape(-1,1).transpose()

updated_weights_over, updated_biases_over = training(3000, A_0, Y,weights_over, biases_over, layers, l_r=0.01, optimizer='adam', regularize=True)

Epoch 0 / 3000 ------- loss : 8.06007856207193
Epoch 0 / 3000 -------  MCC  :0.0
Epoch 100 / 3000 ------- loss : 0.39924770722680025
Epoch 100 / 3000 -------  MCC  :0.727465370849027
Epoch 200 / 3000 ------- loss : 0.22500203435450547
Epoch 200 / 3000 -------  MCC  :0.8482602816780686
Epoch 300 / 3000 ------- loss : 0.18482372012524112
Epoch 300 / 3000 -------  MCC  :0.8490320527523753
Epoch 400 / 3000 ------- loss : 0.16242297849487297
Epoch 400 / 3000 -------  MCC  :0.8645320200944395
Epoch 500 / 3000 ------- loss : 0.15199367352038476
Epoch 500 / 3000 -------  MCC  :0.8724107048180683
Epoch 600 / 3000 ------- loss : 0.14226274405805697
Epoch 600 / 3000 -------  MCC  :0.9096290479826851
Epoch 700 / 3000 ------- loss : 0.14060549062118818
Epoch 700 / 3000 -------  MCC  :0.9105165444726989
Epoch 800 / 3000 ------- loss : 0.14306334903393597
Epoch 800 / 3000 -------  MCC  :0.915856742833843
Epoch 900 / 3000 ------- loss : 0.1483915133715225
Epoch 900 / 3000 -------  MCC  :0.921921332414

In [32]:
test_mcc_over = testing(len(layers),x_test_over, y_test_over.reshape(1,-1), updated_weights_over, updated_biases_over)
print("Test MCC : ", test_mcc_over)

Test MCC :  0.931684625732565


In [33]:
over_sample = 1424
base_sample = 1424
true_test = cv_over[over_sample+base_sample:] #df_test_over[1924:].drop('class',axis=1)
print(true_test.shape)
true_pred_over = make_prediction(len(layers), true_test.toarray(), updated_weights_over, updated_biases_over)

(392, 23)


In [35]:
df = pd.Series(true_pred_over[0])
df.to_csv('results_over.dat', index=False, header=None)
print('results_over.dat Created.')

results_over.dat Created.


### N-gram with Oversampling

In [36]:
cv_n      = CountVectorizer(lowercase=False, analyzer='char', ngram_range=(1,2))
cv_over_n = cv_n.fit_transform(df_train_over['peptides'].values)
print(f"Shape : {cv_over_n.shape}")
#print(f"Vocab : {cv_n.vocabulary_}")

model_input_over_n  = cv_over_n.toarray()[:over_sample+base_sample]
model_output_over_n = df_train_over[:over_sample+base_sample].drop('peptides', axis=1)
model_output_over_n = model_output_over_n.apply(lambda x: 0 if int(x)==-1 else 1, axis=1)

print("Input Shape  : ",model_input_over_n.shape)
print("Output Shape : ",model_output_over_n.shape)

x_train_over_n, x_test_over_n, y_train_over_n,y_test_over_n = spliting(model_input_over_n, model_output_over_n.values)
print("X_train_shape : ",x_train_over_n.shape)
print("X_test_shape  : ",x_test_over_n.shape)
print("Positive Samples : ",len(np.where(y_train_over_n==1)[0]))
print("Negative Samples : ",len(np.where(y_train_over_n==0)[0]))

weights = [[0]]
biases  = [[0]]
layers  = [436,8,4,1]


A_0 = x_train_over_n.transpose()
Y   = y_train_over_n.reshape(-1,1).transpose()

weights_over_n, biases_over_n = initialization(layers, weights, biases)

Shape : (3240, 436)
Input Shape  :  (2848, 436)
Output Shape :  (2848,)
X_train_shape :  (2278, 436)
X_test_shape  :  (570, 436)
Positive Samples :  1139
Negative Samples :  1139


In [37]:
updated_weights_over_n, updated_biases_over_n = training(2000, A_0, Y,weights_over_n, biases_over_n, layers, l_r=0.01, optimizer='adam', regularize=True)

Epoch 0 / 2000 ------- loss : 8.085107167532518
Epoch 0 / 2000 -------  MCC  :0.0
Epoch 100 / 2000 ------- loss : 0.5996664275352553
Epoch 100 / 2000 -------  MCC  :0.15284072595755002
Epoch 200 / 2000 ------- loss : 0.24442242478404214
Epoch 200 / 2000 -------  MCC  :0.8814758901283596
Epoch 300 / 2000 ------- loss : 0.09366487344113174
Epoch 300 / 2000 -------  MCC  :0.9592419874893996
Epoch 400 / 2000 ------- loss : 0.07820186775733187
Epoch 400 / 2000 -------  MCC  :0.9610552028619301
Epoch 500 / 2000 ------- loss : 0.07217985262246172
Epoch 500 / 2000 -------  MCC  :0.9670683586296174
Epoch 600 / 2000 ------- loss : 0.07103989032208799
Epoch 600 / 2000 -------  MCC  :0.9722889787716509
Epoch 700 / 2000 ------- loss : 0.0727405696194217
Epoch 700 / 2000 -------  MCC  :0.9722889787716509
Epoch 800 / 2000 ------- loss : 0.07480489788485994
Epoch 800 / 2000 -------  MCC  :0.9722889787716509
Epoch 900 / 2000 ------- loss : 0.07674135392582387
Epoch 900 / 2000 -------  MCC  :0.972288978

In [38]:
test_mcc_over_n = testing(len(layers),x_test_over_n, y_test_over_n.reshape(1,-1), updated_weights_over_n, updated_biases_over_n)
print("Test MCC : ", test_mcc_over_n)

Test MCC :  0.9334770004369832


In [39]:
true_test = cv_over_n[over_sample+base_sample:] 
print(true_test.shape)
true_pred_over_n = make_prediction(len(layers), true_test.toarray(), updated_weights_over_n, updated_biases_over_n)

(392, 436)


In [41]:
df = pd.Series(true_pred_over_n[0])
df.to_csv('results_over_n.dat', index=False, header=None)
print("results_over_n.dat Created")

results_over_n.dat Created


# Tensorflow Model

In [42]:
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Flatten, Dense, Input
from tensorflow.keras.losses import binary_crossentropy
from tensorflow.keras.regularizers import l1_l2
from tensorflow.keras import backend as K
from sklearn.metrics import confusion_matrix, classification_report, matthews_corrcoef

In [43]:
# reference https://github.com/vlainic/matthews-correlation-coefficient/blob/master/binary_mcc_loss.py
def matthews_correlation_coefficient(y_true, y_pred):
    tp = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    tn = K.sum(K.round(K.clip((1 - y_true) * (1 - y_pred), 0, 1)))
    fp = K.sum(K.round(K.clip((1 - y_true) * y_pred, 0, 1)))
    fn = K.sum(K.round(K.clip(y_true * (1 - y_pred), 0, 1)))
    num = tp * tn - fp * fn
    den = (tp + fp) * (tp + fn) * (tn + fp) * (tn + fn)
    return num / K.sqrt(den + K.epsilon())


In [44]:
def ANN(in_shape,x_train, y_train, x_test,y_test, ep):
    model = Sequential(
        [Input(in_shape,),
        Dense(units=16, activation='relu', kernel_regularizer=l1_l2(l1=0.1, l2=0.01)),
        Dense(units=4, activation='relu'),
        Dense(units = 1, activation='sigmoid'),]
    )
    model.compile(optimizer='adam', loss=binary_crossentropy, metrics=[matthews_correlation_coefficient])
    
    model.fit(x_train, y_train, epochs=ep, verbose=2, )
    
    print(model.evaluate(x_test,y_test))
    pred = model.predict(x_test)
    
    ys = list(map(lambda x: 0 if float(x)<0.5 else 1, pred))
    print("Wrong Pred on X_test: ",len(np.where((ys == y_test)==False)[0]))
    
    return model

In [47]:
model = ANN(23,x_train_base, y_train_base, x_test_base,y_test_base,1000)

Epoch 1/1000


2024-01-31 22:27:22.580114: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M2
2024-01-31 22:27:22.580142: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2024-01-31 22:27:22.580149: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
2024-01-31 22:27:22.580189: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-01-31 22:27:22.580206: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
2024-01-31 22:27:22.949352: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


40/40 - 1s - loss: 7.5438 - matthews_correlation_coefficient: 0.1147 - 850ms/epoch - 21ms/step
Epoch 2/1000
40/40 - 0s - loss: 5.8527 - matthews_correlation_coefficient: 0.1893 - 401ms/epoch - 10ms/step
Epoch 3/1000
40/40 - 0s - loss: 4.5691 - matthews_correlation_coefficient: 0.2405 - 314ms/epoch - 8ms/step
Epoch 4/1000
40/40 - 0s - loss: 3.4859 - matthews_correlation_coefficient: 0.1995 - 314ms/epoch - 8ms/step
Epoch 5/1000
40/40 - 0s - loss: 2.5538 - matthews_correlation_coefficient: 0.1338 - 337ms/epoch - 8ms/step
Epoch 6/1000
40/40 - 0s - loss: 1.7887 - matthews_correlation_coefficient: 0.0826 - 314ms/epoch - 8ms/step
Epoch 7/1000
40/40 - 0s - loss: 1.2009 - matthews_correlation_coefficient: 0.0549 - 315ms/epoch - 8ms/step
Epoch 8/1000
40/40 - 0s - loss: 0.7785 - matthews_correlation_coefficient: 0.0244 - 312ms/epoch - 8ms/step
Epoch 9/1000
40/40 - 0s - loss: 0.5030 - matthews_correlation_coefficient: 0.0000e+00 - 312ms/epoch - 8ms/step
Epoch 10/1000
40/40 - 0s - loss: 0.3570 - ma

Epoch 77/1000
40/40 - 0s - loss: 0.1683 - matthews_correlation_coefficient: 0.6942 - 323ms/epoch - 8ms/step
Epoch 78/1000
40/40 - 0s - loss: 0.1693 - matthews_correlation_coefficient: 0.7259 - 327ms/epoch - 8ms/step
Epoch 79/1000
40/40 - 0s - loss: 0.1674 - matthews_correlation_coefficient: 0.6366 - 322ms/epoch - 8ms/step
Epoch 80/1000
40/40 - 0s - loss: 0.1639 - matthews_correlation_coefficient: 0.6507 - 361ms/epoch - 9ms/step
Epoch 81/1000
40/40 - 0s - loss: 0.1649 - matthews_correlation_coefficient: 0.6673 - 322ms/epoch - 8ms/step
Epoch 82/1000
40/40 - 0s - loss: 0.1650 - matthews_correlation_coefficient: 0.7223 - 323ms/epoch - 8ms/step
Epoch 83/1000
40/40 - 0s - loss: 0.1722 - matthews_correlation_coefficient: 0.7051 - 344ms/epoch - 9ms/step
Epoch 84/1000
40/40 - 0s - loss: 0.1614 - matthews_correlation_coefficient: 0.7016 - 346ms/epoch - 9ms/step
Epoch 85/1000
40/40 - 0s - loss: 0.1611 - matthews_correlation_coefficient: 0.6373 - 333ms/epoch - 8ms/step
Epoch 86/1000
40/40 - 0s - l

Epoch 153/1000
40/40 - 0s - loss: 0.1399 - matthews_correlation_coefficient: 0.7629 - 322ms/epoch - 8ms/step
Epoch 154/1000
40/40 - 0s - loss: 0.1364 - matthews_correlation_coefficient: 0.7699 - 322ms/epoch - 8ms/step
Epoch 155/1000
40/40 - 0s - loss: 0.1350 - matthews_correlation_coefficient: 0.7819 - 327ms/epoch - 8ms/step
Epoch 156/1000
40/40 - 0s - loss: 0.1335 - matthews_correlation_coefficient: 0.6778 - 324ms/epoch - 8ms/step
Epoch 157/1000
40/40 - 0s - loss: 0.1336 - matthews_correlation_coefficient: 0.7229 - 324ms/epoch - 8ms/step
Epoch 158/1000
40/40 - 0s - loss: 0.1419 - matthews_correlation_coefficient: 0.7440 - 345ms/epoch - 9ms/step
Epoch 159/1000
40/40 - 0s - loss: 0.1350 - matthews_correlation_coefficient: 0.7144 - 330ms/epoch - 8ms/step
Epoch 160/1000
40/40 - 0s - loss: 0.1373 - matthews_correlation_coefficient: 0.7511 - 326ms/epoch - 8ms/step
Epoch 161/1000
40/40 - 0s - loss: 0.1333 - matthews_correlation_coefficient: 0.8005 - 325ms/epoch - 8ms/step
Epoch 162/1000
40/4

Epoch 229/1000
40/40 - 0s - loss: 0.1400 - matthews_correlation_coefficient: 0.7188 - 327ms/epoch - 8ms/step
Epoch 230/1000
40/40 - 0s - loss: 0.1345 - matthews_correlation_coefficient: 0.7341 - 356ms/epoch - 9ms/step
Epoch 231/1000
40/40 - 0s - loss: 0.1248 - matthews_correlation_coefficient: 0.8093 - 325ms/epoch - 8ms/step
Epoch 232/1000
40/40 - 0s - loss: 0.1271 - matthews_correlation_coefficient: 0.7997 - 331ms/epoch - 8ms/step
Epoch 233/1000
40/40 - 0s - loss: 0.1239 - matthews_correlation_coefficient: 0.7692 - 330ms/epoch - 8ms/step
Epoch 234/1000
40/40 - 0s - loss: 0.1283 - matthews_correlation_coefficient: 0.7745 - 320ms/epoch - 8ms/step
Epoch 235/1000
40/40 - 0s - loss: 0.1268 - matthews_correlation_coefficient: 0.7911 - 323ms/epoch - 8ms/step
Epoch 236/1000
40/40 - 0s - loss: 0.1253 - matthews_correlation_coefficient: 0.7521 - 323ms/epoch - 8ms/step
Epoch 237/1000
40/40 - 0s - loss: 0.1260 - matthews_correlation_coefficient: 0.6863 - 326ms/epoch - 8ms/step
Epoch 238/1000
40/4

Epoch 305/1000
40/40 - 0s - loss: 0.1202 - matthews_correlation_coefficient: 0.8187 - 326ms/epoch - 8ms/step
Epoch 306/1000
40/40 - 0s - loss: 0.1189 - matthews_correlation_coefficient: 0.7500 - 323ms/epoch - 8ms/step
Epoch 307/1000
40/40 - 0s - loss: 0.1201 - matthews_correlation_coefficient: 0.7462 - 324ms/epoch - 8ms/step
Epoch 308/1000
40/40 - 0s - loss: 0.1179 - matthews_correlation_coefficient: 0.7676 - 322ms/epoch - 8ms/step
Epoch 309/1000
40/40 - 0s - loss: 0.1195 - matthews_correlation_coefficient: 0.8020 - 325ms/epoch - 8ms/step
Epoch 310/1000
40/40 - 0s - loss: 0.1221 - matthews_correlation_coefficient: 0.6935 - 324ms/epoch - 8ms/step
Epoch 311/1000
40/40 - 0s - loss: 0.1241 - matthews_correlation_coefficient: 0.8155 - 324ms/epoch - 8ms/step
Epoch 312/1000
40/40 - 0s - loss: 0.1185 - matthews_correlation_coefficient: 0.8350 - 324ms/epoch - 8ms/step
Epoch 313/1000
40/40 - 0s - loss: 0.1252 - matthews_correlation_coefficient: 0.7818 - 327ms/epoch - 8ms/step
Epoch 314/1000
40/4

Epoch 381/1000
40/40 - 0s - loss: 0.1568 - matthews_correlation_coefficient: 0.6601 - 331ms/epoch - 8ms/step
Epoch 382/1000
40/40 - 0s - loss: 0.1226 - matthews_correlation_coefficient: 0.7955 - 322ms/epoch - 8ms/step
Epoch 383/1000
40/40 - 0s - loss: 0.1253 - matthews_correlation_coefficient: 0.7460 - 326ms/epoch - 8ms/step
Epoch 384/1000
40/40 - 0s - loss: 0.1254 - matthews_correlation_coefficient: 0.7634 - 325ms/epoch - 8ms/step
Epoch 385/1000
40/40 - 0s - loss: 0.1176 - matthews_correlation_coefficient: 0.8095 - 326ms/epoch - 8ms/step
Epoch 386/1000
40/40 - 0s - loss: 0.1199 - matthews_correlation_coefficient: 0.7781 - 327ms/epoch - 8ms/step
Epoch 387/1000
40/40 - 0s - loss: 0.1176 - matthews_correlation_coefficient: 0.7586 - 323ms/epoch - 8ms/step
Epoch 388/1000
40/40 - 0s - loss: 0.1223 - matthews_correlation_coefficient: 0.8310 - 322ms/epoch - 8ms/step
Epoch 389/1000
40/40 - 0s - loss: 0.1184 - matthews_correlation_coefficient: 0.7949 - 332ms/epoch - 8ms/step
Epoch 390/1000
40/4

Epoch 457/1000
40/40 - 0s - loss: 0.1177 - matthews_correlation_coefficient: 0.7510 - 321ms/epoch - 8ms/step
Epoch 458/1000
40/40 - 0s - loss: 0.1173 - matthews_correlation_coefficient: 0.8334 - 324ms/epoch - 8ms/step
Epoch 459/1000
40/40 - 0s - loss: 0.1167 - matthews_correlation_coefficient: 0.8025 - 332ms/epoch - 8ms/step
Epoch 460/1000
40/40 - 0s - loss: 0.1206 - matthews_correlation_coefficient: 0.8117 - 344ms/epoch - 9ms/step
Epoch 461/1000
40/40 - 0s - loss: 0.1317 - matthews_correlation_coefficient: 0.7250 - 343ms/epoch - 9ms/step
Epoch 462/1000
40/40 - 0s - loss: 0.1185 - matthews_correlation_coefficient: 0.8352 - 322ms/epoch - 8ms/step
Epoch 463/1000
40/40 - 0s - loss: 0.1159 - matthews_correlation_coefficient: 0.8117 - 324ms/epoch - 8ms/step
Epoch 464/1000
40/40 - 0s - loss: 0.1193 - matthews_correlation_coefficient: 0.7856 - 325ms/epoch - 8ms/step
Epoch 465/1000
40/40 - 0s - loss: 0.1202 - matthews_correlation_coefficient: 0.7574 - 336ms/epoch - 8ms/step
Epoch 466/1000
40/4

Epoch 533/1000
40/40 - 0s - loss: 0.1171 - matthews_correlation_coefficient: 0.7873 - 322ms/epoch - 8ms/step
Epoch 534/1000
40/40 - 0s - loss: 0.1185 - matthews_correlation_coefficient: 0.7774 - 324ms/epoch - 8ms/step
Epoch 535/1000
40/40 - 0s - loss: 0.1163 - matthews_correlation_coefficient: 0.7883 - 332ms/epoch - 8ms/step
Epoch 536/1000
40/40 - 0s - loss: 0.1146 - matthews_correlation_coefficient: 0.7478 - 326ms/epoch - 8ms/step
Epoch 537/1000
40/40 - 0s - loss: 0.1206 - matthews_correlation_coefficient: 0.7732 - 320ms/epoch - 8ms/step
Epoch 538/1000
40/40 - 0s - loss: 0.1360 - matthews_correlation_coefficient: 0.7545 - 324ms/epoch - 8ms/step
Epoch 539/1000
40/40 - 0s - loss: 0.1241 - matthews_correlation_coefficient: 0.7906 - 323ms/epoch - 8ms/step
Epoch 540/1000
40/40 - 0s - loss: 0.1190 - matthews_correlation_coefficient: 0.7326 - 322ms/epoch - 8ms/step
Epoch 541/1000
40/40 - 0s - loss: 0.1179 - matthews_correlation_coefficient: 0.8123 - 325ms/epoch - 8ms/step
Epoch 542/1000
40/4

Epoch 609/1000
40/40 - 0s - loss: 0.1142 - matthews_correlation_coefficient: 0.7185 - 324ms/epoch - 8ms/step
Epoch 610/1000
40/40 - 0s - loss: 0.1305 - matthews_correlation_coefficient: 0.7854 - 323ms/epoch - 8ms/step
Epoch 611/1000
40/40 - 0s - loss: 0.1178 - matthews_correlation_coefficient: 0.7201 - 320ms/epoch - 8ms/step
Epoch 612/1000
40/40 - 0s - loss: 0.1173 - matthews_correlation_coefficient: 0.7523 - 324ms/epoch - 8ms/step
Epoch 613/1000
40/40 - 0s - loss: 0.1146 - matthews_correlation_coefficient: 0.7543 - 319ms/epoch - 8ms/step
Epoch 614/1000
40/40 - 0s - loss: 0.1143 - matthews_correlation_coefficient: 0.8025 - 322ms/epoch - 8ms/step
Epoch 615/1000
40/40 - 0s - loss: 0.1354 - matthews_correlation_coefficient: 0.6903 - 322ms/epoch - 8ms/step
Epoch 616/1000
40/40 - 0s - loss: 0.1147 - matthews_correlation_coefficient: 0.8243 - 320ms/epoch - 8ms/step
Epoch 617/1000
40/40 - 0s - loss: 0.1167 - matthews_correlation_coefficient: 0.7796 - 321ms/epoch - 8ms/step
Epoch 618/1000
40/4

Epoch 685/1000
40/40 - 0s - loss: 0.1153 - matthews_correlation_coefficient: 0.7689 - 331ms/epoch - 8ms/step
Epoch 686/1000
40/40 - 0s - loss: 0.1435 - matthews_correlation_coefficient: 0.7266 - 327ms/epoch - 8ms/step
Epoch 687/1000
40/40 - 0s - loss: 0.1223 - matthews_correlation_coefficient: 0.7835 - 321ms/epoch - 8ms/step
Epoch 688/1000
40/40 - 0s - loss: 0.1158 - matthews_correlation_coefficient: 0.7400 - 325ms/epoch - 8ms/step
Epoch 689/1000
40/40 - 0s - loss: 0.1136 - matthews_correlation_coefficient: 0.8291 - 324ms/epoch - 8ms/step
Epoch 690/1000
40/40 - 0s - loss: 0.1151 - matthews_correlation_coefficient: 0.7619 - 326ms/epoch - 8ms/step
Epoch 691/1000
40/40 - 0s - loss: 0.1182 - matthews_correlation_coefficient: 0.7178 - 330ms/epoch - 8ms/step
Epoch 692/1000
40/40 - 0s - loss: 0.1202 - matthews_correlation_coefficient: 0.7592 - 325ms/epoch - 8ms/step
Epoch 693/1000
40/40 - 0s - loss: 0.1208 - matthews_correlation_coefficient: 0.7379 - 325ms/epoch - 8ms/step
Epoch 694/1000
40/4

Epoch 761/1000
40/40 - 0s - loss: 0.1160 - matthews_correlation_coefficient: 0.8264 - 324ms/epoch - 8ms/step
Epoch 762/1000
40/40 - 0s - loss: 0.1184 - matthews_correlation_coefficient: 0.8033 - 324ms/epoch - 8ms/step
Epoch 763/1000
40/40 - 0s - loss: 0.1151 - matthews_correlation_coefficient: 0.8006 - 331ms/epoch - 8ms/step
Epoch 764/1000
40/40 - 0s - loss: 0.1192 - matthews_correlation_coefficient: 0.7514 - 327ms/epoch - 8ms/step
Epoch 765/1000
40/40 - 0s - loss: 0.1175 - matthews_correlation_coefficient: 0.7748 - 321ms/epoch - 8ms/step
Epoch 766/1000
40/40 - 0s - loss: 0.1240 - matthews_correlation_coefficient: 0.7656 - 325ms/epoch - 8ms/step
Epoch 767/1000
40/40 - 0s - loss: 0.1286 - matthews_correlation_coefficient: 0.7626 - 336ms/epoch - 8ms/step
Epoch 768/1000
40/40 - 0s - loss: 0.1140 - matthews_correlation_coefficient: 0.7909 - 330ms/epoch - 8ms/step
Epoch 769/1000
40/40 - 0s - loss: 0.1152 - matthews_correlation_coefficient: 0.7399 - 326ms/epoch - 8ms/step
Epoch 770/1000
40/4

40/40 - 0s - loss: 0.1196 - matthews_correlation_coefficient: 0.7493 - 369ms/epoch - 9ms/step
Epoch 837/1000
40/40 - 0s - loss: 0.1146 - matthews_correlation_coefficient: 0.7239 - 334ms/epoch - 8ms/step
Epoch 838/1000
40/40 - 0s - loss: 0.1100 - matthews_correlation_coefficient: 0.7663 - 320ms/epoch - 8ms/step
Epoch 839/1000
40/40 - 0s - loss: 0.1125 - matthews_correlation_coefficient: 0.7926 - 323ms/epoch - 8ms/step
Epoch 840/1000
40/40 - 0s - loss: 0.1191 - matthews_correlation_coefficient: 0.8376 - 321ms/epoch - 8ms/step
Epoch 841/1000
40/40 - 0s - loss: 0.1138 - matthews_correlation_coefficient: 0.7982 - 328ms/epoch - 8ms/step
Epoch 842/1000
40/40 - 0s - loss: 0.1118 - matthews_correlation_coefficient: 0.7734 - 320ms/epoch - 8ms/step
Epoch 843/1000
40/40 - 0s - loss: 0.1210 - matthews_correlation_coefficient: 0.7899 - 326ms/epoch - 8ms/step
Epoch 844/1000
40/40 - 0s - loss: 0.1156 - matthews_correlation_coefficient: 0.8402 - 321ms/epoch - 8ms/step
Epoch 845/1000
40/40 - 0s - loss: 

Epoch 912/1000
40/40 - 0s - loss: 0.1161 - matthews_correlation_coefficient: 0.7492 - 319ms/epoch - 8ms/step
Epoch 913/1000
40/40 - 0s - loss: 0.1183 - matthews_correlation_coefficient: 0.7847 - 324ms/epoch - 8ms/step
Epoch 914/1000
40/40 - 0s - loss: 0.1160 - matthews_correlation_coefficient: 0.7804 - 323ms/epoch - 8ms/step
Epoch 915/1000
40/40 - 0s - loss: 0.1096 - matthews_correlation_coefficient: 0.7360 - 322ms/epoch - 8ms/step
Epoch 916/1000
40/40 - 0s - loss: 0.1156 - matthews_correlation_coefficient: 0.7076 - 321ms/epoch - 8ms/step
Epoch 917/1000
40/40 - 0s - loss: 0.1187 - matthews_correlation_coefficient: 0.7739 - 322ms/epoch - 8ms/step
Epoch 918/1000
40/40 - 0s - loss: 0.1158 - matthews_correlation_coefficient: 0.8073 - 326ms/epoch - 8ms/step
Epoch 919/1000
40/40 - 0s - loss: 0.1154 - matthews_correlation_coefficient: 0.7312 - 322ms/epoch - 8ms/step
Epoch 920/1000
40/40 - 0s - loss: 0.1260 - matthews_correlation_coefficient: 0.7726 - 323ms/epoch - 8ms/step
Epoch 921/1000
40/4

Epoch 988/1000
40/40 - 0s - loss: 0.1227 - matthews_correlation_coefficient: 0.7592 - 326ms/epoch - 8ms/step
Epoch 989/1000
40/40 - 0s - loss: 0.1157 - matthews_correlation_coefficient: 0.7724 - 335ms/epoch - 8ms/step
Epoch 990/1000
40/40 - 0s - loss: 0.1182 - matthews_correlation_coefficient: 0.7407 - 324ms/epoch - 8ms/step
Epoch 991/1000
40/40 - 0s - loss: 0.1183 - matthews_correlation_coefficient: 0.8187 - 324ms/epoch - 8ms/step
Epoch 992/1000
40/40 - 0s - loss: 0.1189 - matthews_correlation_coefficient: 0.7649 - 323ms/epoch - 8ms/step
Epoch 993/1000
40/40 - 0s - loss: 0.1207 - matthews_correlation_coefficient: 0.8140 - 327ms/epoch - 8ms/step
Epoch 994/1000
40/40 - 0s - loss: 0.1158 - matthews_correlation_coefficient: 0.7698 - 323ms/epoch - 8ms/step
Epoch 995/1000
40/40 - 0s - loss: 0.1252 - matthews_correlation_coefficient: 0.7565 - 327ms/epoch - 8ms/step
Epoch 996/1000
40/40 - 0s - loss: 0.1190 - matthews_correlation_coefficient: 0.7367 - 327ms/epoch - 8ms/step
Epoch 997/1000
40/4

  ys = list(map(lambda x: 0 if float(x)<0.5 else 1, pred))


In [48]:
test_pred = model.predict(x_test_base)
print(x_test_base.shape)
test_ys = list(map(lambda x: 0 if float(x)<0.5 else 1, test_pred))
print(len(np.where((test_ys == y_test_base)==False)[0]))
# Testing MCC
print(matthews_corrcoef(y_test_base,test_ys))

(314, 23)
15
0.7107136001567437


  test_ys = list(map(lambda x: 0 if float(x)<0.5 else 1, test_pred))


In [49]:
over_sample = 142
base_sample = base_sample
true_test = cv_base[over_sample+base_sample:] 
print(true_test.shape)
true_test_pred = model.predict(true_test.toarray())
results = list(map(lambda x: -1 if float(x)<0.5 else 1, true_test_pred))

(392, 23)


  results = list(map(lambda x: -1 if float(x)<0.5 else 1, true_test_pred))


In [50]:
df = pd.Series(results)
df.to_csv('results_tf_base.dat', index=False, header=None)
print("results_tf_base.dat Created.")

results_tf_base.dat Created.


In [51]:
# Over sample data with word of bag model
model_2 = ANN(23,x_train_over, y_train_over, x_test_over,y_test_over,500)

test_pred = model_2.predict(x_test_over)
print(x_test_over.shape)
test_ys = list(map(lambda x: 0 if float(x)<0.5 else 1, test_pred))
print(len(np.where((test_ys == y_test_over)==False)[0]))
# Testing MCC
print(matthews_corrcoef(y_test_over,test_ys))


Epoch 1/500
72/72 - 1s - loss: 7.1829 - matthews_correlation_coefficient: -1.2883e-02 - 960ms/epoch - 13ms/step
Epoch 2/500
72/72 - 1s - loss: 4.9325 - matthews_correlation_coefficient: 0.1337 - 581ms/epoch - 8ms/step
Epoch 3/500
72/72 - 1s - loss: 3.1591 - matthews_correlation_coefficient: 0.2862 - 578ms/epoch - 8ms/step
Epoch 4/500
72/72 - 1s - loss: 1.8646 - matthews_correlation_coefficient: 0.3278 - 573ms/epoch - 8ms/step
Epoch 5/500
72/72 - 1s - loss: 1.0712 - matthews_correlation_coefficient: 0.3960 - 576ms/epoch - 8ms/step
Epoch 6/500
72/72 - 1s - loss: 0.7486 - matthews_correlation_coefficient: 0.4669 - 582ms/epoch - 8ms/step
Epoch 7/500
72/72 - 1s - loss: 0.7062 - matthews_correlation_coefficient: 0.3486 - 590ms/epoch - 8ms/step
Epoch 8/500
72/72 - 1s - loss: 0.7010 - matthews_correlation_coefficient: 0.2647 - 578ms/epoch - 8ms/step
Epoch 9/500
72/72 - 1s - loss: 0.6948 - matthews_correlation_coefficient: 0.3333 - 583ms/epoch - 8ms/step
Epoch 10/500
72/72 - 1s - loss: 0.6800 -

Epoch 78/500
72/72 - 1s - loss: 0.2584 - matthews_correlation_coefficient: 0.8350 - 633ms/epoch - 9ms/step
Epoch 79/500
72/72 - 1s - loss: 0.2586 - matthews_correlation_coefficient: 0.8392 - 587ms/epoch - 8ms/step
Epoch 80/500
72/72 - 1s - loss: 0.2540 - matthews_correlation_coefficient: 0.8344 - 583ms/epoch - 8ms/step
Epoch 81/500
72/72 - 1s - loss: 0.2534 - matthews_correlation_coefficient: 0.8454 - 591ms/epoch - 8ms/step
Epoch 82/500
72/72 - 1s - loss: 0.2576 - matthews_correlation_coefficient: 0.8392 - 583ms/epoch - 8ms/step
Epoch 83/500
72/72 - 1s - loss: 0.2574 - matthews_correlation_coefficient: 0.8356 - 585ms/epoch - 8ms/step
Epoch 84/500
72/72 - 1s - loss: 0.2569 - matthews_correlation_coefficient: 0.8401 - 588ms/epoch - 8ms/step
Epoch 85/500
72/72 - 1s - loss: 0.2640 - matthews_correlation_coefficient: 0.8330 - 581ms/epoch - 8ms/step
Epoch 86/500
72/72 - 1s - loss: 0.2543 - matthews_correlation_coefficient: 0.8412 - 582ms/epoch - 8ms/step
Epoch 87/500
72/72 - 1s - loss: 0.251

72/72 - 1s - loss: 0.2356 - matthews_correlation_coefficient: 0.8505 - 583ms/epoch - 8ms/step
Epoch 155/500
72/72 - 1s - loss: 0.2335 - matthews_correlation_coefficient: 0.8542 - 582ms/epoch - 8ms/step
Epoch 156/500
72/72 - 1s - loss: 0.2370 - matthews_correlation_coefficient: 0.8392 - 646ms/epoch - 9ms/step
Epoch 157/500
72/72 - 1s - loss: 0.2370 - matthews_correlation_coefficient: 0.8350 - 716ms/epoch - 10ms/step
Epoch 158/500
72/72 - 1s - loss: 0.2356 - matthews_correlation_coefficient: 0.8566 - 591ms/epoch - 8ms/step
Epoch 159/500
72/72 - 1s - loss: 0.2355 - matthews_correlation_coefficient: 0.8496 - 630ms/epoch - 9ms/step
Epoch 160/500
72/72 - 1s - loss: 0.2370 - matthews_correlation_coefficient: 0.8440 - 640ms/epoch - 9ms/step
Epoch 161/500
72/72 - 1s - loss: 0.2372 - matthews_correlation_coefficient: 0.8310 - 596ms/epoch - 8ms/step
Epoch 162/500
72/72 - 1s - loss: 0.2325 - matthews_correlation_coefficient: 0.8646 - 620ms/epoch - 9ms/step
Epoch 163/500
72/72 - 1s - loss: 0.2364 -

Epoch 230/500
72/72 - 1s - loss: 0.2336 - matthews_correlation_coefficient: 0.8399 - 609ms/epoch - 8ms/step
Epoch 231/500
72/72 - 1s - loss: 0.2251 - matthews_correlation_coefficient: 0.8488 - 586ms/epoch - 8ms/step
Epoch 232/500
72/72 - 1s - loss: 0.2239 - matthews_correlation_coefficient: 0.8310 - 622ms/epoch - 9ms/step
Epoch 233/500
72/72 - 1s - loss: 0.2298 - matthews_correlation_coefficient: 0.8494 - 623ms/epoch - 9ms/step
Epoch 234/500
72/72 - 1s - loss: 0.2377 - matthews_correlation_coefficient: 0.8518 - 580ms/epoch - 8ms/step
Epoch 235/500
72/72 - 1s - loss: 0.2280 - matthews_correlation_coefficient: 0.8418 - 582ms/epoch - 8ms/step
Epoch 236/500
72/72 - 1s - loss: 0.2230 - matthews_correlation_coefficient: 0.8455 - 581ms/epoch - 8ms/step
Epoch 237/500
72/72 - 1s - loss: 0.2231 - matthews_correlation_coefficient: 0.8546 - 581ms/epoch - 8ms/step
Epoch 238/500
72/72 - 1s - loss: 0.2266 - matthews_correlation_coefficient: 0.8318 - 598ms/epoch - 8ms/step
Epoch 239/500
72/72 - 1s - l

Epoch 306/500
72/72 - 1s - loss: 0.2239 - matthews_correlation_coefficient: 0.8539 - 580ms/epoch - 8ms/step
Epoch 307/500
72/72 - 1s - loss: 0.2199 - matthews_correlation_coefficient: 0.8605 - 582ms/epoch - 8ms/step
Epoch 308/500
72/72 - 1s - loss: 0.2205 - matthews_correlation_coefficient: 0.8557 - 579ms/epoch - 8ms/step
Epoch 309/500
72/72 - 1s - loss: 0.2285 - matthews_correlation_coefficient: 0.8486 - 579ms/epoch - 8ms/step
Epoch 310/500
72/72 - 1s - loss: 0.2211 - matthews_correlation_coefficient: 0.8649 - 583ms/epoch - 8ms/step
Epoch 311/500
72/72 - 1s - loss: 0.2173 - matthews_correlation_coefficient: 0.8552 - 583ms/epoch - 8ms/step
Epoch 312/500
72/72 - 1s - loss: 0.2204 - matthews_correlation_coefficient: 0.8481 - 580ms/epoch - 8ms/step
Epoch 313/500
72/72 - 1s - loss: 0.2169 - matthews_correlation_coefficient: 0.8591 - 579ms/epoch - 8ms/step
Epoch 314/500
72/72 - 1s - loss: 0.2319 - matthews_correlation_coefficient: 0.8502 - 586ms/epoch - 8ms/step
Epoch 315/500
72/72 - 1s - l

Epoch 382/500
72/72 - 1s - loss: 0.2447 - matthews_correlation_coefficient: 0.8423 - 580ms/epoch - 8ms/step
Epoch 383/500
72/72 - 1s - loss: 0.2187 - matthews_correlation_coefficient: 0.8544 - 586ms/epoch - 8ms/step
Epoch 384/500
72/72 - 1s - loss: 0.2167 - matthews_correlation_coefficient: 0.8482 - 584ms/epoch - 8ms/step
Epoch 385/500
72/72 - 1s - loss: 0.2139 - matthews_correlation_coefficient: 0.8586 - 584ms/epoch - 8ms/step
Epoch 386/500
72/72 - 1s - loss: 0.2184 - matthews_correlation_coefficient: 0.8601 - 590ms/epoch - 8ms/step
Epoch 387/500
72/72 - 1s - loss: 0.2153 - matthews_correlation_coefficient: 0.8625 - 643ms/epoch - 9ms/step
Epoch 388/500
72/72 - 1s - loss: 0.2184 - matthews_correlation_coefficient: 0.8527 - 579ms/epoch - 8ms/step
Epoch 389/500
72/72 - 1s - loss: 0.2206 - matthews_correlation_coefficient: 0.8391 - 580ms/epoch - 8ms/step
Epoch 390/500
72/72 - 1s - loss: 0.2183 - matthews_correlation_coefficient: 0.8568 - 596ms/epoch - 8ms/step
Epoch 391/500
72/72 - 1s - l

Epoch 458/500
72/72 - 1s - loss: 0.2209 - matthews_correlation_coefficient: 0.8409 - 582ms/epoch - 8ms/step
Epoch 459/500
72/72 - 1s - loss: 0.2142 - matthews_correlation_coefficient: 0.8558 - 579ms/epoch - 8ms/step
Epoch 460/500
72/72 - 1s - loss: 0.2154 - matthews_correlation_coefficient: 0.8464 - 596ms/epoch - 8ms/step
Epoch 461/500
72/72 - 1s - loss: 0.2121 - matthews_correlation_coefficient: 0.8596 - 583ms/epoch - 8ms/step
Epoch 462/500
72/72 - 1s - loss: 0.2194 - matthews_correlation_coefficient: 0.8506 - 587ms/epoch - 8ms/step
Epoch 463/500
72/72 - 1s - loss: 0.2168 - matthews_correlation_coefficient: 0.8453 - 586ms/epoch - 8ms/step
Epoch 464/500
72/72 - 1s - loss: 0.2188 - matthews_correlation_coefficient: 0.8626 - 582ms/epoch - 8ms/step
Epoch 465/500
72/72 - 1s - loss: 0.2189 - matthews_correlation_coefficient: 0.8512 - 580ms/epoch - 8ms/step
Epoch 466/500
72/72 - 1s - loss: 0.2164 - matthews_correlation_coefficient: 0.8592 - 581ms/epoch - 8ms/step
Epoch 467/500
72/72 - 1s - l

  ys = list(map(lambda x: 0 if float(x)<0.5 else 1, pred))
  test_ys = list(map(lambda x: 0 if float(x)<0.5 else 1, test_pred))


In [52]:
over_sample = base_sample
base_sample = base_sample
true_test = cv_over[over_sample+base_sample:]
print(true_test.shape)
true_test_pred = model_2.predict(true_test.toarray())
results = list(map(lambda x: -1 if float(x)<0.5 else 1, true_test_pred))

(392, 23)


  results = list(map(lambda x: -1 if float(x)<0.5 else 1, true_test_pred))


In [53]:
df = pd.Series(results)
df.to_csv('results_tf_over.dat', index=False, header=None)
print("results_tf_over.dat created.")

results_tf_over.dat created.


In [54]:
# Over sample data with k mer =2 
model_3 = ANN(436,x_train_over_n, y_train_over_n, x_test_over_n,y_test_over_n,500)

test_pred = model_3.predict(x_test_over_n)
print(x_test_over_n.shape)
test_ys = list(map(lambda x: 0 if float(x)<0.5 else 1, test_pred))
print(len(np.where((test_ys == y_test_over_n)==False)[0]))
# Testing MCC
print(matthews_corrcoef(y_test_over_n,test_ys))

Epoch 1/500
72/72 - 1s - loss: 21.9215 - matthews_correlation_coefficient: 0.1144 - 1s/epoch - 15ms/step
Epoch 2/500
72/72 - 1s - loss: 2.1902 - matthews_correlation_coefficient: 0.2917 - 590ms/epoch - 8ms/step
Epoch 3/500
72/72 - 1s - loss: 0.7856 - matthews_correlation_coefficient: 0.2962 - 583ms/epoch - 8ms/step
Epoch 4/500
72/72 - 1s - loss: 0.7661 - matthews_correlation_coefficient: 0.4978 - 588ms/epoch - 8ms/step
Epoch 5/500
72/72 - 1s - loss: 0.7355 - matthews_correlation_coefficient: 0.5386 - 606ms/epoch - 8ms/step
Epoch 6/500
72/72 - 1s - loss: 0.6840 - matthews_correlation_coefficient: 0.6243 - 584ms/epoch - 8ms/step
Epoch 7/500
72/72 - 1s - loss: 0.6307 - matthews_correlation_coefficient: 0.6619 - 591ms/epoch - 8ms/step
Epoch 8/500
72/72 - 1s - loss: 0.5896 - matthews_correlation_coefficient: 0.7056 - 583ms/epoch - 8ms/step
Epoch 9/500
72/72 - 1s - loss: 0.5589 - matthews_correlation_coefficient: 0.7206 - 587ms/epoch - 8ms/step
Epoch 10/500
72/72 - 1s - loss: 0.5308 - matthe

Epoch 78/500
72/72 - 1s - loss: 0.3350 - matthews_correlation_coefficient: 0.8987 - 591ms/epoch - 8ms/step
Epoch 79/500
72/72 - 1s - loss: 0.3287 - matthews_correlation_coefficient: 0.9037 - 588ms/epoch - 8ms/step
Epoch 80/500
72/72 - 1s - loss: 0.3266 - matthews_correlation_coefficient: 0.8960 - 630ms/epoch - 9ms/step
Epoch 81/500
72/72 - 1s - loss: 0.3256 - matthews_correlation_coefficient: 0.9017 - 614ms/epoch - 9ms/step
Epoch 82/500
72/72 - 1s - loss: 0.3281 - matthews_correlation_coefficient: 0.9029 - 588ms/epoch - 8ms/step
Epoch 83/500
72/72 - 1s - loss: 0.3307 - matthews_correlation_coefficient: 0.9072 - 587ms/epoch - 8ms/step
Epoch 84/500
72/72 - 1s - loss: 0.3274 - matthews_correlation_coefficient: 0.9009 - 600ms/epoch - 8ms/step
Epoch 85/500
72/72 - 1s - loss: 0.3217 - matthews_correlation_coefficient: 0.9083 - 593ms/epoch - 8ms/step
Epoch 86/500
72/72 - 1s - loss: 0.3230 - matthews_correlation_coefficient: 0.9083 - 586ms/epoch - 8ms/step
Epoch 87/500
72/72 - 1s - loss: 0.328

72/72 - 1s - loss: 0.3111 - matthews_correlation_coefficient: 0.9118 - 591ms/epoch - 8ms/step
Epoch 155/500
72/72 - 1s - loss: 0.3276 - matthews_correlation_coefficient: 0.8958 - 599ms/epoch - 8ms/step
Epoch 156/500
72/72 - 1s - loss: 0.3196 - matthews_correlation_coefficient: 0.9010 - 620ms/epoch - 9ms/step
Epoch 157/500
72/72 - 1s - loss: 0.3217 - matthews_correlation_coefficient: 0.9021 - 607ms/epoch - 8ms/step
Epoch 158/500
72/72 - 1s - loss: 0.3218 - matthews_correlation_coefficient: 0.9066 - 592ms/epoch - 8ms/step
Epoch 159/500
72/72 - 1s - loss: 0.3198 - matthews_correlation_coefficient: 0.9020 - 596ms/epoch - 8ms/step
Epoch 160/500
72/72 - 1s - loss: 0.3175 - matthews_correlation_coefficient: 0.8940 - 590ms/epoch - 8ms/step
Epoch 161/500
72/72 - 1s - loss: 0.3228 - matthews_correlation_coefficient: 0.8994 - 587ms/epoch - 8ms/step
Epoch 162/500
72/72 - 1s - loss: 0.3178 - matthews_correlation_coefficient: 0.9038 - 587ms/epoch - 8ms/step
Epoch 163/500
72/72 - 1s - loss: 0.3209 - 

Epoch 230/500
72/72 - 1s - loss: 0.3137 - matthews_correlation_coefficient: 0.9002 - 605ms/epoch - 8ms/step
Epoch 231/500
72/72 - 1s - loss: 0.3247 - matthews_correlation_coefficient: 0.8981 - 598ms/epoch - 8ms/step
Epoch 232/500
72/72 - 1s - loss: 0.3341 - matthews_correlation_coefficient: 0.8990 - 597ms/epoch - 8ms/step
Epoch 233/500
72/72 - 1s - loss: 0.3217 - matthews_correlation_coefficient: 0.8891 - 593ms/epoch - 8ms/step
Epoch 234/500
72/72 - 1s - loss: 0.3227 - matthews_correlation_coefficient: 0.8990 - 591ms/epoch - 8ms/step
Epoch 235/500
72/72 - 1s - loss: 0.3112 - matthews_correlation_coefficient: 0.8938 - 592ms/epoch - 8ms/step
Epoch 236/500
72/72 - 1s - loss: 0.3289 - matthews_correlation_coefficient: 0.8934 - 605ms/epoch - 8ms/step
Epoch 237/500
72/72 - 1s - loss: 0.3211 - matthews_correlation_coefficient: 0.8914 - 590ms/epoch - 8ms/step
Epoch 238/500
72/72 - 1s - loss: 0.3029 - matthews_correlation_coefficient: 0.9027 - 590ms/epoch - 8ms/step
Epoch 239/500
72/72 - 1s - l

Epoch 306/500
72/72 - 1s - loss: 0.3268 - matthews_correlation_coefficient: 0.8905 - 589ms/epoch - 8ms/step
Epoch 307/500
72/72 - 1s - loss: 0.3241 - matthews_correlation_coefficient: 0.8981 - 586ms/epoch - 8ms/step
Epoch 308/500
72/72 - 1s - loss: 0.3262 - matthews_correlation_coefficient: 0.8989 - 601ms/epoch - 8ms/step
Epoch 309/500
72/72 - 1s - loss: 0.3158 - matthews_correlation_coefficient: 0.9085 - 585ms/epoch - 8ms/step
Epoch 310/500
72/72 - 1s - loss: 0.3091 - matthews_correlation_coefficient: 0.9027 - 591ms/epoch - 8ms/step
Epoch 311/500
72/72 - 1s - loss: 0.3290 - matthews_correlation_coefficient: 0.9038 - 654ms/epoch - 9ms/step
Epoch 312/500
72/72 - 1s - loss: 0.3103 - matthews_correlation_coefficient: 0.9019 - 615ms/epoch - 9ms/step
Epoch 313/500
72/72 - 1s - loss: 0.3206 - matthews_correlation_coefficient: 0.9020 - 587ms/epoch - 8ms/step
Epoch 314/500
72/72 - 1s - loss: 0.3284 - matthews_correlation_coefficient: 0.9058 - 587ms/epoch - 8ms/step
Epoch 315/500
72/72 - 1s - l

Epoch 382/500
72/72 - 1s - loss: 0.3178 - matthews_correlation_coefficient: 0.9044 - 588ms/epoch - 8ms/step
Epoch 383/500
72/72 - 1s - loss: 0.3091 - matthews_correlation_coefficient: 0.9066 - 589ms/epoch - 8ms/step
Epoch 384/500
72/72 - 1s - loss: 0.3091 - matthews_correlation_coefficient: 0.9011 - 589ms/epoch - 8ms/step
Epoch 385/500
72/72 - 1s - loss: 0.3093 - matthews_correlation_coefficient: 0.9001 - 590ms/epoch - 8ms/step
Epoch 386/500
72/72 - 1s - loss: 0.3092 - matthews_correlation_coefficient: 0.9094 - 596ms/epoch - 8ms/step
Epoch 387/500
72/72 - 1s - loss: 0.3056 - matthews_correlation_coefficient: 0.8887 - 586ms/epoch - 8ms/step
Epoch 388/500
72/72 - 1s - loss: 0.3047 - matthews_correlation_coefficient: 0.8992 - 588ms/epoch - 8ms/step
Epoch 389/500
72/72 - 1s - loss: 0.3138 - matthews_correlation_coefficient: 0.9079 - 589ms/epoch - 8ms/step
Epoch 390/500
72/72 - 1s - loss: 0.3104 - matthews_correlation_coefficient: 0.9063 - 589ms/epoch - 8ms/step
Epoch 391/500
72/72 - 1s - l

Epoch 458/500
72/72 - 1s - loss: 0.3207 - matthews_correlation_coefficient: 0.8931 - 622ms/epoch - 9ms/step
Epoch 459/500
72/72 - 1s - loss: 0.3157 - matthews_correlation_coefficient: 0.9004 - 591ms/epoch - 8ms/step
Epoch 460/500
72/72 - 1s - loss: 0.3159 - matthews_correlation_coefficient: 0.8979 - 592ms/epoch - 8ms/step
Epoch 461/500
72/72 - 1s - loss: 0.3219 - matthews_correlation_coefficient: 0.8830 - 590ms/epoch - 8ms/step
Epoch 462/500
72/72 - 1s - loss: 0.3164 - matthews_correlation_coefficient: 0.9084 - 591ms/epoch - 8ms/step
Epoch 463/500
72/72 - 1s - loss: 0.3185 - matthews_correlation_coefficient: 0.9065 - 588ms/epoch - 8ms/step
Epoch 464/500
72/72 - 1s - loss: 0.3040 - matthews_correlation_coefficient: 0.9007 - 588ms/epoch - 8ms/step
Epoch 465/500
72/72 - 1s - loss: 0.4455 - matthews_correlation_coefficient: 0.8882 - 604ms/epoch - 8ms/step
Epoch 466/500
72/72 - 1s - loss: 0.3420 - matthews_correlation_coefficient: 0.8861 - 637ms/epoch - 9ms/step
Epoch 467/500
72/72 - 1s - l

  ys = list(map(lambda x: 0 if float(x)<0.5 else 1, pred))
  test_ys = list(map(lambda x: 0 if float(x)<0.5 else 1, test_pred))


In [55]:
over_sample = base_sample
base_sample = base_sample
true_test = cv_over_n[over_sample+base_sample:] 
print(true_test.shape)
true_test_pred = model_3.predict(true_test.toarray())
results = list(map(lambda x: -1 if float(x)<0.5 else 1, true_test_pred))


(392, 436)


  results = list(map(lambda x: -1 if float(x)<0.5 else 1, true_test_pred))


In [56]:
df = pd.Series(results)
df.to_csv('results_tf_over_n.dat', index=False, header=None)
print("results_tf_over_n.dat Created. ")

results_tf_over_n.dat Created. 
