In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import csv
%matplotlib inline

from os.path import join

import tensorflow as tf
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.utils import to_categorical

In [2]:
rows = []
drugFeatures = {}

with open('drugFeature.csv', 'r') as csvfile: 
    reader = csv.reader(csvfile, skipinitialspace=True)
    next(reader)
    for row in reader:
        rows.append(row)
        
for i in range(0, len(rows)):
    drugFeatures[rows[i][0]] = rows[i][1:]

In [3]:
rows = []
miRNAFeatures = {}

with open('miRNAFeature.csv', 'r') as csvfile: 
    reader = csv.reader(csvfile, skipinitialspace=True)
    next(reader)
    for row in reader:
        rows.append(row)
        
for i in range(0, len(rows)):
    miRNAFeatures[rows[i][0]] = rows[i][1:]


In [4]:
miRNA_train = []
drug_train = []
correlation_train = []

with open('correlationData.csv', 'r') as csvfile: 
    reader = csv.reader(csvfile, skipinitialspace=True)
    next(reader)
    for miRBase, cid, correlation in reader:
        drug_train.append(drugFeatures[str(cid)])
        correlation_train.append(correlation)
        miRNA_train.append(miRNAFeatures[str(miRBase)])

#for i in range(0, len(miRNA_train)):
  #  miRNA_train[i] = miRNA_train[i] + drug_train[i]

#print(miRNA_train[0])
    
#train_x = np.array(miRNA_train).astype(float)
#print(train_x.shape)

drug_train = np.array(drug_train).astype(np.float32)
print(drug_train.shape)

miRNA_train = np.array(miRNA_train).astype(np.float32)
print(miRNA_train.shape)

#correlation_train = to_categorical(correlation_train)
correlation_train = np.array(correlation_train).astype(int)
print(correlation_train.shape)

(6993, 128)
(6993, 128)
(6993,)


In [5]:
print(miRNA_train[1])
print(drug_train[1])

[0.         1.1678784  0.80110854 0.36714268 0.         0.640995
 0.         0.24220982 0.         0.         0.39026725 0.16112137
 0.9944409  0.         0.         0.29475656 1.1155647  0.38025928
 0.         1.0711297  0.7658136  1.0376648  0.16947185 0.6961138
 0.42930424 1.1351733  0.         0.7261601  1.4695327  0.
 0.38475117 0.2522929  1.4178569  0.67905563 1.2654397  0.
 0.53979    0.7011106  0.         0.08322429 0.6676927  1.0721617
 1.3056295  0.83898646 0.5962862  0.08329256 0.7491788  1.1286315
 1.4878961  0.         0.8486786  0.732323   0.81091565 1.3387327
 1.3216766  1.5067047  0.33290002 0.         0.32317445 0.6860323
 0.41309845 0.37720507 0.3178873  0.69950825 0.         0.
 0.77635896 0.92476046 0.68742806 0.         0.85936135 0.
 1.1558073  0.16101675 0.         0.26994047 0.22144234 0.74781567
 0.6479784  0.         0.61656487 0.5485257  0.         0.8486363
 0.6875442  2.2301002  0.         0.86042196 0.8886398  0.7850777
 0.         1.5999737  0.3223623  0.

In [6]:
miRNAFeatureInput = layers.Input(shape=(128,))
drugFeatureInput = layers.Input(shape=(128,))
#Input_layer = layers.Input(shape=(256,))
Input_layer = layers.Concatenate()([miRNAFeatureInput, drugFeatureInput])
layer1 = layers.Dense(units=128, activation = 'relu')(Input_layer)
layer2 = layers.Dense(units=64, activation = 'relu')(layer1)
layer3 = layers.Dense(units=32,  activation = 'relu')(layer2)
layer4 = layers.Dense(units=16, activation = 'relu')(layer3)
#layer5 = layers.Dense(units=2000, activation = 'relu')(layer4)
#layer6 = layers.Dense(units=2000, activation = 'relu')(layer5)
output = layers.Dense(units=1, activation = 'sigmoid')(layer4)

In [7]:
#predict_model = models.Model(Input_layer, output)
predict_model = models.Model([miRNAFeatureInput, drugFeatureInput], output)

In [8]:
predict_model.compile(optimizer=tf.keras.optimizers.Adam(0.01), loss="binary_crossentropy", metrics=['binary_accuracy'])

In [9]:
predict_model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 128)]        0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, 128)]        0                                            
__________________________________________________________________________________________________
concatenate (Concatenate)       (None, 256)          0           input_1[0][0]                    
                                                                 input_2[0][0]                    
__________________________________________________________________________________________________
dense (Dense)                   (None, 128)          32896       concatenate[0][0]            

In [10]:
predict_model.fit([miRNA_train, drug_train], correlation_train, verbose = 2, epochs = 100, batch_size = 1024, validation_split = 0.33)

Train on 4685 samples, validate on 2308 samples
Epoch 1/100
4685/4685 - 1s - loss: 0.9657 - binary_accuracy: 0.6493 - val_loss: 1.8023 - val_binary_accuracy: 0.0000e+00
Epoch 2/100
4685/4685 - 0s - loss: 0.5745 - binary_accuracy: 0.7535 - val_loss: 1.2737 - val_binary_accuracy: 0.0000e+00
Epoch 3/100
4685/4685 - 0s - loss: 0.5610 - binary_accuracy: 0.7535 - val_loss: 1.4320 - val_binary_accuracy: 0.0000e+00
Epoch 4/100
4685/4685 - 0s - loss: 0.5598 - binary_accuracy: 0.7535 - val_loss: 1.2066 - val_binary_accuracy: 0.0000e+00
Epoch 5/100
4685/4685 - 0s - loss: 0.5617 - binary_accuracy: 0.7535 - val_loss: 1.5847 - val_binary_accuracy: 0.0000e+00
Epoch 6/100
4685/4685 - 0s - loss: 0.5661 - binary_accuracy: 0.7535 - val_loss: 1.5370 - val_binary_accuracy: 0.0000e+00
Epoch 7/100
4685/4685 - 0s - loss: 0.5641 - binary_accuracy: 0.7535 - val_loss: 1.8768 - val_binary_accuracy: 0.0000e+00
Epoch 8/100
4685/4685 - 0s - loss: 0.5720 - binary_accuracy: 0.7535 - val_loss: 1.5441 - val_binary_accur

Epoch 68/100
4685/4685 - 0s - loss: 0.5566 - binary_accuracy: 0.7535 - val_loss: 1.4416 - val_binary_accuracy: 0.0000e+00
Epoch 69/100
4685/4685 - 0s - loss: 0.5577 - binary_accuracy: 0.7535 - val_loss: 1.3323 - val_binary_accuracy: 0.0000e+00
Epoch 70/100
4685/4685 - 0s - loss: 0.5597 - binary_accuracy: 0.7535 - val_loss: 1.4327 - val_binary_accuracy: 0.0000e+00
Epoch 71/100
4685/4685 - 0s - loss: 0.5596 - binary_accuracy: 0.7535 - val_loss: 1.3683 - val_binary_accuracy: 0.0000e+00
Epoch 72/100
4685/4685 - 0s - loss: 0.5599 - binary_accuracy: 0.7535 - val_loss: 1.3522 - val_binary_accuracy: 0.0000e+00
Epoch 73/100
4685/4685 - 0s - loss: 0.5611 - binary_accuracy: 0.7535 - val_loss: 1.4280 - val_binary_accuracy: 0.0000e+00
Epoch 74/100
4685/4685 - 0s - loss: 0.5599 - binary_accuracy: 0.7535 - val_loss: 1.2988 - val_binary_accuracy: 0.0000e+00
Epoch 75/100
4685/4685 - 0s - loss: 0.5606 - binary_accuracy: 0.7535 - val_loss: 1.4886 - val_binary_accuracy: 0.0000e+00
Epoch 76/100
4685/4685 -

<tensorflow.python.keras.callbacks.History at 0x2463c9b8a48>

In [11]:
sampledata = predict_model.predict([miRNA_train, drug_train])

In [12]:
print(correlation_train[6000])
print(sampledata[6000])

0
[0.7436589]


In [13]:
from sklearn.metrics import accuracy_score

In [14]:
my_accuracy = accuracy_score(correlation_train, sampledata.round())

In [15]:
print(my_accuracy)

0.5047905047905048
