In [1]:
import numpy as np
import csv
from rdkit import Chem
import matplotlib.pyplot as plt
import openpyxl as xl
%matplotlib inline

from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.layers import Conv2D, Conv2DTranspose, Dense, Flatten, Dropout, BatchNormalization, Reshape, LeakyReLU
from tensorflow.keras.models import Model, Sequential
import tensorflow as tf

In [2]:
sequence = 'AGCU'  
# define a mapping of chars to integers  
char_to_int = dict((c, i) for i, c in enumerate(sequence))  
int_to_char = dict((i, c) for i, c in enumerate(sequence)) 
def sequence_encoder(seq, maxlen = 128):
    X = np.zeros( ( maxlen, len( sequence ) ) )
    for i, c in enumerate( seq ):
        X[i, char_to_int[c] ] = 1
    return X

In [3]:
mimat = []
miRNA = []
with open('miRNA.csv', 'r') as csvfile: 
    reader = csv.reader(csvfile, skipinitialspace=True)
    for num, val in reader:
        mimat.append(num)
        miRNA.append(val)

In [4]:
temp_list = []
for i in miRNA:
    temp_list.append(sequence_encoder(i))
onehot_seq_list = np.array(temp_list)
print(onehot_seq_list.shape)

(1517, 128, 4)


In [5]:
train_x = onehot_seq_list.reshape(-1,128*4)
print(train_x.shape)

(1517, 512)


In [6]:
input_shape = (128*4,)
input_tensor = layers.Input(input_shape)

encoder1 = layers.Dense(300, activation='relu')(input_tensor)
encoder2 = layers.Dense(200, activation='relu')(encoder1)
encoder3 = layers.Dense(128, activation='relu', name='latent_layer')(encoder2)

latent_tensor = encoder3

decoder1 = layers.Dense(200, activation='relu')(latent_tensor)
decoder2 = layers.Dense(300, activation='relu')(decoder1)

reconstruction_output = layers.Dense(128*4, activation = 'relu')(decoder2)

In [7]:
ae = models.Model(input_tensor, reconstruction_output)
ae.compile(optimizer = 'Adam', loss='mse')

In [8]:
ae.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 512)]             0         
_________________________________________________________________
dense (Dense)                (None, 300)               153900    
_________________________________________________________________
dense_1 (Dense)              (None, 200)               60200     
_________________________________________________________________
latent_layer (Dense)         (None, 128)               25728     
_________________________________________________________________
dense_2 (Dense)              (None, 200)               25800     
_________________________________________________________________
dense_3 (Dense)              (None, 300)               60300     
_________________________________________________________________
dense_4 (Dense)              (None, 512)               154112

In [9]:
epoch=100
batch_size=32

ae.fit(train_x, train_x,
      batch_size = batch_size,
      epochs=epoch,
      verbose=2)

Train on 1517 samples
Epoch 1/100
1517/1517 - 1s - loss: 0.0316
Epoch 2/100
1517/1517 - 0s - loss: 0.0187
Epoch 3/100
1517/1517 - 0s - loss: 0.0111
Epoch 4/100
1517/1517 - 0s - loss: 0.0078
Epoch 5/100
1517/1517 - 0s - loss: 0.0061
Epoch 6/100
1517/1517 - 0s - loss: 0.0051
Epoch 7/100
1517/1517 - 0s - loss: 0.0045
Epoch 8/100
1517/1517 - 0s - loss: 0.0040
Epoch 9/100
1517/1517 - 0s - loss: 0.0036
Epoch 10/100
1517/1517 - 0s - loss: 0.0034
Epoch 11/100
1517/1517 - 0s - loss: 0.0032
Epoch 12/100
1517/1517 - 0s - loss: 0.0030
Epoch 13/100
1517/1517 - 0s - loss: 0.0028
Epoch 14/100
1517/1517 - 0s - loss: 0.0027
Epoch 15/100
1517/1517 - 0s - loss: 0.0027
Epoch 16/100
1517/1517 - 0s - loss: 0.0026
Epoch 17/100
1517/1517 - 0s - loss: 0.0025
Epoch 18/100
1517/1517 - 0s - loss: 0.0026
Epoch 19/100
1517/1517 - 0s - loss: 0.0025
Epoch 20/100
1517/1517 - 0s - loss: 0.0024
Epoch 21/100
1517/1517 - 0s - loss: 0.0024
Epoch 22/100
1517/1517 - 0s - loss: 0.0024
Epoch 23/100
1517/1517 - 0s - loss: 0.002

<tensorflow.python.keras.callbacks.History at 0x13e8200af88>

In [10]:
print(train_x[0].reshape(128, 4))

[[0. 0. 0. 1.]
 [0. 0. 1. 0.]
 [1. 0. 0. 0.]
 [0. 0. 1. 0.]
 [0. 0. 1. 0.]
 [0. 1. 0. 0.]
 [0. 1. 0. 0.]
 [0. 1. 0. 0.]
 [1. 0. 0. 0.]
 [0. 1. 0. 0.]
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [0. 0. 1. 0.]
 [0. 0. 0. 1.]
 [0. 1. 0. 0.]
 [0. 1. 0. 0.]
 [1. 0. 0. 0.]
 [0. 1. 0. 0.]
 [0. 0. 0. 1.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0.

In [11]:
decoded_seq = ae.predict(train_x[0].reshape(1,128*4))
print(decoded_seq.reshape(128,4))

[[0.         0.         0.         1.0184573 ]
 [0.         0.         0.9873216  0.        ]
 [0.92554116 0.         0.         0.        ]
 [0.         0.         1.0971279  0.        ]
 [0.06410183 0.         0.97924143 0.        ]
 [0.         0.93782735 0.         0.        ]
 [0.         0.99037606 0.         0.        ]
 [0.         0.9872795  0.         0.        ]
 [0.9027081  0.         0.         0.        ]
 [0.         0.9602098  0.         0.        ]
 [0.         0.         0.         0.        ]
 [1.0863386  0.         0.         0.        ]
 [0.8855681  0.         0.         0.00181097]
 [0.90355384 0.         0.         0.        ]
 [1.0576706  0.         0.         0.        ]
 [0.         0.         1.0133162  0.        ]
 [0.         0.         0.         0.8871078 ]
 [0.         0.9259661  0.         0.        ]
 [0.         1.0046654  0.         0.        ]
 [0.99545527 0.         0.         0.        ]
 [0.         0.9104397  0.         0.        ]
 [0.         

In [12]:
feature_layer_model = models.Model(inputs=ae.input,
                                       outputs=ae.get_layer('latent_layer').output)
feature_output = feature_layer_model.predict(train_x[0].reshape(1, 128*4))

In [13]:
print(feature_output)

[[0.         0.97421974 0.5150256  0.6765196  0.         1.1277136
  0.         1.2141285  0.         0.         0.99871135 0.35049543
  1.1663758  0.         0.         1.0215986  1.6046895  0.5821447
  0.         0.5952385  1.0038116  0.8722179  1.006069   0.9074036
  0.75710326 0.96511555 0.         1.5862415  0.5958752  0.
  0.33694345 1.1272573  0.42157188 0.6861091  0.43589354 0.
  0.3449101  1.188418   0.         1.221881   0.7677579  0.5252679
  0.7113294  0.6332697  1.503986   0.39955887 0.53246075 0.8043627
  1.3849689  1.0982062  0.48260584 0.707944   2.2356055  1.7000731
  1.2670677  0.4612414  0.02799289 0.         0.5670428  1.0909815
  0.33295277 0.14131072 0.13745748 0.90027106 0.         0.
  0.98432076 1.1032288  0.7971993  0.         0.22559096 0.
  1.1461229  0.20429596 0.         0.40379515 1.1257646  0.2388787
  0.6950509  0.         0.80939347 0.14636582 0.         1.0016036
  0.7870828  0.2902552  1.1544414  0.82302827 1.020612   1.7983214
  0.         1.1486903

In [14]:
featureList = []
for i in range (0, train_x.shape[0]):
    fo = feature_layer_model.predict(train_x[i].reshape(1,128 * 4))
    featureList.append(fo.tolist())

In [15]:
print(featureList[1][0])
print(mimat)

[0.0, 0.5610201954841614, 1.3859870433807373, 0.6375765800476074, 0.0, 1.2422518730163574, 0.0, 0.6320744156837463, 0.0, 0.0, 1.7628836631774902, 0.6996763944625854, 0.7254539728164673, 0.0, 0.0, 0.6660261154174805, 1.274143099784851, 0.6311331987380981, 0.0, 0.815101146697998, 0.6890671253204346, 0.3724668622016907, 1.161853313446045, 0.4920099079608917, 0.6946920156478882, 0.0, 0.0, 0.7073050141334534, 0.8693756461143494, 0.0, 0.3481066823005676, 1.2197853326797485, 0.8719751238822937, 0.7025530934333801, 0.7283930778503418, 0.0, 0.7674576044082642, 1.1714082956314087, 0.0, 0.9567451477050781, 0.9007133841514587, 1.1651464700698853, 0.43878668546676636, 1.376755952835083, 1.437488079071045, 0.9954615831375122, 1.0996960401535034, 0.47676706314086914, 1.259968876838684, 0.3151683509349823, 0.9225273728370667, 1.1074427366256714, 1.5408293008804321, 0.9542492032051086, 0.9878146648406982, 0.0, 0.5329945683479309, 0.0, 0.8312473297119141, 0.6560392379760742, 1.0577877759933472, 0.093357

In [16]:
wb = xl.Workbook()
w1 = wb['Sheet']
w1.cell(1, 1).value = 'MIMAT'
w1.cell(1, 2).value = 'feature'

In [17]:
for i in range(2, len(featureList) + 2):
        w1.cell(i, 1).value = mimat[i-2]
        for j in range(0, 128):
            w1.cell(i, 2 + j).value = featureList[i-2][0][j]

In [18]:
wb.save('miRNAFeature.xlsx')