In [1]:
# Necessary imports
%load_ext autoreload
%autoreload 2\

import networkx as nx
import numpy as np
import tensorflow as tf

from keras import backend as K
from keras.layers import Dense
from keras.models import Model, Sequential


from TCGAMultiOmics.multiomics import MultiOmicsData
from moge.network.heterogeneous_network import HeterogeneousNetwork


Using TensorFlow backend.


#  Import network from file

In [2]:
import pickle

# WRITE
# with open('moge/data/lncRNA_miRNA_mRNA/miRNA-mRNA_network_test_05_val_01_seed_0.pickle', 'wb') as file:
#     pickle.dump(network, file)

# READ
with open('moge/data/lncRNA_miRNA_mRNA/miRNA-mRNA_network_test_05_val_01_seed_0.pickle', 'rb') as file:
# with open('moge/data/lncRNA_miRNA_mRNA/miRNA-mRNA_network_biogrid.pickle', 'rb') as file:
    network = pickle.load(file)
    network.remove_extra_nodes()
#     network.node_list = network.all_nodes
#     node_list = network.node_list

In [19]:
# READ edgelists
with open('moge/data/lncRNA_miRNA_mRNA/miRNA-mRNA_network_test_05_val_01_seed_0_test_edges.pickle', 'rb') as file:
    test_edges_dict = pickle.load(file)
    
with open('moge/data/lncRNA_miRNA_mRNA/miRNA-mRNA_network_test_05_val_01_seed_0_val_edges.pickle', 'rb') as file:
    val_edges_dict = pickle.load(file)

# Load training data

In [20]:
X, y = network.multi_omics_data.load_data(modalities=["MIR", "GE"])

In [21]:
network.multi_omics_data.external_data_path = "/home/jonny/PycharmProjects/Bioinformatics_ExternalData/"

In [22]:
X["MIR"].shape

(460, 1870)

In [23]:
transcripts = network.multi_omics_data.GE.get_genes_info()["Mature sequence"]

FileNotFoundError: [Errno 2] No such file or directory: '/home/jonny_admin/PycharmProjects/Bioinformatics_ExternalData/GENCODE/gencode.v28.transcripts.fa'

In [11]:
transcripts[transcripts.notna()].map(len).describe()

count    1617.000000
mean       21.403834
std         1.644691
min        16.000000
25%        21.000000
50%        22.000000
75%        22.000000
max        27.000000
Name: Mature sequence, dtype: float64

# Training Source Target Graph Embedding

In [9]:
from keras.layers import Input, Conv1D, Lambda, Dot, Dense, Flatten, MaxPooling1D, Lambda
from keras.layers import LSTM, Dense, TimeDistributed, Dropout
from keras.models import Model, Sequential
from keras.regularizers import l2
from keras import backend as K

from keras.optimizers import SGD, Adam, RMSprop
from keras.losses import binary_crossentropy

from keras.utils import to_categorical


def W_init(shape,name=None):
    """Initialize weights as in paper"""
    values = np.random.normal(loc=0,scale=1e-2,size=shape)
    return K.variable(values,name=name)
#//TODO: figure out how to initialize layer biases in keras.
def b_init(shape,name=None):
    """Initialize bias as in paper"""
    values=np.random.normal(loc=0.5,scale=1e-2,size=shape)
    return K.variable(values,name=name)


In [10]:
K.clear_session()
tf.reset_default_graph()
# sess.close()
sess = tf.InteractiveSession()

In [11]:
# node_features_size = X["MIR"].shape[0]
input_shape = (None, 4)
_d = 128
# n_nodes = len(network.node_list)

In [12]:
# with tf.name_scope('inputs'):
E_ij = Input(batch_shape=(1, ), name="E_ij")
#     input_i = tf.placeholder(tf.float32, shape=input_shape, name="input_i")
#     input_j = tf.placeholder(tf.float32, shape=input_shape, name="input_j")
input_seq_i = Input(batch_shape=(1, None, 4), name="input_i")
input_seq_j = Input(batch_shape=(1, None, 4), name="input_j")
# is_directed = tf.placeholder(tf.bool, name="is_directed")
is_directed = Input(batch_shape=(1, ), dtype=tf.bool, name="is_directed")
# i = tf.Variable(int, name="i", trainable=False)
# j = tf.Variable(int, name="j", trainable=False)

In [13]:
def create_base_network(input_shape):
    """ Base network to be shared (eq. to feature extraction).
    """
    input = Input(shape=input_shape)
#     x = Flatten()(input)
    x = LSTM(128, input_shape=input_shape, return_sequences=False)(input)
    x = Dense(128, activation='relu')(x)
    x = Dropout(0.1)(x)
    x = Dense(128, activation='relu')(x)
    x = Dropout(0.1)(x)
    x = Dense(128, activation='relu')(x)
    return Model(input, x)

def euclidean_distance(vects):
    x, y = vects
    return K.sqrt(K.maximum(K.sum(K.square(x - y), axis=1, keepdims=True), K.epsilon()))

def source_target_emebedding_distance(vects):
    emb_i, emb_j, is_directed = vects
    dot_directed = Dot(axes=1)([emb_i[:, 0:int(_d/2)], emb_j[:, int(_d/2):_d]])
    dot_undirected = Dot(axes=1)([emb_i, emb_j])
    return K.switch(is_directed, K.sigmoid(dot_directed), K.sigmoid(dot_undirected))

def contrastive_loss(y_true, y_pred):
    '''Contrastive loss from Hadsell-et-al.'06
    http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf
    '''
    margin = 1
    return K.mean(y_true * K.square(y_pred) +
                  (1 - y_true) * K.square(K.maximum(margin - y_pred, 0)))

def accuracy(y_true, y_pred):
    '''Compute classification accuracy with a fixed threshold on distances.
    '''
    return K.mean(K.equal(y_true, K.cast(y_pred < 0.5, y_true.dtype)))

In [14]:
#build create_base_network to use in each siamese 'leg'
lstm_network = create_base_network(input_shape=(None, 4))

print("lstm_network", lstm_network)

# encode each of the two inputs into a vector with the convnet
encoded_i = lstm_network(input_seq_i)
encoded_j = lstm_network(input_seq_j)
print("encoded_i", encoded_i, "\nencoded_j", encoded_j)

distance = Lambda(source_target_emebedding_distance)([encoded_i, encoded_j, is_directed])
print("distance", distance)

siamese_net = Model(inputs=[input_seq_i, input_seq_j, is_directed], outputs=distance)

lstm_network <keras.engine.training.Model object at 0x7f2a0ed7af28>
encoded_i Tensor("model_1/dense_3/Relu:0", shape=(1, 128), dtype=float32) 
encoded_j Tensor("model_1_1/dense_3/Relu:0", shape=(1, 128), dtype=float32)
distance Tensor("lambda_1/Select:0", shape=(1, 1), dtype=float32)


In [15]:
#//TODO: get layerwise learning rates and momentum annealing scheme described in paperworking
siamese_net.compile(loss=contrastive_loss, 
                    optimizer=RMSprop(lr=0.01),
                    metrics=[accuracy])

siamese_net.count_params()


117632

# Data Generator

In [5]:
from moge.network.data_generator import DataGenerator

In [7]:
generator = DataGenerator(network.node_list, network=network, 
                          batch_size=1, dim=(None, 4), shuffle=True)

In [16]:
siamese_net.fit_generator(generator)

Epoch 1/1
[5370938]
[11159958]
[5814391]
[10436904]
[360256]
[1394466]
[8521522]
[4374912]
[6187676]
[9927030]
[3907835]
[8044924]


StopIteration: 'DataGenerator' object has no attribute 'split_index'

In [32]:
X, y = next(train_generator())
print(X.shape, y.shape)

(1, 88, 4) (1, 88, 1)


In [31]:
def train_generator():
    while True:
        sequence_length = np.random.randint(10, 100)
        x_train = np.random.random((1, sequence_length, 4))
        # y_train will depend on past 5 timesteps of x
        y_train = x_train[:, :, 0]
        for i in range(1, 2):
            y_train[:, i:] += x_train[:, :-i, i]
        y_train = to_categorical(y_train > 2.5)
        yield x_train, y_train

In [126]:
with tf.name_scope('siamese'):
    siamese = Dense(128, activation='relu')(N_i)
    siamese = Dense(_d, activation='relu')(siamese)
    
    emb_c_i = siamese(N_i)
    emb_c_i = siamese(N_j)


In [127]:
with tf.name_scope('embedding'):
    emb_s = tf.Variable(initial_value=tf.random_uniform([n_nodes, int(_d/2)], -1, 1),
                        validate_shape=True, dtype=tf.float32,
                        name="emb_t", trainable=True)

    emb_t = tf.Variable(initial_value=tf.random_uniform([n_nodes, int(_d/2)], -1, 1),
                        validate_shape=True, dtype=tf.float32,
                        name="emb_s", trainable=True)

    emb_c = tf.concat([emb_s, emb_t], axis=1, name="emb_concat")

In [128]:
emb_s = emb_s[i].assign(tf.reshape(siamese[:, 0 : int(_d/2)], [-1]))
emb_t = emb_t[i].assign(tf.reshape(siamese[:, int(_d/2) : _d], [-1]))

# with tf.control_dependencies([emb_s[i].assign(emb_c_i)]):
#     emb_s = tf.identity(emb_s)

In [None]:
sess.as_default()
K.set_session(sess)
session.run(init_op)