<a href="https://colab.research.google.com/github/AvantiShri/colab_notebooks/blob/master/revcomp/CreateSiameseProfileModelFromStandardModel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#Upload file, verify that it was uploaded
!ls

sample_data


In [4]:
#Specify tensorflow version 1, import keras, load model
%tensorflow_version 1.x
import keras
from keras.utils import CustomObjectScope
from keras.models import load_model
import tensorflow as tf
import tensorflow_probability as tfp

def multinomial_nll(true_counts, logits):
    """Compute the multinomial negative log-likelihood
    Args:
      true_counts: observed count values
      logits: predicted logit values
    """
    counts_per_example = tf.reduce_sum(true_counts, axis=-1)
    dist = tfp.distributions.Multinomial(total_count=counts_per_example,
                                         logits=logits)
    return (-tf.reduce_sum(dist.log_prob(true_counts)) / 
            tf.to_float(tf.shape(true_counts)[0]))

#from https://github.com/kundajelab/basepair/blob/cda0875571066343cdf90aed031f7c51714d991a/basepair/losses.py#L87
class MultichannelMultinomialNLL(object):
    def __init__(self, n):
        self.__name__ = "MultichannelMultinomialNLL"
        self.n = n

    def __call__(self, true_counts, logits):
        for i in range(self.n):
            loss = multinomial_nll(true_counts[..., i], logits[..., i])
            if i == 0:
                total = loss
            else:
                total += loss
        return total

    def get_config(self):
        return {"n": self.n}

with CustomObjectScope({'MultichannelMultinomialNLL': MultichannelMultinomialNLL}):
  model = load_model('1535_TrainProfileModelNANOG_reg_loss_add_profile_only.h5')


Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
Use `tf.cast` instead.



In [5]:
model.summary()

Model: "model_15"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
sequence (InputLayer)           (None, 1346, 4)      0                                            
__________________________________________________________________________________________________
conv1d_113 (Conv1D)             (None, 1326, 64)     5440        sequence[0][0]                   
__________________________________________________________________________________________________
lambda_85 (Lambda)              (None, 1322, 64)     0           conv1d_113[0][0]                 
__________________________________________________________________________________________________
conv1d_114 (Conv1D)             (None, 1322, 64)     12352       conv1d_113[0][0]                 
___________________________________________________________________________________________

In [6]:
#Display the list of layers
model.layers

[<keras.engine.input_layer.InputLayer at 0x7f0f48a9ff28>,
 <keras.layers.convolutional.Conv1D at 0x7f0f48a9ff60>,
 <keras.layers.core.Lambda at 0x7f0f48a970b8>,
 <keras.layers.convolutional.Conv1D at 0x7f0f48a97198>,
 <keras.layers.merge.Add at 0x7f0f48a97208>,
 <keras.layers.core.Lambda at 0x7f0f48a974a8>,
 <keras.layers.convolutional.Conv1D at 0x7f0f48a97588>,
 <keras.layers.merge.Add at 0x7f0f48a975c0>,
 <keras.layers.core.Lambda at 0x7f0f48a97748>,
 <keras.layers.convolutional.Conv1D at 0x7f0f48a977b8>,
 <keras.layers.merge.Add at 0x7f0f48a977f0>,
 <keras.layers.core.Lambda at 0x7f0f48a97978>,
 <keras.layers.convolutional.Conv1D at 0x7f0f48a979e8>,
 <keras.layers.merge.Add at 0x7f0f48a97a20>,
 <keras.layers.core.Lambda at 0x7f0f48a97ba8>,
 <keras.layers.convolutional.Conv1D at 0x7f0f48a97c18>,
 <keras.layers.merge.Add at 0x7f0f48a97c50>,
 <keras.layers.core.Lambda at 0x7f0f48a97dd8>,
 <keras.layers.convolutional.Conv1D at 0x7f0f48a97e48>,
 <keras.layers.merge.Add at 0x7f0f48a2d048>

In [7]:
#Display inputs
model.inputs

[<tf.Tensor 'sequence:0' shape=(?, 1346, 4) dtype=float32>,
 <tf.Tensor 'patchcap.logcount:0' shape=(?, 2) dtype=float32>,
 <tf.Tensor 'patchcap.profile:0' shape=(?, 1000, 2) dtype=float32>]

In [8]:
model.outputs

[<tf.Tensor 'CHIPNexus.NANOG.logcount/BiasAdd:0' shape=(?, 2) dtype=float32>,
 <tf.Tensor 'CHIPNexus.NANOG.profile/add:0' shape=(?, 1000, 2) dtype=float32>]

In [21]:
#Let's create the model
#define the inputs
fwd_sequence_input = keras.models.Input(shape=(1346,4))
fwd_patchcap_logcount = keras.models.Input(shape=(2,))
fwd_patchcap_profile = keras.models.Input(shape=(1000,2))

#revcomp sequence input
rev_sequence_input = keras.layers.Lambda(lambda x: x[:,::-1,::-1])(fwd_sequence_input)
rev_patchcap_logcount = keras.layers.Lambda(lambda x: x[:,::-1])(fwd_patchcap_logcount)
#note that last axis is NOT fwd vs reverse strand, but different smoothing levels
# that's why we flip only the middle axis
rev_patchcap_profile = keras.layers.Lambda(lambda x: x[:,::-1,:])(fwd_patchcap_profile)

#Run the model on the original fwd inputs
fwd_logcount, fwd_profile = model(
    [fwd_sequence_input, fwd_patchcap_logcount, fwd_patchcap_profile])
#Run the original model on the reverse inputs
rev_logcount, rev_profile = model(
    [rev_sequence_input, rev_patchcap_logcount, rev_patchcap_profile])

#Reverse complement rev_logcount and rev_profile to be compatible with fwd
revcompd_rev_logcount = keras.layers.Lambda(lambda x: x[:,::-1])(rev_logcount)
revcompd_rev_profile = keras.layers.Lambda(lambda x: x[:,::-1,::-1])(rev_profile)

#Average the two
avg_logcount = keras.layers.Average()([fwd_logcount, revcompd_rev_logcount])
avg_profile = keras.layers.Average()([fwd_profile, revcompd_rev_profile])

#Create a model that goes from the inputs to the averaged output
siamese_model = keras.models.Model(inputs=[fwd_sequence_input,
                                           fwd_patchcap_logcount,
                                           fwd_patchcap_profile],
                                   outputs=[avg_logcount, avg_profile])

In [22]:
import numpy as np

#Let's test it out. 
rng = np.random.RandomState(1234)
random_sequences = rng.random((10,1346,4))
random_logcount = rng.random((10,2))
random_profile = rng.random((10,1000,2))

fwd_logcount, fwd_profile = model.predict([random_sequences,
                                   random_logcount,
                                   random_profile])
rev_logcount, rev_profile = model.predict([random_sequences[:,::-1,::-1],
                                   random_logcount[:,::-1],
                                   random_profile[:,::-1]])
fwd_siamese_logcount, fwd_siamese_profile = siamese_model.predict(
                                  [random_sequences,
                                   random_logcount,
                                   random_profile])
rev_siamese_logcount, rev_siamese_profile = siamese_model.predict(
                                  [random_sequences[:,::-1,::-1],
                                   random_logcount[:,::-1],
                                   random_profile[:,::-1]])

In [23]:
rev_siamese_logcount[:,::-1] - 0.5*(fwd_logcount + rev_logcount[:,::-1])

array([[0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.]], dtype=float32)

In [24]:
fwd_siamese_logcount - rev_siamese_logcount[:,::-1]

array([[0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.]], dtype=float32)

In [25]:
fwd_siamese_profile - 0.5*(rev_profile[:,::-1,::-1] + fwd_profile)

array([[[0., 0.],
        [0., 0.],
        [0., 0.],
        ...,
        [0., 0.],
        [0., 0.],
        [0., 0.]],

       [[0., 0.],
        [0., 0.],
        [0., 0.],
        ...,
        [0., 0.],
        [0., 0.],
        [0., 0.]],

       [[0., 0.],
        [0., 0.],
        [0., 0.],
        ...,
        [0., 0.],
        [0., 0.],
        [0., 0.]],

       ...,

       [[0., 0.],
        [0., 0.],
        [0., 0.],
        ...,
        [0., 0.],
        [0., 0.],
        [0., 0.]],

       [[0., 0.],
        [0., 0.],
        [0., 0.],
        ...,
        [0., 0.],
        [0., 0.],
        [0., 0.]],

       [[0., 0.],
        [0., 0.],
        [0., 0.],
        ...,
        [0., 0.],
        [0., 0.],
        [0., 0.]]], dtype=float32)

In [26]:
fwd_siamese_profile - rev_siamese_profile[:,::-1,::-1]

array([[[0., 0.],
        [0., 0.],
        [0., 0.],
        ...,
        [0., 0.],
        [0., 0.],
        [0., 0.]],

       [[0., 0.],
        [0., 0.],
        [0., 0.],
        ...,
        [0., 0.],
        [0., 0.],
        [0., 0.]],

       [[0., 0.],
        [0., 0.],
        [0., 0.],
        ...,
        [0., 0.],
        [0., 0.],
        [0., 0.]],

       ...,

       [[0., 0.],
        [0., 0.],
        [0., 0.],
        ...,
        [0., 0.],
        [0., 0.],
        [0., 0.]],

       [[0., 0.],
        [0., 0.],
        [0., 0.],
        ...,
        [0., 0.],
        [0., 0.],
        [0., 0.]],

       [[0., 0.],
        [0., 0.],
        [0., 0.],
        ...,
        [0., 0.],
        [0., 0.],
        [0., 0.]]], dtype=float32)