In [None]:
import numpy as np
import h5py
import tensorflow as tf
import keras
from keras.models import Sequential, Model
from keras.layers import Conv1D, MaxPooling1D
from keras.layers import Input, Embedding
from keras.layers import Dense, Merge, Dropout
from keras.layers import LSTM, Bidirectional
from keras import metrics
from keras import backend as K
from keras.layers import Lambda
gpu_options = tf.GPUOptions(allow_growth=True)
session = tf.InteractiveSession(config=tf.ConfigProto(gpu_options=gpu_options))

In [None]:
alphabet = "NACGT"
vocab_size = len(alphabet)
time_steps = 100
embedding_size = 300 # for embedding layer, maybe remove
dense_size = 300
category = 10 # size of Y_vector

genome_input_shape = (100,)

def euclidean_distance(vects):
    x, y = vects
    return K.sqrt(K.maximum(K.sum(K.square(x - y), axis=1, keepdims=True), K.epsilon()))

def eucl_dist_output_shape(shapes):
    shape1, shape2 = shapes
    return (shape1[0], 1)

def create_base_network(input_shape):
    input = Input(shape=input_shape)
    x = Embedding(vocab_size, embedding_size, input_length=time_steps)(input)
    x = Bidirectional(LSTM(units=300, return_sequences=True))(x)
    x = Dropout(0.50)(x)
    x = Bidirectional(LSTM(units=300))(x)
    x = Dropout(0.50)(x)

    x = Dense(dense_size, activation='relu')(x)
    x = Dense(dense_size, activation='relu')(x)
    x = Dense(category, activation='linear')(x)
    return Model(input, x)


model_makes_embedding = create_base_network(genome_input_shape)

input_a = Input(shape=genome_input_shape)
input_b = Input(shape=genome_input_shape)

processed_a = model_makes_embedding(input_a)
processed_b = model_makes_embedding(input_b)

distance = Lambda(euclidean_distance,
                  output_shape=eucl_dist_output_shape)([processed_a, processed_b])

model_twin_sequences = Model([input_a, input_b], distance)

model_twin_sequences.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_squared_error'])

model_twin_sequences.summary()

In [None]:
h5f = h5py.File('X_train_twin_sequences_a.h5','r')
X_train_a = h5f['X_a'][:]
h5f.close()
h5f = h5py.File('X_train_twin_sequences_b.h5','r')
X_train_b = h5f['X_b'][:]
h5f.close()
h5f = h5py.File('Y_train_twin_sequences.h5','r')
Y_train = h5f['Y'][:]
h5f.close()
print(X_train_a.shape)
print(Y_train.shape)
print(X_train_a[:1])
print(X_train_b[:1])
print(Y_train[:1])

In [None]:

model_twin_sequences.fit([X_train_a[:1000], X_train_b[:1000]], Y_train[:1000], batch_size=4)