In [7]:
! pip install tensorflow



In [8]:
# download model input data from DNANexus folder
#%%bash
!dx download "model_input" -r



In [32]:
import tensorflow as tf
import keras
from keras import layers
from keras.layers import Input, Dense
from tensorflow.keras import regularizers
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# Load and preprocess data

In [11]:
insertions_data = pd.read_csv('model_input/HERV_K_Insertions.txt',sep='\t',index_col=0)

In [18]:
input1 = insertions_data.values
X = [input1]
input_shape = input1.shape[-1]

# Model

In [19]:
#AUTOENCODER MODEL DEFINITION
# This is the size of our encoded representations
encoding_dim = 5 

# This is our input image
input_layer = keras.Input(shape=input_shape,name='input')

# "encoded" is the encoded representation of the input, we can add strong regularization here if n_samples << n_features
encoded = layers.Dense(encoding_dim, activation='relu',name='commpressed_representation')(input_layer)

# "decoded" is the lossy reconstruction of the input
decoded = layers.Dense(input_shape, activation='sigmoid',name='reconstructed_output')(encoded)

# This model maps an input to its reconstruction
autoencoder = keras.Model(input_layer, decoded)

In [20]:
tf.keras.utils.plot_model(autoencoder, "multi_input_and_output_model.png", show_shapes=True)


('You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) ', 'for plot_model/model_to_dot to work.')


# Training

In [21]:
autoencoder.compile(optimizer='adam', loss='binary_crossentropy')


In [22]:
hisotry = autoencoder.fit(X,X,
                epochs=50,
                batch_size=5,
                shuffle=True,
                validation_split=0.2,verbose=2)

Epoch 1/50
4/4 - 0s - loss: 0.7349 - val_loss: 0.7244
Epoch 2/50
4/4 - 0s - loss: 0.7296 - val_loss: 0.7197
Epoch 3/50
4/4 - 0s - loss: 0.7248 - val_loss: 0.7152
Epoch 4/50
4/4 - 0s - loss: 0.7207 - val_loss: 0.7110
Epoch 5/50
4/4 - 0s - loss: 0.7165 - val_loss: 0.7071
Epoch 6/50
4/4 - 0s - loss: 0.7128 - val_loss: 0.7032
Epoch 7/50
4/4 - 0s - loss: 0.7089 - val_loss: 0.6994
Epoch 8/50
4/4 - 0s - loss: 0.7052 - val_loss: 0.6958
Epoch 9/50
4/4 - 0s - loss: 0.7019 - val_loss: 0.6922
Epoch 10/50
4/4 - 0s - loss: 0.6984 - val_loss: 0.6887
Epoch 11/50
4/4 - 0s - loss: 0.6951 - val_loss: 0.6853
Epoch 12/50
4/4 - 0s - loss: 0.6918 - val_loss: 0.6820
Epoch 13/50
4/4 - 0s - loss: 0.6887 - val_loss: 0.6789
Epoch 14/50
4/4 - 0s - loss: 0.6855 - val_loss: 0.6760
Epoch 15/50
4/4 - 0s - loss: 0.6828 - val_loss: 0.6732
Epoch 16/50
4/4 - 0s - loss: 0.6797 - val_loss: 0.6702
Epoch 17/50
4/4 - 0s - loss: 0.6767 - val_loss: 0.6673
Epoch 18/50
4/4 - 0s - loss: 0.6738 - val_loss: 0.6644
Epoch 19/50
4/4 - 0

# Use encoder to reduce dimensions

In [33]:
# encoder part, to compress data with after training
encoder = keras.Model(input_layer, encoded)
encoded_input = Input(shape = (encoding_dim, ))

In [37]:
encoded_train = pd.DataFrame(encoder.predict(X))
encoded_train = encoded_train.add_prefix('feature_')

In [39]:
print(encoded_train.shape)
encoded_train

(20, 5)


Unnamed: 0,feature_0,feature_1,feature_2,feature_3,feature_4
0,0.847623,0.0,0.489327,0.774136,0.653006
1,0.252004,0.201692,0.271582,0.249787,1.408806
2,0.0,0.0,0.571005,0.53657,1.838752
3,1.487037,0.0,1.455211,0.307442,1.026299
4,0.70135,0.0,1.747839,1.317239,1.896496
5,1.28301,0.0,0.872306,0.942102,0.659673
6,0.381713,0.280432,1.133847,0.308492,1.685441
7,0.721037,0.173268,1.506489,0.288442,1.089734
8,0.215552,0.0,1.128298,0.0,0.209109
9,0.568619,0.0,0.0,0.522326,1.825762
