# **To run the Code:**
 

1.   Click on `Runtime`.
2.   Choose `Run All`.



## Mouning Google Drive folders

In [None]:
# from google.colab import drive
# drive.mount("/content/gdrive/")
# !ls "/content/gdrive/My Drive/Data-integration"

## Mounting GitHub repo

In [None]:
!git clone https://github.com/AliMorie/SVU-MWS-F20-ADE-DataIntegration.git

## importing required dependencies

In [3]:
import numpy as np
import pandas as pd
from keras.models import Model
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
from keras.utils.vis_utils import plot_model
from keras.layers import Input, Dense
from keras.layers.merge import concatenate

## Loading Datasets

In [4]:
# work_dir = "/content/gdrive/My Drive/Data-integration"
work_dir = "SVU-MWS-F20-ADE-DataIntegration"
scRNAseq = pd.read_csv(work_dir + '/scRNAseq.txt',sep='\t')
scProteomics = pd.read_csv(work_dir + '/scProteomics.txt',sep='\t')

X_scRNAseq = scRNAseq.values[:,0:(scRNAseq.shape[1]-1)]
Y_scRNAseq = scRNAseq.values[:,scRNAseq.shape[1]-1]
X_scProteomics = scProteomics.values[:,0:(scProteomics.shape[1]-1)]
Y_scProteomics = scProteomics.values[:,scProteomics.shape[1]-1]

X_scRNAseq = np.log(X_scRNAseq + 1)
X_scProteomics = np.log(X_scProteomics + 1)

## checking Dataset Dimentions.

In [None]:
print('Shape of X_scRNAseq is ', X_scRNAseq.shape)
print('Shape of X_scProteomics is ', X_scProteomics.shape)

print('Shape of Y_scRNAseq is ', Y_scRNAseq.shape)
print('Shape of Y_scProteomics is ', Y_scProteomics.shape)

# Input Layer

In [6]:
ncol_scRNAseq = X_scRNAseq.shape[1]
input_dim_scRNAseq = Input(shape = (ncol_scRNAseq, ), name = "scRNAseq")
ncol_scProteomics = X_scProteomics.shape[1]
input_dim_scProteomics = Input(shape = (ncol_scProteomics, ), name = "scProteomics")

# Dimensions of Encoder for each OMIC

In [7]:
encoding_dim_scRNAseq = 50
encoding_dim_scProteomics = 10

# Encoder layer for each OMIC


In [8]:
encoded_scRNAseq = Dense(encoding_dim_scRNAseq, activation = 'linear', 
                         name = "Encoder_scRNAseq")(input_dim_scRNAseq)
encoded_scProteomics = Dense(encoding_dim_scProteomics, activation = 'linear', 
                             name = "Encoder_scProteomics")(input_dim_scProteomics)

# Merging Encoder layers from different OMICs


In [9]:
merge = concatenate([encoded_scRNAseq, encoded_scProteomics])

# Bottleneck compression


In [10]:
bottleneck = Dense(50, kernel_initializer = 'uniform', activation = 'linear', 
                   name = "Bottleneck")(merge)

#Inverse merging


In [11]:
merge_inverse = Dense(encoding_dim_scRNAseq + encoding_dim_scProteomics, 
                      activation = 'elu', name = "Concatenate_Inverse")(bottleneck)

# Decoder layer for each OMIC


In [12]:
decoded_scRNAseq = Dense(ncol_scRNAseq, activation = 'sigmoid', 
                         name = "Decoder_scRNAseq")(merge_inverse)
decoded_scProteomics = Dense(ncol_scProteomics, activation = 'sigmoid', 
                             name = "Decoder_scProteomics")(merge_inverse)

# Combining Encoder and Decoder into an Autoencoder model


In [13]:
autoencoder = Model(inputs = [input_dim_scRNAseq, input_dim_scProteomics], 
                    outputs = [decoded_scRNAseq, decoded_scProteomics])

# Compile Autoencoder


In [None]:
autoencoder.compile(optimizer = 'adam', 
                    loss={'Decoder_scRNAseq': 'mean_squared_error', 
                          'Decoder_scProteomics': 'mean_squared_error'})
autoencoder.summary()

## Autoencoder training

In [None]:
estimator = autoencoder.fit([X_scRNAseq, X_scProteomics], 
                            [X_scRNAseq, X_scProteomics], 
                            epochs = 100, batch_size = 128, 
                            validation_split = 0.2, shuffle = True, verbose = 1)
print("Training Loss: ",estimator.history['loss'][-1])
print("Validation Loss: ",estimator.history['val_loss'][-1])
# plt.plot(estimator.history['loss'])
# plt.plot(estimator.history['val_loss'])
# plt.title('Model Loss')
# plt.ylabel('Loss')
# plt.xlabel('Epoch')
# plt.legend(['Train','Validation'], loc = 'upper right')
# plt.show()

# Encoder model


In [16]:
encoder = Model(inputs = [input_dim_scRNAseq, input_dim_scProteomics], 
                outputs = bottleneck)
bottleneck_representation = encoder.predict([X_scRNAseq, X_scProteomics])

# tSNE on Autoencoder bottleneck representation


In [None]:
model_tsne_auto = TSNE(learning_rate = 200, n_components = 2, random_state = 123, 
                       perplexity = 90, n_iter = 1000, verbose = 1)
tsne_auto = model_tsne_auto.fit_transform(bottleneck_representation)
plt.scatter(tsne_auto[:, 0], tsne_auto[:, 1], c = Y_scRNAseq, cmap = 'tab20', s = 10)
plt.title('tSNE on Autoencoder: Data Integration, CITEseq')
plt.xlabel("tSNE1")
plt.ylabel("tSNE2")
plt.show()