Dorrit0, dorrit1, jelle0, night0, night1

In [36]:
data_path_old = r"C:\Users\Fin Amin\Desktop/\StemCellResearch\datasets_Sahil_preprocessed_final-20220525T021621Z-001\datasets_Sahil_preprocessed_final\exp1"
matrix_path = "regulator_gene_matrix.csv"
data_path = r"C:\Users\Fin Amin\Desktop\StemCellResearch\Fin_preProcessed\synData"
data_path_inter =  r"C:\Users\Fin Amin\Desktop\StemCellResearch\Fin_preProcessed\interpolatedOnly"

In [37]:
import tensorflow as tf
from tensorflow import keras

import numpy as np
import pandas as pd

from keras.layers import Input
from keras.layers import Dense
from keras.layers import Conv1D
from keras.layers import Conv1DTranspose
from keras.layers import Flatten, Reshape

import os

In [38]:
def read_files():
    '''
    *Changed*
    currently hardcoded for only one file. 
    change code a bit for reading multiple files.
    '''
    #genes_intensities_data_matrix = pd.read_csv(file_path_intensities, index_col = 0)
    #print(os.listdir(data_path))
    replicate_files = os.listdir(data_path)
    #print('replicate files:',replicate_files)
    replicates = []
    # i = 0
    for file in replicate_files:
        
        #print('file name:',file)
        #print('value of i:',i)
        genes_intensities_data_matrix = pd.read_csv(os.path.join(data_path , file), index_col = 0, on_bad_lines='skip')
        #print('genes_intensities_data_matrix:',  genes_intensities_data_matrix.head())
        replicates.append(genes_intensities_data_matrix.values)
        # i+=1
        
    genes_intensities_data_matrix = genes_intensities_data_matrix.values
    regulator_gene_matrix = np.loadtxt(matrix_path)
    
    return np.asarray(replicates), regulator_gene_matrix.astype(np.float32)

In [39]:
replicates, matrix = read_files()

In [40]:
replicates.shape

(6, 6, 245)

In [41]:
#%tensorflow_version 2.x
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


In [42]:
class EncoderLinear(keras.layers.Layer):
    def __init__(self, regulator_gene_matrix, input_dim=32, units=32):
        super(EncoderLinear, self).__init__()

        def init_weights(shape, dtype="float32"):

            w_init = tf.random_normal_initializer()(shape=shape, dtype=dtype) * tf.convert_to_tensor(regulator_gene_matrix, dtype=dtype)

            return w_init

        self.w = tf.Variable(
            initial_value=init_weights(shape=(input_dim, units), dtype="float32"),
            trainable=True,
        )

    def call(self, inputs):
        return tf.matmul(inputs, self.w)

In [43]:
class DecoderLinear(keras.layers.Layer):
    def __init__(self, regulator_gene_matrix, input_dim=32, units=32):
        super(DecoderLinear, self).__init__()

        def init_weights(shape, dtype="float32"):

            w_init = tf.random_normal_initializer()(shape=shape, dtype=dtype) * tf.transpose(tf.convert_to_tensor(regulator_gene_matrix, dtype=dtype))

            return w_init

        self.w = tf.Variable(
            initial_value=init_weights(shape=(input_dim, units), dtype="float32"),
            trainable=True,
        )

    def call(self, inputs):
        return tf.matmul(inputs, self.w)

In [44]:
def encoder(parent_child_biological_association):
    '''
    Encoder structure
    '''
    '''
    The data is time-series. Therefore, CNN to learn the temporal relationship between 
    the intensities for each gene.
    '''
    en_conv = Conv1D(32, 3, activation = "relu")(parent_child_biological_association) # 6*245
    en_dense = Flatten()(en_conv)
    phenotype = Dense(2)(en_dense)
    return phenotype

def decoder(X, num_protein_gene, time_steps):
    '''
    Decoder structure
    '''
    de_dense = Dense(128)(X)
    de_dense = Reshape((1, 128))(de_dense) #tf.reshape(de_dense, (self.batch_size,1,128))
    de_deconv = Conv1DTranspose(num_protein_gene, time_steps, activation = "relu")(de_dense)
    # gene_reconstruction = self.decoder_biological_operation(de_deconv)
    return de_deconv

def model(regulator_gene_matrix, num_protein_gene, time_steps, num_kinase_regulators):

    inp = Input(shape=(time_steps, num_kinase_regulators))

    x = EncoderLinear(regulator_gene_matrix, 245, 245)(inp)
    enc = encoder(x)
    dec = decoder(enc, num_protein_gene, time_steps)
    out = DecoderLinear(regulator_gene_matrix, 245, 245)(dec)

    _model = tf.keras.Model(inputs=inp, outputs=out)

    return _model

In [45]:
regulator_gene_matrix = np.random.randint(0, 2, size=(245, 245)).astype(np.float32)


In [46]:
enc_dec = model(regulator_gene_matrix, 245, 6, 245) #we can just change the time steps to something higher

In [47]:
enc_dec.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 6, 245)]          0         
                                                                 
 encoder_linear_2 (EncoderLi  (None, 6, 245)           60025     
 near)                                                           
                                                                 
 conv1d_1 (Conv1D)           (None, 4, 32)             23552     
                                                                 
 flatten_1 (Flatten)         (None, 128)               0         
                                                                 
 dense_2 (Dense)             (None, 2)                 258       
                                                                 
 dense_3 (Dense)             (None, 128)               384       
                                                           

In [48]:
from keras import losses

In [49]:
enc_dec.compile(optimizer='adam', loss=losses.MeanSquaredError())

In [50]:
# enc_dec.compile(optimizer='adam',loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True))

In [51]:
genes_intensities_normalized, regulator_gene_matrix = read_files()
len(regulator_gene_matrix)

741

In [52]:
genes_intensities_normalized.shape

(6, 6, 245)

In [53]:
genes_intensities_normalized[0].shape #30, 246

(6, 245)

In [54]:
enc_dec.fit(genes_intensities_normalized,genes_intensities_normalized,epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x15248a2d190>

In [55]:
out = enc_dec.predict(genes_intensities_normalized)



In [56]:
def mymagn(A, B):
    return np.sum((B - A) ** 2)

In [57]:
genes_intensities_normalized.shape

(6, 6, 245)

In [58]:
out.shape

(6, 6, 245)

In [59]:
d1 = mymagn(genes_intensities_normalized, out)
d1

1606.3102494518039