In [None]:
import pandas as pd
import numpy as np
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import StandardScaler

In [None]:
gene = pd.read_csv("/content/ESCA_RNAseq_ori.csv", index_col=0)
methy = pd.read_csv("/content/ESCA_protein_ori.csv", index_col=0)
mirna = pd.read_csv("/content/ESCA_miRNAseq_ori.csv", index_col=0)
embed = pd.read_csv("/content/ESCA_embeddings.csv", index_col=0).iloc[:, 1:]

print(gene.shape)
print(methy.shape)
print(mirna.shape)
print(embed.shape)

merged1 = np.hstack((gene, embed))
merged2 = np.hstack((methy, embed))
merged3 = np.hstack((mirna, embed))

scaler1 = StandardScaler()
scaler2 = StandardScaler()
scaler3 = StandardScaler()

merged1 = scaler1.fit_transform(merged1)
merged2 = scaler2.fit_transform(merged2)
merged3 = scaler3.fit_transform(merged3)

print(merged1.shape, merged2.shape, merged3.shape)

In [None]:
def build_autoencoder(input_dim, encoding_dim):
    input_layer = Input(shape=(input_dim,))
    encoded = Dense(encoding_dim, activation='relu')(input_layer)
    decoded = Dense(input_dim, activation='sigmoid')(encoded)
    autoencoder = Model(inputs=input_layer, outputs=decoded)
    encoder = Model(inputs=input_layer, outputs=encoded)
    autoencoder.compile(optimizer='adam', loss='mse')
    return autoencoder, encoder

encoding_dim = 128
epochs = 200
batch_size = 32

encoded_features = []
for data in [merged1, merged2, merged3]:
    input_dim = data.shape[1]
    autoencoder, encoder = build_autoencoder(input_dim, encoding_dim)
    autoencoder.fit(data, data, epochs=epochs, batch_size=batch_size, shuffle=True, verbose=1)
    encoded_features.append(encoder.predict(data))

encoded_features1, encoded_features2, encoded_features3 = encoded_features
print(encoded_features1.shape, encoded_features2.shape, encoded_features3.shape)

In [None]:
row_names = gene.index
num_columns = encoded_features1.shape[1]
col_names = list(range(1, num_columns + 1))
fused_gene_df = pd.DataFrame(encoded_features1, index=row_names, columns=col_names)
fused_gene_df.to_csv("/ESCA_RNAseq_fused.csv")

row_names = methy.index
num_columns = encoded_features2.shape[1]
col_names = list(range(1, num_columns + 1))
fused_methy_df = pd.DataFrame(encoded_features2, index=row_names, columns=col_names)
fused_methy_df.to_csv("/ESCA_protein_fused.csv")

row_names = mirna.index
num_columns = encoded_features3.shape[1]
col_names = list(range(1, num_columns + 1))
fused_mirna_df = pd.DataFrame(encoded_features3, index=row_names, columns=col_names)
fused_mirna_df.to_csv("/ESCA_miRNAseq_fused.csv")