In [6]:
import pickle
import tensorflow as tf
from tensorflow.keras import models

import os
import numpy as np
import time
import pandas as pd

In [2]:
def load_data(filename):
    data = []
    gene_names = []
    data_labels = []
    lines = open(filename).readlines()
    sample_names = lines[0].replace('\n', '').split('\t')[1:]
    dx = 1

    for line in lines[dx:]:
        values = line.replace('\n', '').split('\t')
        gene = str.upper(values[0])
        gene_names.append(gene)
        data.append(values[1:])
    data = np.array(data, dtype='float32')
    data = np.transpose(data)

    return data, data_labels, sample_names, gene_names

In [7]:
print(os.getcwd())
os.chdir("/home/km/temp/DeepDEP/")
print(os.getcwd())

/home/km/temp/DeepDEP/prediction/code
/home/km/temp/DeepDEP


In [8]:
with tf.device('/cpu:0'):
    model_name = "model_custom_full"
    model_saved = models.load_model("prediction/model/%s.h5" % model_name)
# model_paper is the full 4-omics DeepDEP model used in the paper
# user can choose from single-omics, 2-omics, or full DeepDEP models from the
# /data/full_results_models_paper/models/ directory

In [14]:
# load TCGA genomics data and gene fingerprints
data_mut, data_labels_mut, sample_names_mut, gene_names_mut = load_data(
    "preprocessing/DATA/2022-07-13/predict_mut_prediction.txt")
data_exp, data_labels_exp, sample_names_exp, gene_names_exp = load_data(
    "preprocessing/DATA/2022-07-13/predict_exp_prediction.txt")
data_cna, data_labels_cna, sample_names_cna, gene_names_cna = load_data(
    "preprocessing/DATA/2022-07-13/predict_cna_prediction.txt")
data_meth, data_labels_meth, sample_names_meth, gene_names_meth = load_data(
    "preprocessing/DATA/2022-07-13/predict_meth_prediction.txt")
data_fprint_1298DepOIs, data_labels_fprint, gene_names_fprint, function_names_fprint = load_data(
    "preprocessing/DATA/2022-07-13/predict_fingerprint_prediction.txt")
print("\n\nDatasets successfully loaded.\n\n")



Datasets successfully loaded.




In [15]:
batch_size = 64
first_to_predict = 10
# predict the first 10 samples for DEMO ONLY, for all samples please substitute 10 by data_mut_tcga.shape[0]
# prediction results of all 8238 TCGA samples can be found in /data/full_results_models_paper/predictions/

In [16]:
with tf.device('/cpu:0'):
    t = time.time()
    data_pred = np.zeros((first_to_predict, data_fprint_1298DepOIs.shape[0]))
    for z in np.arange(0, first_to_predict):
        data_pred_tmp = model_saved.predict([data_mut[np.repeat(z, data_fprint_1298DepOIs.shape[0])],
                                             data_exp[np.repeat(z, data_fprint_1298DepOIs.shape[0])],
                                             data_cna[np.repeat(z, data_fprint_1298DepOIs.shape[0])],
                                             data_meth[np.repeat(z, data_fprint_1298DepOIs.shape[0])],
                                             data_fprint_1298DepOIs], batch_size=batch_size, verbose=0)
        data_pred[z] = np.transpose(data_pred_tmp)
        print("TCGA sample %d predicted..." % z)

TCGA sample 0 predicted...
TCGA sample 1 predicted...
TCGA sample 2 predicted...
TCGA sample 3 predicted...
TCGA sample 4 predicted...
TCGA sample 5 predicted...
TCGA sample 6 predicted...
TCGA sample 7 predicted...
TCGA sample 8 predicted...
TCGA sample 9 predicted...


In [17]:
# write prediction results to txt
data_pred_df = pd.DataFrame(data=np.transpose(data_pred), index=gene_names_fprint, columns=sample_names_mut_tcga[0:first_to_predict])

In [19]:
data_pred_df.to_csv('tmp.csv')

In [None]:
pd.DataFrame.to_csv(data_pred_df, path_or_buf="/results/predictions/tcga_predicted_data_%s_demo.txt" % model_name, sep='\t', index_label='CRISPR_GENE', float_format='%.4f')
print("\n\nPrediction completed in %.1f mins.\nResults saved in /results/predictions/tcga_predicted_data_%s_demo.txt\n\n" % (
    (time.time()-t)/60, model_name))