In [1]:
%run common.ipynb

Tensorflow version: 1.15.0
Keras version: 2.2.4
Numpy version: 1.21.2


Using TensorFlow backend.


In [2]:
from deeplift.layers import NonlinearMxtsMode
import deeplift.conversion.kerasapi_conversion as kc
from collections import OrderedDict
import deeplift
from deeplift.util import compile_func
import sys, os

In [3]:
SIX5_disc1 = 'SIX5_disc1'
MYC_disc1 = 'MYC_disc1'
SRF_disc1 = 'SRF_disc1'
AP1_disc1 = 'AP1_disc1'
GATA_disc1 = 'GATA_disc1'
TAL1_known1 = 'TAL1_known1'
IRF_known1 = 'IRF_known1'
all_motifs = [SRF_disc1, AP1_disc1, GATA_disc1, TAL1_known1, IRF_known1]

In [5]:
def percent(mean, std, data):
    p = 0
    for d in data:
        if d > mean - std and d < mean + std:
            p += 1
    return p/data.shape[0]

In [8]:
output = []
data_bundles = []
outputs = []

# all_runs = [('model/model_' + motif + '.h5', 
#              'model/model_' + motif + '.json', 
#              'data/testing_' + motif + '.npy', 
#              check(motif)) 
#             for motif in all_motifs]
all_runs = []
for i in range(len(all_motifs)):
    for j in range(i + 1, len(all_motifs)):
        all_runs.append(('model/model_and_' + str(i) + '_' + str(j) + '.h5', 
             'model/model_and_' + str(i) + '_' + str(j) + '.json', 
             'data/testing_' + str(i) + '_' + str(j) + '.npy', 
             check_or(all_motifs[i], all_motifs[j])))


for keras_model_weights, keras_model_json, testing_data_path, method in all_runs:
    sys.stdout = open(os.devnull, 'w')
    data = load_data(method, testing_data_path)

    model = kc.convert_model_from_saved_files(
                h5_file=keras_model_weights,
                json_file=keras_model_json,
                nonlinear_mxts_mode=NonlinearMxtsMode.DeepLIFT_GenomicsDefault)
      
    score_func = model.get_target_contribs_func(find_scores_layer_idx=0, target_layer_idx=-2)

    background = OrderedDict([('A', 0.3), ('C', 0.2), ('G', 0.2), ('T', 0.3)])

    all_scores = np.zeros((3, data.onehot.shape[0], data.onehot.shape[1]))
    for task_idx in [0,1,2]:
        scores = np.array(score_func(
                                task_idx=task_idx,
                                input_data_list=[data.onehot],
                                input_references_list=[
                                    np.array([
                                        background['A'], 
                                        background['C'], 
                                        background['G'], 
                                        background['T']])
                                    [None,None,:]],
                                batch_size=200,
                                progress_update=None))
    scores = np.sum(scores, axis=2)
    all_scores[task_idx] = scores
    
    all_scores = np.average(all_scores, axis=0)
    mask = np.zeros(all_scores.shape)
    
    motif_scores = OrderedDict()
    for motif in all_motifs:
        motif_scores[motif] = []

    for idx in range(len(scores)):
        for embedding in data.embeddings[idx]:
            for motif in all_motifs:
                if motif in embedding.what.getDescription():
                    motif_scores[motif].append(all_scores[idx, embedding.startPos:embedding.startPos+len(embedding.what)])
                    mask[idx, embedding.startPos:embedding.startPos+len(embedding.what)] = 1
    background = scores[np.where(mask == 0)]
    sys.stdout = sys.__stdout__
    print('Background mean: ', np.mean(background), 'Background std: ', np.std(background))
    for motif in all_motifs:
        motif_scores[motif] = np.array(motif_scores[motif])
        
        motif_score = motif_scores[motif]
        if len(motif_score) == 0:
            continue
        motif_score_mean = np.mean(motif_score, axis=1)
        print('Motif: ', motif)
        print(
#               'Motif char mean: ', np.mean(motif_score), 
#               'Motif char std: ', np.std(motif_score), 
#               'Motif mean std: ', np.std(motif_score_mean), 
#               'Motif mean min: ', np.min(motif_score_mean), 
#               'Motif mean max: ', np.max(motif_score_mean), 
              'Percent 1 std: ', "{:.2%}".format(percent(np.mean(background), np.std(background), motif_score_mean)))

nonlinear_mxts_mode is set to: DeepLIFT_GenomicsDefault
For layer 1 the preceding linear layer is 0 of type Conv1D;
In accordance with nonlinear_mxts_mode=DeepLIFT_GenomicsDefault we are setting the NonlinearMxtsMode to Rescale
For layer 3 the preceding linear layer is 2 of type Conv1D;
In accordance with nonlinear_mxts_mode=DeepLIFT_GenomicsDefault we are setting the NonlinearMxtsMode to Rescale
For layer 7 the preceding linear layer is 6 of type Dense;
In accordance with nonlinear_mxts_modeDeepLIFT_GenomicsDefault we are setting the NonlinearMxtsMode to RevealCancel
For layer 10 the preceding linear layer is 9 of type Dense;
In accordance with nonlinear_mxts_modeDeepLIFT_GenomicsDefault we are setting the NonlinearMxtsMode to RevealCancel
Background mean:  -0.04692414 Background std:  0.054060053
Motif:  SRF_disc1
Percent 1 std:  1.32%
Motif:  AP1_disc1
Percent 1 std:  2.29%
