In [1]:
%run common.ipynb

Tensorflow version: 1.15.0
Keras version: 2.2.4
Numpy version: 1.21.2


Using TensorFlow backend.


In [2]:
from deeplift.layers import NonlinearMxtsMode
import deeplift.conversion.kerasapi_conversion as kc
from collections import OrderedDict
import deeplift
from deeplift.util import compile_func
import sys, os

In [3]:
SIX5_disc1 = 'SIX5_disc1'
MYC_disc1 = 'MYC_disc1'
SRF_disc1 = 'SRF_disc1'
AP1_disc1 = 'AP1_disc1'
GATA_disc1 = 'GATA_disc1'
TAL1_known1 = 'TAL1_known1'
IRF_known1 = 'IRF_known1'
all_motifs = [SRF_disc1, AP1_disc1, GATA_disc1, TAL1_known1, IRF_known1]

In [4]:
def percent(mean, std, data):
    p = 0
    for d in data:
        if d > mean - std and d < mean + std:
            p += 1
    return p/data.shape[0]

In [12]:
all_runs = []
for i in range(len(all_motifs)):
    for j in range(i + 1, len(all_motifs)):
        all_runs.append(('model/model_and_' + str(i) + '_' + str(j) + '.h5', 
             'model/model_and_' + str(i) + '_' + str(j) + '.json', 
             'data/testing_' + str(i) + '_' + str(j) + '.npy', 
             check_or(all_motifs[i], all_motifs[j])))

output = []
        
for keras_model_weights, keras_model_json, testing_data_path, method in all_runs:
    data = load_data(method, testing_data_path)

    model = kc.convert_model_from_saved_files(
                h5_file=keras_model_weights,
                json_file=keras_model_json,
                nonlinear_mxts_mode=NonlinearMxtsMode.DeepLIFT_GenomicsDefault)
      
    score_func = model.get_target_contribs_func(find_scores_layer_idx=0, target_layer_idx=-2)

    background = OrderedDict([('A', 0.3), ('C', 0.2), ('G', 0.2), ('T', 0.3)])

    all_scores = np.zeros((3, data.onehot.shape[0], data.onehot.shape[1]))
    for task_idx in [0,1,2]:
        scores = np.array(score_func(
                                task_idx=task_idx,
                                input_data_list=[data.onehot],
                                input_references_list=[
                                    np.array([
                                        background['A'], 
                                        background['C'], 
                                        background['G'], 
                                        background['T']])
                                    [None,None,:]],
                                batch_size=200,
                                progress_update=None))
    scores = np.sum(scores, axis=2)
    all_scores[task_idx] = scores
    
    all_scores = np.average(all_scores, axis=0)
    mask = np.zeros(all_scores.shape)
    
    motif_scores = OrderedDict()
    for motif in all_motifs:
        motif_scores[motif] = []

    for idx in range(len(scores)):
        for embedding in data.embeddings[idx]:
            for motif in all_motifs:
                if motif in embedding.what.getDescription():
                    motif_scores[motif].append(all_scores[idx, embedding.startPos:embedding.startPos+len(embedding.what)])
                    mask[idx, embedding.startPos:embedding.startPos+len(embedding.what)] = 1
    background = scores[np.where(mask == 0)]
    
    output.append('Background mean: {:.3}, Background std: {:.3}'.format(np.mean(background), np.std(background)))
    for motif in all_motifs:
        motif_scores[motif] = np.array(motif_scores[motif])
        
        motif_score = motif_scores[motif]
        if len(motif_score) == 0:
            continue
        motif_score_mean = np.mean(motif_score, axis=1)
        output.append('Motif: ' + motif)
        output.append('Percent 1 std: {:.2%}'.format(percent(np.mean(background), np.std(background), motif_score_mean)))
#         print(
#               'Motif char mean: ', np.mean(motif_score), 
#               'Motif char std: ', np.std(motif_score), 
#               'Motif mean std: ', np.std(motif_score_mean), 
#               'Motif mean min: ', np.min(motif_score_mean), 
#               'Motif mean max: ', np.max(motif_score_mean))

nonlinear_mxts_mode is set to: DeepLIFT_GenomicsDefault
For layer 1 the preceding linear layer is 0 of type Conv1D;
In accordance with nonlinear_mxts_mode=DeepLIFT_GenomicsDefault we are setting the NonlinearMxtsMode to Rescale
For layer 3 the preceding linear layer is 2 of type Conv1D;
In accordance with nonlinear_mxts_mode=DeepLIFT_GenomicsDefault we are setting the NonlinearMxtsMode to Rescale
For layer 7 the preceding linear layer is 6 of type Dense;
In accordance with nonlinear_mxts_modeDeepLIFT_GenomicsDefault we are setting the NonlinearMxtsMode to RevealCancel
For layer 10 the preceding linear layer is 9 of type Dense;
In accordance with nonlinear_mxts_modeDeepLIFT_GenomicsDefault we are setting the NonlinearMxtsMode to RevealCancel
nonlinear_mxts_mode is set to: DeepLIFT_GenomicsDefault
For layer 1 the preceding linear layer is 0 of type Conv1D;
In accordance with nonlinear_mxts_mode=DeepLIFT_GenomicsDefault we are setting the NonlinearMxtsMode to Rescale
For layer 3 the prece

In [14]:
for line in output:
    print(line)

Background mean: -0.0469, Background std: 0.0541
Motif: SRF_disc1
Percent 1 std: 1.32%
Motif: AP1_disc1
Percent 1 std: 2.29%
Background mean: -0.0369, Background std: 0.0465
Motif: SRF_disc1
Percent 1 std: 1.19%
Motif: GATA_disc1
Percent 1 std: 4.33%
Background mean: -0.0567, Background std: 0.0527
Motif: SRF_disc1
Percent 1 std: 0.13%
Motif: TAL1_known1
Percent 1 std: 0.51%
Background mean: -0.0469, Background std: 0.0508
Motif: SRF_disc1
Percent 1 std: 1.31%
Motif: IRF_known1
Percent 1 std: 0.53%
Background mean: -0.0418, Background std: 0.0553
Motif: AP1_disc1
Percent 1 std: 38.53%
Motif: GATA_disc1
Percent 1 std: 56.83%
Background mean: -0.0331, Background std: 0.0459
Motif: AP1_disc1
Percent 1 std: 4.26%
Motif: TAL1_known1
Percent 1 std: 2.03%
Background mean: -0.0384, Background std: 0.0467
Motif: AP1_disc1
Percent 1 std: 21.75%
Motif: IRF_known1
Percent 1 std: 2.14%
Background mean: -0.0843, Background std: 0.0848
Motif: GATA_disc1
Percent 1 std: 9.48%
Motif: TAL1_known1
Percent

In [18]:
all_runs = []
for i in range(len(all_motifs)):
    for j in range(i + 1, len(all_motifs)):
        all_runs.append(('model/model_or_' + str(i) + '_' + str(j) + '.h5', 
             'model/model_or_' + str(i) + '_' + str(j) + '.json', 
             'data/testing_' + str(i) + '_' + str(j) + '.npy', 
             check_or(all_motifs[i], all_motifs[j])))

output = []
        
for keras_model_weights, keras_model_json, testing_data_path, method in all_runs:
    data = load_data(method, testing_data_path)

    model = kc.convert_model_from_saved_files(
                h5_file=keras_model_weights,
                json_file=keras_model_json,
                nonlinear_mxts_mode=NonlinearMxtsMode.DeepLIFT_GenomicsDefault)
      
    score_func = model.get_target_contribs_func(find_scores_layer_idx=0, target_layer_idx=-2)

    background = OrderedDict([('A', 0.3), ('C', 0.2), ('G', 0.2), ('T', 0.3)])

    all_scores = np.zeros((3, data.onehot.shape[0], data.onehot.shape[1]))
    for task_idx in [0,1,2]:
        scores = np.array(score_func(
                                task_idx=task_idx,
                                input_data_list=[data.onehot],
                                input_references_list=[
                                    np.array([
                                        background['A'], 
                                        background['C'], 
                                        background['G'], 
                                        background['T']])
                                    [None,None,:]],
                                batch_size=200,
                                progress_update=None))
    scores = np.sum(scores, axis=2)
    all_scores[task_idx] = scores
    
    all_scores = np.average(all_scores, axis=0)
    mask = np.zeros(all_scores.shape)
    
    motif_scores = OrderedDict()
    for motif in all_motifs:
        motif_scores[motif] = []

    for idx in range(len(scores)):
        for embedding in data.embeddings[idx]:
            for motif in all_motifs:
                if motif in embedding.what.getDescription():
                    motif_scores[motif].append(all_scores[idx, embedding.startPos:embedding.startPos+len(embedding.what)])
                    mask[idx, embedding.startPos:embedding.startPos+len(embedding.what)] = 1
    background = scores[np.where(mask == 0)]
    
    output.append('Background mean: {:.3}, Background std: {:.3}'.format(np.mean(background), np.std(background)))
    for motif in all_motifs:
        motif_scores[motif] = np.array(motif_scores[motif])
        
        motif_score = motif_scores[motif]
        if len(motif_score) == 0:
            continue
        motif_score_mean = np.mean(motif_score, axis=1)
        output.append('Motif: ' + motif)
        output.append('Percent 1 std: {:.2%}'.format(percent(np.mean(background), np.std(background), motif_score_mean)))

nonlinear_mxts_mode is set to: DeepLIFT_GenomicsDefault
For layer 1 the preceding linear layer is 0 of type Conv1D;
In accordance with nonlinear_mxts_mode=DeepLIFT_GenomicsDefault we are setting the NonlinearMxtsMode to Rescale
For layer 3 the preceding linear layer is 2 of type Conv1D;
In accordance with nonlinear_mxts_mode=DeepLIFT_GenomicsDefault we are setting the NonlinearMxtsMode to Rescale
For layer 7 the preceding linear layer is 6 of type Dense;
In accordance with nonlinear_mxts_modeDeepLIFT_GenomicsDefault we are setting the NonlinearMxtsMode to RevealCancel
For layer 10 the preceding linear layer is 9 of type Dense;
In accordance with nonlinear_mxts_modeDeepLIFT_GenomicsDefault we are setting the NonlinearMxtsMode to RevealCancel
nonlinear_mxts_mode is set to: DeepLIFT_GenomicsDefault
For layer 1 the preceding linear layer is 0 of type Conv1D;
In accordance with nonlinear_mxts_mode=DeepLIFT_GenomicsDefault we are setting the NonlinearMxtsMode to Rescale
For layer 3 the prece

In [19]:
for line in output:
    print(line)

Background mean: -0.0194, Background std: 0.0772
Motif: SRF_disc1
Percent 1 std: 1.45%
Motif: AP1_disc1
Percent 1 std: 3.90%
Background mean: -0.0187, Background std: 0.0596
Motif: SRF_disc1
Percent 1 std: 1.32%
Motif: GATA_disc1
Percent 1 std: 6.74%
Background mean: -0.0265, Background std: 0.0758
Motif: SRF_disc1
Percent 1 std: 0.65%
Motif: TAL1_known1
Percent 1 std: 5.34%
Background mean: -0.0241, Background std: 0.0972
Motif: SRF_disc1
Percent 1 std: 2.09%
Motif: IRF_known1
Percent 1 std: 2.94%
Background mean: -0.0233, Background std: 0.0757
Motif: AP1_disc1
Percent 1 std: 6.16%
Motif: GATA_disc1
Percent 1 std: 5.54%
Background mean: -0.00314, Background std: 0.0405
Motif: AP1_disc1
Percent 1 std: 6.77%
Motif: TAL1_known1
Percent 1 std: 2.98%
Background mean: -0.000545, Background std: 0.0715
Motif: AP1_disc1
Percent 1 std: 7.43%
Motif: IRF_known1
Percent 1 std: 3.90%
Background mean: -0.0119, Background std: 0.0514
Motif: GATA_disc1
Percent 1 std: 7.94%
Motif: TAL1_known1
Percent

In [None]:
all_runs = []
for i in all_motifs:
    all_runs.append(('model/model_' + i + '.h5', 
         'model/model_' + i + '.json', 
         'data/testing_' + i + '.npy', 
         check(i)))

output = []
        
for keras_model_weights, keras_model_json, testing_data_path, method in all_runs:
    data = load_data(method, testing_data_path)

    model = kc.convert_model_from_saved_files(
                h5_file=keras_model_weights,
                json_file=keras_model_json,
                nonlinear_mxts_mode=NonlinearMxtsMode.DeepLIFT_GenomicsDefault)
      
    score_func = model.get_target_contribs_func(find_scores_layer_idx=0, target_layer_idx=-2)

    background = OrderedDict([('A', 0.3), ('C', 0.2), ('G', 0.2), ('T', 0.3)])

    all_scores = np.zeros((3, data.onehot.shape[0], data.onehot.shape[1]))
    for task_idx in [0,1,2]:
        scores = np.array(score_func(
                                task_idx=task_idx,
                                input_data_list=[data.onehot],
                                input_references_list=[
                                    np.array([
                                        background['A'], 
                                        background['C'], 
                                        background['G'], 
                                        background['T']])
                                    [None,None,:]],
                                batch_size=200,
                                progress_update=None))
    scores = np.sum(scores, axis=2)
    all_scores[task_idx] = scores
    
    all_scores = np.average(all_scores, axis=0)
    mask = np.zeros(all_scores.shape)
    
    motif_scores = OrderedDict()
    for motif in all_motifs:
        motif_scores[motif] = []

    for idx in range(len(scores)):
        for embedding in data.embeddings[idx]:
            for motif in all_motifs:
                if motif in embedding.what.getDescription():
                    motif_scores[motif].append(all_scores[idx, embedding.startPos:embedding.startPos+len(embedding.what)])
                    mask[idx, embedding.startPos:embedding.startPos+len(embedding.what)] = 1
    background = scores[np.where(mask == 0)]
    
    output.append('Background mean: {:.3}, Background std: {:.3}'.format(np.mean(background), np.std(background)))
    for motif in all_motifs:
        motif_scores[motif] = np.array(motif_scores[motif])
        
        motif_score = motif_scores[motif]
        if len(motif_score) == 0:
            continue
        motif_score_mean = np.mean(motif_score, axis=1)
        output.append('Motif: ' + motif)
        output.append('Percent 1 std: {:.2%}'.format(percent(np.mean(background), np.std(background), motif_score_mean)))

nonlinear_mxts_mode is set to: DeepLIFT_GenomicsDefault
For layer 1 the preceding linear layer is 0 of type Conv1D;
In accordance with nonlinear_mxts_mode=DeepLIFT_GenomicsDefault we are setting the NonlinearMxtsMode to Rescale
For layer 3 the preceding linear layer is 2 of type Conv1D;
In accordance with nonlinear_mxts_mode=DeepLIFT_GenomicsDefault we are setting the NonlinearMxtsMode to Rescale
For layer 7 the preceding linear layer is 6 of type Dense;
In accordance with nonlinear_mxts_modeDeepLIFT_GenomicsDefault we are setting the NonlinearMxtsMode to RevealCancel
For layer 10 the preceding linear layer is 9 of type Dense;
In accordance with nonlinear_mxts_modeDeepLIFT_GenomicsDefault we are setting the NonlinearMxtsMode to RevealCancel
nonlinear_mxts_mode is set to: DeepLIFT_GenomicsDefault
For layer 1 the preceding linear layer is 0 of type Conv1D;
In accordance with nonlinear_mxts_mode=DeepLIFT_GenomicsDefault we are setting the NonlinearMxtsMode to Rescale
For layer 3 the prece

In [None]:
for line in output:
    print(line)