# Generate and visualize filter strengths

In [1]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append("../")
import os
os.chdir("..")
import pandas as pd
import matplotlib.pyplot as plt
from src.vis_data import get_vis_data

  from .autonotebook import tqdm as notebook_tqdm


## 1. Generate and save filter strengths

In [2]:
def sum_strength(d):
    if "strength" in d.keys():
        return d["strength"]
    return sum([sum_strength(child) for child in d["children"]])

In [3]:
json_data = get_vis_data(exon="GCGGCACCTACTACAATGTCCCCCGCTGCATACACTCGGAGCCAATAGGGCGCCTATAGAGTGTAGTCCT", json_file="data/teaser.json", threshold=0.001)
json_data = get_vis_data(exon="GAGTCCCGCTTACCATTGCATTTAAGAAAGCGGCCATACGCCGCTAAGACCCTACTCTTCAGAATACCAG", json_file="data/exon_s1.json", threshold=0.001)
json_data = get_vis_data(exon="CCTTCCACGCCTCTCCCACTCGTTACACTCAGTTGCAGTATGGTTAACACTCCACTAGGCCCCAGGAATC", json_file="data/exon_s1_comp1.json", threshold=0.001)
json_data = get_vis_data(exon="GTCTGACAGTACTACGCTAATACTACGTAAACCAAAGCCATAATCCAATTGACCTCCTTTTCAGGAATTC", json_file="data/exon_s1_comp2.json", threshold=0.001)
json_data = get_vis_data(exon="GAGTCCCGCTTACCATTGCATTTAAGAAAGAGGCCATACGCCGCTAAGACCCTACTCTTCAGAATACCAG", json_file="data/exon_s1_34c>a.json", threshold=0.001)
json_data = get_vis_data(exon="GAGTCCCGCTTACCATTGCATTTAAGAAAGAGGCCATACGCCTCTAAGACCCTACTCTTCAGAATACCAG", json_file="data/exon_s1_34c>a_46g>u.json", threshold=0.001)
json_data = get_vis_data(exon="GACTATGAGCCCCAACGAACAAGCTCCTATCTGGGAACTCTTTTCTGCAGACTTTAACCCTACCCCCAGA", json_file="data/exon_d1.json", threshold=0.001)



In [4]:
json_data = get_vis_data(exon="GCGGCACCTACTACAATGTCCCCCGCTGCATACACTCGGAGCCAATAGGGCGCCTATAGAGTGTAGTCCT", json_file="data/exon.json", threshold=0.001)

In [5]:
json_data["delta_force"]

-16.2321506754854

In [6]:
sum_strength(json_data["nucleotide_activations"]["children"][0]) - sum_strength(json_data["nucleotide_activations"]["children"][1]) + json_data["incl_bias"]

-14.922714271500624

In [7]:
sum_strength(json_data["feature_activations"]["children"][0]) - sum_strength(json_data["feature_activations"]["children"][1])

-15.894813972257097

## 2. New groupings

In [8]:
json_data = get_vis_data(
    exon="GCGGCACCTACTACAATGTCCCCCGCTGCATACACTCGGAGCCAATAGGGCGCCTATAGAGTGTAGTCCT", 
    json_file="data/teaser.json", 
    threshold=0.001, use_new_grouping=True)
json_data = get_vis_data(
    exon="GAGTCCCGCTTACCATTGCATTTAAGAAAGCGGCCATACGCCGCTAAGACCCTACTCTTCAGAATACCAG", 
    json_file="data/exon_s1.json", 
    threshold=0.001, use_new_grouping=True)
json_data = get_vis_data(
    exon="CCTTCCACGCCTCTCCCACTCGTTACACTCAGTTGCAGTATGGTTAACACTCCACTAGGCCCCAGGAATC", 
    json_file="data/exon_s1_comp1.json", 
    threshold=0.001, use_new_grouping=True)
json_data = get_vis_data(
    exon="GTCTGACAGTACTACGCTAATACTACGTAAACCAAAGCCATAATCCAATTGACCTCCTTTTCAGGAATTC", 
    json_file="data/exon_s1_comp2.json", 
    threshold=0.001, use_new_grouping=True)
json_data = get_vis_data(
    exon="GAGTCCCGCTTACCATTGCATTTAAGAAAGAGGCCATACGCCGCTAAGACCCTACTCTTCAGAATACCAG", 
    json_file="data/exon_s1_34c>a.json", 
    threshold=0.001, use_new_grouping=True)
json_data = get_vis_data(
    exon="GAGTCCCGCTTACCATTGCATTTAAGAAAGAGGCCATACGCCTCTAAGACCCTACTCTTCAGAATACCAG", 
    json_file="data/exon_s1_34c>a_46g>u.json", 
    threshold=0.001, use_new_grouping=True)
json_data = get_vis_data(
    exon="GACTATGAGCCCCAACGAACAAGCTCCTATCTGGGAACTCTTTTCTGCAGACTTTAACCCTACCCCCAGA", 
    json_file="data/exon_d1.json", 
    threshold=0.001, use_new_grouping=True)

In [9]:
json_data = get_vis_data(
    exon="CCGCGACCGGAUUAAGAUGAAGGAACGAAGCAAUUGUCGAAUCUACUCUAAUCUGCUCGAAGAUCAGAAC", 
    json_file="data/max_incl_class.json", 
    threshold=0.001, use_new_grouping=True)
json_data = get_vis_data(
    exon="AGAUGUCGAUCCCCAUUAAUCAACCCCUCUCCUUAUAUUAUCCCCAUAUUCACAAAACUGUUUGCUAAAA", 
    json_file="data/max_skip_class.json", 
    threshold=0.001, use_new_grouping=True)

In [10]:
json_data = get_vis_data(
    exon="CCGCGACCGGAUUAAGAUGAAGGAACGAAGCAAUUGUCGAAUCUACUCUAAUCUGCUCGAAGAUCAGAAC", 
    json_file="data/max_incl_feature.json", 
    threshold=0.001, use_new_grouping=True)
json_data = get_vis_data(
    exon="CCACUCACCGCCGCCGGUGUCCUGGCAUACUCAUUAUCGCAACCCCGACGCGGCCCACUUGGGUCGCGGC", 
    json_file="data/max_skip_feature.json", 
    threshold=0.001, use_new_grouping=True)

In [11]:
json_data = get_vis_data(
    exon="AGUCUCUUGGAAUCGCGCCCGACAUCUUACCAGUAAAAUCGGUGCUCCAGGGCCACGAUCUUCGACACCA", 
    json_file="data/max_incl_feature_pos.json", 
    threshold=0.001, use_new_grouping=True)
json_data = get_vis_data(
    exon="GCUCGCAACCAGCCGCCUACCUAUUAAUUGUCUGUGCUCCAAGAAUUACAGCUAGCAAUUUAGGUACCAA", 
    json_file="data/max_skip_feature_pos.json", 
    threshold=0.001, use_new_grouping=True)

# 3. Different length

In [12]:
json_data = get_vis_data(
    exon="ATATTGTTGACGGTCAATTATATGGGCTCATATGATAGCACCTGTCGAGATGTTTAAGGCATAAGGAGCTGACGTTGGCCTCAGTATTATAGTGCAGCAACGGATTGTGCATAATCGTGTATGAATCCGAGGATTTGGCG", 
    dataset_name="ES7",
    json_file="data/different_length_exon_length_140.json", 
    threshold=0.001, use_new_grouping=True)
json_data = get_vis_data(
    exon="TCAAACAGGCGTAACATATTCATTAAAGCCGTTGCACATGGAGCTGAGATATACACAGGAACGAACCGTTTACGTTAATGGGA", 
    dataset_name="ES7",
    json_file="data/different_length_exon_length_83.json",
    threshold=0.001, use_new_grouping=True)
json_data = get_vis_data(
    exon="ATGAAATCCATTCCACAGCA", 
    dataset_name="ES7",
    json_file="data/different_length_exon_length_20.json", 
    threshold=0.001, use_new_grouping=True)

# 4. Different datasets

In [3]:
json_data = get_vis_data(
    exon="AGTTGCTGCTGGGAGCTCCAGCACAGTGAAATGGACAGAAGGGCAGAGCAA", 
    dataset_name="WT1_exon_5",
    json_file="data/other_datasets_wt1_exon.json", 
    threshold=0.001, use_new_grouping=True)
json_data = get_vis_data(
    exon="GATCCAGATCTAACTTGCGGTGGCTGTGTCTCCTGCTTTTCCCGATTCCAGTAATTGTTTGGG", 
    dataset_name="FAS_exon_6",
    json_file="data/other_datasets_fas_exon.json", 
    threshold=0.001, use_new_grouping=True)
json_data = get_vis_data(
    exon="GGTTTTAGACAAAATCAAAAAGAAGGAAGGTGCTCACATTCCTTAAATTAAGGA", 
    dataset_name="SMN2_exon_7",
    json_file="data/other_datasets_smn2_exon.json", 
    threshold=0.001, use_new_grouping=True)
json_data = get_vis_data(
    exon="GGTCGTCAGACACCAAAACATATTTCTGAAAGTCTAGGAGCTGAGGTGGATCCTGATATGTCTTGGTCAAGTTCTTTAGCTACACCACCCACCCTTAGTTCTACTGTGCTCATAG", 
    dataset_name="BRCA2_exon_7",
    json_file="data/other_datasets_brca2_exon.json", 
    threshold=0.001, use_new_grouping=True)
json_data = get_vis_data(
    exon="AGCAGTATACAAAGATGCTGATTTGTATTTATTAGACTCTCCTTTTGGATACCTAGATGTTTTAACAGAAAAAGAAATATTTGAAAG", 
    dataset_name="CFTR_exon_13",
    json_file="data/other_datasets_cftr_exon.json", 
    threshold=0.001, use_new_grouping=True)

