In [1]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append("../")
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
from src.vis_data import get_vis_data
from joblib import load

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def sum_strength(d):
    if "strength" in d.keys():
        return d["strength"]
    return sum([sum_strength(child) for child in d["children"]])

In [26]:
# Load exons
exons = [
    "GAGUCCCGCUUACCAUUGCAUUUAAGAAAGAGGCCAUACGCCGCUAAGACCCUACUCUUCAGAAUACCAG",
    "GAGUCCCGCUUACCAUUGCAUUUAAGAAAGCGGCCAUACGCCGCUAAGACCCUACUCUUCAGAAUACCAG",
    "GUCUGACAGUACUACGCUAAUACUACGUAAACCAAAGCCAUAAUCCAAUUGACCUCCUUUUCAGGAAUUC",
    "CCUUCCACGCCUCUCCCACUCGUUACACUCAGUUGCAGUAUGGUUAACACUCCACUAGGCCCCAGGAAUC",
    "GAGUCCCGCUUACCAUUGCAUUUAAGAAAGAGGCCAUACGCCUCUAAGACCCUACUCUUCAGAAUACCAG",
    "GAGUCCCGCUUACCAUUGCAUUUAAGAAAGAGGCCAUACGCCGCUAAGACCCUACUCUUCAGAAUACCAG",
]

In [27]:
# Get vis data
json_data = [None]*len(exons)
i = 0
for exon in tqdm(exons):
    json_data[i] = get_vis_data(exon=exon, threshold=0.001)
    i += 1

  0%|          | 0/6 [00:00<?, ?it/s]

100%|██████████| 6/6 [00:03<00:00,  1.55it/s]


In [32]:
# Min and max of delta strength
print("Delta strenth range:", end=" ")
print((
    min(exon_data["delta_force"] for exon_data in json_data), 
    max(exon_data["delta_force"] for exon_data in json_data)
))

# Max of class strength
print("Max class strength:", end=" ")
print(max(
    max([exon_data["incl_strength"] for exon_data in json_data]),
    max([exon_data["skip_strength"] for exon_data in json_data])
))

# Max of feature strength
max_feature_strength = 0
for exon_data in json_data:
    for class_feature_activations in exon_data["feature_activations"]["children"]:
        for feature_strength in class_feature_activations["children"]:
            max_feature_strength = max(
                max_feature_strength, sum_strength(feature_strength)
            )
print("Max feature strength:", max_feature_strength)

# Max of feature position strength
max_feature_position_strength = 0
for exon_data in json_data:
    for class_feature_activations in exon_data["feature_activations"]["children"]:
        for feature_strength in class_feature_activations["children"]:
            if "children" not in feature_strength.keys():
                continue
            for feature_position_strength in feature_strength["children"]:
                max_feature_position_strength = max(
                    max_feature_position_strength, feature_position_strength["strength"]
                )
print("Max feature position strength:", max_feature_position_strength)

# Max of nucleotide position strength
max_nucleotide_position_strength = 0
for exon_data in json_data:
    for class_nucleotide_activations in exon_data["nucleotide_activations"]["children"]:
        for nucleotide_strength in class_nucleotide_activations["children"]:
            for nucleotide_position_strength in nucleotide_strength["children"]:
                max_nucleotide_position_strength = max(
                    max_nucleotide_position_strength, sum_strength(nucleotide_position_strength)
                )
print("Max nucleotide position strength:", max_nucleotide_position_strength)

# Max of nucleotide position feature strength
max_nucleotide_position_feature_strength = 0
for exon_data in json_data:
    for class_nucleotide_activations in exon_data["nucleotide_activations"]["children"]:
        for nucleotide_strength in class_nucleotide_activations["children"]:
            for nucleotide_position_strength in nucleotide_strength["children"]:
                for nucleotide_position_feature_strength in nucleotide_position_strength["children"]:
                    max_nucleotide_position_feature_strength = max(
                        max_nucleotide_position_feature_strength, 
                        nucleotide_position_feature_strength["strength"]
                    )
print("Max nucleotide position feature strength:", max_nucleotide_position_feature_strength)

Delta strenth range: (-13.908240915963916, 22.812916158010694)
Max class strength: 86.02241516113281
Max feature strength: 37.52738878960292
Max feature position strength: 11.806382848973044
Max nucleotide position strength: 3.7882057132961697
Max nucleotide position feature strength: 3.7689620256641363


In [33]:
xTe = load(f"../data/xTe_ES7_HeLa_ABC.pkl.gz")

In [34]:
nts = ["A", "C", "G", "T"]
xTe_seqs = np.array([
    "".join([nts[np.where(one_hot == 1)[0].item()] for one_hot in row[10:80]]) for row in tqdm(xTe[0])
])
exons = xTe_seqs[np.random.rand(len(xTe[0])) < 0.01]

100%|██████████| 47962/47962 [00:04<00:00, 11695.16it/s]


In [35]:
json_data = [None]*len(exons)
i = 0
for exon in tqdm(exons):
    json_data[i] = get_vis_data(exon=exon, threshold=0.001)
    i += 1

100%|██████████| 491/491 [04:39<00:00,  1.75it/s]


In [37]:
# Min and max of delta strength
print("Delta strenth range:", end=" ")
print((
    min(exon_data["delta_force"] for exon_data in json_data), 
    max(exon_data["delta_force"] for exon_data in json_data)
))

# Max of class strength
print("Max class strength:", end=" ")
print(max(
    max([exon_data["incl_strength"] for exon_data in json_data]),
    max([exon_data["skip_strength"] for exon_data in json_data])
))

# Max of feature strength
max_feature_strength = 0
for exon_data in json_data:
    for class_feature_activations in exon_data["feature_activations"]["children"]:
        for feature_strength in class_feature_activations["children"]:
            max_feature_strength = max(
                max_feature_strength, sum_strength(feature_strength)
            )
print("Max feature strength:", max_feature_strength)

# Max of feature position strength
max_feature_position_strength = 0
for exon_data in json_data:
    for class_feature_activations in exon_data["feature_activations"]["children"]:
        for feature_strength in class_feature_activations["children"]:
            if "children" not in feature_strength.keys():
                continue
            for feature_position_strength in feature_strength["children"]:
                max_feature_position_strength = max(
                    max_feature_position_strength, feature_position_strength["strength"]
                )
print("Max feature position strength:", max_feature_position_strength)

# Max of nucleotide position strength
max_nucleotide_position_strength = 0
for exon_data in json_data:
    for class_nucleotide_activations in exon_data["nucleotide_activations"]["children"]:
        for nucleotide_strength in class_nucleotide_activations["children"]:
            for nucleotide_position_strength in nucleotide_strength["children"]:
                max_nucleotide_position_strength = max(
                    max_nucleotide_position_strength, sum_strength(nucleotide_position_strength)
                )
print("Max nucleotide position strength:", max_nucleotide_position_strength)

# Max of nucleotide position feature strength
max_nucleotide_position_feature_strength = 0
for exon_data in json_data:
    for class_nucleotide_activations in exon_data["nucleotide_activations"]["children"]:
        for nucleotide_strength in class_nucleotide_activations["children"]:
            for nucleotide_position_strength in nucleotide_strength["children"]:
                for nucleotide_position_feature_strength in nucleotide_position_strength["children"]:
                    max_nucleotide_position_feature_strength = max(
                        max_nucleotide_position_feature_strength, 
                        nucleotide_position_feature_strength["strength"]
                    )
print("Max nucleotide position feature strength:", max_nucleotide_position_feature_strength)

Delta strenth range: (-71.34475481672075, 101.65916478471968)
Max class strength: 136.1594737751982
Max feature strength: 61.950183892971836
Max feature position strength: 15.874034881907232
Max nucleotide position strength: 7.857531867787869
Max nucleotide position feature strength: 5.3906272675376385


Delta strenth range: (-71.34475481672075, 101.65916478471968)       # (-120, 120)
Max class strength: 136.1594737751982                               # 160
Max feature strength: 61.950183892971836                            # 70
Max feature position strength: 15.874034881907232                   # 20
Max nucleotide position strength: 7.857531867787869                 # 10
Max nucleotide position feature strength: 5.3906272675376385        # 6