In [34]:
"""
This notebook plots example predicted and observed tracks used in Fig. 1F-G, 2A-B, 6C-D, Supp. Fig. 2A-B
"""

'\nThis notebook plots example predicted and observed tracks used in Fig. 1F-G, 2A-B, 6C-D, Supp. Fig. 2A-B\n'

In [35]:
import numpy as np
import pyfastx
import os
import h5py
from utils import plot_side

In [36]:
# PRINT COMMANDS TO DOWNLOAD AND EXTRACT DATA

# Set SCRATCH to where you want to download data to
SCRATCH = "/Users/adamhe/github/scratch"

URL = "https://zenodo.org/records/10597358/files"
TAR = "example_tracks_and_deepshap.tar.gz"
print(f"wget {URL}/{TAR} -P {SCRATCH}")
print(f"tar -cvzf {SCRATCH}/{TAR}")

wget https://zenodo.org/records/10597358/files/example_tracks_and_deepshap.tar.gz -P /Users/adamhe/github/scratch
tar -cvzf /Users/adamhe/github/scratch/example_tracks_and_deepshap.tar.gz


In [7]:
# Load data

# Sequence
fasta = pyfastx.Fasta(os.path.join(SCRATCH, "example_tracks_and_deepshap/concat_sequence.fna.gz"))
ids = [rec.name for rec in fasta]

# Experimental
y = np.load(os.path.join(SCRATCH, "example_tracks_and_deepshap/concat_procap.npz"))["arr_0"][:, np.r_[250:750, 1250:1750]]

# Predicted
tracks = h5py.File(os.path.join(SCRATCH, "example_tracks_and_deepshap/ensemble_examples_prediction.h5"))["track"]
quantity = h5py.File(os.path.join(SCRATCH, "example_tracks_and_deepshap/ensemble_examples_prediction.h5"))["quantity"]
y_norm = tracks / np.array(tracks).sum(axis=1, keepdims=True)
y_pred_scaled = y_norm * quantity

In [8]:
# Select tracks we want to plot

ifnar2_coord = "chr21:33229367-33230366"
ints6_coord = "chr13:51452691-51453690"
ttll9_enh_coord = "chr20:31894784-31895783" # EH38E2106518
EH38E2695789_enh_coord = "chr9:70418561-70419560" # EH38E2695789, EH38E2695790
klf9_enh_coord = "chr9:70411667-70412666" # EH38E2695773
klf9_dt_enh_coord = "chr9:70419676-70420675" # EH38E2695794, EH38E2695793
kif3b_enh_coord = "chr20:32285520-32286519" # KIF3B, EH38E2106824, EH38E2106825
EH38E2107427_enh_coord = "chr20:32971851-32972850" # EH38E2107427
NASP_prom_coord = "chr1:45583513-45584512" # NASP
EH38E3485200_enh_coord = "chr22:43187170-43188169"
irf4_prom_coord = "chr6:391151-392150"
rpl10a_prom_coord = "chr6:35467831-35468830"
rpl35_prom_coord = "chr9:124861481-124862480"
irf1_prom_coord = "chr5:132490401-132491400"
irf7_prom_coord = "chr11:615501-616500"
irf8_prom_coord = "chr16:85898601-85899600"

ifnar2_ids = [idx.split("_")[-1] == ifnar2_coord for idx in ids]
ints6_ids = [idx.split("_")[-1] == ints6_coord for idx in ids]
ttll9_enh_ids = [idx.split("_")[-1] == ttll9_enh_coord for idx in ids]
EH38E2695789_enh_ids = [idx.split("_")[-1] == EH38E2695789_enh_coord for idx in ids]
klf9_enh_ids = [idx.split("_")[-1] == klf9_enh_coord for idx in ids]
klf9_dt_enh_ids = [idx.split("_")[-1] == klf9_dt_enh_coord for idx in ids]
kif3b_enh_ids = [idx.split("_")[-1] == kif3b_enh_coord for idx in ids]
EH38E2107427_enh_ids = [idx.split("_")[-1] == EH38E2107427_enh_coord for idx in ids]
NASP_prom_ids = [idx.split("_")[-1] == NASP_prom_coord for idx in ids]
EH38E3485200_enh_ids = [idx.split("_")[-1] == EH38E3485200_enh_coord for idx in ids]
irf4_prom_ids = [idx.split("_")[-1] == rpl10a_prom_coord for idx in ids]
rpl10a_prom_ids = [idx.split("_")[-1] == rpl10a_prom_coord for idx in ids]
rpl35_prom_ids = [idx.split("_")[-1] == rpl35_prom_coord for idx in ids]
irf1_prom_ids = [idx.split("_")[-1] == irf1_prom_coord for idx in ids]
irf7_prom_ids = [idx.split("_")[-1] == irf7_prom_coord for idx in ids]
irf8_prom_ids = [idx.split("_")[-1] == irf8_prom_coord for idx in ids]

In [10]:
plot_side(
    np.mean(y[ifnar2_ids], axis=0), 
    ylim=[-1.5, 15],
    yticks=[0, 15],
    pic_name="img/ensemble_ifnar2_prom_expt.pdf"
)

In [11]:
plot_side(
    np.mean(y_pred_scaled[ifnar2_ids], axis=0),
    ylim=[-1, 10],
    yticks=[0, 10],
    pic_name="img/ensemble_ifnar2_prom_pred.pdf"
)

In [12]:
plot_side(
    np.mean(y[ints6_ids], axis=0),
    ylim=[-100, 16.7],
    yticks=[-100, 0],
    pic_name="img/ensemble_ints6_prom_expt.pdf"
)

In [13]:
plot_side(
    np.mean(y_pred_scaled[ints6_ids], axis=0),
    ylim=[-18, 3],
    yticks=[-18, 0],
    pic_name="img/ensemble_ints6_prom_pred.pdf"
)

In [14]:
plot_side(
    np.mean(y[klf9_dt_enh_ids], axis=0),
    ylim=[-3.5, 7],
    yticks=[0, 7],
    pic_name="img/ensemble_klf9_dt_enh_expt.pdf"
)

In [15]:
plot_side(
    np.mean(y_pred_scaled[klf9_dt_enh_ids], axis=0),
    ylim=[-2.5, 5],
    yticks=[0, 5],
    pic_name="img/ensemble_klf9_dt_enh_pred.pdf"
)

In [16]:
plot_side(
    np.mean(y[EH38E2107427_enh_ids], axis=0),
    ylim=[-0.8, 0.6],
    yticks=[0, 0.6],
    pic_name="img/ensemble_EH38E2107427_enh_expt.pdf"
)

In [17]:
plot_side(
    np.mean(y_pred_scaled[EH38E2107427_enh_ids], axis=0),
    ylim=[-1.2, 0.9],
    yticks=[0, 0.9],
    pic_name="img/ensemble_EH38E2107427_enh_pred.pdf"
)

In [18]:
plot_side(
    np.mean(y[NASP_prom_ids], axis=0),
    ylim=[-5, 50],
    yticks=[0, 50],
    pic_name="img/ensemble_nasp_prom_expt.pdf"
)

In [19]:
plot_side(
    np.mean(y_pred_scaled[NASP_prom_ids], axis=0),
    ylim=[-8, 80],
    yticks=[0, 80],
    pic_name="img/ensemble_nasp_prom_pred.pdf"
)

In [20]:
plot_side(
    np.mean(y[EH38E3485200_enh_ids], axis=0),
    ylim=[-1.5, 15],
    yticks=[0, 15],
    pic_name="img/ensemble_EH38E3485200_enh_expt.pdf"
)

In [21]:
plot_side(
    np.mean(y_pred_scaled[EH38E3485200_enh_ids], axis=0),
    ylim=[-0.5, 5],
    yticks=[0, 5],
    pic_name="img/ensemble_EH38E3485200_enh_pred.pdf"
)

In [22]:
plot_side(
    np.mean(y[irf4_prom_ids], axis=0),
    ylim=[-20, 100],
    yticks=[0, 72],
    pic_name="img/ensemble_irf4_prom_expt.pdf"
)

In [23]:
plot_side(
    np.mean(y_pred_scaled[irf4_prom_ids], axis=0),
    ylim=[-6, 30],
    yticks=[0, 30],
    pic_name="img/ensemble_irf4_prom_pred.pdf"
)

In [24]:
plot_side(
    np.mean(y[rpl10a_prom_ids], axis=0),
    ylim=[-20, 100],
    yticks=[0, 100],
    pic_name="img/ensemble_rpl10a_prom_expt.pdf"
)

In [25]:
plot_side(
    np.mean(y_pred_scaled[rpl10a_prom_ids], axis=0),
    ylim=[-6, 30],
    yticks=[0, 30],
    pic_name="img/ensemble_rpl10a_prom_pred.pdf"
)

In [26]:
plot_side(
    np.mean(y[rpl35_prom_ids], axis=0),
    ylim=[-24, 16],
    yticks=[0, -24],
    pic_name="img/ensemble_rpl35_prom_expt.pdf"
)

In [27]:
plot_side(
    np.mean(y_pred_scaled[rpl35_prom_ids], axis=0),
    ylim=[-6, 4],
    yticks=[0, -6],
    pic_name="img/ensemble_rpl35_prom_pred.pdf"
)

In [28]:
plot_side(
    np.mean(y[irf1_prom_ids], axis=0),
    ylim=[-180, 18],
    yticks=[0, -180],
    pic_name="img/ensemble_irf1_prom_expt.pdf"
)

In [29]:
plot_side(
    np.mean(y_pred_scaled[irf1_prom_ids], axis=0),
    ylim=[-40, 4],
    yticks=[0, -40],
    pic_name="img/ensemble_irf1_prom_pred.pdf"
)

In [30]:
plot_side(
    np.mean(y[irf7_prom_ids], axis=0),
    ylim=[-60, 6],
    yticks=[0, -60],
    pic_name="img/ensemble_irf7_prom_expt.pdf"
)

In [31]:
plot_side(
    np.mean(y_pred_scaled[irf7_prom_ids], axis=0),
    ylim=[-20, 2],
    yticks=[0, -20],
    pic_name="img/ensemble_irf7_prom_pred.pdf"
)

In [32]:
plot_side(
    np.mean(y[irf8_prom_ids], axis=0),
    ylim=[-10, 15],
    yticks=[0, 15],
    pic_name="img/ensemble_irf8_prom_expt.pdf"
)

In [33]:
plot_side(
    np.mean(y_pred_scaled[irf8_prom_ids], axis=0),
    ylim=[-10, 15],
    yticks=[0, 15],
    pic_name="img/ensemble_irf8_prom_pred.pdf"
)