In [111]:
import joblib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import pearsonr
import pyfastx
import os
import h5py

In [112]:
def plot_side(arr, ylim=[-2, 2.5], xticks=[], yticks=[0, 2], pic_name=None):
    assert arr.shape[0] % 2 == 0, "arr must have even length."
    midpoint = int(arr.shape[0]/2)
    pl = arr[:midpoint]
    mn = arr[midpoint:]
    plt.bar(range(pl.shape[0]), pl, width=2, color="r",)
    plt.bar(range(mn.shape[0]), -mn, width=2, color="b")
    axes = plt.gca()
    axes.set_ylim(ylim)
    axes.set_yticks(yticks)
    axes.set_xticks(xticks)
    axes.spines[["right", "top", "bottom"]].set_visible(False)
    plt.xlim(-0.5, pl.shape[0]-.5)
    
    if pic_name is None:
        plt.show()
    else:
        plt.savefig(pic_name, transparent=True)
        plt.close()

In [113]:
procap_fp = "dpr_windows_procap_mean.csv.gz"
prediction_fp = "ensemble_dpr_prediction_mean.h5"
scratch = "/Users/adamhe/github/scratch/"

In [114]:
tracks = h5py.File(os.path.join(scratch, prediction_fp))["track"]
quantity = h5py.File(os.path.join(scratch, prediction_fp))["quantity"]
y_norm = tracks / np.array(tracks).sum(axis=1, keepdims=True)
y_pred_scaled = y_norm * quantity

y = pd.read_csv(
    os.path.join(scratch, procap_fp), header=None, index_col=0
).to_numpy()[:, np.r_[250:750, 1250:1750]]

dpr_scores = pd.read_csv(
    os.path.join(scratch, "active_dpr_svrh.bed.gz"), sep="\t", header=None
)
dpr_desc = dpr_scores.loc[:,10].sort_values(ascending=False).index

In [139]:
dpr_scores.iloc[dpr_desc[10]]

0                         chr8
1                     99893680
2                     99893681
3                         chr8
4                     99893680
5                     99893681
6     chr8-100905909-m-FI_0.84
7                            -
8          TTAGTCAGGAAGGACGTTG
9                        17.66
10                        9.24
Name: 2088, dtype: object

In [117]:
plot_side(
    y[dpr_desc[1]], ylim=[-180, 10], yticks=[0, -180],
    pic_name="ensemble_irf1_prom.pdf"
)

In [118]:
plot_side(
    y_pred_scaled[dpr_desc[1]], ylim=[-36, 2], yticks=[0, -36],
    pic_name="ensemble_irf1_prom_pred.pdf"
)

In [121]:
plot_side(
    y[dpr_desc[5]], ylim=[-48, 8], yticks=[0, -3],
    pic_name="ensemble_ivns1abp_prom.pdf"
)

In [122]:
plot_side(
    y_pred_scaled[dpr_desc[5]], ylim=[-18, 3], yticks=[0, -3],
    pic_name="ensemble_ivns1abp_prom_pred.pdf"
)

In [124]:
plot_side(
    y[dpr_desc[6]], ylim=[-3, 24], yticks=[0, 24],
    pic_name="ensemble_nolc1_prom.pdf"
)

In [125]:
plot_side(
    y_pred_scaled[dpr_desc[6]], ylim=[-1, 8], yticks=[0, 8],
    pic_name="ensemble_nolc1_prom_pred.pdf"
)

In [137]:
plot_side(
    y[dpr_desc[10]], ylim=[-9, 3], yticks=[0, 3],
    pic_name="ensemble_cox6c_prom.pdf"
)

In [127]:
plot_side(
    y_pred_scaled[dpr_desc[10]], ylim=[-4.5, 1.5], yticks=[0, 1.5],
    pic_name="ensemble_cox6c_prom_pred.pdf"
)