In [1]:
import h5py
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from akita_utils.utils import ut_dense

In [2]:
def get_map(predicted_vector, diagonal_offset=2):

    """
    Created a 512x512 map representating changes in the DNA contacts.

    Parameters
    ------------
    predicted_vector : numpy vector
        Akita's output.
    diagonal_offset : int
        Model's parameter.

    Returns
    ---------
    matrix : numpy matrix
        512x512 map
    """

    matrix = ut_dense(np.expand_dims(predicted_vector, 1), diagonal_offset)
    matrix = np.squeeze(matrix, axis=2)

    return matrix

In [3]:
def plot_map(matrix, vmin=-0.6, vmax=0.6, width=5, height=5, palette="RdBu_r"):

    """
    Plots a 512x512 map representating changes in the DNA contacts.

    Parameters
    ------------
    matrix : numpy array
        Changes in DNA contacts.
    vmin : float
    vmax : float
        Minimum and maximum in the colormap scale.
    width : int
    height : int
        Width and height of a plotted map.
    """

    fig = plt.figure(figsize=(width, height))

    sns.heatmap(
        matrix,
        vmin=vmin,
        vmax=vmax,
        cbar=False,
        cmap=palette,
        square=True,
        xticklabels=False,
        yticklabels=False,
    )
    plt.show()

In [4]:
out_dir = "TEST"
stat_file_name = "STATS_OUT.h5"
maps_file_name = "MAPS_OUT.h5"
num_procs = 2

In [None]:
hf = h5py.File(f"{out_dir}/job0/{stat_file_name}", "r")

for key in hf:
    print(key, hf[key].shape)

In [None]:
hf["SCD_h1_m0_t3"][9]

In [None]:
get_map(hf["SCD_h1_m0_t3"][0,:,0])

In [None]:
hf = h5py.File("TEST/job0/OUT.h5", "r")

for key in hf:
    print(key, hf[key].shape)


In [None]:
for key in hf.attrs.keys():
    print(key, hf.attrs[key])

In [None]:
for i in range(6):
    print(f"background {i}")
    plot_map(hf["refmap_h1_m0"][0,:,:,i])

In [None]:
hf["map_h1_m0"][0,:,:,0]

In [5]:
def collect_h5(file_name, out_dir, num_procs):
    
    # count variants
    num_variants = 0
    for pi in range(num_procs):
        # open job
        job_h5_file = "%s/job%d/%s" % (out_dir, pi, file_name)
        job_h5_open = h5py.File(job_h5_file, "r")
        num_variants += len(job_h5_open["chrom"])
        job_h5_open.close()
    
    print("num_variants: ", num_variants)
    
    # initialize final h5
    final_h5_file = "%s/%s" % (out_dir, file_name)
    final_h5_open = h5py.File(final_h5_file, "w")

    job0_h5_file = "%s/job0/%s" % (out_dir, file_name)
    job0_h5_open = h5py.File(job0_h5_file, "r")
    for key in job0_h5_open.keys():
        
        if job0_h5_open[key].ndim == 1:
            final_h5_open.create_dataset(
                key, shape=(num_variants,), dtype=job0_h5_open[key].dtype
            )
            
        elif job0_h5_open[key].ndim == 3:
            
            if key.split("_")[0] == "map":
                _, prediction_vector_length, num_targets = job0_h5_open[key].shape

                final_h5_open.create_dataset(
                    key, shape=(num_variants, prediction_vector_length, num_targets), dtype=job0_h5_open[key].dtype
                )
            else:
                num_backgrounds, prediction_vector_length, num_targets = job0_h5_open[key].shape

                final_h5_open.create_dataset(
                    key, shape=(num_backgrounds, prediction_vector_length, num_targets), dtype=job0_h5_open[key].dtype
                )

    job0_h5_open.close()

    # set values
    vi = 0
    for pi in range(num_procs):
        print("collecting job", pi)
        # open job
        job_h5_file = "%s/job%d/%s" % (out_dir, pi, file_name)
        job_h5_open = h5py.File(job_h5_file, "r")

        # append to final
        for key in job_h5_open.keys():

            job_variants = job_h5_open[key].shape[0]
            
            if job_h5_open[key].ndim == 1:
                final_h5_open[key][vi : vi + job_variants] = job_h5_open[key]

            elif job_h5_open[key].ndim == 3:
            
                if key.split("_")[0] == "map":
                    final_h5_open[key][vi : vi + job_variants, :, :] = job_h5_open[key]

                else:
                    num_backgrounds, _, _ = job_h5_open[key].shape
                    final_h5_open[key][:num_backgrounds, :, :] = job_h5_open[key]
            
        vi += job_variants
        job_h5_open.close()
            
    final_h5_open.close()

In [6]:
collect_h5(stat_file_name, out_dir, num_procs)

num_variants:  20
collecting job 0
collecting job 1


In [7]:
collect_h5(maps_file_name, out_dir, num_procs)

num_variants:  20
collecting job 0
collecting job 1


In [None]:
final_h5_file = "%s/%s" % (out_dir, file_name)
print(final_h5_file)

In [8]:
hf_collected = h5py.File("TEST/STATS_OUT.h5", "r")

for key in hf_collected:
    # print(key, hf_collected[key].shape)
    print(key, hf_collected[key].shape, hf_collected[key][0], hf_collected[key][-1])

SCD_h1_m0_t0 (20,) 40.75 21.64
SCD_h1_m0_t1 (20,) 48.7 17.66
SCD_h1_m0_t2 (20,) 52.94 50.84
SCD_h1_m0_t3 (20,) 51.16 53.38
SCD_h1_m0_t4 (20,) 52.4 42.34
SCD_h1_m0_t5 (20,) 46.94 41.38
background_index (20,) 0 0
chrom (20,) b'chr2' b'chr13'
diffSCD_h1_m0_t0 (20,) 32.56 0.217
diffSCD_h1_m0_t1 (20,) 40.66 0.1921
diffSCD_h1_m0_t2 (20,) 22.83 0.1729
diffSCD_h1_m0_t3 (20,) 21.83 0.1624
diffSCD_h1_m0_t4 (20,) 27.55 0.1615
diffSCD_h1_m0_t5 (20,) 21.39 0.1456
end (20,) 48886271 109771775
experiment_id (20,) 0 19
flank_bp (20,) 0 0
genomic_SCD (20,) 53.7 0.063
orientation (20,) b'<<' b'<<'
spacer_bp (20,) 90 90
start (20,) 48886252 109771756
strand (20,) b'-' b'+'


In [9]:
hf_collected = h5py.File("TEST/MAPS_OUT.h5", "r")

for key in hf_collected:
    print(key, hf_collected[key].shape)
    # print(key, hf_collected[key].shape, hf_collected[key][0], hf_collected[key][-1])

background_index (20,)
chrom (20,)
end (20,)
experiment_id (20,)
flank_bp (20,)
genomic_SCD (20,)
map_h1_m0 (20, 130305, 6)
orientation (20,)
refmap_h1_m0 (1, 130305, 6)
spacer_bp (20,)
start (20,)
strand (20,)


In [13]:
# plot_map(get_map(hf_collected["map_h1_m0"][0,:,0]))

In [None]:
def plot_map(matrix, vmin=-0.6, vmax=0.6, width=5, height=5, palette="RdBu_r"):

    """
    Plots a 512x512 map representating changes in the DNA contacts.

    Parameters
    ------------
    matrix : numpy array
        Changes in DNA contacts.
    vmin : float
    vmax : float
        Minimum and maximum in the colormap scale.
    width : int
    height : int
        Width and height of a plotted map.
    """

    fig = plt.figure(figsize=(width, height))

    sns.heatmap(
        matrix,
        vmin=vmin,
        vmax=vmax,
        cbar=False,
        cmap=palette,
        square=True,
        xticklabels=False,
        yticklabels=False,
    )
    plt.show()

In [None]:
for i in range(5):
    mapa = hf_collected["refmap_h1_m0"][0,:,:,i]
    print(i)
    plot_map(mapa)

In [None]:
plot_map(hf_collected["map_h1_m0"][10,:,:,3])