In [1]:
import os
import numpy as np
import pandas as pd
import pyBigWig
import re
import h5py
import multiprocess as mp
from functools import partial

os.chdir("/media/kyle_storage/kyle_ferchen/grimes_lab_main/analysis/"\
    "2023_06_12_tea_seq_atac_processing/")

In [2]:
# Define a path for the genome fasta file
path_to_mm10 = "/media/kyle_storage/kyle_ferchen/grimes_lab_main/reference/genomes/mm10/mm10.fa"
# Read in the mm10 fasta indexing file
fai_mm10 = pd.read_table(path_to_mm10 + ".fai", header=None)
fai_mm10.columns = ["NAME", "LENGTH", "OFFSET", "LINEBASES", "LINEWIDTH"]
fai_mm10 = fai_mm10.set_index("NAME")

In [3]:
### Helper functions

# Define helper function to extract sequences from fasta file
def read_seq_from_fasta(input_bed, fai_annotation, fasta):
    bed_df = input_bed.copy()
    # Correct start values for 0 index
    bed_df.iloc[:,1] = bed_df.iloc[:,1].values - 1
    # Define fai annotation based parameters for reading from fasta file
    bed_df["offset"] = [fai_annotation.loc[i, "OFFSET"] for i in \
        bed_df.iloc[:,0].values]
    bed_df["lw"] = [fai_annotation.loc[i, "LINEWIDTH"] for i in \
        bed_df.iloc[:,0].values]
    bed_df["lb"] = [fai_annotation.loc[i, "LINEBASES"] for i in \
        bed_df.iloc[:,0].values]
    # Define positions in file to which to seek
    bed_df["len_new_line"] = bed_df["lw"] - bed_df["lb"]
    bed_df["lines_to_start"] = bed_df.iloc[:,1].values // bed_df["lb"].values
    bed_df["char_to_start"] = bed_df["lines_to_start"] * bed_df["lw"]
    bed_df["bases_before_start"] = bed_df.iloc[:,1].values % bed_df["lb"].values
    bed_df["bases_to_read"] = bed_df.iloc[:,2] - bed_df.iloc[:,1]
    bed_df["bases_to_next_line"] = bed_df["lb"] - bed_df["bases_before_start"]
    # Check bases to next line
    mask_need_next_line = (bed_df["bases_to_read"] < \
        bed_df["bases_to_next_line"]).values
    # Define number of char values to read
    bed_df["char_to_read"] = 0
    if mask_need_next_line.sum() > 0:
        bed_df.loc[mask_need_next_line, "char_to_read"] = bed_df.loc[\
            mask_need_next_line, "bases_to_read"]
    if (~mask_need_next_line).sum() > 0:
        new_lines_to_read = 1 + ((\
            bed_df.loc[~mask_need_next_line, "bases_to_read"].values - \
            bed_df.loc[~mask_need_next_line, "bases_to_next_line"].values) // \
                bed_df.loc[~mask_need_next_line, "lb"].values)
        bed_df.loc[~mask_need_next_line, "char_to_read"] = (\
            bed_df.loc[~mask_need_next_line, "len_new_line"].values * \
                new_lines_to_read) + \
                    bed_df.loc[~mask_need_next_line, "bases_to_read"].values

    # Filter df to only the columns needed (clear memory)
    bed_df = bed_df[[\
        "offset", "char_to_start", "bases_before_start", "char_to_read"]]
    # Read in the sequence from the fasta
    output_seqs = []
    with open(fasta, "r") as f:
        for i, row in bed_df.iterrows():
            f.seek(\
                row["offset"] + \
                row["char_to_start"] + \
                row["bases_before_start"])
            output_seqs.append(f.read(row["char_to_read"]).replace("\n", ""))
    
    return(output_seqs)

def get_reverse_complement(seq):
    # Define replacements
    dict_replace = {\
        "A": "T", 
        "C": "G",
        "G": "C",
        "T": "A",
        "a": "t",
        "c": "g",
        "g": "c",
        "t": "a",
        "n": "n",
        "N": "N"}
    # Replace and return
    return("".join([dict_replace[i] for i in seq[::-1]]))


def read_bw_instances_from_bed_df(bw_file_path, bed_df, cores=None):
    if cores == None:
        # create as many processes as there are CPUs on your machine
        num_processes = mp.cpu_count()
    else:
        num_processes = cores
    
    # calculate the chunk size as an integer
    chunk_size = bed_df.shape[0] // num_processes
    # Assign chunks
    chunk_starts = list(range(0, bed_df.shape[0], chunk_size))
    chunks = [bed_df.iloc[tmp_start:chunk_starts[i+1]] for i,tmp_start in \
        enumerate(chunk_starts[:-2])]
    if (bed_df.shape[0] % num_processes) == 0:
        chunks += [\
            bed_df.iloc[chunk_starts[-2]:chunk_starts[-1]],
            bed_df.iloc[chunk_starts[-1]:]]
    else:
        chunks += [bed_df.iloc[chunk_starts[-2]:]]

    def _help_read_bigwig(input_bed_df):
        with pyBigWig.open(bw_file_path) as tmp_bw:
            bw_series = pd.Series([\
                np.array(tmp_bw.values(\
                    row["chr"], 
                    row["start"]-1,
                    row["end"])) for i, row in input_bed_df.iterrows()],
                index=input_bed_df.index.values)
            
        return(bw_series)

    # Create pool with `num_processes` processes
    pool = mp.Pool(processes=num_processes)
    # Apply function to each chunk
    result = pool.map(_help_read_bigwig, chunks)
    return(pd.concat(result).loc[bed_df.index.values])




In [4]:
# Read in the peak set used
peaks = pd.read_table(\
    "output/correlate_tea_atac_to_cite_rna_across_r7_clusters/"\
    "peak_to_gene_correlation_within_tads/sig_peak_to_gene_peaks_10col.bed",
    header=None)
peaks.index = (\
    peaks.iloc[:,0] + ":" + \
    peaks.iloc[:,1].astype(str) + "-" + \
    peaks.iloc[:,2].astype(str)).values
peaks = peaks.loc[~pd.Series(peaks.index.values).duplicated().values]
peaks.columns = ["chr", "start", "end", "name", "score", "strand", "thickStart",
    "thickEnd", "color", "summit"]
peaks

Unnamed: 0,chr,start,end,name,score,strand,thickStart,thickEnd,color,summit
chr1:4456181-4457181,chr1,4456181,4457181,peak_1,1000,.,4456181,4457181,2550220,500
chr1:4540111-4541111,chr1,4540111,4541111,peak_2,1000,.,4540111,4541111,2550220,500
chr1:4614190-4615190,chr1,4614190,4615190,peak_3,1000,.,4614190,4615190,2550220,500
chr1:4615468-4616468,chr1,4615468,4616468,peak_4,1000,.,4615468,4616468,2550220,500
chr1:4621593-4622593,chr1,4621593,4622593,peak_5,1000,.,4621593,4622593,2550220,500
...,...,...,...,...,...,...,...,...,...,...
chrX:169263066-169264066,chrX,169263066,169264066,peak_116374,1000,.,169263066,169264066,2550220,500
chrX:169266871-169267871,chrX,169266871,169267871,peak_116375,1000,.,169266871,169267871,2550220,500
chrX:169299283-169300283,chrX,169299283,169300283,peak_116376,1000,.,169299283,169300283,2550220,500
chrX:169303654-169304654,chrX,169303654,169304654,peak_116377,1000,.,169303654,169304654,2550220,500


In [5]:
# Read in CWMs to do dot product scoring
print("Reading positive patterns...")
path_to_h5_files = "output/chrombpnet/modisco_merged_results/fold_0/"\
    "modisco_h5_outputs/"
h5_cluster_re = re.compile(r'modisco_fold_0_(.+)_modisco.h5')
pat_dict = {}
for tmp_h5_file in os.listdir(path_to_h5_files):
    tmp_cluster = h5_cluster_re.findall(tmp_h5_file)[0]
    tmp_h5_file_path = os.path.join(\
        path_to_h5_files, 
        tmp_h5_file)
    with h5py.File(tmp_h5_file_path, "r") as tmp_h5:
        if 'pos_patterns' in list(tmp_h5):
            tmp_pos_pats = list(tmp_h5['pos_patterns'])
            for tmp_pat in list(tmp_h5['pos_patterns']):
                pat_dict[f"{tmp_cluster}__pos_{tmp_pat}"] = np.array(\
                    tmp_h5['pos_patterns'][tmp_pat]['sequence'][()]).astype(\
                        np.float32) 
                
        if 'neg_patterns' in list(tmp_h5):
            tmp_neg_pats = list(tmp_h5['neg_patterns'])
            for tmp_pat in list(tmp_h5['neg_patterns']):
                pat_dict[f"{tmp_cluster}__neg_{tmp_pat}"] = np.array(\
                    tmp_h5['neg_patterns'][tmp_pat]['sequence'][()]).astype(\
                        np.float32) 

# Modify pos_patterns -only use bases with frequencies above 0.5
print("Mask to positions in pattern above 0.5 frequency for a given base...")
for tmp_pat in pat_dict:
    pat_dict[tmp_pat] = (pat_dict[tmp_pat].T * \
        (pat_dict[tmp_pat] > 0.5).max(axis=1).astype(np.float32)).T
    
# Make reverse complement pat_dict
pat_dict_rev_comp = {}
for tmp_pat in pat_dict:
    pat_dict_rev_comp[tmp_pat] = pat_dict[tmp_pat][::-1,::-1]

Reading positive patterns...
Mask to positions in pattern above 0.5 frequency for a given base...


In [6]:
# Read in the contribution scores for each peak for each cluster
path_to_bigwigs = "output/chrombpnet/browser_visualization/"\
    "count_scores_bw_extended_peak_set/fold_0/"

list_bw = os.listdir(path_to_bigwigs)

cs_dict = {}
# # Get the contribution score values
for tmp_cluster_bw in list_bw:
    tmp_cluster_name = tmp_cluster_bw[:-3]
    print(f"Working on cluster {tmp_cluster_name}...")
    tmp_path_bw = os.path.join(path_to_bigwigs, tmp_cluster_bw)
    cs_dict[tmp_cluster_name] = read_bw_instances_from_bed_df(\
        tmp_path_bw, peaks[["chr", "start", "end"]], cores=8)



Working on cluster BMCP...
Working on cluster CD127_MP...
Working on cluster CLP1_Rrm2...
Working on cluster eHSC...
Working on cluster eHSC_Pcna...
Working on cluster ERP1...
Working on cluster ERP2...
Working on cluster HSCP_ERP1...
Working on cluster HSCP_HPC_Cenpf...
Working on cluster HSCP_HPC_Hist1h2af...
Working on cluster HSCP_HPC_Tk1...
Working on cluster HSCP_MKP...
Working on cluster IG2_MP...
Working on cluster IG2_proNeu1...
Working on cluster LT_HSC_Mllt3...
Working on cluster MDP_Cpa3...
Working on cluster MDP_Irf8...
Working on cluster MEP...
Working on cluster MKP...
Working on cluster ML_cell_cycle...
Working on cluster MPP4_Hlf...
Working on cluster MPP4_Nkx2_3...
Working on cluster MPP5_Egr1...
Working on cluster MPP5_Flt3...
Working on cluster MultiLin_1...
Working on cluster MultiLin_1_MEP...
Working on cluster MultiLin_2_F13a1...
Working on cluster MultiLin_2_Ms4a3...
Working on cluster pre_MultiLin_1...
Working on cluster pre_MultiLin_2...
Working on cluster pro

In [7]:
# Get the base pairs for each peak
print("Reading in observed bp sequences...")
peak_seqs = pd.Series(read_seq_from_fasta(\
        input_bed = peaks[["chr", "start", "end"]],
        fai_annotation = fai_mm10,
        fasta = path_to_mm10),
    index=peaks.index.values).str.upper()

Reading in observed bp sequences...


In [8]:
# Transform sequences to one-hot-encoded DNA vectors
# Get the one-hot-encoded DNA values fo the modisco seqs
ohed_dict = {\
    "A": [1.0, 0.0, 0.0, 0.0],
    "C": [0.0, 1.0, 0.0, 0.0],
    "G": [0.0, 0.0, 1.0, 0.0],
    "T": [0.0, 0.0, 0.0, 1.0],
    "N": [0.0, 0.0, 0.0, 0.0]}
peak_seqs_ohe = peak_seqs.apply(\
    lambda x: np.array([ohed_dict[i] for i in [*x]]))

In [51]:
path_save_motif_hits = "output/chrombpnet/modisco_merged_results/fold_0/"\
    "redo_extract_seqlets/cluster_pwm_hits/"

list_motif_hit_files = os.listdir(path_save_motif_hits)


saved_stats = []
saved_hits = []
for tmp_file in list_motif_hit_files:
    tmp_cluster = tmp_file[:-4]
    print(f"Working on {tmp_cluster}...")
    tmp_hits = pd.read_feather(os.path.join(path_save_motif_hits, tmp_file))
    # Add name for seqlet coordinates
    tmp_hits["seq_name"] = (\
        tmp_hits["chr"] + ":" + \
        tmp_hits["start"].astype(str) + "-" + \
        tmp_hits["end"].astype(str)).values
    # Remove hits with duplicate positions and patterns
    tmp_hits = tmp_hits.loc[\
        ~(tmp_hits["seq_name"]+tmp_hits["pattern"]).duplicated().values]
    # Save the indices with unique seqlet positions
    unique_idx = tmp_hits.loc[~tmp_hits["seq_name"].duplicated()].index.values
    # Save the unique indices with reverse strand patterns
    unique_rev_idx = tmp_hits.loc[unique_idx].loc[\
        tmp_hits.loc[unique_idx, "strand"] == "-", "seq_name"].values
    # Define helper dataframe
    indexing_df = pd.DataFrame({\
            "idx": tmp_hits.loc[unique_idx, "peak"].values,
            "start_position": tmp_hits.loc[unique_idx, "pos"].values,
            "end_position": (tmp_hits.loc[unique_idx, "end"] - \
                tmp_hits.loc[unique_idx, "start"] + \
                tmp_hits.loc[unique_idx, "pos"]).values},
        index=tmp_hits.loc[unique_idx, "seq_name"].values)
    # Read in the contribution scores for the seqlet positions
    seg_cs = pd.Series([i[j:k] for i,j,k in zip(\
            cs_dict[tmp_cluster].loc[indexing_df["idx"].values].values,
            indexing_df["start_position"].values,
            indexing_df["end_position"].values)],
        index=indexing_df.index.values)
    # Get the one-hot encoded DNA sequences
    seg_ohe = pd.Series([i[j:k] for i,j,k in zip(\
            peak_seqs_ohe.loc[indexing_df["idx"].values].values,
            indexing_df["start_position"].values,
            indexing_df["end_position"].values)],
        index=indexing_df.index.values)

    # Weight the nucleotide frequency of the motif
    tmp_fwd_idx = tmp_hits.loc[tmp_hits["strand"] == "+"].index.values
    tmp_rev_idx = tmp_hits.loc[tmp_hits["strand"] == "-"].index.values
    base_scores_fwd = pd.Series([(i*j).sum(axis=1) for i,j in zip(\
        seg_ohe.loc[tmp_hits.loc[tmp_fwd_idx,"seq_name"].values].values,
        pd.Series(pat_dict)[tmp_hits.loc[tmp_fwd_idx,"pattern"].values].values)], 
        index=tmp_fwd_idx)
    base_scores_rev = pd.Series([(i*j).sum(axis=1) for i,j in zip(\
        seg_ohe.loc[tmp_hits.loc[tmp_rev_idx,"seq_name"].values].values,
        pd.Series(pat_dict_rev_comp)[tmp_hits.loc[tmp_rev_idx,"pattern"].values].values)], 
        index=tmp_rev_idx)
    base_scores = pd.concat([base_scores_fwd, base_scores_rev]).loc[\
        tmp_hits.index.values]

    # Calculate dot-product scores
    tmp_dp_scores = pd.Series(
        [(i*j).sum() for i,j in zip(\
            base_scores.values,
            seg_cs.loc[tmp_hits.loc[\
                base_scores.index.values, 
                "seq_name"].values])],
        index=base_scores.index.values)

    # Invert scores with negative patterns
    neg_pat_idx = base_scores.index.values[\
        tmp_hits.loc[base_scores.index.values, "pattern"].str.contains(\
            "neg_pattern")]

    if len(neg_pat_idx) > 0:
        tmp_dp_scores.loc[neg_pat_idx] = -1 * tmp_dp_scores.loc[neg_pat_idx]

    # Get the statistics for each pattern from modisco hits
    tmp_modisco_hits = tmp_hits.loc[tmp_hits["in_modisco"]]
    list_unique_pats = tmp_modisco_hits["pattern"].unique()
    tmp_min = []
    tmp_max = []
    tmp_pt5 = []
    tmp_pt95 = []
    for tmp_pat in list_unique_pats:
        seg_scores = tmp_dp_scores.loc[tmp_modisco_hits.loc[\
            tmp_modisco_hits["pattern"] == tmp_pat].index.values].values
        tmp_min.append(seg_scores.min())
        tmp_max.append(seg_scores.max())
        tmp_pt5.append(np.percentile(seg_scores, 5))
        tmp_pt95.append(np.percentile(seg_scores, 95))

    tmp_stats = pd.DataFrame({\
            "min": tmp_min,
            "max": tmp_max,
            "pctile_5": tmp_pt5,
            "pctile_95": tmp_pt95},
        index=list_unique_pats)

    # Filter 
    sel_tmp_hits = tmp_hits.loc[\
        tmp_dp_scores[tmp_dp_scores >= tmp_stats.loc[\
            tmp_hits.loc[\
                tmp_dp_scores.index.values, 
                "pattern"].values, "min"].values].index.values]
    sel_tmp_hits["dp_score"] = tmp_dp_scores.loc[\
        sel_tmp_hits.index.values].values

    saved_stats.append(tmp_stats)
    saved_hits.append(sel_tmp_hits)


saved_stats = pd.concat(saved_stats)
saved_hits = pd.concat(saved_hits)

Working on BMCP...
Working on CD127_MP...
Working on CLP1_Rrm2...
Working on eHSC...
Working on eHSC_Pcna...
Working on ERP1...
Working on ERP2...
Working on HSCP_ERP1...
Working on HSCP_HPC_Cenpf...
Working on HSCP_HPC_Hist1h2af...
Working on HSCP_HPC_Tk1...
Working on HSCP_MKP...
Working on IG2_MP...
Working on IG2_proNeu1...
Working on LT_HSC_Mllt3...
Working on MDP_Cpa3...
Working on MDP_Irf8...
Working on MEP...
Working on MKP...
Working on ML_cell_cycle...
Working on MPP4_Hlf...
Working on MPP4_Nkx2_3...
Working on MPP5_Egr1...
Working on MPP5_Flt3...
Working on MultiLin_1...
Working on MultiLin_1_MEP...
Working on MultiLin_2_F13a1...
Working on MultiLin_2_Ms4a3...
Working on pre_MultiLin_1...
Working on pre_MultiLin_2...
Working on proNeu_1...
Working on ST_HSC...


In [60]:
# Save all instances
saved_hits.index = list(range(saved_hits.shape[0]))
saved_hits.to_feather("output/chrombpnet/modisco_merged_results/fold_0/"\
    "redo_extract_seqlets/all_seqlit_hits_above_modisco_min_anno.fea")

# Save modisco dpscore stats
saved_stats.reset_index().to_feather("output/chrombpnet/"\
    "modisco_merged_results/fold_0/redo_extract_seqlets/"\
    "modisco_seqlet_hit_stats.fea")

In [61]:
saved_hits

Unnamed: 0,chr,start,end,peak,score,pos,strand,pattern,in_modisco,seq_name,dp_score
0,chr1,4456740,4456770,chr1:4456181-4457181,5.125949,560,+,BMCP__pos_pattern_32,False,chr1:4456740-4456770,0.318700
1,chr1,4456742,4456772,chr1:4456181-4457181,8.421132,562,-,BMCP__pos_pattern_3,True,chr1:4456742-4456772,0.309506
2,chr1,4456791,4456821,chr1:4456181-4457181,5.021894,611,+,BMCP__pos_pattern_5,False,chr1:4456791-4456821,0.153212
3,chr1,4614242,4614272,chr1:4614190-4615190,4.184268,53,+,BMCP__pos_pattern_19,False,chr1:4614242-4614272,0.096635
4,chr1,4614242,4614272,chr1:4614190-4615190,1.546088,53,+,BMCP__pos_pattern_21,False,chr1:4614242-4614272,0.092790
...,...,...,...,...,...,...,...,...,...,...,...
16097284,chrX,169300178,169300208,chrX:169299283-169300283,8.963812,896,-,ST_HSC__pos_pattern_1,False,chrX:169300178-169300208,0.413159
16097285,chrX,169300188,169300218,chrX:169299283-169300283,5.419419,906,+,ST_HSC__pos_pattern_17,False,chrX:169300188-169300218,0.442239
16097286,chrX,169304118,169304148,chrX:169303654-169304654,5.680791,465,-,ST_HSC__pos_pattern_34,False,chrX:169304118-169304148,0.512000
16097287,chrX,169304138,169304168,chrX:169303654-169304654,12.651671,485,+,ST_HSC__pos_pattern_1,False,chrX:169304138-169304168,2.852918


In [66]:
### Extract dp-scores for all seqlets on all clusters
# Save the indices with unique seqlet positions
unique_idx = saved_hits.loc[~saved_hits["seq_name"].duplicated()].index.values
# Save the unique indices with reverse strand patterns
unique_rev_idx = saved_hits.loc[unique_idx].loc[\
    saved_hits.loc[unique_idx, "strand"] == "-", "seq_name"].values

dp_scores = {}
for tmp_cluster in cs_dict:
    print(f"Working on {tmp_cluster}...")
    # Define helper dataframe
    indexing_df = pd.DataFrame({\
            "idx": saved_hits.loc[unique_idx, "peak"].values,
            "start_position": saved_hits.loc[unique_idx, "pos"].values,
            "end_position": (saved_hits.loc[unique_idx, "end"] - \
                saved_hits.loc[unique_idx, "start"] + \
                saved_hits.loc[unique_idx, "pos"]).values},
        index=saved_hits.loc[unique_idx, "seq_name"].values)
    # Read in the contribution scores for the seqlet positions
    seg_cs = pd.Series([i[j:k] for i,j,k in zip(\
            cs_dict[tmp_cluster].loc[indexing_df["idx"].values].values,
            indexing_df["start_position"].values,
            indexing_df["end_position"].values)],
        index=indexing_df.index.values)
    # Get the one-hot encoded DNA sequences
    seg_ohe = pd.Series([i[j:k] for i,j,k in zip(\
            peak_seqs_ohe.loc[indexing_df["idx"].values].values,
            indexing_df["start_position"].values,
            indexing_df["end_position"].values)],
        index=indexing_df.index.values)

    # Weight the nucleotide frequency of the motif
    tmp_fwd_idx = saved_hits.loc[saved_hits["strand"] == "+"].index.values
    tmp_rev_idx = saved_hits.loc[saved_hits["strand"] == "-"].index.values
    base_scores_fwd = pd.Series([(i*j).sum(axis=1) for i,j in zip(\
        seg_ohe.loc[saved_hits.loc[tmp_fwd_idx,"seq_name"].values].values,
        pd.Series(pat_dict)[saved_hits.loc[\
            tmp_fwd_idx,"pattern"].values].values)], 
        index=tmp_fwd_idx)
    base_scores_rev = pd.Series([(i*j).sum(axis=1) for i,j in zip(\
        seg_ohe.loc[saved_hits.loc[tmp_rev_idx,"seq_name"].values].values,
        pd.Series(pat_dict_rev_comp)[saved_hits.loc[\
            tmp_rev_idx,"pattern"].values].values)], 
        index=tmp_rev_idx)
    base_scores = pd.concat([base_scores_fwd, base_scores_rev]).loc[\
        saved_hits.index.values]

    # Calculate dot-product scores
    dp_scores[tmp_cluster] = pd.Series(
        [(i*j).sum() for i,j in zip(\
            base_scores.values,
            seg_cs.loc[saved_hits.loc[\
                base_scores.index.values, 
                "seq_name"].values])],
        index=base_scores.index.values)
    
dp_scores = pd.DataFrame(dp_scores)
dp_scores.reset_index().to_feather("output/chrombpnet/modisco_merged_results/"\
    "fold_0/redo_extract_seqlets/"\
    "all_seqlit_hits_above_modisco_min_dp_scores.fea")

Working on BMCP...
Working on CD127_MP...
Working on CLP1_Rrm2...
Working on eHSC...
Working on eHSC_Pcna...
Working on ERP1...
Working on ERP2...
Working on HSCP_ERP1...
Working on HSCP_HPC_Cenpf...
Working on HSCP_HPC_Hist1h2af...
Working on HSCP_HPC_Tk1...
Working on HSCP_MKP...
Working on IG2_MP...
Working on IG2_proNeu1...
Working on LT_HSC_Mllt3...
Working on MDP_Cpa3...
Working on MDP_Irf8...
Working on MEP...
Working on MKP...
Working on ML_cell_cycle...
Working on MPP4_Hlf...
Working on MPP4_Nkx2_3...
Working on MPP5_Egr1...
Working on MPP5_Flt3...
Working on MultiLin_1...
Working on MultiLin_1_MEP...
Working on MultiLin_2_F13a1...
Working on MultiLin_2_Ms4a3...
Working on pre_MultiLin_1...
Working on pre_MultiLin_2...
Working on proNeu_1...
Working on ST_HSC...
