In [1]:
import os
import pandas as pd
import numpy as np
from scipy.io import mmread
from scipy.sparse import vstack
from scipy.io import mmwrite

os.chdir("/Volumes/salomonis2/LabFiles/Kyle/Analysis/2023_06_12_tea_seq_atac_processing/")

In [2]:
# Helper functions
def helper_load_sparse_mtx_from_dir(input_dir, exp_name):
    print("Reading sparse matrix for {}...".format(exp_name))
    tmp_sparse_mtx = mmread(os.path.join(input_dir, "pmat.mtx"))
    tmp_barcodes = pd.read_table(os.path.join(input_dir, "barcodes.tsv"), header=None).iloc[:,0].values
    tmp_peaks = pd.read_table(os.path.join(input_dir, "peaks.tsv"), header=None)
    print("Output is dictionary with the following keys: 'sparse_mtx', 'barcodes', 'peaks'")
    return({\
        "sparse_mtx": tmp_sparse_mtx,
        "barcodes": [exp_name + ":" + item for item in tmp_barcodes],
        "peaks": tmp_peaks})

In [3]:
path_pmat_dirs = "output/tea_r7_pmat_macs2_p_0_001_tss_added/"

m1 = helper_load_sparse_mtx_from_dir(\
    os.path.join(path_pmat_dirs, "M1"),
    exp_name="M1")

m2 = helper_load_sparse_mtx_from_dir(\
    os.path.join(path_pmat_dirs, "M2"),
    exp_name="M2")

h1 = helper_load_sparse_mtx_from_dir(\
    os.path.join(path_pmat_dirs, "H1"),
    exp_name="H1")

h2 = helper_load_sparse_mtx_from_dir(\
    os.path.join(path_pmat_dirs, "H2"),
    exp_name="H2")



Reading sparse matrix for M1...
Output is dictionary with the following keys: 'sparse_mtx', 'barcodes', 'peaks'
Reading sparse matrix for M2...
Output is dictionary with the following keys: 'sparse_mtx', 'barcodes', 'peaks'
Reading sparse matrix for H1...
Output is dictionary with the following keys: 'sparse_mtx', 'barcodes', 'peaks'
Reading sparse matrix for H2...
Output is dictionary with the following keys: 'sparse_mtx', 'barcodes', 'peaks'


In [4]:
print("Shape of M1: {}".format(m1["sparse_mtx"].shape))
print("Shape of M2: {}".format(m2["sparse_mtx"].shape))
print("Shape of H1: {}".format(h1["sparse_mtx"].shape))
print("Shape of H2: {}".format(h2["sparse_mtx"].shape))

Shape of M1: (20000, 757540)
Shape of M2: (20000, 757540)
Shape of H1: (12198, 757540)
Shape of H2: (11754, 757540)


In [5]:
# Combine sparse matrices
combined = vstack([\
    m1["sparse_mtx"], 
    m2["sparse_mtx"], 
    h1["sparse_mtx"], 
    h2["sparse_mtx"]])
combined_barcodes = pd.Series(\
    list(m1["barcodes"]) + \
    list(m2["barcodes"]) + \
    list(h1["barcodes"]) + \
    list(h2["barcodes"]))

In [6]:
# Write out the output files
path_output = "output/tea_r7_pmat_macs2_p_0_001_tss_added/combined/"
if not os.path.isdir(path_output):
    os.mkdir(path_output)

# Barcodes
combined_barcodes.to_csv(\
    os.path.join(path_output, "barcodes.tsv"), 
    sep="\t", header=False, index=False)
# Peaks
m1["peaks"].to_csv(\
    os.path.join(path_output, "peaks.tsv"), 
    sep="\t", header=False, index=False)
# PMAT MTX
mmwrite(os.path.join(path_output, "pmat.mtx"), combined)