In [35]:
import pandas as pd
import scipy.io as spio
import numpy as np
import scanpy as sc
import mudata as md
#File Location

file_loc = "../Hao2021/Data/GSE164378_RAW/"

In [57]:
# RNA matrix
RNA_matrix = spio.mmread(file_loc+"GSM5008737_RNA_3P-matrix.mtx.gz")
RNA_matrix = RNA_matrix.tocsr()

# RNA barcode
RNA_barcodes = pd.read_csv(file_loc+"GSM5008739_HTO_3P-barcodes.tsv.gz", 
                              sep = "\t", header = None, index_col=0,
                          names = ["barcode"])
# RNA features
RNA_features = pd.read_csv(file_loc+"GSM5008737_RNA_3P-features.tsv.gz", 
                            sep = "\t", header = None, usecols=[0], 
                            index_col = 0, names = ["GeneID"])

# ADT Matrix
ADT_matrix = scipy.io.mmread(file_loc+"GSM5008738_ADT_3P-matrix.mtx.gz")
ADT_matrix = ADT_matrix.tocsr()

# ADT_barcodes
ADT_barcodes = pd.read_csv(file_loc+"GSM5008738_ADT_3P-barcodes.tsv.gz",
                          sep = '\t', header = None, index_col=0,
                          names = ["barcode"])

# ADT features
ADT_features = pd.read_csv(file_loc+"GSM5008738_ADT_3P-features.tsv.gz",
                           sep = '\t', header = None, usecols=[0],
                           index_col = 0, names=["ProteinID"])

# HTO_matrix
HTO_matrix = scipy.io.mmread(file_loc+"GSM5008739_HTO_3P-matrix.mtx.gz")
HTO_matrix = HTO_matrix.tocsc()

# # HTO_barcodes
HTO_barcodes = pd.read_csv(file_loc+"GSM5008739_HTO_3P-barcodes.tsv.gz",
                           sep='\t', header = None, index_col = 0,
                          names = ["barcode"])
         


# HTO_features
HTO_features = pd.read_csv(file_loc+"GSM5008739_HTO_3P-features.tsv.gz", 
                              sep = '\t', header = None, usecols=[0],
                             index_col=0, names=["BathID"])

In [None]:
adata = sc.AnnData(X=RNA_matrix.T, obs=RNA_barcodes, var=RNA_features,
                  dtype='float32')

adata_ADT = sc.AnnData(X=ADT_matrix.T, obs=ADT_barcodes, var=ADT_features,
                  dtype='float32')

adata_HTO = sc.AnnData(X=HTO_matrix.T, obs=HTO_barcodes, var=HTO_features,
                  dtype='float32')

In [96]:
# There are var_names that are intersecting between rna and adt
intersecting_vars = list(set(adata.var_names).intersection(set(adata_ADT.var_names)))

# Creating a dictionary to hold the new var_names
new_var_names = {}

# Iterate over the intersecting var_names and add the suffix '_AB'
for var in intersecting_vars:
    new_var_names[var] = f"{var}_AB"

# Rename columns in the adata_ADT
adata_ADT.var_names_make_unique

for old_var, new_var in new_var_names.items():
    adata_ADT.var.rename({old_var: new_var}, inplace=True)

In [97]:
mdata = md.MuData({"RNA": adata, "ADT": adata_ADT, "HTO": adata_HTO})

In [98]:
mdata.write("../Hao2021/Data/Hao2021_3P.h5mu")