HashSolo assignments for "Benchmarking single-cell hashtag oligo demultiplexing methods", George Howitt et al, 2022

In [1]:
import numpy as np
import scanpy as sc
import scanpy.external as sce
import pandas as pd
from scipy import io
from scipy import sparse

In [11]:
#List of capture names for BAL data set
captures = ["batch1_c1", "batch1_c2", "batch2_c1", "batch2_c2", "batch3_c1", "batch3_c2"]

In [3]:
def get_hashsolo_calls(capture, priors = [0.01, 0.8, 0.19]):
    assert(sum(priors) == 1.0)
    #Load in counts
    counts = io.mmread("data/adata/" + capture + "_counts.mtx")
    counts = sparse.csr_matrix(counts)
    
    #Load in barcodes
    barcodes = pd.read_csv("data/adata/" + capture + "_barcodes.csv")
    HTOs = pd.read_csv("data/adata/" + capture[0:6] + "_HTOs.csv")
    
    #Create AnnData object
    adata = sc.AnnData(counts.T)
    adata.var_names = HTOs["HTO"].values
    
    #HashSolo requires the counts to be in the obs part of the AnnData object
    df = pd.DataFrame.sparse.from_spmatrix(counts.T)
    df.columns = HTOs["HTO"]
    df.index = barcodes["Barcode"]
    adata.obs = df
    
    #Run HashSolo
    sce.pp.hashsolo(adata, list(HTOs["HTO"]), priors = priors)
    
    #Write assignments to file
    adata.obs.Classification.to_csv("data/adata/" + capture + "_hashsolo.csv")

In [4]:
#Run on each of the captures in BAL data set.
[get_hashsolo_calls(capture, priors = [0.05, 0.75, 0.2]) for capture in captures]

  adata = sc.AnnData(counts.T)


Please cite HashSolo paper:
https://www.cell.com/cell-systems/fulltext/S2405-4712(20)30195-2
Please cite HashSolo paper:
https://www.cell.com/cell-systems/fulltext/S2405-4712(20)30195-2
Please cite HashSolo paper:
https://www.cell.com/cell-systems/fulltext/S2405-4712(20)30195-2
Please cite HashSolo paper:
https://www.cell.com/cell-systems/fulltext/S2405-4712(20)30195-2
Please cite HashSolo paper:
https://www.cell.com/cell-systems/fulltext/S2405-4712(20)30195-2
Please cite HashSolo paper:
https://www.cell.com/cell-systems/fulltext/S2405-4712(20)30195-2


[None, None, None, None, None, None]

Cell line data set

In [8]:
#List of LMO captures
lmo_captures = ["lmo_c1", "lmo_c2", "lmo_c3"]

In [9]:
def get_hashsolo_calls_LMO(capture, priors = [0.01, 0.8, 0.19]):
    assert(sum(priors) == 1.0)
    counts = io.mmread("data/adata/" + capture + "_counts.mtx")
    counts = sparse.csr_matrix(counts)
    
    barcodes = pd.read_csv("data/adata/" + capture + "_barcodes.csv")
    HTOs = pd.read_csv("data/adata/LMO_labels.csv")
    
    adata = sc.AnnData(counts.T)
    adata.var_names = HTOs["HTO"].values
    
    df = pd.DataFrame.sparse.from_spmatrix(counts.T)
    df.columns = HTOs["HTO"]
    df.index = barcodes["Barcode"]
    adata.obs = df
    
    sce.pp.hashsolo(adata, list(HTOs["HTO"]), priors = priors)
    
    adata.obs.Classification.to_csv("data/adata/" + capture + "_hashsolo.csv")

In [10]:
[get_hashsolo_calls_LMO(capture, priors = [0.05, 0.75, 0.2]) for capture in lmo_captures]

  adata = sc.AnnData(counts.T)


Please cite HashSolo paper:
https://www.cell.com/cell-systems/fulltext/S2405-4712(20)30195-2
Please cite HashSolo paper:
https://www.cell.com/cell-systems/fulltext/S2405-4712(20)30195-2
Please cite HashSolo paper:
https://www.cell.com/cell-systems/fulltext/S2405-4712(20)30195-2


[None, None, None]