In [1]:
import scanpy as sc
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pathlib import Path
from scipy.sparse import coo_matrix

## Assigns guides based on a UMI cutoff
Simpler and faster than previous approaches.

In [2]:
import pandas as pd
from scipy.sparse import coo_matrix
import scanpy as sc
from pathlib import Path

def assign_guides(path: Path, donor: str, umi_thresh: list):
    # Read h5 file.
    filtered_matrix = sc.read_10x_h5(str(path.expanduser()), gex_only=False)
    filtered_matrix.var_names_make_unique()
    
    # Get CRISPR counts and convert to COO matrix.
    crispr = filtered_matrix[:, filtered_matrix.var.feature_types == "CRISPR Guide Capture"].copy()
    X_coo = coo_matrix(crispr.X)
    
    # Initialize a list to store dataframes for each threshold.
    dfs = []

    # Generate assignments for the current threshold.
    assignments = [(i, j, v) for i, j, v in zip(X_coo.row, X_coo.col, X_coo.data) if v >= umi_thresh]
    assignments = [(crispr.obs.index[i], crispr.var['gene_ids'].iloc[j], v) for i, j, v in assignments]
    df = pd.DataFrame(assignments, columns=['cell_barcode', 'guide', 'UMI'])
    df['donor'] = donor
    df['cell_id'] = df['donor'] + '_' + df['cell_barcode']
    df = df[['guide', 'donor', 'UMI', 'cell_barcode', 'cell_id']]
    dfs.append(df)
    
    return dfs

Assign guides for each donor/lane for both CRISPRa and CRISPRi and save to CSV

### Primary Macrophages

In [2]:
# set path to files 
prefix = "~"

In [3]:
umi_threshold_list = [3,5,10,15,20,30,40]

In [4]:
all_data = {'CRISPRi':['D1-1i', 'D1-2i', 'D1-3i', 'D1-4i', 'D2-1i', 'D2-2i', 'D2-3i'],
            'CRISPRa':['D1-5a', 'D1-6a', 'D1-7a', 'D1-8a', 'D2-4a', 'D2-5a', 'D2-6a']} 
all_assignments = {}             

for umi_threshold in umi_threshold_list:
    for (crispr, donors) in all_data.items():
        def assign_donor(donor):
            filtered_h5 = Path(prefix+"/data/CellRanger_output/" + crispr + "/" + donor + "/filtered_feature_bc_matrix.h5")
            return assign_guides(filtered_h5, donor, umi_threshold)[0]
        result = pd.concat(map(assign_donor, donors), ignore_index=True)
        
        # Only cells with single guides assigned.
        result = result.sort_values(['cell_id', 'UMI'], ascending = False)
        result = result.drop_duplicates(subset=['cell_id'], keep='first')
        #result = result.drop_duplicates(subset=['cell_id'], keep=False)
    
        # Save to CSV for use in Seurat. 
        result.to_csv(crispr + "_" + str(umi_threshold)+"umi.csv", index=False)
        all_assignments[crispr] = result

  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
 