## Metacell Example Notebook

Modified to accept annotated data object. For RNA: build kernel on X_pca; For ATAC: build kernel on X_svd

Access a df of metacell assignments using metacell_graph.get_labels()

In [32]:
import numpy as np
import pandas as pd
import seaborn as sns 
import matplotlib.pyplot as plt
sns.set()

import scanpy as sc
import unidip.dip as dip
import palantir 

from tqdm import tqdm

In [33]:
base_dir = '/Users/sitarapersad/metacells/metacells_data/'

to_process = ['cd34_multiome_rna_no_bcells.h5ad'] 

In [34]:
def log_transform(X, ps=0.1):
    return np.log2(X + ps) - np.log2(ps)


In [35]:
from importlib import reload
import build_graph # script for building shared NN graph
import metacells_ad as metacells # script for finding metacells

reload(metacells)

results_dict = {}

for file in to_process:
    ad = sc.read(base_dir + file)
    if 'atac' in file:
        build_on = "X_svd"
    else:
        build_on = "X_pca"
        
    # Grab Tanay Labels as well... 
    tanay_dir = base_dir+''.join(file.split('.')[:-1])+'_tanay/'
    
    print('Reading tanay files from ',tanay_dir)
    
    # Read in the metacells and assignments from Tanay method
    assmts = pd.read_csv(tanay_dir+'metacells.txt', sep=" ")
    
    label_df = assmts.rename(columns = {'x':'tanay_labels'})
    label_df.index = label_df.index.astype(str) 
    n_metacells = len(label_df['tanay_labels'].unique())
    
    graph_model = metacells.Metacells(ad, build_on, n_metacells=n_metacells)
    graph_model.fit()
    

Reading tanay files from  /Users/sitarapersad/metacells/metacells_data/cd34_multiome_rna_no_bcells_tanay/
Building kernel...
Computing kNN graph...
Computing radius for adaptive bandwidth kernel...


HBox(children=(IntProgress(value=0, max=6881), HTML(value='')))


Making graph symmetric...
Computing RBF kernel...


HBox(children=(IntProgress(value=0, max=6881), HTML(value='')))


Building similarity LIL matrix...


HBox(children=(IntProgress(value=0, max=6881), HTML(value='')))


Constructing CSR matrix...
Initializing residual matrix using greedy column selection
Initializing f and g...


HBox(children=(IntProgress(value=0, max=65), HTML(value='')))


Starting iteration 1 of 8
Completed iteration 1 of 8.
Starting iteration 2 of 8
Completed iteration 2 of 8.
Starting iteration 3 of 8
Completed iteration 3 of 8.
Starting iteration 4 of 8
Completed iteration 4 of 8.
Starting iteration 5 of 8
Completed iteration 5 of 8.
Starting iteration 6 of 8
Completed iteration 6 of 8.
Starting iteration 7 of 8
Completed iteration 7 of 8.
Starting iteration 8 of 8
Completed iteration 8 of 8.


In [36]:
graph_model.get_labels()

Unnamed: 0,metacell_ID
cd34_multiome_rep1#AAACAGCCACTCGCTC-1,cd34_multiome_rep2#TTGCGTCTCCGGTATG-1
cd34_multiome_rep1#AAACAGCCACTGACCG-1,cd34_multiome_rep2#TGTGATCAGTAACTCA-1
cd34_multiome_rep1#AAACAGCCATAATCAC-1,cd34_multiome_rep2#TTGCTCTCAGACAAAC-1
cd34_multiome_rep1#AAACATGCAAATTCGT-1,cd34_multiome_rep2#TTGCTCTCAGACAAAC-1
cd34_multiome_rep1#AAACATGCAGCATGTC-1,cd34_multiome_rep2#TCTCCTCGTACGATTG-1
...,...
cd34_multiome_rep2#TTTGTCTAGGGCCACT-1,cd34_multiome_rep2#GCCCAAATCAAGCGCC-1
cd34_multiome_rep2#TTTGTGAAGGAAGCAC-1,cd34_multiome_rep2#TTGCTCTCAGACAAAC-1
cd34_multiome_rep2#TTTGTGAAGTAACCAC-1,cd34_multiome_rep2#TTAGCAATCATAACCA-1
cd34_multiome_rep2#TTTGTGGCATACTCCT-1,cd34_multiome_rep2#TTCGTGCTCGTTAGCG-1
