In [6]:
!pip install sctop

Collecting sctop
  Using cached scTOP-0.0.2-py3-none-any.whl (14 kB)
Installing collected packages: sctop
Successfully installed sctop-0.0.2


In [7]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from tqdm import tqdm
import sctop as top

ModuleNotFoundError: No module named 'sctop'

In [8]:
!pip freeze

alabaster @ file:///home/ktietz/src/ci/alabaster_1611921544520/work
anaconda-client==1.7.2
anaconda-navigator==2.1.1
anaconda-project @ file:///tmp/build/80754af9/anaconda-project_1610472525955/work
anndata==0.7.6
anyio @ file:///opt/concourse/worker/volumes/live/64740ac7-3a9c-4fbb-6685-a51c4ff8b4ca/volume/anyio_1617783319350/work/dist
appdirs==1.4.4
applaunchservices==0.2.1
appnope @ file:///opt/concourse/worker/volumes/live/5f13e5b3-5355-4541-5fc3-f08850c73cf9/volume/appnope_1606859448618/work
appscript @ file:///opt/concourse/worker/volumes/live/82e8b4c7-2416-4d10-509e-144ca79d9b1d/volume/appscript_1611426996703/work
argh==0.26.2
argon2-cffi @ file:///opt/concourse/worker/volumes/live/d733ceb5-7f19-407b-7da7-a386540ab855/volume/argon2-cffi_1613037492998/work
asn1crypto @ file:///tmp/build/80754af9/asn1crypto_1596577642040/work
astroid @ file:///opt/concourse/worker/volumes/live/343a8902-287c-47fb-6db8-923a63364302/volume/astroid_1613500849157/work
astropy @ file:///opt/concourse/wor

# Import data
Load the reference database (a processed version of the [Mouse Cell Atlas](http://bis.zju.edu.cn/MCA/index.html)) and load sample data (mouse lung data taken by Michael Herriges, in the [Kotton Lab](https://www.bumc.bu.edu/kottonlab/)).

In [None]:
data_MC20, metadata_MC20 = top.load_basis('MC20-KO22', 0)
rawdata_herriges = pd.read_csv('./21_01_06_herriges_with_week6_invivo.csv')

Load corresponding metadata of the Herriges sample.

In [None]:
metadata_herriges = pd.read_csv('./herriges_metadata.csv')

# Michael's labels for each of the clusters
cluster_key = {'2':'AT1 and AT1-like',
               '4':'Ciliated',
               '6':'Basal',
               '7':'Gastric-like',
               '8':'Neuroendocrine',
               '0+':'AT2',
               '1+13':'Secretory',
               '3+14':'AT2-like'}

clusters = metadata_herriges['new_clustering.07.12.2021'].values
cluster_labels = [cluster_key[cluster] for cluster in clusters]
identity = metadata_herriges['orig.ident'].values

type_annotations = np.array(cluster_labels)

type_annotations[type_annotations == 'AT2'] = 'AT2 and AT2-like' 
type_annotations[type_annotations == 'AT2-like'] = 'AT2 and AT2-like' 

source_annotations = []

for label in identity:
    if 'Transplant' in label:
        source_annotations += ['Transplant']
    elif 'Endogenous' in label:
        source_annotations += ['Endogenous']
    elif 'Control' in label:
        source_annotations += ['Control']

# Process the raw sample data and calculate projection scores

In [None]:
data_herriges = top.process(rawdata_herriges)
projections_herriges = top.score(data_MC20, data_herriges)

# Visualize the projection scores

In [None]:
# Visualize just two cell types, and color according to Sftpc
celltype1 = 'Lung AT1 Cell WK6 (KO22)'
celltype2 = 'Lung AT2 Cell WK6 (KO22)'
gene = 'Sftpc'

fig, ax = plt.subplots(1, 1, figsize=(10,10))
sns.scatterplot(x=projections_herriges.loc[celltype1],
                y=projections_herriges.loc[celltype2],
                hue = data_herriges.loc[gene],
                style = source_annotations,
                alpha=0.5,
                s=40,
                ax=ax
               )
plt.show()

In [None]:
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from matplotlib.cm import ScalarMappable

In [None]:
# Define plotting functions

# Create bar plot of the highest projection scores for a particular sample
def plot_highest(projections, n=10, ax=None, **kwargs):
    ax = ax or plt.gca()
    
    projections_sorted = projections.sort_values(by=projections.columns[0])
    projections_top10 = projections_sorted.iloc[-n:]
    return projections_top10.plot.barh(ax=ax, **kwargs)

# Create scatter plot showing projections of each cell in a tSNE plot, for a given cell type
def plot_tSNE(projections, tSNE_data, cell_type, ax=None, **kwargs):
    ax = ax or plt.gca()
    
    type_projections = np.array(projections.loc[cell_type]).T
    
    # Make color bar measuring ``cell_type``
    cmap = plt.get_cmap('rocket_r')
    norm = plt.Normalize(min(type_projections), max(type_projections))
    scalarmap = ScalarMappable(norm=norm, cmap=cmap)
    scalarmap.set_array([])
    colorbar = plt.colorbar(scalarmap, ax = ax, 
                            label='Projection onto {}'.format(cell_type))
    plot = sns.scatterplot(x = tSNE_data[:,0],
                           y = tSNE_data[:,1],
                           hue = type_projections,
                           palette = cmap,
                           alpha = 0.5,
                           ax = ax,
                           **kwargs
                          )
    plot.legend_.remove()
    
# Create scatter plot showing top-projection types for each cell
def plot_tSNE(projections, tSNE_data, minimum_cells, ax=None, **kwargs):
    top_types = projections.idxmax().values
    unique_types = np.unique(top_types, return_counts=True)
    other_types = []

    for i, count in enumerate(unique_types[1]):
        if count < minimum_cells:
            other_types += [unique_types[0][i]]

    for i, cell_type in enumerate(top_types):
        if cell_type in other_types:
            top_types[i] = "Other"
            
    plot = sns.scatterplot(x = tSNE_data[:,0],
                           y = tSNE_data[:,1],
                           hue = top_types,
                           # palette=['tab:blue', 'tab:orange', 'tab:gray', 
                           #          'tab:brown', 'tab:purple', 'tab:red', 'tab:pink'],
                           alpha = 0.5,
                           ax = ax, 
                           **kwargs
                          )

In [None]:
pca = PCA(100)
PCA_data = pca.fit_transform(data_herriges.T)

tsne = TSNE(n_components=2,perplexity=20)
tSNE_data = tsne.fit_transform(PCA_data)