In [None]:
import scanpy as sc
#print(sc.__version__) == 1.8.1
import anndata as ad
#print(ad.__version__) 0.7.8
import igraph as ig
#print(ig.__version__) == 0.9.9
import leidenalg as lg
#print(lg.__version__) == 0.8.3

import scanpy.external as sce
import harmonypy as hp

import pandas as pd
import numpy as np

import scvi

import matplotlib.pyplot as plt
from matplotlib import rcParams
import matplotlib.font_manager
from matplotlib.pyplot import rc_context

In [None]:
typeII = sc.read_10x_mtx("scRNA-seq-MTX/")
INP = scvi.data.read_h5ad('INP.h5ad') #Recieved from Cai Lab, 
                                                  #using data anlysis as outlined 
                                                  #in https://doi.org/10.1016/j.celrep.2021.109039
    

# Harmonize Type II & INP Data

In [None]:
typeII.layers["counts"] = typeII.X.copy()
sc.pp.normalize_total(typeII, target_sum=1e6, key_added="size_factor", layers=None)
sc.pp.log1p(typeII)
sc.tl.pca(typeII, svd_solver='arpack', n_comps=50, random_state=0) 
sc.pp.neighbors(typeII, n_neighbors=20, random_state=0)

In [None]:
sc.tl.pca(INP, svd_solver='arpack', n_comps=50, random_state=0)
typeII_complete = typeII.concatenate(INP)
sc.pp.highly_variable_genes(typeII_complete, flavor="cell_ranger", n_top_genes=2000)
sc.tl.pca(typeII_complete, svd_solver='arpack', n_comps=50, random_state=0, use_highly_variable = True) 
sce.pp.harmony_integrate(typeII_complete, key = 'batch', max_iter_harmony = 20)
sc.pp.neighbors(typeII_complete, n_neighbors=20, random_state=0, use_rep = "X_pca_harmony")

# Visualize Marker Genes throughout Lineage

In [None]:
sc.tl.umap(typeII_complete, random_state=0, init_pos="spectral")
sc.tl.leiden(typeII_complete, resolution=1, random_state=0)

In [None]:
sc.set_figure_params(dpi=450,dpi_save=450, figsize=(5,5), format='png')

#Figure 1B
name = "paperFig/fig1_batch"
sc.pl.umap(typeII_complete, color = 'batch', ncols=1,legend_loc=None, show=False, save = name)

#Figure 1C
genes = ['dpn', 'pnt', 'Sp1', 'opa', 'dap', 'Hey', 'nSyb', 'repo']
for gene in genes:
    name = "paperFig/fig1_" + gene
    sc.pl.umap(typeII_complete, color = gene, na_color = 'Black', ncols=1, vmin = 0, vmax = 8, legend_loc=None, show=False, save = name)

#Figure 1E
name = "paperFig/fig1_leiden"
sc.pl.umap(typeII_complete, color = 'leiden', ncols=1,legend_loc='on data', show=False, save = name)

# Pseudotime Analysis

In [None]:
#Start Cell - determined from as dpn+, pnt+, DsRed+
print(typeII_complete.obs.index.tolist().index('CATTCTAAGCAACTTC-1-0'))

typeII_complete.obs['iroot'] = '0' 
typeII_complete.obs['iroot'][2038] = '1' 
typeII_complete.uns['iroot'] = 2038

sc.tl.diffmap(typeII_complete, random_state=0)
sc.tl.dpt(typeII_complete)

# Reclustering of NBs & immINPs

In [None]:
recluster_14 = typeII_complete[typeII_complete.obs['leiden'].isin(['14'])]
sc.tl.leiden(recluster_14, resolution=0.3, random_state=0)

recluster_14.obs['seperation'] = 'NaN'

for x in range(NBs_plot.obs['leiden'].size):
    if(recluster_14.obs['leiden'][x] == '0'):
        recluster_14.obs['seperation'][x] = 'NBs'
    else:
        recluster_14.obs['seperation'][x] = 'immINP'

sc.tl.pca(recluster_14, svd_solver='arpack', random_state=0) 
sc.pp.neighbors(recluster_14, random_state=0)
sc.tl.umap(recluster_14, random_state = 0, min_dist = 1)

# Calculate QC Metrics

In [None]:
typeII_complete.var['mt'] = typeII_complete.var_names.str.startswith("mt:")

sc.pp.calculate_qc_metrics(typeII_complete, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)

with rc_context({'figure.figsize': (10, 5)}):
    sc.pl.violin(typeII_complete, ['n_genes_by_counts'], groupby='leiden', stripplot=False, inner='box', save="paperFig/n_genes_by_counts")
    
with rc_context({'figure.figsize': (10, 5)}):
    sc.pl.violin(typeII_complete, ['pct_counts_mt'], groupby='leiden', stripplot=False, inner='box', save="paperFig/pct_counts_mt")
    

# Visualize Marker Genes in NBs & immINPs

In [None]:
sc.set_figure_params(dpi=450,dpi_save=450, figsize=(5,5), format='png')

#Figure 1D top
genes = ['dpt_pseudotime']
for gene in genes:
    name = "paperFig/fig1_zoom_" + gene
    print(name)
    sc.pl.umap(recluster_14, color = gene, color_map = "magma_r", ncols=1, legend_loc=None, show=False, save = name)

#Figure 1E top
genes = ['dpn', 'erm', 'ase', 'ham']
for gene in genes:
    name = "paperFig/fig1_14_" + gene
    print(name)
    sc.pl.umap(recluster_14, color = gene, color_map = "magma", ncols=1, vmin=0, vmax=8, legend_loc=None, show=False, save = name)

#Figure 2A 
genes = ['dpn', 'fru']
for gene in genes:
    name = "paperFig/fig1_14_" + gene
    print(name)
    sc.pl.umap(recluster_14, color = gene, color_map = "magma", ncols=1, vmin=0, vmax=8, legend_loc=None, show=False, save = name)

# Visualize Marker Genes in INPs

In [None]:
recluster_1 = typeII_complete[typeII_complete.obs['leiden'].isin(['1'])]

sc.set_figure_params(dpi=450,dpi_save=450, figsize=(5,5), format='png')
#Figure 1D top
genes = ['dpt_pseudotime']
for gene in genes:
    name = "paperFig/fig1_1_" + gene
    print(name)
    sc.pl.umap(recluster_1, color = gene, color_map = "magma_r", ncols=1,legend_loc=None, show=False, save = name)

#Figure 1E bottom
genes = ['D', 'hbn', 'ey', 'scro']
for gene in genes:
    name = "paperFig/fig1_1_" + gene
    print(name)
    sc.pl.umap(recluster_1, color = gene, color_map = "magma", ncols=1, vmin=0, vmax=8, legend_loc=None, show=False, save = name)

# Annotate Clusters

In [None]:
sc.tl.leiden(typeII_complete, restrict_to = ['leiden', ['14']], resolution=0.2, random_state=0, key_added = 'sep')
sc.tl.leiden(typeII_complete, restrict_to = ['sep', ['14,1']], resolution=0.295, random_state=0, key_added = 'sep_2')
sc.tl.leiden(typeII_complete, restrict_to = ['sep_2', ['1']], resolution=0.5, random_state=0, key_added = 'sep_3')

In [None]:
typeII_complete.obs['relabeled_temp'] = 'NaN'
for x in range(typeII_complete.obs['sep_3'].size):
    if(typeII_complete.obs['sep_3'][x] == '14,1,1'):
        typeII_complete.obs['relabeled_temp'][x] = 'type II NB'
    elif(typeII_complete.obs['sep_3'][x] == '14,1,0'):
        typeII_complete.obs['relabeled_temp'][x] = 'Ase- imm INP'
    elif(typeII_complete.obs['sep_3'][x] == '14,0'):
        typeII_complete.obs['relabeled_temp'][x] = 'Ase+ imm INP'
    elif(typeII_complete.obs['sep_3'][x] == '1,0'):
        typeII_complete.obs['relabeled_temp'][x] = 'Ey+ INP'
    elif(typeII_complete.obs['sep_3'][x] == '1,1'):
        typeII_complete.obs['relabeled_temp'][x] = 'D+ INP'
    elif(typeII_complete.obs['sep_3'][x] == '1,2'):
        typeII_complete.obs['relabeled_temp'][x] = 'Ey+ INP'
    elif(typeII_complete.obs['sep_3'][x] == '1,3'):
        typeII_complete.obs['relabeled_temp'][x] = 'D+ INP'
    elif(typeII_complete.obs['sep_3'][x] == '1,4'):
        typeII_complete.obs['relabeled_temp'][x] = 'D+ INP'
    elif(typeII_complete.obs['sep_3'][x] == '1,5'):
        typeII_complete.obs['relabeled_temp'][x] = 'Ey+ INP'
    elif(typeII_complete.obs['sep_3'][x] == '0'):
        typeII_complete.obs['relabeled_temp'][x] = 'GMC'
    elif(typeII_complete.obs['sep_3'][x] == '20'):
        typeII_complete.obs['relabeled_temp'][x] = 'GMC'
    elif(typeII_complete.obs['sep_3'][x] == '17'):
        typeII_complete.obs['relabeled_temp'][x] = 'low quality'
    elif(typeII_complete.obs['sep_3'][x] == '4'):
        typeII_complete.obs['relabeled_temp'][x] = 'low quality'
    elif(typeII_complete.obs['sep_3'][x] == '24'):
        typeII_complete.obs['relabeled_temp'][x] = 'glia'
    elif(typeII_complete.obs['sep_3'][x] == '18'):
        typeII_complete.obs['relabeled_temp'][x] = 'glia'
    else:
        typeII_complete.obs['relabeled_temp'][x] = 'neuron'

sc.tl.leiden(typeII_complete, restrict_to = ['relabeled_temp', ['neuron']], resolution=0.2, random_state=0, key_added = 'sep4')


typeII_complete.obs['relabeled'] = 'NaN'
for x in range(typeII_complete.obs['sep4'].size):
    if(typeII_complete.obs['sep4'][x] == 'neuron,0'):
        typeII_complete.obs['relabeled'][x] = 'imm neuron'
    else:
        typeII_complete.obs['relabeled'][x] = typeII_complete.obs['relabeled_temp'][x]

#Figure 1F
name = "paperFig1_annotatad"
sc.set_figure_params(dpi=450,dpi_save=450, figsize=(5,5), format='png')
sc.pl.umap(typeII_complete_core, color=['relabeled'],  palette=[(0.5529411764705883, 0.8274509803921568, 0.7803921568627451),
                                                                (0.2196078431372549, 0.4235294117647059, 0.6901960784313725),
                                                                (0.7019607843137254, 0.8705882352941177, 0.4117647058823529),
                                                                (0.8, 0.9215686274509803, 0.7725490196078432),
                                                                (0.5019607843137255, 0.6941176470588235, 0.8274509803921568),
                                                                (0.9921568627450981, 0.7058823529411765, 0.3843137254901961),
                                                                (0.984313725490196, 0.5019607843137255, 0.4470588235294118),
                                                                (0.7372549019607844, 0.5019607843137255, 0.7411764705882353),
                                                                (0.9882352941176471, 0.803921568627451, 0.8980392156862745),
                                                                (1.0, 0.9294117647058824, 0.43529411764705883)],
                                                                save = name)

# Get Gene Lists for Figure 3 Data & Visualize

In [None]:
groups_to_rank = [str(x) for x in recluster_14.obs["seperation"].unique()]
sc.tl.rank_genes_groups(recluster_14, 'seperation', groups_to_rank=groups_to_rank, method='t-test_overestim_var')
dedf = sc.get.rank_genes_groups_df(recluster_14, group="NBs")
dedf.sort_values(by=['logfoldchanges'])
dedf.to_csv('data/supplemental_Table_2.csv')

In [None]:
import csv 

#Gene lists calculated from annotated peak sets & single cell gene-lists
fru_NB_genes=next(csv.reader(open('fru_NB_genes.csv', 'r')))
fru_immINP_genes=next(csv.reader(open('fru_immINP_genes.csv', 'r')))
fru_invariant_genes=next(csv.reader(open('fru_invariant_genes.csv', 'r')))

sc.tl.score_genes(typeII_complete, fru_NB_genes, ctrl_size=len(fru_NB_genes), n_bins=25, score_name='fru_NB_genes', random_state=0, copy=False, use_raw=None)
sc.tl.score_genes(typeII_complete, fru_immINP_genes, ctrl_size=len(fru_immINP_genes), n_bins=25, score_name='fru_immINP_genes', random_state=0, copy=False, use_raw=None)
sc.tl.score_genes(typeII_complete, fru_invariant_genes, ctrl_size=len(fru_invariant_genes), n_bins=25, score_name='fru_invariant_genes', random_state=0, copy=False, use_raw=None)

sc.set_figure_params(dpi=450,dpi_save=450, figsize=(5,5), format='png')

#Figure 3D
categorys = ['fru_NB_genes', 'fru_immINP_genes', 'fru_invariant_genes']
for cat in categorys:
    name = "paperFig/fig3_" + cat
    sc.pl.umap(typeII_complete, color = cat, na_color = 'Black', ncols=1, vmin = 0, vmax = 3, legend_loc=None, show=False, save = name)