# !! This notebook should be run from within the SAMap docker container !!

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib
from scipy import sparse
import anndata as ad
import gc

from tqdm import tqdm

import scanpy as sc

from samalg import SAM

In [None]:
custom_cmap = matplotlib.colors\
                .LinearSegmentedColormap.from_list( 'custom', plt.get_cmap('Reds')\
                                                                    (np.linspace(0,1,100)**(2)) )

In [None]:
markerGenes = np.array(['Mlig455_065963', # ctsa
                        'Mlig455_058455', # foxf2
                        'Mlig455_019041', # ets2
                        'Mlig455_035842', # hnf4
                        'Mlig455_010977', # apob
                        'Mlig455_001873', # gata4
                        'Mlig455_063414', # nkx2-2
                        'Mlig455_024622', # mat2a
                        'Mlig455_055907', # boll
                        'Mlig455_012539', # dlx1
                        'Mlig455_054167', # cabp7-like
                        'Mlig455_001418', # nanos-1
                        'Mlig455_058841', # piwi
                        'Mlig455_065488', # elavl4
                        'Mlig455_017161', # sperm-1
                        'Mlig455_059117', # fos
                        'Mlig455_061703', # hes1 
                        'Mlig455_060907', # piwi
                        'Mlig455_034777', # dscaml1
                        'Mlig455_048486', # gcm
                        'Mlig455_068578', # chat
                        'Mlig455_028180', # syt11
                        'Mlig455_021418', # pc2
                        'Mlig455_010925', # stnB
                        'Mlig455_030543', # lhx3
                        'Mlig455_024541', # pou4f2
                        'Mlig455_034094', # myh2
                        'Mlig455_018605', # myod1
                        'Mlig455_066621', # nr4a2
                        'Mlig455_000356', # notch2
                        'Mlig455_024930', # sox15
                        'Mlig455_020211', # pax5
                        'Mlig455_056728', # rootletin
                        'Mlig455_017088', # sspo
                        'Mlig455_038182', # ano7
                        'Mlig455_029729', # sspo
                        'Mlig455_038673', # RNA815_15018.1/kremen2-like
                        'Mlig455_059462', # RNA815_17183/wscd2-like
                        'Mlig455_025266', # ascl4
                        'Mlig455_042563', # ascl5
                        'Mlig455_024289', # RNA1310_52977/RNA815_36457/wscd2-like
                        'Mlig455_014917', # zan
                        'Mlig455_030495', # pax6
                        'Mlig455_038010', # ano7
                        'Mlig455_035625', # osr2
                        'Mlig455_058893', # sall3
                        'Mlig455_034928', # eya1
                        'Mlig455_020123', # pou3f2
                        'Mlig455_033230', # macif-1
                        'Mlig455_027638']) # RNA1310_30724/calmodulin6-like

names = np.array(['ctsa','foxf2','ets2',
                  'hnf4','apob','gata4','nkx2-2','mat2a',
                  'boll','dlx1',
                  'cabp7-like','nanos1',
                  'piwi','elavl4','sperm1',
                  'fos','hes1','piwi','dscaml1','gcm',
                  'chat','syt11','pc2','stnB','lhx3','pou4f2',
                  'myh2','myod1','nr4a2',
                  'notch2','sox15','pax5','rootletin',
                  'sspo','ano7','sspo','RNA815_15018.1/kremen2-like',
                  'RNA815_17183/wscd2-like',
                  'ascl4','ascl5','RNA1310_52977/RNA815_36457/wscd2-like',
                  'zan','pax6','ano7',
                  'osr','sall3','eya1','pou3f2',
                  'macif1','RNA1310_30724/calmodulin6-like'])

ctOrder = ['Cathepsin','Intestine-1','Intestine-2','GSC','Female Germline','Male Germline','Neoblast',
           'Neural Progenitors','Neural-1','Neural-2','Neural-3','Neural-4','Neural-5','Muscle',
           'Epidermal Progenitors','Epidermal-1','Epidermal-2','Parenchymal-1','Parenchymal-2',
           'Parenchymal-3','Parenchymal-4','Parenchymal-5','Protonephridia','Anchor Cells']

In [None]:
adataME = sc.read_h5ad( 'Mlig.SEACells_RNA.h5ad' )
adataME.X = adataME.layers['raw']
adataME

In [None]:
samME = SAM( counts=adataME )
samME.preprocess_data( min_expression=0, filter_genes=False )
samME.run( sparse_pca=True, weight_mode='rms', batch_key='Sample' )

samME.adata

In [None]:
del adataME
gc.collect()

In [None]:
dp = sc.pl.dotplot( samME.adata[samME.adata.obs.GroupFigure!='???-1'].copy(), markerGenes, 'GroupFigure',
                    standard_scale='var', categories_order=ctOrder, use_raw=False,
                    show=False, dendrogram=False, figsize=(21,7), dot_max=0.75, dot_min=0, 
                    mean_only_expressed=False, cmap=custom_cmap )
dp['mainplot_ax'].set_xticklabels(names)
plt.savefig( 'Plots/EDFig3/PanelED3_Mlig_gex.svg', format='svg' )
plt.show()

In [None]:
adataMS = sc.read_h5ad( 'ArchROutputs/Mlig/Mlig.genescores_init.h5ad' )
obs = pd.read_csv( 'Metadata/Mlig.final_cluster_annots.csv', index_col=0 )
adataMS.obs = obs.loc[adataMS.obs_names,:]
adataMS = adataMS[adataMS.obs.Sample=='uncut',:].copy()

del obs

adataMS

In [None]:
samMS = SAM( counts=adataMS )
samMS.preprocess_data( min_expression=0, filter_genes=False )
samMS.run( sparse_pca=True, weight_mode='rms', batch_key='Sample' )

samMS.adata

In [None]:
del adataMS
gc.collect()

In [None]:
samMS.dispersion_ranking_NN(save_avgs=True)

In [None]:
dp = sc.pl.dotplot( samMS.adata[samMS.adata.obs.GroupFigure!='???-1'].copy(), markerGenes, 'GroupFigure',
                    layer='X_knn_avg', standard_scale='var', categories_order=ctOrder, 
                    show=False, dendrogram=False, figsize=(21,7), dot_max=0.75, dot_min=0, 
                    mean_only_expressed=False, cmap=custom_cmap )
dp['mainplot_ax'].set_xticklabels(names)
plt.savefig( 'Plots/EDFig3/PanelED3_Mlig_gs.svg', format='svg' )
plt.show()

In [None]:
del samME
del samMS

gc.collect()

In [None]:
markerGenes = np.array(['SMESG000029163.1', # ctsa
                        'SMESG000066497.1', # foxf1
                        'SMESG000065612.1', # ets1
                        'SMESG000079512.1', # nkx2-2
                        'SMESG000014588.1', # hnf4
                        'SMESG000053199.1', # mat
                        'SMESG000003408.1', # porcupine
                        'SMESG000039559.1', # gata4
                        'SMESG000076072.1', # ophis
                        'SMESG000000719.1', # surfactant b
                        'SMESG000011334.1', # onecut
                        'SMESG000076962.1', # msy4
                        'SMESG000018169.1', # nanos
                        'SMESG000066384.1', # zfp1
                        'SMESG000078256.1', # tp53
                        'SMESG000036375.1', # smedwi1
                        'SMESG000021009.1', # bruli
                        'SMESG000036444.1', # gcm
                        'SMESG000051357.1', # chat
                        'SMESG000017305.1', # stnb
                        'SMESG000008070.1', # pc2
                        'SMESG000016766.1', # col4a2
                        'SMESG000063104.1', # nr4a2
                        'SMESG000034317.1', # myod1
                        'SMESG000025013.1', # prog2
                        'SMESG000051170.1', # soxP3
                        'SMESG000068935.1', # dd_357
                        'SMESG000030598.1', # vim
                        'SMESG000033673.1', # vit
                        'SMESG000065670.1', # foxA
                        'SMESG000006381.1', # ano7
                        'SMESG000026861.1', # ascl4
                        'SMESG000040790.1', # zan6
                        'SMESG000005975.1', # six1
                        'SMESG000076173.1', # pou3f3
                        'SMESG000042801.1']) # sall1

names = np.array(['ctsa','foxf1','ets1',
                  'nkx2-2','hnf4','mat','porcupine','gata4',
                  'ophis','surfactant b',
                  'onecut','msy4','nanos',
                  'zfp1','tp53','smedwi1','bruli',
                  'gcm','chat','stnb','pc2',
                  'col4a2','nr4a2','myod1',
                  'prog2','soxP3','dd_357','vim',
                  'vit','foxA',
                  'ano7','ascl4','zan6',
                  'six1','pou3f3','sall1'])

ctOrder = ['Cathepsin','Intestine-1','Intestine-2','Ophis','GSC','GSC progeny/diff germline',
           'Neoblast-1','Neoblast-2','Neural Progenitors','Neural-1','Neural-2','Neural-POU4',
           'Muscle-1','Muscle-2','Muscle-3','Epidermal-1','Epidermal-2','Pharynx',
           'Parenchymal','Protonephridia']

In [None]:
adataPE = sc.read_h5ad('GEXCounts/Smed/Smed.raw_RNA_counts.h5ad')
obs = pd.read_csv('Metadata/Smed.final_cluster_annots.csv',index_col=0)
adataPE.obs = obs.loc[adataPE.obs_names,:]

del obs

adataPE

In [None]:
samPE = SAM( counts=adataPE )
samPE.preprocess_data( min_expression=0, filter_genes=False )
samPE.run( sparse_pca=True, weight_mode='rms', batch_key='Sample' )

samPE.adata

In [None]:
del adataPE
gc.collect()

In [None]:
samPE.dispersion_ranking_NN(save_avgs=True)

In [None]:
dp = sc.pl.dotplot( samPE.adata, markerGenes, 'GroupFigure',
                    layer='X_knn_avg', standard_scale='var', categories_order=ctOrder, 
                    show=False, dendrogram=False, figsize=(21,7), dot_max=0.75, dot_min=0, 
                    mean_only_expressed=False, cmap=custom_cmap )
dp['mainplot_ax'].set_xticklabels(names)
plt.savefig( 'Plots/EDFig3/PanelED3_Smed_gex.svg', format='svg' )
plt.show()

In [None]:
adataPS = sc.read_h5ad('ArchROutputs/Smed/Smed.imputed_genescores.h5ad')
obs = pd.read_csv('Metadata/Smed.final_cluster_annots.csv',index_col=0)
adataPS.obs = obs.loc[adataPS.obs_names,:]

del obs

adataPS

In [None]:
samPS = SAM( counts=adataPS )
samPS.preprocess_data( min_expression=0, filter_genes=False )
samPS.run( sparse_pca=True, weight_mode='rms', batch_key='Sample' )

samPS.adata

In [None]:
del adataPS
gc.collect()

In [None]:
samPS.dispersion_ranking_NN(save_avgs=True)

In [None]:
dp = sc.pl.dotplot( samPS.adata, markerGenes, 'GroupFigure',
                    layer='X_knn_avg', standard_scale='var', categories_order=ctOrder, 
                    show=False, dendrogram=False, figsize=(21,7), dot_max=0.75, dot_min=0, 
                    mean_only_expressed=False, cmap=custom_cmap )
dp['mainplot_ax'].set_xticklabels(names)
plt.savefig( 'Plots/EDFig3/PanelED3_Smed_gs.svg', format='svg' )
plt.show()

In [None]:
del samPE
del samPS

gc.collect()

In [None]:
markerGenes = np.array(['Smp-212710', # tgfbi
                        'Smp-076300', # foxf1
                        'Smp-034410', # ctsf
                        'Smp-174700', # hnf4
                        'Smp-103610', # cb1.1
                        'Smp-347780', # nkx2-5
                        'Smp-248100', # nr
                        'Smp-041540', # eled
                        'Smp-055740', # nanos1
                        'Smp-144860', # boule
                        'Smp-051920', # nanos2
                        'Smp-175590', # fgfrA
                        'Smp-139530', # tp53
                        'Smp-145470', # zfp1
                        'Smp-171130', # gcm
                        'Smp-146910', # chat
                        'Smp-175900', # syt2
                        'Smp-342080', # stnB
                        'Smp-077980', # pc2
                        'Smp-340200', # pou4f2
                        'Smp-345610', # lhx3
                        'Smp-194830', # kk7
                        'Smp-167400', # myod1
                        'Smp-018250', # troponin
                        'Smp-139200', # nr4a2
                        'Smp-195190', # sm13
                        'Smp-335630', # tsp2
                        'Smp-346900', # sm25
                        'Smp-045200', # tal
                        'Smp-049580', # zfp1
                        'Smp-241610', # p48
                        'Smp-167830', # ataxin2
                        'Smp-085840', # meg4.2
                        'Smp-331700', # foxA
                        'Smp-335600', # sialidase
                        'Smp-344230', # pou
                        'Smp-128060']) # sall1

names = np.array(['tgfbi','foxf1','ctsf',
                  'hnf4','cb1.1','nkx2-5',
                  'nr',
                  'eled','nanos1','boule',
                  'nanos2','fgfrB','tp53','zfp1',
                  'gcm','chat','syt2','stnB','pc2',
                  'pou4f2','lhx3','kk7',
                  'myod1','troponin','nr4a2',
                  'sm13','tsp2','sm25','tal','zfp1',
                  'p48','ataxin2',
                  'meg4.1','foxA',
                  'sialidase','pou','sall1'])

ctOrder = ['Cathepsin','Intestine','S1','GSC','Neoblast','Neural Progenitors','Neural-1','Neural-2',
           'Neural-POU4','Neural-3','Neural-4','Neural-5','Muscle Progenitors','Muscle-1','Muscle-2',
           'Muscle-3','Tegument Progenitors','Tegument-1','Tegument-2','Vitellocytes',
           'Oesophageal Gland','Protonephridia']

In [None]:
adataSE = sc.read_h5ad( 'ArchROutputs/Sman/Sman.imputed_RNA_cp10k.h5ad' )
obs = pd.read_csv( 'Metadata/Sman.final_cluster_annots.csv', index_col=0 )
adataSE.obs = obs.loc[adataSE.obs_names,:]

del obs

adataSE

In [None]:
samSE = SAM( counts=adataSE )
samSE.preprocess_data( min_expression=0, filter_genes=False )
samSE.run( sparse_pca=True, weight_mode='rms', batch_key='Sample' )

samSE.adata

In [None]:
del adataSE
gc.collect()

In [None]:
samSE.dispersion_ranking_NN(save_avgs=True)

In [None]:
dp = sc.pl.dotplot( samSE.adata, markerGenes, 'GroupFigure',
                    layer='X_knn_avg', standard_scale='var', categories_order=ctOrder, 
                    show=False, dendrogram=False, figsize=(21,7), dot_max=0.75, dot_min=0, 
                    mean_only_expressed=False, cmap=custom_cmap )
dp['mainplot_ax'].set_xticklabels(names)
plt.savefig( 'Plots/EDFig3/PanelED3_Sman_gex.svg', format='svg' )
plt.show()

In [None]:
adataSS = sc.read_h5ad( 'ArchROutputs/Sman/Sman.genescores.h5ad' )
obs = pd.read_csv( 'Metadata/Smed.final_cluster_annots.csv', index_col=0 )
adataSS.obs = obs.loc[adataSS.obs_names,:]

del obs

adataSS

In [None]:
samSS = SAM( counts=adataSS )
samSS.preprocess_data( min_expression=0, filter_genes=False )
samSS.run( sparse_pca=True, weight_mode='rms', batch_key='Sample' )

samSS.adata

In [None]:
del adataSS
gc.collect()

In [None]:
samSS.dispersion_ranking_NN(save_avgs=True)

In [None]:
dp = sc.pl.dotplot( samSS.adata, markerGenes, 'GroupFigure',
                    layer='X_knn_avg', standard_scale='var', categories_order=ctOrder, 
                    show=False, dendrogram=False, figsize=(21,7), dot_max=0.75, dot_min=0, 
                    mean_only_expressed=False, cmap=custom_cmap )
dp['mainplot_ax'].set_xticklabels(names)
plt.savefig( 'Plots/EDFig3/PanelED3_Sman_gs.svg', format='svg' )
plt.show()

In [None]:
del samSE
del samSS

gc.collect()