## Analysis of FFPE Adenocarcinoma Prostate with scanpy

In [1]:
import scanpy as sc
import numpy as np
import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from matplotlib import rcParams
import seaborn as sns
import scanorama
import csv
from sklearn.metrics.pairwise import cosine_distances

import SpatialDE

plt.rcParams['figure.figsize']=(8,8)


%load_ext autoreload
%autoreload 2

In [2]:
#read in data
#this is saved locally so need a different function than downloading from 10X server
ffpe_human_prostate_cancer = sc.read_visium('FFPE_Visium_Human_ProstateCancer',count_file='Visium_FFPE_Human_Prostate_Cancer_filtered_feature_bc_matrix.h5')
ffpe_human_prostate_cancer.var_names_make_unique()

Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


In [3]:
ffpe_human_prostate_cancer

AnnData object with n_obs × n_vars = 4371 × 17943
    obs: 'in_tissue', 'array_row', 'array_col'
    var: 'gene_ids', 'feature_types', 'genome'
    uns: 'spatial'
    obsm: 'spatial'

In [4]:
#Calculate some QC metrics for the data
ffpe_human_prostate_cancer.var["mt"] = ffpe_human_prostate_cancer.var_names.str.startswith("MT")
sc.pp.calculate_qc_metrics(ffpe_human_prostate_cancer, qc_vars=["mt"], inplace=True)

In [5]:
#Visualise some QC metrics for the data
%matplotlib notebook

fig, axs = plt.subplots(1, 4, figsize=(15, 4))
sns.distplot(ffpe_human_prostate_cancer.obs["total_counts"], kde=False, ax=axs[0])
sns.distplot(ffpe_human_prostate_cancer.obs["total_counts"][ffpe_human_prostate_cancer.obs["total_counts"] < 20000], kde=False, bins=40, ax=axs[1])
sns.distplot(ffpe_human_prostate_cancer.obs["n_genes_by_counts"], kde=False, bins=60, ax=axs[2])
sns.distplot(ffpe_human_prostate_cancer.obs["n_genes_by_counts"][ffpe_human_prostate_cancer.obs["n_genes_by_counts"] < 4000], kde=False, bins=60, ax=axs[3])
sc.pl.violin(ffpe_human_prostate_cancer,['pct_counts_mt'])

<IPython.core.display.Javascript object>

  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'feature_types' as categorical
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'genome' as categorical


<IPython.core.display.Javascript object>

In [6]:
ffpe_human_prostate_cancer

AnnData object with n_obs × n_vars = 4371 × 17943
    obs: 'in_tissue', 'array_row', 'array_col', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'total_counts_mt', 'log1p_total_counts_mt', 'pct_counts_mt'
    var: 'gene_ids', 'feature_types', 'genome', 'mt', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts'
    uns: 'spatial'
    obsm: 'spatial'

In [7]:
#Perform some filtering
sc.pp.filter_cells(ffpe_human_prostate_cancer, min_counts=3000)
print(f'Number of cells after min count filter: {ffpe_human_prostate_cancer.n_obs}')
sc.pp.filter_cells(ffpe_human_prostate_cancer, max_counts=35000)
print(f'Number of cells after max count filter: {ffpe_human_prostate_cancer.n_obs}')
ffpe_human_prostate_cancer = ffpe_human_prostate_cancer[ffpe_human_prostate_cancer.obs["pct_counts_mt"] < 4]
ffpe_human_prostate_cancer = ffpe_human_prostate_cancer[ffpe_human_prostate_cancer.obs["pct_counts_mt"] > 0.5]
print(f"#cells after MT filter: {ffpe_human_prostate_cancer.n_obs}")
#Filter out genes that are detected in less than 10 cells
sc.pp.filter_genes(ffpe_human_prostate_cancer, min_cells=10)
print(f'Number of genes after cell filter: {ffpe_human_prostate_cancer.n_vars}')
sc.pp.filter_cells(ffpe_human_prostate_cancer, min_genes = 3000)
print(f'Number of cells after gene filter: {ffpe_human_prostate_cancer.n_obs}')

Number of cells after min count filter: 4216
Number of cells after max count filter: 4213
#cells after MT filter: 4213
Number of genes after cell filter: 15062
Number of cells after gene filter: 3569


In [8]:
##Do normalisation and find highly variable genes
sc.pp.normalize_total(ffpe_human_prostate_cancer, inplace=True)
sc.pp.log1p(ffpe_human_prostate_cancer)
sc.pp.highly_variable_genes(ffpe_human_prostate_cancer, flavor="seurat", n_top_genes=2000)

In [9]:
#Look at distribution after normalisation
fig, axs = plt.subplots(1, 2, figsize=(15, 4))
sns.distplot(ffpe_human_prostate_cancer.obs["total_counts"], kde=False, ax=axs[0])
sns.distplot(ffpe_human_prostate_cancer.obs["n_genes_by_counts"], kde=False, bins=60, ax=axs[1])

<IPython.core.display.Javascript object>



<AxesSubplot:xlabel='n_genes_by_counts'>

In [10]:
#Perform next steps in cluster identification
sc.pp.pca(ffpe_human_prostate_cancer,n_comps=20)
sc.pp.neighbors(ffpe_human_prostate_cancer)
sc.tl.umap(ffpe_human_prostate_cancer)
sc.tl.leiden(ffpe_human_prostate_cancer, key_added='clusters')

In [11]:
#Do some umap visualisations
plt.rcParams["figure.figsize"] = (4, 4)
sc.pl.umap(ffpe_human_prostate_cancer, color=["total_counts", "n_genes_by_counts", "clusters"], wspace=0.4)

<IPython.core.display.Javascript object>

In [12]:
#Visualise in spatial coordinates
plt.rcParams["figure.figsize"] = (8, 8)
sc.pl.spatial(ffpe_human_prostate_cancer, img_key="hires", color=["total_counts", "n_genes_by_counts"])

<IPython.core.display.Javascript object>

In [13]:
#Visualise cell type clusters by spatial organisation
sc.pl.spatial(ffpe_human_prostate_cancer, img_key="hires", color="clusters", size=1.5)

<IPython.core.display.Javascript object>

In [14]:
#Compute marker genes across all clusters and plot heatmap of the top 10 markers in each cluster
sc.tl.rank_genes_groups(ffpe_human_prostate_cancer, "clusters", method="t-test")
sc.pl.rank_genes_groups_heatmap(ffpe_human_prostate_cancer, groups="5", n_genes=10, groupby="clusters")

categories: 0, 1, 2, etc.
var_group_labels: 5


<IPython.core.display.Javascript object>

### Output necessary files

In [15]:
##write this AnnData object to something readable in Seurat - should be h5ad file
ffpe_human_prostate_cancer.write_h5ad(filename='ffpe_human_prostate_cancer.h5ad', compression=None, compression_opts=None, force_dense=None, as_dense=())

In [None]:
#Write out normalised dataframe with expression values to file so it can be read back in for this purpose
ffpe_human_prostate_cancer.write_csvs('preprocessed_output/',skip_data=False)

### Do analysis of SVGs for SpatialDE

In [None]:
%%time
#Now try and find SVGs with SpatialDE
#We need to convert normalised counts into a DF for spatial DE

counts = pd.DataFrame(ffpe_human_prostate_cancer.X.todense(), columns=ffpe_human_prostate_cancer.var_names, index=ffpe_human_prostate_cancer.obs_names)
coord = pd.DataFrame(ffpe_human_prostate_cancer.obsm['spatial'], columns=['x_coord', 'y_coord'], index=ffpe_human_prostate_cancer.obs_names)
results = SpatialDE.run(coord, counts)
results.to_csv('ffpe_prostate_cancer_spatialde_all_svgs.csv')

  R2 = -2. * np.dot(X, X.T) + (Xsq[:, None] + Xsq[None, :])
  R2 = -2. * np.dot(X, X.T) + (Xsq[:, None] + Xsq[None, :])


Models:   0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15062 [00:00<?, ?it/s]

  0%|          | 0/15062 [00:00<?, ?it/s]

  0%|          | 0/15062 [00:00<?, ?it/s]

  0%|          | 0/15062 [00:00<?, ?it/s]

  0%|          | 0/15062 [00:00<?, ?it/s]

  0%|          | 0/15062 [00:00<?, ?it/s]

  0%|          | 0/15062 [00:00<?, ?it/s]

  0%|          | 0/15062 [00:00<?, ?it/s]

  0%|          | 0/15062 [00:00<?, ?it/s]

  0%|          | 0/15062 [00:00<?, ?it/s]

In [47]:
#Look at top 10 values
results.sort_values("qval").head(10)

Unnamed: 0,FSV,M,g,l,max_delta,max_ll,max_mu_hat,max_s2_t_hat,model,n,s2_FSV,s2_logdelta,time,BIC,max_ll_null,LLR,pval,qval
2172,0.104277,4,CCL19,1031.970442,8.40891,758.050204,0.046469,0.00435,SE,3569,1.4e-05,0.001396,0.001723,-1483.380246,663.617312,94.432892,0.0,0.0
2559,0.121746,4,CA14,1985.029706,6.700388,2345.02722,0.027718,0.002276,SE,3569,9.3e-05,0.007367,0.001726,-4657.334277,2270.080681,74.946538,0.0,0.0
2560,0.156046,4,ADAMTSL4,1985.029706,5.023417,233.602757,0.094114,0.009896,SE,3569,9.7e-05,0.005288,0.001956,-434.485351,87.441966,146.160791,0.0,0.0
2561,0.201606,4,MCL1,1985.029706,3.678322,-2540.40213,1.599367,0.076049,SE,3569,7.8e-05,0.003004,0.001728,5113.524424,-2931.175636,390.773506,0.0,0.0
2564,0.140088,4,SELENBP1,1985.029706,5.701477,-2761.776624,0.661803,0.048713,SE,3569,4.4e-05,0.002789,0.001754,5556.273411,-3026.513456,264.736831,0.0,0.0
11519,0.039244,4,LDB1,7344.572367,12.150385,-2919.717455,0.932061,0.0253,SE,3569,0.00011,0.060537,0.003714,5872.155072,-3029.441589,109.724135,0.0,0.0
2566,0.096224,4,S100A4,1985.029706,8.723898,-2639.758941,0.49057,0.029742,SE,3569,3.5e-05,0.003985,0.001783,5312.238046,-2772.44671,132.687769,0.0,0.0
6160,0.219527,4,ATP5MF,3818.271117,2.792867,-2715.143153,1.435257,0.097864,SE,3569,0.000401,0.013898,0.001715,5463.006469,-2918.248342,203.105189,0.0,0.0
2570,0.131343,4,RAB25,1985.029706,6.142961,-2766.746129,0.708002,0.045826,SE,3569,4.5e-05,0.003127,0.001744,5566.212421,-2971.291676,204.545547,0.0,0.0
6157,0.121785,4,BAIAP2L1,3818.271117,5.664854,-2433.725199,0.432718,0.040057,SE,3569,0.00038,0.029939,0.001531,4900.170561,-2511.844796,78.119597,0.0,0.0


In [48]:
#Sort results for all qvals >= 0.05 and add them to file
results_filtered = results[results["qval"] <= 0.05]
results_filtered = results_filtered.sort_values(by="qval")
results_filtered.to_csv('ffpe_human_prostate_cancer_svgs_spatialde.csv')
results_filtered

Unnamed: 0,FSV,M,g,l,max_delta,max_ll,max_mu_hat,max_s2_t_hat,model,n,s2_FSV,s2_logdelta,time,BIC,max_ll_null,LLR,pval,qval
12870,0.020846,4,HNRNPUL1,7344.572367,23.312012,-2177.946553,1.923806,0.011410,SE,3569,0.000028,0.051269,0.001713,4388.613268,-2256.284783,78.338230,0.000000,0.000000
5149,0.085704,4,PLXND1,3818.271117,8.380375,-2568.701526,0.451451,0.029393,SE,3569,0.000190,0.026317,0.001708,5170.123216,-2661.724328,93.022801,0.000000,0.000000
5150,0.317836,4,ATP2C1,3818.271117,1.686032,-2837.840594,1.389626,0.169679,SE,3569,0.000505,0.011865,0.001558,5708.401350,-3395.055473,557.214879,0.000000,0.000000
5151,0.410124,4,CPNE4,3818.271117,1.129866,-2777.890392,1.071159,0.240708,SE,3569,0.000451,0.008901,0.001330,5588.500946,-3753.860435,975.970043,0.000000,0.000000
11995,0.020690,4,INTS6,7344.572367,23.491628,-2840.005105,0.737238,0.012604,SE,3569,0.000031,0.057517,0.001745,5712.730374,-2911.354400,71.349295,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10415,0.003925,4,LEKR1,7344.572367,125.951833,2970.784943,0.015071,0.000088,SE,3569,0.000010,0.469854,0.001693,-5908.849723,2966.276139,4.508804,0.033721,0.049650
12453,0.004086,4,CHRNE,7344.572367,120.975915,4192.962070,0.009270,0.000046,SE,3569,0.000011,0.467872,0.001890,-8353.203978,4188.452913,4.509157,0.033714,0.049650
12274,0.003479,4,JMJD8,7344.572367,142.147374,-2269.284618,0.406198,0.001568,SE,3569,0.000010,0.603754,0.001764,4571.289400,-2273.793425,4.508807,0.033721,0.049650
7386,0.004624,4,SLC4A8,3818.271117,169.106588,1271.401495,0.039286,0.000171,SE,3569,0.000007,0.228562,0.003923,-2510.082828,1266.898901,4.502594,0.033843,0.049825
