# Tutorial 'QC, topographical analysis and segmentaton-free of Spot-based transcriptomics data'

This is the fast lane of the tutorial. Just start a session and click 'run the whole notebook' on the top to start executing.

UMAP calculation takes a few minutes, which we can use in the main notebook to get familiar with the general exploratory workflow.

In [1]:
# widens the screen:

%load_ext autoreload
%autoreload 2

from IPython.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))

import sys
import os

sys.path.append(os.path.join(os.path.abspath('.'),'../..'))

In [2]:
# imports, define a handy figure function:

import plankton.plankton as pl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scanpy as sc

def figure(width=8,height=8):
    plt.figure(figsize=(width,height))


In [3]:
# load background stain:


um_p_px=0.325

coordinates = pd.read_csv('./data/in_situ_sequencing/coordinates.csv')
bg = -plt.imread('background.jpg').mean(-1)
bg = (bg-bg.min())/(bg.max()-bg.min())
bg_map = pl.PixelMap(pixel_data=bg,
                     cmap='Greys',
                     px_p_um = 0.504/um_p_px)
del bg

In [4]:
rands = np.random.rand(0,2,)*np.array([coordinates.Global_x_pos.values.max(),coordinates.Global_y_pos.values.max()])

x = np.hstack([coordinates.Global_x_pos.values,rands[:,0]])
y = np.hstack([coordinates.Global_y_pos.values,rands[:,1]])

rand_genes = coordinates.Gene.values[np.random.randint(len(coordinates.Gene.unique()), size=(rands.shape[0]))]
g = np.hstack([coordinates.Gene.values,rand_genes])

In [5]:
sdata = pl.SpatialData(x_coordinates=x*um_p_px,
                       y_coordinates=y*um_p_px,
                       genes=g,
                       pixel_maps={'DAPI':bg_map}
                      )

In [6]:
# sdata['noise']=False
# sdata.loc[len(sdata)-rands.shape[0]:,'noise']=True

sdata = sdata[sdata.stats.progressive_sample(1.05)].spatial[100:2800,1000:]

In [None]:
# Parameterization for data cleaning/artefact removal:`

#bw 100: segmentation

import time

knn_neighbors=150
bandwidth=15
n_neighbors=25
metric='euclidean'
min_dist=0.02
random_state=42
zero_weight=0.0
cutoff=6

_=sdata.graph.update_knn(n_neighbors=knn_neighbors)


t = time.perf_counter()
sdata.graph.run_umap(bandwidth=bandwidth,n_neighbors=n_neighbors, min_dist=min_dist, metric=metric, random_state=random_state,zero_weight=zero_weight,cutoff=cutoff)
t = (time.perf_counter()-t)
print(t)

Reducing dimensions with FastICA




In [None]:
assert False

In [None]:
sdata.save('tutorial-umap.pl')
# sdata = pl.load('tutorial-umap.pl')

In [None]:
plt.figure(figsize=(15,15))

sdata.graph.map_and_umap(alpha=0.3,c=sdata.graph.umap_1)
plt.suptitle(f"nbrs:{n_neighbors}-cutoff:{cutoff}-bw:{bandwidth}-t:{int(t)}")

In [None]:
sdata.graph.umap_js()

In [None]:
figure()
sdata[~sdata.bg_noise].scatter()

In [None]:
sdata.save('tissue_clusters.pl')

In [None]:
figure(12,12)

tissues = ['epithel', 'endo_1', 'endo_2',
       'duct_1', 'duct_2', 'mucus_1', 'mucus_2']
sdata['tissues'] = sdata.unite_columns(tissues)

# sdata = sdata[~sdata.bg_noise].drop('bg_noise')
sdata
# sdata[~sdata.bg_noise].scatter()

In [None]:
from matplotlib.cm import get_cmap

accent = get_cmap('nipy_spectral')

figure(9,9)
out = sdata.scatter(c=sdata.tissues.cat.codes,cmap=accent)

handlers = [plt.scatter([],[],color=accent(f)) for f in np.linspace(0.2,1,len(tissues))]

plt.legend(handlers,tissues,)


## Analyse DEGs

### ducts:

In [None]:
from plankton.stats import mor_normalize

mask_ducts = sdata.duct_1|sdata.duct_2


c1,c2 = mor_normalize(sdata[mask_ducts],sdata[~mask_ducts])

figure(25,5)

lfc_ducts = np.log2(c1/c2).sort_values()

lfc_ducts.plot.bar()

In [None]:
# degs for ducts:

figure(8,8)
sdata[sdata.g.isin(lfc_ducts[-2:].index)].scatter(legend=True)

### mucus

In [None]:
mask_mucus = sdata.mucus_1|sdata.mucus_2

c1,c2 = mor_normalize(sdata[mask_mucus],sdata[~mask_mucus])

lfc_mucus = np.log(c1/c2).sort_values()
lfc_mucus.plot.bar()

In [None]:
from plankton.stats import mor_normalize

c1,c2=mor_normalize(sdata[sdata.mucus_1|sdata.duct_1].stats,sdata[sdata.mucus_2|sdata.duct_2].stats)

figure(25,5)
np.log(c1/c2).sort_values().plot.bar()

degs = (c1/c2).sort_values()

In [None]:
figure(8,8)

sdata[sdata.g.isin(degs[-3:].index)].scatter(color='blue',alpha=0.1)
sdata[sdata.g.isin(degs[:6].index)].scatter(color='lime',alpha=0.1)


# unsupervised approach using SSAM-denovo

In [None]:
from plankton.utils import localmax_sampling,ssam

# create a signature matrix through local-max sampling
signatures = pd.DataFrame(localmax_sampling(sdata,n_clusters=8,bandwidth=5),columns=sdata.genes)

ct_map = ssam(sdata,signatures=signatures,kernel_bandwidth=10,threshold_exp=0.8)

In [None]:
figure(9,9)
ct_map.imshow(cmap='nipy_spectral')

In [None]:
sdata_epi = sdata[~sdata.bg_noise & (sdata.inner_ducts_1|sdata.inner_ducts_2)]

In [None]:
figure()

from plankton.utils import hbar_compare

hbar_compare(sdata[sdata.noise].stats, sdata[~sdata.noise].stats,('noise','signal'))

In [None]:
# Parameterization for data cleaning/artefact removal:`

import time

knn_neighbors=250
bandwidth=25
n_neighbors=100
metric='cosine'
min_dist=0.2
random_state=42
zero_weight=0.0
cutoff=30

_=sdata_epi.graph.update_knn(n_neighbors=knn_neighbors)


t = time.perf_counter()
sdata_epi.graph.run_umap(bandwidth=bandwidth,n_neighbors=n_neighbors, min_dist=min_dist, metric=metric, random_state=random_state,zero_weight=zero_weight,cutoff=cutoff)
print(time.perf_counter()-t)


In [None]:
sdata_epi.graph.map_and_umap()

In [None]:
sdata_epi.graph.umap_js()