In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import anndata as ad
import scanpy as sc

from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from umap import UMAP

from scroutines import powerplots
from scroutines.miscu import is_in_polygon

In [None]:
np.random.seed(0)

In [None]:
# functions (the teacher wrote for you to use later)
def rot2d(x, y, theta, unit='degree'):
    """ rotate data points defined by `x` and `y` by `theta` degree
    """
    a = np.vstack([x,y]).T
    if unit == 'degree':
        theta = theta*np.pi/180 # convert to radian

    R = np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]])
    ar = a.dot(R.T)
    return ar[:,0], ar[:,1]

def st_scatter(x, y, gexp=None, vmax_p=98, title='', s=1, cbar_label='', output='', cmap='rocket_r', axis_off=True):
    """customized scatter plot -- yesterday's progress
    """
  
    fig, ax = plt.subplots(figsize=(10,8))
    if gexp is not None:
        vmax = np.percentile(gexp, vmax_p)
        g = ax.scatter(x, y, c=gexp, s=s, edgecolor='none', vmax=vmax, cmap=cmap, rasterized=True)
        fig.colorbar(g, label=cbar_label, shrink=0.3)
    else:
        g = ax.scatter(x, y, s=s, edgecolor='none', cmap=cmap, rasterized=True)
  
    if axis_off:
        ax.axis('off')
    ax.set_title(title)
    ax.set_aspect('equal')
  
    if output:
        powerplots.savefig_autodate(fig, output)
        
    return 

# visualize clusters
def plot_cluster(clsts, x, y, ux, uy, s=1, axis_off=True):
    """this assumes `clsts` is a integer that starts from 0
    """
    from matplotlib import colors
  
    unq_clsts, inv = np.unique(clsts, return_inverse=True)
    n_unq = len(unq_clsts)
    # colors = np.array(sns.color_palette('husl', n_unq))
    # c_vec = colors[inv]
  
    cmap = plt.cm.jet
    norm = colors.BoundaryNorm(np.arange(-0.5, n_unq, 1), cmap.N)
  
    fig, axs = plt.subplots(1, 2, figsize=(8*2,6))
    
    ax = axs[0]
    g = ax.scatter(x, y, norm=norm, cmap=cmap, c=clsts, s=s, edgecolor='none')
    ax.set_title('XY (spatial distribution)')
    ax.set_aspect('equal')
    if axis_off:
        ax.axis('off')
    
    ax = axs[1]
    ax.scatter(ux, uy, norm=norm, cmap=cmap, c=clsts, s=s, edgecolor='none')
    ax.set_title('UMAP (molecular similarity)')
    ax.set_aspect('equal')
    if axis_off:
        ax.axis('off')
  
    fig.colorbar(g, ax=ax, label='clusters', ticks=np.arange(n_unq), shrink=0.7)
    return fig, axs

In [None]:
outdir = "/data/qlyu/v1/results_merfish/plots_230717"
outdatadir = "/data/qlyu/v1/results_merfish"
!mkdir -p $outdir

In [None]:
adata = ad.read('/data/qlyu/v1/results_merfish/pos_reg1_230719.h5ad')
adata

In [None]:
gn = 'Slc17a7'
xr = adata.obs['x']
yr = adata.obs['y']
g = np.log10(1+adata[:,gn].X)
st_scatter(xr, yr, gexp=g, title=gn, cbar_label='log10(cnt+1)', axis_off=True)

# norm

In [None]:
cnts = adata.X
cov = np.sum(cnts, axis=1)
medcov = np.median(cov)

In [None]:
fig, ax = plt.subplots()
sns.histplot(cov, ax=ax)
ax.text(medcov, 0, int(medcov))
ax.axvline(medcov, color='k', linestyle='--')
sns.despine(ax=ax)
plt.show()

In [None]:
# equlize the counts for all cells to be 100
scaling = 100
normcnts = cnts/cov.reshape(-1,1)*100
adata.layers['norm'] = normcnts

In [None]:
gn = 'Slc17a7'
xr = adata.obs['x']
yr = adata.obs['y']
g = np.log10(1+adata[:,gn].layers['norm'])
st_scatter(xr, yr, gexp=g, title=gn, cbar_label='log10(cnt+1)', axis_off=True)

# PCA, UMAP, clustering

In [None]:
pca = PCA(n_components=20)
pcs = pca.fit_transform(adata.layers['norm'])
print(pcs.shape)

ucs = UMAP(n_components=2, n_neighbors=30).fit_transform(pcs)
print(ucs.shape)

In [None]:
adata.obsm['pca'] = pcs
adata.obsm['umap'] = ucs

In [None]:
sc.pp.neighbors(adata, n_neighbors=30, use_rep='pca')

In [None]:
r = 0.1
sc.tl.leiden(adata, resolution=r, key_added=f'leiden_r{r}')

In [None]:
adata

In [None]:
st_scatter(pcs[:,0], pcs[:,1], gexp=g)

In [None]:
st_scatter(ucs[:,0], ucs[:,1], gexp=g)

In [None]:
clsts = adata.obs['leiden_r0.1'].astype(int)
x = adata.obs['x']
y = adata.obs['y']
ux = adata.obsm['umap'][:,0]
uy = adata.obsm['umap'][:,1]
plot_cluster(clsts, x, y, ux, uy, axis_off=False)

# Iteration 1: get Glut. neurons 
- pick cluster 0

In [None]:
adatasub = adata[adata.obs['x']>5500].copy()
adatasub = adatasub[adatasub.obs['leiden_r0.1']=='0']
adatasub

In [None]:
pca = PCA(n_components=20)
pcs = pca.fit_transform(adatasub.layers['norm'])
print(pcs.shape)

ucs = UMAP(n_components=2, n_neighbors=30).fit_transform(pcs)
print(ucs.shape)

In [None]:
adatasub.obsm['pca'] = pcs
adatasub.obsm['umap'] = ucs

In [None]:
sc.pp.neighbors(adatasub, n_neighbors=30, use_rep='pca')

In [None]:
r = 0.1
sc.tl.leiden(adatasub, resolution=r, key_added=f'leiden_r{r}')

In [None]:
clsts = adatasub.obs['leiden_r0.1'].astype(int)
x = adatasub.obs['x']
y = adatasub.obs['y']
ux = adatasub.obsm['umap'][:,0]
uy = adatasub.obsm['umap'][:,1]
plot_cluster(clsts, x, y, ux, uy, s=2)

In [None]:
g = np.log10(1+adatasub[:,gn].layers['norm'])
st_scatter(x, y, gexp=g, s=2, title=gn)
st_scatter(ux, uy, gexp=g, s=2, title=gn)

# Iteration 2: remove hippocampal Glut. neurons 
- pick cluster 0; ditch cluster 1

In [None]:
adatasub2 = adatasub[adatasub.obs['leiden_r0.1'].isin(['0'])].copy()
# adatasub2 = adatasub[adatasub.obs['x']<4500].copy()
adatasub2

In [None]:
pca = PCA(n_components=20)
pcs = pca.fit_transform(adatasub2.layers['norm'])
print(pcs.shape)

ucs = UMAP(n_components=2, n_neighbors=30).fit_transform(pcs)
print(ucs.shape)

In [None]:
adatasub2.obsm['pca'] = pcs
adatasub2.obsm['umap'] = ucs

In [None]:
sc.pp.neighbors(adatasub2, n_neighbors=30, use_rep='pca')

In [None]:
r = 0.2
sc.tl.leiden(adatasub2, resolution=r, key_added=f'leiden_r{r}')

In [None]:
clsts = adatasub2.obs['leiden_r0.2'].astype(int)
x = adatasub2.obs['x']
y = adatasub2.obs['y']
ux = adatasub2.obsm['umap'][:,0]
uy = adatasub2.obsm['umap'][:,1]
fig, axs = plot_cluster(clsts, x, y, ux, uy, s=2, axis_off=False)
# axs[0].plot([2000,4500], [-3000,-1500])

# Iteration 3: remove other sub-cortical cells by anatomical location 
- those cells are hard to distinguish using global transcriptome signatures, but easy to be removed by anatomical location

In [None]:
adatasub3 = adatasub2[adatasub2.obs['leiden_r0.2']=='0'].copy()
adatasub3

In [None]:
xy  = adatasub3.obs[['x', 'y']].values
adatasub3.obsm['xy'] = xy
sc.pp.neighbors(adatasub3, n_neighbors=50, use_rep='xy')
r = 0.02
sc.tl.leiden(adatasub3, resolution=r, key_added=f'leiden_spatial_r{r}')

In [None]:
clsts = adatasub3.obs[f'leiden_spatial_r{r}'].astype(int)
x  = adatasub3.obs['x']
y  = adatasub3.obs['y']
ux = adatasub3.obsm['umap'][:,0]
uy = adatasub3.obsm['umap'][:,1]
plot_cluster(clsts, x, y, ux, uy, s=2, axis_off=False)

In [None]:
# poly = [
#     [-11000, 10000,],
#     [ -8500, 10000,],
#     [ -7000, 11700,],
#     [ -6000, 12500,],
#     [ -6000, 12000,],
#     [ -4000, 12500,],
#     [ -4000, 14500,],
#     [-11000, 14500,],
# ]
# selected = is_in_polygon(poly, xy)
# adatasub3x = adatasub3[selected]
adatasub3x = adatasub3[adatasub3.obs[f'leiden_spatial_r{r}'].isin(['0', '1', '2', '3'])].copy()

clsts = adatasub3x.obs[f'leiden_spatial_r{r}'].astype(int)
x  = adatasub3x.obs['x']
y  = adatasub3x.obs['y']
ux = adatasub3x.obsm['umap'][:,0]
uy = adatasub3x.obsm['umap'][:,1]
plot_cluster(clsts, x, y, ux, uy, s=2, axis_off=False)

# Iteration 4: refine the cells -- remove any remaining non-cortical-glut cells

- remaining cells include cortical glut neurons more than V1. The exact V1 boundaries can be selected based on Rorb expressions and other

In [None]:
adatasub4 = adatasub3x.copy()
adatasub4

In [None]:
pca = PCA(n_components=20)
pcs = pca.fit_transform(adatasub4.layers['norm'])
print(pcs.shape)

ucs = UMAP(n_components=2, n_neighbors=30).fit_transform(pcs)
print(ucs.shape)

In [None]:
adatasub4.obsm['pca'] = pcs
adatasub4.obsm['umap'] = ucs

In [None]:
sc.pp.neighbors(adatasub4, n_neighbors=30, use_rep='pca')

In [None]:
r = 0.2 
sc.tl.leiden(adatasub4, resolution=r, key_added=f'leiden_r{r}')

In [None]:
clsts = adatasub4.obs['leiden_r0.2'].astype(int)
x = adatasub4.obs['x']
y = adatasub4.obs['y']
ux = adatasub4.obsm['umap'][:,0]
uy = adatasub4.obsm['umap'][:,1]
plot_cluster(clsts, x, y, ux, uy, s=2)

In [None]:
gn = 'Slc17a7'
g = np.log10(1+adatasub4[:,gn].layers['norm'])
st_scatter(x, y, gexp=g, s=2, title=gn)
st_scatter(ux, uy, gexp=g, s=2, title=gn)

# Iteration 5: get the cells we now care about

In [None]:
adatasub5 = adatasub4.copy() #[adatasub4.obs['leiden_r0.2'].isin(['0', '1'])].copy()
adatasub5

In [None]:
pca = PCA(n_components=20)
pcs = pca.fit_transform(adatasub5.layers['norm'])
print(pcs.shape)

ucs = UMAP(n_components=2, n_neighbors=30).fit_transform(pcs)
print(ucs.shape)

In [None]:
adatasub5.obsm['pca'] = pcs
adatasub5.obsm['umap'] = ucs

In [None]:
sc.pp.neighbors(adatasub5, n_neighbors=30, use_rep='pca')

In [None]:
r = 0.5
sc.tl.leiden(adatasub5, resolution=r, key_added=f'leiden_r{r}')

In [None]:
clsts = adatasub5.obs['leiden_r0.5'].astype(int)
x = adatasub5.obs['x']
y = adatasub5.obs['y']
ux = adatasub5.obsm['umap'][:,0]
uy = adatasub5.obsm['umap'][:,1]
plot_cluster(clsts, x, y, ux, uy, s=2)

In [None]:
gn = 'Slc17a7'
g = np.log10(1+adatasub5[:,gn].layers['norm'])
st_scatter(x, y, gexp=g, s=2, title=gn)
st_scatter(ux, uy, gexp=g, s=2, title=gn)

# save 

In [None]:
output = outdatadir + '/pos_reg1_ctxglut_230719.h5ad'
print(output)
adatasub5.write(output)

# explore

In [None]:
gn = 'Cux2'
g = np.log10(1+adatasub5[:,gn].layers['norm'])
st_scatter(x, y, gexp=g, s=2, title=gn)
st_scatter(ux, uy, gexp=g, s=2, title=gn)

In [None]:
gn = 'Rorb'
g = np.log10(1+adatasub5[:,gn].layers['norm'])
st_scatter(x, y, gexp=g, s=2, title=gn)
st_scatter(ux, uy, gexp=g, s=2, title=gn)

In [None]:
gn = 'Cdh13'
g = np.log10(1+adatasub5[:,gn].layers['norm'])
st_scatter(x, y, gexp=g, s=2, title=gn)
# st_scatter(ux, uy, gexp=g, s=2, title=gn)

In [None]:

gns = [
    'Cux2',
    'Rorb',
    'Whrn',
]

for gn in gns:
    g = np.log10(1+adatasub5[:,gn].layers['norm'])
    st_scatter(x, y, gexp=g, s=2, title=gn)
    plt.show()
    # plt.close()
    # st_scatter(ux, uy, gexp=g, s=2, title=gn)

In [None]:

gns = [
    'Cdh13',
    'Adamts2',
    'Nr4a3',
    'Cntn5',
    'Gabrg3',
    'Grm8',
    'Sorcs3',
    'Chrm2',
    'Cdh12',
    'Cntnap2',
    'Kcnh5',
    'Kcnq5',
    'Ncam2',
    'Gria3',
    'Rorb',
    'Kcnip3',
    'Baz1a',
    'Rfx3',
    'Trpc6',
    'Egfem1',
    'Igfn1',
    'Bdnf',
    'Epha3',
    'Kcna1',
    'Whrn',
    'Igsf9b',
    'Mdga1',
]

for gn in gns:
    g = np.log10(1+adatasub5[:,gn].layers['norm'])
    st_scatter(x, y, gexp=g, s=2, title=gn)
    plt.show()
    # plt.close()
    # st_scatter(ux, uy, gexp=g, s=2, title=gn)