In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

import anndata as ad
import scanpy as sc

from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from umap import UMAP

from scroutines import powerplots
import time

import importlib
importlib.reload(powerplots)
from scroutines import basicu
from scroutines.miscu import is_in_polygon

import utils_merfish

In [2]:
np.random.seed(0)

from merfish_datasets import merfish_datasets
directories = merfish_datasets
print(merfish_datasets)

{'P14NR_ant': 'merfish_06142023/ant/region0', 'P28NR_ant': 'merfish_06142023/ant/region1', 'P14NR_pos': 'merfish_06142023/pos/region0', 'P28NR_pos': 'merfish_06142023/pos/region1', 'P21NR_ant': 'merfish_20231114/region0', 'P21DR_ant': 'merfish_20231114/region2', 'P28DR_ant': 'merfish_20231114/region1', 'P21NR_pos': 'merfish_20231120/region0', 'P21DR_pos': 'merfish_20231120/region1', 'P28DR_pos': 'merfish_20231120/region2'}


In [3]:
name = 'P28DR_pos'
dirc = directories[name]

full_dirc = f'/u/home/f/f7xiesnm/project-zipursky/v1-bb/v1/data/merfish/{dirc}/'
f_main    = f'/u/home/f/f7xiesnm/project-zipursky/v1-bb/v1/data/merfish/organized/{name}_ctxglut_240411.h5ad'
f1   = full_dirc + 'detected_transcripts.csv'
# f2   = full_dirc + 'cell_metadata.csv'
fout = full_dirc + 'detected_transcripts_v1l23.csv'

In [4]:
adata = ad.read(f_main)
bound_l, bound_r, bound_d = adata.uns['bound_lrd']
cond = np.all([
    adata.obs['width'] > bound_l, 
    adata.obs['width'] < bound_r, 
    adata.obs['depth'] < bound_d, 
], axis=0)
adatasub = adata[cond]

rotation_angle = adata.uns['rotation']
ref_line = np.array(adata.uns['ref_line'])

adatasub

View of AnnData object with n_obs × n_vars = 2467 × 500
    obs: 'EntityID', 'fov', 'volume', 'center_x', 'center_y', 'min_x', 'min_y', 'max_x', 'max_y', 'anisotropy', 'transcript_count', 'perimeter_area_ratio', 'solidity', 'DreO_raw', 'DreO_high_pass', 'Cre_raw', 'Cre_high_pass', 'DAPI_raw', 'DAPI_high_pass', 'FlpO_raw', 'FlpO_high_pass', 'Pvalb_raw', 'Pvalb_high_pass', 'Sst_raw', 'Sst_high_pass', 'Fth1_raw', 'Fth1_high_pass', 'PolyT_raw', 'PolyT_high_pass', 'gncov', 'gnnum', 'fpcov', 'x', 'y', 'depth', 'width', 'inside_v1l23'
    uns: 'bound_lrd', 'ref_line', 'rotation'
    obsm: 'blanks', 'pca'
    layers: 'norm'

In [5]:
# get FOVs
fovs = np.unique(adatasub.obs['fov'])
fovs.shape, fovs

((31,),
 array([1236, 1237, 1238, 1239, 1240, 1241, 1242, 1243, 1244, 1245, 1246,
        1258, 1259, 1260, 1261, 1262, 1263, 1264, 1265, 1266, 1267, 1272,
        1273, 1274, 1275, 1276, 1277, 1278, 1279, 1302, 1303]))

# get transcripts in V1 L2/3

In [6]:
# go over all tranascripts - select only those that are within these FOVs
chunks = pd.read_csv(f1, chunksize=1000000)
dfsubs = []

ti = time.time()
for i, chunk in enumerate(chunks):
    print(i, time.time()-ti)
    dfsub = chunk[chunk['fov'].isin(fovs)]
    if len(dfsub) > 0:
        print(len(dfsub))
        dfsubs.append(dfsub)

FileNotFoundError: [Errno 2] No such file or directory: '/u/home/f/f7xiesnm/project-zipursky/v1-bb/v1/data/merfish/merfish_20231120/region2/detected_transcripts.csv'

In [None]:
df_v1l23 = pd.concat(dfsubs)
df_v1l23

In [None]:
# check results
query_genes = ['Cdh13', 'Sorcs3', 'Chrm2']

abc = df_v1l23[df_v1l23['gene'].isin(query_genes)]  
fig, ax = plt.subplots()
sns.scatterplot(data=abc.sample(frac=1, replace=False), 
                x='global_x', y='global_y', hue='gene', hue_order=query_genes, 
                s=4, edgecolor='none', ax=ax, )
ax.set_aspect('equal')
ax.axis('off')
ax.legend(bbox_to_anchor=(1,1))
ax.invert_yaxis()

# rotate, calc depth and width, and viz

In [None]:
# reference line
ref_line_obj = utils_merfish.RefLineSegs(ref_line)

In [None]:
%%time
x = df_v1l23['global_x'].values
y = df_v1l23['global_y'].values
xr, yr = utils_merfish.rot2d(x, y, rotation_angle)
XY = np.vstack([xr, yr]).T 

df_v1l23['x'] = xr
df_v1l23['y'] = yr
df_v1l23['depth'] = ref_line_obj.ndist_to_qps(XY)
df_v1l23['width'] = ref_line_obj.tdist_to_qps(XY)

cond_spots = np.all([
    df_v1l23['width'] > bound_l, 
    df_v1l23['width'] < bound_r, 
    df_v1l23['depth'] < bound_d, 
], axis=0)

df_v1l23 = df_v1l23[cond_spots]

In [None]:
abc = df_v1l23[df_v1l23['gene'].isin(query_genes)]  

In [None]:
fig, ax = plt.subplots(1,1,figsize=(10,3))
sns.scatterplot(data=abc.sample(frac=1, replace=False), 
                x='width', y='depth', hue='gene', hue_order=query_genes, 
                s=4, edgecolor='none', ax=ax, )
ax.set_aspect('equal')
ax.axis('off')
ax.legend(bbox_to_anchor=(1,1))
ax.invert_yaxis()

# Save results

In [None]:
df_v1l23

In [None]:
df_v1l23.to_csv(fout, header=True, index=False)
!head $fout