written by Yodai Takei

In [1]:
# Our numerical workhorses
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tifffile as tif
import seaborn as sns
from scipy.ndimage.filters import gaussian_laplace, gaussian_filter1d, gaussian_filter

# This is to enable inline displays for the purposes of the tutorial
%matplotlib inline

# This enables SVG graphics inline
%config InlineBackend.figure_formats = {'png', 'retina'}

rc = {'lines.linewidth': 2, 'axes.labelsize': 18,  'axes.titlesize': 18, 'axes.facecolor': 'DFDFE5'}
sns.set_context('notebook', rc=rc)
sns.set_style('white')

# Suppress future warnings
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [2]:
def image_coloredpoints2(image,x,y,color,z):
    # image: np.zeros([2048,2048,3],dtype=np.uint8)
    # x,y: rounded values
    x = int(x)
    y = int(y)
    z = int(z)
    image[z,x-1,y-1] = color
    image[z,x-1,y] = color
    image[z,x-1,y+1] = color
    image[z,x,y-1] = color
    image[z,x,y] = color
    image[z,x,y+1] = color
    image[z,x+1,y-1] = color
    image[z,x+1,y] = color
    image[z,x+1,y+1] = color
    
    return image

In [3]:
#ref_path = 'I:/OneDrive - California Institute of Technology/Long Cai - 1/100k/LC1-100k-000-reference-analysis/output/'
ref_path = 'I:/OneDrive - California Institute of Technology/Long Cai - 1/100k/annot/'
df_ref = pd.read_csv(ref_path+'mm10_25kb_bins_genes_new.csv',header=None)
df_ref.columns = ['name','chrom','start','end','group name','gene']
df_ref.head()

Unnamed: 0,name,chrom,start,end,group name,gene
0,chr1-1,chr1,3000000,3025000,chr1-1,.
1,chr1-2,chr1,3025000,3050000,chr1-2,.
2,chr1-3,chr1,3050000,3075000,chr1-3,.
3,chr1-4,chr1,3075000,3100000,chr1-4,.
4,chr1-5,chr1,3100000,3125000,chr1-5,.


example genes

In [6]:
gene_names = ['Cntnap5b','Dpp10','Adgrl3','Kcnd2','Cadps2','Ptprd','Cfap54','Grin2b','Jazf1','Sntg1']

In [8]:
output_path = 'H:/Yodai/100k/2021-05-16-cerebellum-DNAfull-rep1/dot_reconstruction/'

In [9]:
#fig,ax = plt.subplots(1,1,figsize=(10,10))

# plot whole z sections
z_min = 1
z_max = 49

for fov_id in range(3):

    df = pd.read_csv('output/LC1-100k-002-003-cerebellum-rep1-IF-intensity-per-finalpoints-pos'+str(fov_id)+'.csv')
    df = df[(df['fov']==fov_id)].reset_index(drop=True)
    df = df[['name','x','y','z','dot_int']]
    print(len(df))

    for j in range(len(gene_names)):

        gene_name = gene_names[j]
        print(gene_name)

        loci_names = list(df_ref[df_ref['gene']==gene_name]['name'].unique())

        df_fp = df[(df['name'].isin(loci_names))].reset_index(drop=True)

        # make 8 bit blank images
        image_fp1 = np.zeros([z_max-z_min+1,2048,2048,3],dtype=np.uint8) # blank image with actual image size

        # raw dots on the entire blank image based on the rounded DNA seqFISH+ spot location
        for i in range(len(df_fp)):
            if np.round(df_fp['z'][i]) >= z_min and np.round(df_fp['z'][i]) <= z_max:
                image_fp1 = image_coloredpoints2(image_fp1,np.round(df_fp['y'][i])-1,
                                               np.round(df_fp['x'][i])-1,[255, 255, 255],np.round(df_fp['z'][i])-1)

        # gaussian blur of the dots with defined sigma
        image_gf1 = gaussian_filter(image_fp1,sigma=1)

        # just for visualization
        #ax.imshow(image_gf1)

        tif.imwrite(output_path+'cerebellum-rep1-pos'+str(fov_id)+'-'+str(gene_name)+'.tif',image_gf1, compression='DEFLATE')

3528974
Cntnap5b
Dpp10
Adgrl3
Kcnd2
Cadps2
Ptprd
Cfap54
Grin2b
Jazf1
Sntg1
4157506
Cntnap5b
Dpp10
Adgrl3
Kcnd2
Cadps2
Ptprd
Cfap54
Grin2b
Jazf1
Sntg1
4448089
Cntnap5b
Dpp10
Adgrl3
Kcnd2
Cadps2
Ptprd
Cfap54
Grin2b
Jazf1
Sntg1


In [10]:
df_ref

Unnamed: 0,name,chrom,start,end,group name,gene
0,chr1-1,chr1,3000000,3025000,chr1-1,.
1,chr1-2,chr1,3025000,3050000,chr1-2,.
2,chr1-3,chr1,3050000,3075000,chr1-3,.
3,chr1-4,chr1,3075000,3100000,chr1-4,.
4,chr1-5,chr1,3100000,3125000,chr1-5,.
...,...,...,...,...,...,...
121395,chrX-6575,chrX,170750000,170775000,chrX-6575,.
121396,chrX-6576,chrX,170800000,170825000,chrX-6576,.
121397,chrX-6577,chrX,170825000,170850000,chrX-6577,.
121398,chrX-6578,chrX,170850000,170875000,chrX-6578,.


In [12]:
#fig,ax = plt.subplots(1,1,figsize=(10,10))

# plot whole z sections
z_min = 1
z_max = 49

for fov_id in range(3):

    df = pd.read_csv('output/LC1-100k-002-003-cerebellum-rep1-IF-intensity-per-finalpoints-pos'+str(fov_id)+'.csv')
    df = df[(df['fov']==fov_id)].reset_index(drop=True)
    df = df[['name','x','y','z','dot_int']]
    print(len(df))

    for j in range(1):

        gene_name = 'chr6-Hox'
        print(gene_name)
        
        # two 200kb bins for chr6 Hox cluster
        loci_names = ['chr6-1957','chr6-1958','chr6-1959','chr6-1960','chr6-1961','chr6-1962','chr6-1963','chr6-1964']+\
                     ['chr6-1965','chr6-1966', 'chr6-1967','chr6-1968', 'chr6-1969','chr6-1970','chr6-1971','chr6-1972']

        df_fp = df[(df['name'].isin(loci_names))].reset_index(drop=True)

        # make 8 bit blank images
        image_fp1 = np.zeros([z_max-z_min+1,2048,2048,3],dtype=np.uint8) # blank image with actual image size

        # raw dots on the entire blank image based on the rounded DNA seqFISH+ spot location
        for i in range(len(df_fp)):
            if np.round(df_fp['z'][i]) >= z_min and np.round(df_fp['z'][i]) <= z_max:
                image_fp1 = image_coloredpoints2(image_fp1,np.round(df_fp['y'][i])-1,
                                               np.round(df_fp['x'][i])-1,[255, 255, 255],np.round(df_fp['z'][i])-1)

        # gaussian blur of the dots with defined sigma
        image_gf1 = gaussian_filter(image_fp1,sigma=1)

        # just for visualization
        #ax.imshow(image_gf1)

        tif.imwrite(output_path+'cerebellum-rep1-pos'+str(fov_id)+'-'+str(gene_name)+'.tif',image_gf1, compression='DEFLATE')

3528974
chr6-Hox
4157506
chr6-Hox
4448089
chr6-Hox


Those pseudo-dot images are loaded by ImageJ and format can be changed.