In [1]:
%matplotlib inline
%load_ext memory_profiler

import pandas as pd
import os
import glob
import pickle
import phate
import scprep
import meld
import time
import graphtools as gt
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import datetime
import scanpy as sc
from sklearn.decomposition import PCA
import math
from scipy import sparse
import sys


# settings
plt.rc('font', size = 9)
plt.rc('font', family='sans serif')
plt.rcParams['pdf.fonttype']=42
plt.rcParams['ps.fonttype']=42
plt.rcParams['text.usetex']=False
plt.rcParams['legend.frameon']=False
plt.rcParams['axes.grid']=False
plt.rcParams['legend.markerscale']=0.5
sc.set_figure_params(dpi=300,dpi_save=600,
                     frameon=False,
                     fontsize=9)
plt.rcParams['savefig.dpi']=600
sc.settings.verbosity=2
sc._settings.ScanpyConfig.n_jobs=-1
sns.set_style("ticks")

In [2]:
# fps
dfp = '/home/cl2292/project/SCA1_snRNAseq/Mouse/data/'
pfp = '/home/cl2292/project/SCA1_snRNAseq/Mouse/results/'
sc.settings.figdir = pfp

In [3]:
# Load Data

if True :
    start = time.time()
    backed=None # None if not
    fname='211105_mouse_PC.h5ad' # for full, can maybe get away with ~300G
    %memit adata = sc.read_h5ad(os.path.join(dfp,fname),backed=backed)
    print('loaded @'+datetime.datetime.now().strftime('%y%m%d.%H:%M:%S'))
    print('took {:.2f}-s to load data'.format(time.time()-start))


peak memory: 805.37 MiB, increment: 610.45 MiB
loaded @211105.09:59:45
took 0.85-s to load data


In [3]:
# Load Data

if True :
    start = time.time()
    backed=None # None if not
    fname='220328_WT_sampling.h5ad' # for full, can maybe get away with ~300G
    %memit wt = sc.read_h5ad(os.path.join(dfp,fname),backed=backed)
    print('loaded @'+datetime.datetime.now().strftime('%y%m%d.%H:%M:%S'))
    print('took {:.2f}-s to load data'.format(time.time()-start))


if True :
    start = time.time()
    backed=None # None if not
    fname='220328_SCA1_sampling.h5ad' # for full, can maybe get away with ~300G
    %memit mut = sc.read_h5ad(os.path.join(dfp,fname),backed=backed)
    print('loaded @'+datetime.datetime.now().strftime('%y%m%d.%H:%M:%S'))
    print('took {:.2f}-s to load data'.format(time.time()-start))


peak memory: 22185.33 MiB, increment: 21986.95 MiB
loaded @220329.16:16:04
took 22.37-s to load data
peak memory: 44301.27 MiB, increment: 22115.92 MiB
loaded @220329.16:16:27
took 22.57-s to load data


In [4]:
# subset by cell type, i 
i = 'PC'
wt = wt[wt.obs['sub4']==i, :]
wt.obs['value'] = 0
mut = mut[mut.obs['sub4']==i, :]
mut.obs['value'] = 0

Trying to set attribute `.obs` of view, copying.
Trying to set attribute `.obs` of view, copying.


In [7]:
    # get list of genes
goi = pd.read_csv(os.path.join(pfp, '220329 PC genes for heatmap.csv'), header=None)[0].to_list()


In [8]:
X = pd.DataFrame(wt[:,goi].layers['imputed'], index=wt.obs.index.to_list(), columns=goi, dtype=float)
X['genotime'] = wt.obs['genotype'].astype(str)  + '_' + wt.obs['timepoint'].astype(str) 

dt = pd.DataFrame(mut[:,goi].layers['imputed'], index=mut.obs.index.to_list(), columns=goi, dtype=float)
dt['genotime'] = mut.obs['genotype'].astype(str) + '_' + mut.obs['timepoint'].astype(str) 

X = X.append(dt) 
X = X.T

In [41]:
adata.obs['sub3'].value_counts()

Z+PC    1334
Z-PC     840
Name: sub3, dtype: int64

In [57]:
X = pd.DataFrame(adata[:,goi].layers['imputed'], index=adata.obs.index.to_list(), columns=goi, dtype=float)
#X['genotime'] = adata.obs['genotype'].astype(str)  + '_' + adata.obs['timepoint'].astype(str) 
#X['genotime_zpc'] = adata.obs['genotype'].astype(str)+ '_' +adata.obs['timepoint'].astype(str)+ '_' +adata.obs['sub3'].astype(str)
X['genozpc'] = adata.obs['genotype'].astype(str)+'_' +adata.obs['sub3'].astype(str) 
X = X.T

In [9]:

# aesthetics
cmap_genotime = {'WT_5wk':sns.cubehelix_palette(8,start=.5,rot=-.75)[1], 
              'SCA1_5wk':sns.cubehelix_palette(8)[1], 
              'WT_12wk':sns.cubehelix_palette(8,start=.5,rot=-.75)[2], 
              'SCA1_12wk':sns.cubehelix_palette(8)[2], 
              'WT_18wk':sns.cubehelix_palette(8,start=.5,rot=-.75)[3], 
              'SCA1_18wk':sns.cubehelix_palette(8)[3], 
              'WT_24wk':sns.cubehelix_palette(8,start=.5,rot=-.75)[4], 
              'SCA1_24wk':sns.cubehelix_palette(8)[4], 
              'WT_30wk':sns.cubehelix_palette(8,start=.5,rot=-.75)[5], 
              'SCA1_30wk':sns.cubehelix_palette(8)[5]}

cmap_genotime_zpc = {'WT_5wk_Z+PC':sns.cubehelix_palette(8,start=.5,rot=-.75)[1], 
                     'WT_5wk_Z-PC':sns.cubehelix_palette(8,start=.5,rot=-.75)[1],
                     'WT_12wk_Z+PC':sns.cubehelix_palette(8,start=.5,rot=-.75)[2],
                     'WT_12wk_Z-PC':sns.cubehelix_palette(8,start=.5,rot=-.75)[2],
                     'WT_18wk_Z+PC':sns.cubehelix_palette(8,start=.5,rot=-.75)[3],
                     'WT_18wk_Z-PC':sns.cubehelix_palette(8,start=.5,rot=-.75)[3],
                     'WT_24wk_Z+PC':sns.cubehelix_palette(8,start=.5,rot=-.75)[4],
                     'WT_24wk_Z-PC':sns.cubehelix_palette(8,start=.5,rot=-.75)[4],
                     'WT_30wk_Z+PC':sns.cubehelix_palette(8,start=.5,rot=-.75)[5],
                     'WT_30wk_Z-PC':sns.cubehelix_palette(8,start=.5,rot=-.75)[5],
                     'SCA1_5wk_Z+PC':sns.cubehelix_palette(8)[1],
                     'SCA1_5wk_Z-PC':sns.cubehelix_palette(8)[1],
                     'SCA1_12wk_Z+PC':sns.cubehelix_palette(8)[2],
                     'SCA1_12wk_Z-PC':sns.cubehelix_palette(8)[2],
                     'SCA1_18wk_Z+PC':sns.cubehelix_palette(8)[3],
                     'SCA1_18wk_Z-PC':sns.cubehelix_palette(8)[3],
                     'SCA1_24wk_Z+PC':sns.cubehelix_palette(8)[4],
                     'SCA1_24wk_Z-PC':sns.cubehelix_palette(8)[4],
                     'SCA1_30wk_Z+PC':sns.cubehelix_palette(8)[5],
                     'SCA1_30wk_Z-PC':sns.cubehelix_palette(8)[5],
                     }

cmap_genozpc = {'WT_Z+PC':sns.cubehelix_palette(8,start=.5,rot=-.75)[1],
                'WT_Z-PC':sns.cubehelix_palette(8,start=.5,rot=-.75)[5],
                'SCA1_Z+PC':sns.cubehelix_palette(8,start=.4,rot=.4)[1],
                'SCA1_Z-PC':sns.cubehelix_palette(8,start=.4,rot=.4)[5]
                }

In [10]:
Y = X.T
y = pd.DataFrame(Y.loc[:, [i for i in Y.columns.to_list() if 'genotime' not in i]], dtype=float)
y['genotime'] = Y['genotime']
y = y.groupby('genotime').mean().reset_index().T
y.columns = y.iloc[0, :]
y = y.drop(y.index[0])
del Y

In [46]:
Y = X.T
y = pd.DataFrame(Y.loc[:, [i for i in Y.columns.to_list() if 'genotime_zpc' not in i]], dtype=float)
y['genotime_zpc'] = Y['genotime_zpc']
y = y.groupby('genotime_zpc').mean().reset_index().T
y.columns = y.iloc[0, :]
y = y.drop(y.index[0])
del Y

In [59]:
Y = X.T
y = pd.DataFrame(Y.loc[:, [i for i in Y.columns.to_list() if 'genozpc' not in i]], dtype=float)
y['genozpc'] = Y['genozpc']
y = y.groupby('genozpc').mean().reset_index().T
y.columns = y.iloc[0, :]
y = y.drop(y.index[0])
del Y

In [33]:
col_col = X.loc[X.index=='genotime', :].T['genotime'].map(cmap_genotime).to_list()
X = X.loc[~(X.index=='genotime'), :]

In [34]:

X.loc[:, :] = np.asarray(X.loc[:, :], dtype='float')

In [66]:

y.loc[:, :] = np.asarray(y.loc[:, :], dtype='float')

In [11]:
pg = sns.clustermap(y, col_colors=list(map(cmap_genotime.get, y.columns.to_list())), cmap='RdYlBu_r', xticklabels=False, yticklabels=False,
                    standard_scale=0)
pg.ax_heatmap.set_xlabel('')
pg.savefig(os.path.join(pfp, '220329_heatmap_dge_PC_genotime.pdf'))



TypeError: ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''

AttributeError: 'NoneType' object has no attribute 'reshape'

<Figure size 3000x3000 with 5 Axes>