In [1]:
import blitzgsea as blitz
import pandas as pd
import numpy as np
import scanpy as sc
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
#plt.rcParams['figure.figsize'] = [6, 4]

### Load DEGs

In [5]:
genesA = pd.read_csv('~/data/CRISPRa_gene_list.csv')
genesA = list(genesA['x'])
genesI = pd.read_csv('~/data/CRISPRi_gene_list.csv')
genesI = list(genesI['x'])

In [7]:
# read in csvs for each gene
CRISPRa_DEGs = {}
CRISPRi_DEGs = {}

for gene in genesA:
   deg = pd.read_csv('~/data/DEGs/CRISPRa/'+gene+'.csv')
   CRISPRa_DEGs[gene] = deg

for gene in genesI:
    deg = pd.read_csv('~/data/DEGs/CRISPRi/'+gene+'.csv')
    CRISPRi_DEGs[gene] = deg

In [8]:
CRISPRa_DEGs.keys()

In [9]:
CRISPRi_DEGs.keys()

In [11]:
# calculate "z scores"
for gene in genesA:
    CRISPRa_DEGs[gene]['Zscore'] = np.sign(CRISPRa_DEGs[gene]['lfc']) * -np.log10(CRISPRa_DEGs[gene]['adj_pval'])

for gene in genesI:
    CRISPRi_DEGs[gene]['Zscore'] = np.sign(CRISPRi_DEGs[gene]['lfc']) * -np.log10(CRISPRi_DEGs[gene]['adj_pval'])

### Get number of DEGs per perturbation

In [13]:
CRISPRa_number_of_DEGs = pd.DataFrame({'gene': genesA,
                                       'num_degs': 0})
CRISPRi_number_of_DEGs = pd.DataFrame({'gene': genesI, 
                                       'num_degs': 0})

for gene in genesA:
    num_degs = CRISPRa_DEGs[gene].loc[CRISPRa_DEGs[gene]['adj_pval'] <= 0.05].shape[0]
    CRISPRa_number_of_DEGs.loc[CRISPRa_number_of_DEGs['gene'] == gene, 'num_degs'] = num_degs

for gene in genesI:
    num_degs = CRISPRi_DEGs[gene].loc[CRISPRi_DEGs[gene]['adj_pval'] <= 0.05].shape[0]
    CRISPRi_number_of_DEGs.loc[CRISPRi_number_of_DEGs['gene'] == gene, 'num_degs'] = num_degs

In [14]:
CRISPRa_number_of_DEGs = CRISPRa_number_of_DEGs.loc[CRISPRa_number_of_DEGs['num_degs'] != 0].sort_values('num_degs', ascending = False)

In [15]:
plt.figure(figsize=(12,3))
plt.bar(CRISPRa_number_of_DEGs['gene'], height = CRISPRa_number_of_DEGs['num_degs'], width = 1)
plt.tick_params(axis='x', labelrotation=90)
plt.xticks(fontsize=4)
plt.title('Number DEGs for CRISPRa')

In [16]:
CRISPRi_number_of_DEGs = CRISPRi_number_of_DEGs.loc[CRISPRi_number_of_DEGs['num_degs'] != 0].sort_values('num_degs', ascending = False)

In [17]:
plt.figure(figsize=(12,3))
plt.bar(CRISPRi_number_of_DEGs['gene'], height = CRISPRi_number_of_DEGs['num_degs'], width = 1)
plt.tick_params(axis='x', labelrotation=90)
plt.xticks(fontsize=4)
plt.title('Number DEGs for CRISPRi')

### Create square z score matrix

In [18]:
CRISPRa_matrix = pd.DataFrame({'gene': genesA})

for gene in genesA:
    gene_df = CRISPRa_DEGs[gene].loc[CRISPRa_DEGs[gene]['name'].isin(genesA), ('name', 'Zscore')]
    gene_df.rename(columns = {'name': 'gene',
                    'Zscore': gene}, inplace = True)
    CRISPRa_matrix = pd.merge(CRISPRa_matrix, gene_df, how = 'left', on = 'gene')

CRISPRa_matrix.set_index('gene', inplace = True)

In [19]:
nan_genes = list(CRISPRa_matrix[CRISPRa_matrix.isna().any(axis=1)].index)

In [20]:
CRISPRa_matrix_nona = CRISPRa_matrix.dropna()
CRISPRa_matrix_nona.drop(columns = nan_genes, inplace = True)

In [21]:
CRISPRa_matrix_nona.shape

In [19]:
plt.figure(figsize = (10,8))
ax = sns.heatmap(CRISPRa_matrix_nona, cmap = 'YlGnBu', vmin = 0, vmax = 3, yticklabels = False, xticklabels = False)
plt.xlabel('sgRNA', fontsize=14)
plt.ylabel('Zscore', fontsize = 14)

In [22]:
CRISPRi_matrix = pd.DataFrame({'gene': genesI})

for gene in genesI:
    gene_df = CRISPRi_DEGs[gene].loc[CRISPRi_DEGs[gene]['name'].isin(genesI), ('name', 'Zscore')]
    gene_df.rename(columns = {'name': 'gene',
                    'Zscore': gene}, inplace = True)
    CRISPRi_matrix = pd.merge(CRISPRi_matrix, gene_df, how = 'left', on = 'gene')

CRISPRi_matrix.set_index('gene', inplace = True)

In [23]:
nan_genes = list(CRISPRi_matrix[CRISPRi_matrix.isna().any(axis=1)].index)

In [24]:
CRISPRi_matrix_nona = CRISPRi_matrix.dropna()
CRISPRi_matrix_nona.drop(columns = nan_genes, inplace = True)

In [25]:
CRISPRi_matrix_nona.shape

In [24]:
plt.figure(figsize = (10,8))
ax = sns.heatmap(CRISPRi_matrix_nona, cmap = 'YlGnBu', vmax = 0, vmin = -3, yticklabels = False, xticklabels = False)
plt.xlabel('sgRNA', fontsize=14)
plt.ylabel('Zscore', fontsize = 14)

### Order by expression in NT cells

In [26]:
from scipy.io import mmread
from scipy.sparse import csc_matrix
import anndata as ad
import scanpy as sc

In [27]:
CRISPRi_counts = mmread("~/data/seurat/CRISPRi_seurat5.mtx")
CRISPRi_var_df = pd.read_csv("~/data/seurat/CRISPRi_seurat5_genes.tsv", sep = '\t', index_col = 0, header = None)
CRISPRi_obs_df = pd.read_csv("~/data/seurat/CRISPRi_seurat5_metadata.csv", index_col = 0)

In [27]:
CRISPRi_adata = ad.AnnData(X = csc_matrix(CRISPRi_counts.T),
                           obs = CRISPRi_obs_df,
                           var = CRISPRi_var_df)

In [28]:
NT_cells_CRISPRi = CRISPRi_adata.obs_names[CRISPRi_adata.obs['Gene_Targeted'] == 'NT']

In [29]:
NT_expression_CRISPRi = CRISPRi_adata[NT_cells_CRISPRi, :].X

In [30]:
CRISPRi_NT_df = pd.DataFrame(NT_expression_CRISPRi.toarray(), index=NT_cells_CRISPRi, columns=CRISPRi_adata.var_names)

In [31]:
CRISPRi_NT_df.shape

In [32]:
CRISPRi_NT_df = CRISPRi_NT_df[CRISPRi_matrix_nona.columns]

In [33]:
CRISPRi_NT_avgexp = CRISPRi_NT_df.mean()

In [34]:
CRISPRi_genes_ordered = list(CRISPRi_NT_avgexp.sort_values(ascending = False).index)

In [35]:
CRISPRi_matrix_nona_reordered = CRISPRi_matrix_nona.reindex(CRISPRi_genes_ordered)
CRISPRi_matrix_nona_reordered = CRISPRi_matrix_nona_reordered[CRISPRi_genes_ordered]

In [36]:
plt.figure(figsize = (10,8))
sns.set(font_scale=0.5)
ax = sns.heatmap(CRISPRi_matrix_nona_reordered, cmap = 'YlGnBu', vmax = 0, vmin = -3, yticklabels = False, xticklabels = False)
plt.xlabel('CRISPRi gene', fontsize=14)
plt.ylabel('DEGs', fontsize = 14)

In [37]:
CRISPRa_counts = mmread("~/data/seurat/CRISPRa_seurat5.mtx")
CRISPRa_var_df = pd.read_csv("~/data/seurat/CRISPRa_seurat5_genes.tsv", sep = '\t', index_col = 0, header = None)
CRISPRa_obs_df = pd.read_csv("~/data/seurat/CRISPRa_seurat5_metadata.csv", index_col = 0)

In [38]:
CRISPRa_adata = ad.AnnData(X = csc_matrix(CRISPRa_counts.T),
                           obs = CRISPRa_obs_df,
                           var = CRISPRa_var_df)

In [39]:
NT_cells_CRISPRa = CRISPRa_adata.obs_names[CRISPRa_adata.obs['Gene_Targeted'] == 'NT']

In [40]:
NT_expression_CRISPRa = CRISPRa_adata[NT_cells_CRISPRa, :].X

In [41]:
CRISPRa_NT_df = pd.DataFrame(NT_expression_CRISPRa.toarray(), index=NT_cells_CRISPRa, columns=CRISPRa_adata.var_names)

In [42]:
CRISPRa_NT_df.shape

In [43]:
CRISPRa_NT_df = CRISPRa_NT_df[CRISPRa_matrix_nona.columns]

In [44]:
CRISPRa_NT_avgexp = CRISPRa_NT_df.mean()

In [45]:
CRISPRa_genes_ordered = list(CRISPRa_NT_avgexp.sort_values(ascending = False).index)

In [46]:
CRISPRa_matrix_nona_reordered = CRISPRa_matrix_nona.reindex(CRISPRa_genes_ordered)
CRISPRa_matrix_nona_reordered = CRISPRa_matrix_nona_reordered[CRISPRa_genes_ordered]

In [47]:
plt.figure(figsize = (10,8))
sns.set(font_scale=0.5)
ax = sns.heatmap(CRISPRa_matrix_nona_reordered, cmap = 'YlGnBu', vmax = 3, vmin = 0, yticklabels = False, xticklabels = False)
plt.xlabel('CRISPRa gene', fontsize=14)
plt.ylabel('DEGs', fontsize = 14)

### Use LFC for QC plots

In [28]:
CRISPRi_matrix_lfc = pd.DataFrame({'gene': genesI})

for gene in genesI:
    gene_df = CRISPRi_DEGs[gene].loc[CRISPRi_DEGs[gene]['name'].isin(genesI), ('name', 'lfc')]
    gene_df.rename(columns = {'name': 'gene',
                    'lfc': gene}, inplace = True)
    CRISPRi_matrix_lfc = pd.merge(CRISPRi_matrix_lfc, gene_df, how = 'left', on = 'gene')

CRISPRi_matrix_lfc.set_index('gene', inplace = True)

In [29]:
CRISPRa_matrix_lfc = pd.DataFrame({'gene': genesA})

for gene in genesA:
    gene_df = CRISPRa_DEGs[gene].loc[CRISPRa_DEGs[gene]['name'].isin(genesA), ('name', 'lfc')]
    gene_df.rename(columns = {'name': 'gene',
                    'lfc': gene}, inplace = True)
    CRISPRa_matrix_lfc = pd.merge(CRISPRa_matrix_lfc, gene_df, how = 'left', on = 'gene')

CRISPRa_matrix_lfc.set_index('gene', inplace = True)

In [30]:
CRISPRi_lfc_decreases = np.array(CRISPRi_matrix_lfc.values.diagonal())
CRISPRi_lfc_decreases = CRISPRi_lfc_decreases[~np.isnan(CRISPRi_lfc_decreases)]

CRISPRa_lfc_decreases = np.array(CRISPRa_matrix_lfc.values.diagonal())
CRISPRa_lfc_decreases = CRISPRa_lfc_decreases[~np.isnan(CRISPRa_lfc_decreases)]

In [31]:
2 ** np.median(CRISPRa_lfc_decreases) - 1

In [32]:
2 ** np.median(CRISPRi_lfc_decreases) - 1

In [36]:
medians = combined_data.median()
medians

In [33]:
### plt.figure(figsize = (8,5))
skyblue_color = "#87CEEB"  # Hex code for skyblue
salmon_color = "#FA8072"   # Hex code for salmon
my_palette = {"CRISPRi": "skyblue", "CRISPRa": "salmon"}

combined_data = pd.concat([
    pd.Series(CRISPRi_lfc_decreases, name='CRISPRi', index=range(len(CRISPRi_lfc_decreases))),
    pd.Series(CRISPRa_lfc_decreases, name='CRISPRa', index=range(len(CRISPRa_lfc_decreases)))
], keys=['CRISPRi', 'CRISPRa'], axis=1)

# Create a boxplot using Seaborn
sns.set_context('talk')
sns.set_theme(style='white')
sns.boxplot(data=combined_data, palette=my_palette, width = 0.5, linewidth = 2, fliersize = 3)
plt.axhline(0, linestyle = '--', linewidth = 2, color = 'black')
plt.ylabel("Log fold change")