In [None]:
import scanpy as sc
import scvi
from tqdm.notebook import tqdm
import os
import numpy as np
import pandas as pd
from scvi.model.utils import mde
import matplotlib.pyplot as plt
import pickle
import glob
import torch

Add the path to the perturb data with guides

In [None]:
adata_path = '/mnt/sata2/Analysis_Alex_2/perturb1/adata_with_guides.h5ad'

In [None]:
adata = sc.read(adata_path)

Run scvi between the batches

In [6]:
torch.set_float32_matmul_precision('medium')

In [None]:
scvi.model.SCVI.setup_anndata(adata, batch_key="batch", layer="raw")

In [None]:
scvi_ref = scvi.model.SCVI(adata, n_layers=2, n_latent=30, gene_likelihood="nb")
scvi_ref.train()

In [10]:
SCVI_LATENT_KEY = "X_scVI"

adata.obsm[SCVI_LATENT_KEY] = scvi_ref.get_latent_representation()
sc.pp.neighbors(adata, use_rep=SCVI_LATENT_KEY)

In [11]:
adata.obsm["X_mde"] = mde(adata.obsm["X_scVI"])

Cluster on the scvi latent space

In [26]:
sc.tl.leiden(adata)

Identify the P14 clusters

In [None]:
# Group by 'leiden' and 'guide_rnas' to get the counts
leiden_groupby = adata.obs.groupby(['leiden'])
g_counts = leiden_groupby['guide_rnas'].value_counts()

# Convert the resulting series to a DataFrame for easier manipulation
g_counts_df = g_counts.unstack(fill_value=0)

# Extract the count of "Other cells" and sum of all other guide_rnas
other_cells_count = g_counts_df.get('Other cells', 0)  # Get count of "Other cells", fill 0 if not present
all_other_grnas_count = g_counts_df.drop(columns=['Other cells'], errors='ignore').sum(axis=1)

# Calculate the ratio of "Other cells" to all other guide_rnas in each leiden group
ratio_other_to_grnas = all_other_grnas_count/other_cells_count

In [None]:

plt.figure(figsize=(10, 6))
ratio_other_to_grnas.plot(kind='bar', color='skyblue')

plt.xlabel('Leiden Groups')
plt.ylabel('Ratio of "Other cells" to Other guide_rnas')
plt.title('Ratio of "Other cells" to Other guide_rnas in Each Leiden Group')

plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

Read in the original adata

In [None]:
adata2 = sc.read(adata_path)

3 and 4 are the clusters with transferred cells

In [45]:
adata.obs['guide_rnas'][~adata.obs['leiden'].isin(['3', '4'])] = 'Other cells'

Plot the sgRNAs on the MDE and save out the adata

In [None]:

bright_colors = {
    'sgCd19': 'red',  
    'sgCxcr3': 'blue', 
    'sgThy1': 'yellow'
}

for group in np.unique(adata.obs['guide_rnas']):
    if group == 'Other cells':
        plt.scatter(
            adata[adata.obs['guide_rnas'] == group].obsm["X_mde"][:, 0], 
            adata[adata.obs['guide_rnas'] == group].obsm["X_mde"][:, 1], 
            s=1, 
            c='gray', 
            linewidths=0, 
            alpha=0.2, 
            label=group
        )
    else:
        plt.scatter(
            adata[adata.obs['guide_rnas'] == group].obsm["X_mde"][:, 0], 
            adata[adata.obs['guide_rnas'] == group].obsm["X_mde"][:, 1], 
            s=1, 
            c=bright_colors[group],  # Use the bright color for the specific group
            linewidths=0, 
            label=group
        )

plt.legend(
    scatterpoints=1, 
    markerscale=5, 
    loc='center left', 
    bbox_to_anchor=(1, 0.5)
)
plt.grid(False)
plt.show()


In [48]:
adata.write('/mnt/sata2/Analysis_Alex_2/perturb1/final_filtered_on_leiden.h5ad')

Make a nicer looking plot

In [None]:
adata = sc.read('/mnt/sata2/Analysis_Alex_2/perturb1/final_filtered_on_leiden.h5ad')

In [None]:
import numpy as np
import matplotlib.pyplot as plt

plt.figure(dpi=400)
bright_colors = {
    'sgCd19': 'lightblue',  
    'sgCxcr3': 'orange', 
    'sgThy1': 'lightgreen'
}

# Mask for "Other cells"
other_cells_mask = adata.obs['guide_rnas'] == 'Other cells'
other_cells_data = adata[other_cells_mask].obsm["X_mde"]

# Get data for non-"Other cells"
non_other_cells_mask = ~other_cells_mask
non_other_cells_data = adata[non_other_cells_mask].obsm["X_mde"]
non_other_cells_groups = adata.obs.loc[non_other_cells_mask, 'guide_rnas']

# Create an array for shuffling
shuffled_indices = np.random.permutation(non_other_cells_data.shape[0])
shuffled_data = non_other_cells_data[shuffled_indices]
shuffled_groups = non_other_cells_groups.iloc[shuffled_indices]

# Plot "Other cells" first
plt.scatter(
    other_cells_data[:, 0], 
    other_cells_data[:, 1], 
    s=1, 
    c='gray', 
    linewidths=0, 
    alpha=0.2, 
    label='Other cells'
)

# Plot shuffled non-"Other cells" individually with corresponding colors
for i in range(len(shuffled_data)):
    group = shuffled_groups.iloc[i]
    plt.scatter(
        shuffled_data[i, 0], 
        shuffled_data[i, 1], 
        s=5, 
        c=bright_colors[group],  # Use the bright color for the specific group
        linewidths=0
    )

# Add legend
handles = [plt.Line2D([0], [0], marker='o', color='w', label=group, markerfacecolor=color, markersize=10) 
           for group, color in bright_colors.items()]
handles.append(plt.Line2D([0], [0], marker='o', color='w', label='Other cells', markerfacecolor='gray', markersize=5))
plt.legend(handles=handles, loc='center left', bbox_to_anchor=(1, 0.5))

plt.grid(False)
savedir = 'figures/umap'
try:
    os.makedirs(savedir)
except FileExistsError:
    pass
plt.savefig(os.path.join(savedir, 'umap_guides.pdf'), bbox_inches='tight')
plt.show()

