Plotting QC metrics for uninfected

In [2]:
import os

# important for gpd.sjoin
os.environ["USE_PYGEOS"] = "0"

import scanpy as sc
import squidpy as sq
import numpy as np
import warnings
import seaborn as sns
import matplotlib.pyplot as plt
import igraph
import random
import math
from sklearn.preprocessing import MinMaxScaler


warnings.filterwarnings("ignore")

In [3]:
adata = sc.read_h5ad('uninfected/analysis/cleaned/final_object_no_peyers.h5ad')

median transcripts per cell

In [None]:
np.median(adata.obs['total_transcripts'])

number of cells

In [None]:
len(adata.obs)

In [9]:
si1 = adata[adata.obs['batch'] == 'segmentation_SI1']
si2 = adata[adata.obs['batch'] == 'segmentation_SI2']

In [17]:
si1_counts = np.sum(si1.X, axis=0)

In [18]:
si2_counts = np.sum(si2.X, axis=0)

In [None]:
# Calculate the log10 of the counts
log_si1_counts = np.log10(si1_counts)
log_si2_counts = np.log10(si2_counts)

plt.figure(dpi=400)
# Plot the scatter plot
plt.scatter(log_si1_counts, log_si2_counts)

# Draw the line y = x in black
plt.plot([min(log_si1_counts), max(log_si1_counts)], [min(log_si1_counts), max(log_si1_counts)], color='black')

# Add the correlation coefficient to the plot
corr_coef = np.corrcoef(log_si1_counts, log_si2_counts)[0, 1]
plt.text(0.1, 0.9, f'r = {corr_coef:.2f}', transform=plt.gca().transAxes)

# Rename the axes ticks to 10^(current value)
plt.xticks(ticks=np.arange(np.floor(min(log_si1_counts)), np.ceil(max(log_si1_counts))+1),
           labels=[f'$10^{int(x)}$' for x in np.arange(np.floor(min(log_si1_counts)), np.ceil(max(log_si1_counts))+1)])

plt.yticks(ticks=np.arange(np.floor(min(log_si2_counts)), np.ceil(max(log_si2_counts))+1),
           labels=[f'$10^{int(y)}$' for y in np.arange(np.floor(min(log_si2_counts)), np.ceil(max(log_si2_counts))+1)])

plt.xlabel('log10(si1_counts)')
plt.ylabel('log10(si2_counts)')
plt.savefig('../figures/qc.pdf')
# Show the plot
plt.show()