In [None]:
import scanpy as sc
import anndata as ann
import numpy as np
import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt
import glob
from matplotlib import rcParams
from matplotlib import colors
import logging

import seaborn as sb

sc.settings.verbosity = 3


plt.rcParams['figure.figsize']=(8,8) #rescale figures
sc.settings.verbosity = 3
sc.set_figure_params(dpi=200, dpi_save=300, 
                     vector_friendly=False,
                    format='pdf')

sc.logging.print_versions()

In [None]:
import warnings
warnings.simplefilter(action="ignore", category=FutureWarning)

In [None]:
# Set size for plots
sb.set_context(context='paper')

In [None]:
#set analysis version
version = "V1"
#set output files_path
output_files_path = "/Sunshine_DeRisi_RSV_files/"

fig_path = "/Sunshine_DeRisi_RSV_files/figures/"

In [None]:
sc.settings.figdir = fig_path

In [None]:
name = "2024_RSV_annotated_filtered_human_virus"###doublets are removed
preprocessed_path = output_files_path+version+'_'+name+'.h5ad'
adata_human_virus = sc.read_h5ad(preprocessed_path)

In [None]:
#id for read origin
RSV = [name for name in adata_human_virus.var_names if name.startswith('RSV')]
RSV_genome = [name for name in adata_human_virus.var_names if name.startswith('genome_RSV')]
human_genes = adata_human_virus.var_names.str.startswith('GRCh38_')

# Normalize, log1p

In [None]:
sc.pp.normalize_per_cell(adata_human_virus)

adata_human_virus.obs['n_counts_norm'] = adata_human_virus.X.sum(1)
adata_human_virus.obs['n_counts_norm_log'] = np.log1p(adata_human_virus.obs['n_counts_norm'])

#Sum the number of human and viral transcripts per cell POST NORM
adata_human_virus.obs ['human_n_counts_norm'] = np.sum(adata_human_virus[:, human_genes].X, axis=1).A1
adata_human_virus.obs['viral_transcript_n_counts_norm'] = np.sum(adata_human_virus[:, RSV].X, axis=1).A1
#Sum the number of human and virus transcripts per cell and log transform (ln+1)
adata_human_virus.obs ['viral_transcript_n_counts_norm_log'] = np.log1p(np.sum(adata_human_virus[:, RSV].X, axis=1).A1)
adata_human_virus.obs ['human_n_counts_norm_log'] = np.log1p(np.sum(adata_human_virus[:, human_genes].X, axis=1).A1)

In [None]:
#log, NOT scale, HVG isolate
sc.pp.log1p(adata_human_virus)
logging.info('Log transforming data')
adata_human_virus.raw = adata_human_virus
logging.info('Saving log(counts)+1 in .raw')
#sc.pp.scale(adata_human_virus, max_value=10)


#Identify highly variable genes
sc.pp.highly_variable_genes(adata_human_virus, min_mean=0.0125, max_mean=3, min_disp=0.5) #scanpy default settings
print('\n','Number of highly variable genes: {:d}'.format(np.sum(adata_human_virus.var['highly_variable'])))

# Reduce dimensionality

In [None]:
initialization = 1

sc.tl.pca(adata_human_virus, random_state=initialization)

rcParams['figure.figsize']=(4,4)
sc.pl.pca_variance_ratio(adata_human_virus, n_pcs=50, log=True)

## take a look at what is driving PCs

In [None]:
sc.pl.pca(
    adata_human_virus,
    color=["viral_transcript_frac", "viral_transcript_frac", "new_multiseq_id", "new_multiseq_id",
          "frac_mito_human","frac_mito_human","human_n_counts","human_n_counts","batch","batch"],
    dimensions=[(0, 1), (2, 3), (0, 1), (2, 3),(0, 1), (2, 3),(0, 1), (2, 3),(0, 1), (2, 3)],
    ncols=2,
    size=2,
) #from https://scanpy.readthedocs.io/en/stable/tutorials/basics/clustering.html

In [None]:
rcParams['figure.figsize']=(10,10)
sc.pl.pca_loadings(adata_human_virus, components = '1,2,3')

In [None]:
n_components = 40

#Compute PCA
sc.pp.pca(adata_human_virus, n_comps= n_components, use_highly_variable=True, svd_solver='arpack', random_state=initialization)
sc.pp.neighbors(adata_human_virus,random_state=initialization)
logging.info('KNN complete.')
sc.tl.umap(adata_human_virus,random_state=initialization)
logging.info('UMAP complete.')

In [None]:
sc.tl.umap(adata_human_virus, min_dist=1, spread=0.5)

In [None]:
#this stretched out the heat inactivated RSV population in UMAP space --> plots for figure below
fig, ((ax1, ax2)) = plt.subplots(1,2,figsize=(8,3), gridspec_kw={'wspace':0.4})


sc.pl.umap(adata_human_virus[adata_human_virus.obs.treatment.isin(["Heat_Killed_RSV"]), :], 
                      color="viral_transcript_frac",
                      frameon=False, vmin=0, vmax=.45, 
                      ax=ax1,show=False,cmap='inferno', size=30,
          title='Heat Inactivated RSV')

sc.pl.umap(adata_human_virus[adata_human_virus.obs.treatment.isin(["RSV_infected"]), :], 
                      color="viral_transcript_frac",
                      frameon=False, vmin=0, vmax=.45,
                      ax=ax2, show=False, cmap='inferno',
          size=30,title='RSV')

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.colors import LinearSegmentedColormap


inferno = plt.cm.get_cmap('inferno', 256)
new_colors = inferno(np.linspace(0, 1, 256))
gray_to_purple = np.concatenate([np.linspace([0.2, 0.2, 0.2, 1.0], inferno(0.4), 100), inferno(np.linspace(0.4, 1, 156))])
new_colors[:256] = gray_to_purple

custom_inferno = LinearSegmentedColormap.from_list("CustomInferno", new_colors)

#test plot of custom_inferno
plt.imshow(np.random.rand(10, 10), cmap=custom_inferno)
plt.colorbar()
plt.show()

## Figure 1 C

In [None]:
rcParams['figure.figsize']=(3,2.5)

ax = sc.pl.umap(adata_human_virus[adata_human_virus.obs.treatment.isin(["Vehicle_Control"]), :], size=50, show=False)

#in order for plot NOT to compress, plot RSV_infected cells w/ size = 0
sc.pl.umap(
    adata_human_virus[adata_human_virus.obs.treatment.isin(["RSV_infected"]), :], 
                      color="viral_transcript_frac",
                      frameon=False, vmin=0, vmax=.45, 
                     # ax=ax1,
    show=False, 
    size=0,
        color_map='Greys',
        colorbar_loc= None,
    ax=ax
)


sc.pl.umap(
    adata_human_virus[adata_human_virus.obs.treatment.isin(["Heat_Killed_RSV"]), :], 
                      color="viral_transcript_frac",
                      frameon=False, vmin=0, vmax=.45, 
                     # ax=ax1,
    show=False, 
    size=30,
        color_map=custom_inferno,
          title='Heat Inactivated RSV',
    ax=ax
)



plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['font.sans-serif'] = 'Arial'
plt.rcParams['font.size'] = 12.0
plt.rcParams['legend.fontsize'] = 12.0

plt.savefig(fig_path+'umap_fig1_heatinactivated_overlayed_vc_20250222.pdf')

In [None]:
rcParams['figure.figsize']=(3,2.5)

ax = sc.pl.umap(adata_human_virus[adata_human_virus.obs.treatment.isin(["Vehicle_Control"]), :], size=50, show=False)

# including here for completeness and to ensure size the same --> but not required
sc.pl.umap(
    adata_human_virus[adata_human_virus.obs.treatment.isin(["Heat_Killed_RSV"]), :], 
                      color="viral_transcript_frac",
                      frameon=False, vmin=0, vmax=.45, 
                     # ax=ax1,
    show=False, 
    size=0,
        color_map='Greys',
    colorbar_loc= None,
    ax=ax
)



sc.pl.umap(
    adata_human_virus[adata_human_virus.obs.treatment.isin(["RSV_infected"]), :], 
                      color="viral_transcript_frac",
                      frameon=False, vmin=0, vmax=.45, 
                     # ax=ax1,
    show=False,
    color_map=custom_inferno,
    size=30,
          title='RSV',
    ax=ax
)

plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['font.sans-serif'] = 'Arial'
plt.rcParams['font.size'] = 12.0
plt.rcParams['legend.fontsize'] = 12.0

#plt.savefig(fig_path+'umap_fig1_rsvinfected_overlayed_vc_20250222.pdf')