In [None]:
import scanpy as sc
import anndata as ann
import numpy as np
import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt
import glob
from matplotlib import rcParams
from matplotlib import colors
import logging

import seaborn as sb

sc.settings.verbosity = 3


plt.rcParams['figure.figsize']=(8,8) #rescale figures
sc.settings.verbosity = 3
sc.set_figure_params(dpi=200, dpi_save=300)
sc.logging.print_versions()

In [None]:
import warnings
warnings.simplefilter(action="ignore", category=FutureWarning)

In [None]:
# Set size for plots
sb.set_context(context='paper')

In [None]:
#set analysis version
version = "V1"
#set output files_path
output_files_path = "/Sunshine_DeRisi_RSV_files/"

fig_path = "/Sunshine_DeRisi_RSV_files/figures/"

In [None]:
sc.settings.figdir = fig_path

In [None]:
name = "2024_RSV_annotated_filtered_human_virus"###doublets are removed
preprocessed_path = output_files_path+version+'_'+name+'.h5ad'
adata_human_virus = sc.read_h5ad(preprocessed_path)

In [None]:
#id for read origin
RSV = [name for name in adata_human_virus.var_names if name.startswith('RSV')]
RSV_genome = [name for name in adata_human_virus.var_names if name.startswith('genome_RSV')]
human_genes = adata_human_virus.var_names.str.startswith('GRCh38_')
virus_genes = RSV + RSV_genome

# Subset to 12 hpi

In [None]:
adata_human_virus = adata_human_virus[((adata_human_virus.obs.batch == '0')),:].copy()
adata_human_virus

## for downstream ISG analysis

In [None]:
Hein_isgs = ['PSMB8','PSMB9','PSME1','PSME2','ISG15','ISG20','IRF7','MX1','MX2','GBP1','GBP2','GBP3','IFI6','IFI44','IFI35','IFI16','IFI27','IFIH1','IFI44L','IFIT1','IFIT2','IFIT3','IFIT5','IFITM1','IFITM2','IFITM3','EIF2AK2','OAS1','OAS2','OAS3','CNP','PLSCR1','BST2','BTN3A2','XAF1','CASP1','CASP4','CASP7','GSDMD']
isg_gene_list = ['GRCh38_' + gene for gene in Hein_isgs]
# determine what genes have at least 5 counts in at least 10 cells across this time point
# while this should not change the gene score, subsetting to well expressed genes for gene scoring analysis downstream

adata_human_virus_isgsubset = adata_human_virus[:,adata_human_virus.var_names.isin(isg_gene_list)]
cell_ids = adata_human_virus_isgsubset.obs.index
count_matrix = adata_human_virus_isgsubset.X.toarray()
counts_df = pd.DataFrame(count_matrix, index = cell_ids, columns = adata_human_virus_isgsubset.var_names)

cells_with_counts = (counts_df >= 5).sum(axis=0)
genes_above_10 = cells_with_counts[cells_with_counts > 10]
isg_subset = genes_above_10.index.tolist()
isg_subset

adata_human_virus = adata_human_virus[:,~adata_human_virus.var_names.isin(virus_genes)].copy() 
adata_human_virus

# Normalize, log1p,scale

In [None]:
sc.pp.normalize_per_cell(adata_human_virus)

adata_human_virus.obs['n_counts_norm'] = adata_human_virus.X.sum(1)
adata_human_virus.obs['n_counts_norm_log'] = np.log1p(adata_human_virus.obs['n_counts_norm'])

#Sum the number of human and viral transcripts per cell POST NORM
adata_human_virus.obs ['human_n_counts_norm'] = np.sum(adata_human_virus[:, human_genes].X, axis=1).A1

adata_human_virus.obs['viral_transcript_n_counts_norm'] = np.sum(adata_human_virus[:, RSV].X, axis=1).A1

#Sum the number of human and virus transcripts per cell and log transform (ln+1)
adata_human_virus.obs ['viral_transcript_n_counts_norm_log'] = np.log1p(np.sum(adata_human_virus[:, RSV].X, axis=1).A1)
adata_human_virus.obs ['human_n_counts_norm_log'] = np.log1p(np.sum(adata_human_virus[:, human_genes].X, axis=1).A1)

In [None]:
# filter genes not present within this time point
sc.pp.filter_genes(adata_human_virus, min_cells=3)
human_genes = adata_human_virus.var_names.str.startswith('GRCh38_')

In [None]:
#log, scale, HVG isolate
sc.pp.log1p(adata_human_virus)
logging.info('Log transforming data')
adata_human_virus.raw = adata_human_virus
logging.info('Saving log(counts)+1 in .raw')

In [None]:
#Identify highly variable genes
sc.pp.highly_variable_genes(adata_human_virus, min_mean=0.0125, max_mean=10, min_disp=0.5)
print('\n','Number of highly variable genes: {:d}'.format(np.sum(adata_human_virus.var['highly_variable'])))

In [None]:
sc.pp.scale(adata_human_virus, max_value=10)

In [None]:
initialization = 1

sc.tl.pca(adata_human_virus, n_comps=50, 
          #use_highly_variable=True, 
          svd_solver='arpack', 
          random_state=initialization)
sc.pl.pca_overview(adata_human_virus)

In [None]:
sc.pp.neighbors(adata_human_virus, 
                n_neighbors=15, 
                n_pcs=21, #30
                random_state=initialization)
logging.info('KNN complete.')

sc.tl.umap(adata_human_virus, random_state=initialization,
                min_dist = 0.5, spread = 1)
logging.info('UMAP complete.')

rcParams['figure.figsize']=(2,2)
# sc.tl.umap(adata_human_virus, min_dist=0.5, spread=1)
sc.pl.umap(adata_human_virus,color=['GRCh38_ISG15'],
                   show=True,cmap='YlGnBu',size=10)

## Figure 2A-D - UMAPS

In [None]:
sc.tl.embedding_density(adata_human_virus, basis='umap', groupby='treatment_infectionstatus')
rcParams['figure.figsize']=(4,4)
sc.pl.embedding_density(adata_human_virus, basis='umap', key='umap_density_treatment_infectionstatus',
                        group = ['RSV_infected_infected','RSV_infected_uninfected','Heat_Killed_RSV_uninfected',
                                 'Vehicle_Control_uninfected'],
                        frameon=False,color_map="YlOrRd_r")
#,
 #                      save='treatement_inf_unf_density.pdf')

In [None]:
rcParams['figure.figsize']=(3,3)
sc.pl.umap(adata_human_virus,color=['viral_transcript_frac'],
                   show=True,cmap='inferno_r',size=20)

In [None]:
fig, ((ax1, ax2, ax3, ax4)) = plt.subplots(1, 4, figsize=(16,3), gridspec_kw={'wspace':0.4})

ax1_dict = sc.pl.umap(adata_human_virus, color='GRCh38_HERPUD1',show=False,cmap = 'viridis',
                  size=25,frameon=False, title = 'HERPUD1',  ax=ax1)
ax2_dict = sc.pl.umap(adata_human_virus, color='GRCh38_DDIT3',show=False,cmap = 'viridis',
                  size=25,frameon=False, title = 'DDIT3',  ax=ax2)
ax3_dict = sc.pl.umap(adata_human_virus, color='GRCh38_ISG15',show=False,cmap = 'viridis',
                  use_raw=True,size=25,frameon=False, color_map=None, vmax=6.5, title = 'ISG15',
                      ax=ax3)
ax4_dict = sc.pl.umap(adata_human_virus, color='GRCh38_IFIT1',show=False,cmap = 'viridis',
                  use_raw=True,size=25,frameon=False, color_map=None, vmax=6.5, title = 'IFIT1',  ax=ax4)


plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['font.sans-serif'] = 'Arial'
plt.rcParams['font.size'] = 12.0
plt.rcParams['legend.fontsize'] = 12.0

#plt.savefig(fig_path+'umap_transcriptional_herpud1_ddit3_isg15_ifit1.pdf')

## Figure 2E - dot plot

In [None]:
adata_human_virus.layers["scaled"] = sc.pp.scale(adata_human_virus, copy=True).X

In [None]:
marker_genes = ['GRCh38_PLAU',
'GRCh38_PLAUR',
'GRCh38_DDIT3',
'GRCh38_HERPUD1',
'GRCh38_DNAJB9',
'GRCh38_XBP1',
'GRCh38_BAG3',
'GRCh38_HSPA1A',
'GRCh38_HSPA1B',
'GRCh38_PLEK2',
'GRCh38_RHOF',
'GRCh38_IFRD1',
'GRCh38_ISG20',
'GRCh38_ISG15',
'GRCh38_IFI6',
'GRCh38_IFIT1',
'GRCh38_OAS1',
'GRCh38_IRF7',
'GRCh38_MX1',
'GRCh38_IFIT3',
'GRCh38_OAS3',
'GRCh38_PLSCR1']

In [None]:
adata_human_virus.obs.treatment_infectionstatus.unique()

NOTE : So few cells in buffer - it doesn't make sense to include in plots. likely infected cells actually but n is just too low here

In [None]:
adata_human_virus_subset = adata_human_virus[((adata_human_virus.obs.treatment_infectionstatus != 'RSV_infected_buffer')),:].copy()
adata_human_virus_subset

In [None]:
#note reordered plot below in affinity for to group by gene function

In [None]:
sc.pl.dotplot(adata_human_virus_subset, marker_genes, groupby = "treatment_infectionstatus",
                   cmap='inferno_r',
              categories_order=['RSV_infected_infected','RSV_infected_uninfected',
                    'Heat_Killed_RSV_uninfected','Vehicle_Control_uninfected',
                               ],
                            #save='treatement_inf_status_dotplot.pdf'
                  )

In [None]:
rcParams['figure.figsize']=(3,3)
#ISG SCORE
#re-score ISG stimulation
sc.tl.score_genes(adata_human_virus_subset,isg_subset, score_name='isg_subset')

In [None]:
plt.rcParams['figure.figsize']=(4,4) #rescale figures
sb.boxplot(x=adata_human_virus_subset.obs.treatment_infectionstatus, 
           y=adata_human_virus_subset.obs.isg_subset,fliersize =0,
          color="gray")
sb.swarmplot(x=adata_human_virus_subset.obs.treatment_infectionstatus, 
             y=adata_human_virus_subset.obs.isg_subset,color=".4",size=0.4)
plt.xticks(rotation=90)
sb.despine()
plt.title('ISG Score')