In [None]:
import warnings
import os

warnings.filterwarnings("ignore")

import matplotlib.pyplot as plt
import seaborn as sns
import scanpy as sc
import pandas as pd
import numpy as np
import random

import matplotlib.pyplot as plt
import glob
from matplotlib import rcParams
from matplotlib import colors



import rpy2.rinterface_lib.callbacks
import anndata2ri
import logging


from rpy2.robjects import pandas2ri
from rpy2.robjects import r

from adjustText import adjust_text

sc.settings.verbosity = 0
rpy2.rinterface_lib.callbacks.logger.setLevel(logging.ERROR)

pandas2ri.activate()
anndata2ri.activate()

%load_ext rpy2.ipython

In [None]:
#set analysis version
version = "V1"
#set output files_path
output_files_path = "/Sunshine_DeRisi_RSV_files/"
fig_path = "/Sunshine_DeRisi_RSV_files/figures/"

de_results_path = output_files_path+"de_human/"

## Volcano plots for human DE only

In [None]:
de_files = sorted([file for file in os.listdir(de_results_path) if file.endswith('.csv') and file.startswith('MAST_')])

#Adding this for custom file order:
de_files = ['MAST_0hpi_Heat_Killed_RSV-vs-Vehicle_Control.csv',
     'MAST_0hpi_RSV_infected-vs-RSV_uninfected.csv',
     'MAST_0hpi_RSV_infected-vs-VC_uninfected.csv',
     'MAST_0hpi_RSV_uninfected-vs-VC_uninfected.csv',
     'MAST_4hpi_Heat_Killed_RSV-vs-Vehicle_Control.csv',
     'MAST_4hpi_RSV_infected-vs-RSV_uninfected.csv',
     'MAST_4hpi_RSV_infected-vs-VC_uninfected.csv',
     'MAST_4hpi_RSV_uninfected-vs-VC_uninfected.csv',
     'MAST_8hpi_Heat_Killed_RSV-vs-Vehicle_Control.csv',
     'MAST_8hpi_RSV_infected-vs-RSV_uninfected.csv',
     'MAST_8hpi_RSV_infected-vs-VC_uninfected.csv',
     'MAST_8hpi_RSV_uninfected-vs-VC_uninfected.csv',
     'MAST_12hpi_Heat_Killed_RSV-vs-Vehicle_Control.csv',
     'MAST_12hpi_RSV_infected-vs-RSV_uninfected.csv',
     'MAST_12hpi_RSV_infected-vs-VC_uninfected.csv',
     'MAST_12hpi_RSV_uninfected-vs-VC_uninfected.csv']




palette = {False: 'black', True: 'red'}

file_number = len(de_files)
row_number = (file_number -1) // 4+1
col_number = min(file_number, 4)

fig, axes = plt.subplots(row_number, col_number, figsize=(50,50))



for ax, de_file in zip(axes.flatten(), de_files):
    df_de = pd.read_csv(os.path.join(de_results_path,de_file))
    df_de['pval_adj_-log10'] = -(np.log10(df_de['pvals_adj']))
    df_de['pval_adj_-log10'].replace([np.inf, -np.inf], np.nan, inplace=True)
    

    max_pval_adj = df_de['pval_adj_-log10'].max(skipna=True)
    df_de['pval_adj_-log10'].fillna(max_pval_adj, inplace=True)
    
    top_highlight_genes = df_de.assign(abs_lfc=df_de['logfoldchanges'].abs()).nlargest(50, 'abs_lfc')
    

    df_de['highlight'] = (df_de['pvals_adj'] < 0.05) & (abs(df_de['logfoldchanges']) > 1)
    

    df_de['names_str'] = df_de['names'].str.split('_').str[1]
    
    plt.figure(figsize=(8,8))
    sns.scatterplot(x='logfoldchanges', y ='pval_adj_-log10', data = df_de, hue='highlight', 
                    palette = palette, ax=ax, legend=False,
                   alpha=1,linewidth=0, size=1)
    plt.xlim(-6, 6)
    
    df_de.set_index('names', inplace=True, drop=False)
    
    #add labels to subset of dots
    for i in top_highlight_genes['names']:
        ax.text(x = df_de['logfoldchanges'][i]+0.1, y=df_de['pval_adj_-log10'][i]+0.1,
                s=df_de['names_str'][i], size=8)
    
    sns.despine()
    ax.set_xlabel('Log2 FC')
    ax.set_ylabel('-Log10(padj)')
    ax.set_title(f'DE : {de_file}')
    

plt.tight_layout()

## Figure S3A

In [None]:
#Adding this for custom file order:
de_files = de_files = ['MAST_0hpi_Heat_Killed_RSV-vs-Vehicle_Control.csv',
     'MAST_0hpi_RSV_infected-vs-RSV_uninfected.csv',
     'MAST_0hpi_RSV_infected-vs-VC_uninfected.csv',
     'MAST_0hpi_RSV_uninfected-vs-VC_uninfected.csv',
     'MAST_4hpi_Heat_Killed_RSV-vs-Vehicle_Control.csv',
     'MAST_4hpi_RSV_infected-vs-RSV_uninfected.csv',
     'MAST_4hpi_RSV_infected-vs-VC_uninfected.csv',
     'MAST_4hpi_RSV_uninfected-vs-VC_uninfected.csv',
     'MAST_8hpi_Heat_Killed_RSV-vs-Vehicle_Control.csv',
     'MAST_8hpi_RSV_infected-vs-RSV_uninfected.csv',
     'MAST_8hpi_RSV_infected-vs-VC_uninfected.csv',
     'MAST_8hpi_RSV_uninfected-vs-VC_uninfected.csv',
     'MAST_12hpi_Heat_Killed_RSV-vs-Vehicle_Control.csv',
     'MAST_12hpi_RSV_infected-vs-RSV_uninfected.csv',
     'MAST_12hpi_RSV_infected-vs-VC_uninfected.csv',
     'MAST_12hpi_RSV_uninfected-vs-VC_uninfected.csv']





palette = {False: 'black', True: 'red'}

file_number = len(de_files)
row_number = (file_number -1) // 4+1
col_number = min(file_number, 4)

fig, axes = plt.subplots(row_number, col_number, figsize=(50,50))



for ax, de_file in zip(axes.flatten(), de_files):
    df_de = pd.read_csv(os.path.join(de_results_path,de_file))
    df_de['pval_adj_-log10'] = -(np.log10(df_de['pvals_adj']))
    df_de['pval_adj_-log10'].replace([np.inf, -np.inf], np.nan, inplace=True)
    

    max_pval_adj = df_de['pval_adj_-log10'].max(skipna=True)
    df_de['pval_adj_-log10'].fillna(max_pval_adj, inplace=True)
    

    
    top_highlight_genes = df_de[(df_de['pval_adj_-log10'] > 75) & (abs(df_de['logfoldchanges']) > 1)]
    

    df_de['highlight'] = (df_de['pvals_adj'] < 0.05) & (abs(df_de['logfoldchanges']) > 1)
    
    #clean up gene labels and make new col
    df_de['names_str'] = df_de['names'].str.split('_').str[1]
    
    plt.figure(figsize=(8,8))
    sns.scatterplot(x='logfoldchanges', y ='pval_adj_-log10', data = df_de, hue='highlight', 
                    palette = palette, ax=ax, legend=False,
                   alpha=1,linewidth=0, size=10)
    ax.set_xlim(-6, 6.2)
    ax.set_ylim(0, 380)
    
    df_de.set_index('names', inplace=True, drop=False)
    

    for i in top_highlight_genes['names']:
        ax.text(x = df_de['logfoldchanges'][i], y=df_de['pval_adj_-log10'][i],
                s=df_de['names_str'][i], size=10)
        

    texts = [ax.texts[i] for i in range(len(ax.texts))]  # thanks chatgpt
    adjust_text(texts, ax=ax,expand=(1.2, 1.2),arrowprops=dict(color="gray", lw=0.5),only_move={'points':'y', 'texts':'y'})
    
    sns.despine(ax=ax,right=True, top=True)
    ax.set_xlabel('Log2 FC')
    ax.set_ylabel('-Log10(padj)')
    ax.set_title(f'DE : {de_file}')
    
    plt.rcParams['font.family'] = 'sans-serif'
    plt.rcParams['font.sans-serif'] = 'Arial'
    plt.rcParams['font.size'] = 12.0
    plt.rcParams['legend.fontsize'] = 12.0


plt.tight_layout()


#fig.savefig(fig_path+'volcano_all_vs_VC_axes_norm.pdf')

In [None]:
#plot 12 RSV timepoint only
de_files = ['MAST_12hpi_RSV_infected-vs-VC_uninfected.csv',
     'MAST_12hpi_RSV_uninfected-vs-VC_uninfected.csv']




palette = {False: 'black', True: 'red'}

file_number = len(de_files)
row_number = (file_number -1) // 4+1
col_number = min(file_number, 4)

fig, axes = plt.subplots(row_number, col_number, figsize=(25,10))



for ax, de_file in zip(axes.flatten(), de_files):
    df_de = pd.read_csv(os.path.join(de_results_path,de_file))
    df_de['pval_adj_-log10'] = -(np.log10(df_de['pvals_adj']))
    df_de['pval_adj_-log10'].replace([np.inf, -np.inf], np.nan, inplace=True)
    

    max_pval_adj = df_de['pval_adj_-log10'].max(skipna=True)
    df_de['pval_adj_-log10'].fillna(max_pval_adj, inplace=True)
    

    
    top_highlight_genes = df_de[(df_de['pval_adj_-log10'] > 75) & (abs(df_de['logfoldchanges']) > 1)]
    

    df_de['highlight'] = (df_de['pvals_adj'] < 0.05) & (abs(df_de['logfoldchanges']) > 1)
    

    df_de['names_str'] = df_de['names'].str.split('_').str[1]
    
    plt.figure(figsize=(8,8))
    sns.scatterplot(x='logfoldchanges', y ='pval_adj_-log10', data = df_de, hue='highlight', 
                    palette = palette, ax=ax, legend=False,
                   alpha=1,linewidth=0, size=10)
    
    df_de.set_index('names', inplace=True, drop=False)
    
    #add labels to subset of dots
    for i in top_highlight_genes['names']:
        ax.text(x = df_de['logfoldchanges'][i], y=df_de['pval_adj_-log10'][i],
                s=df_de['names_str'][i], size=10)
        

    texts = [ax.texts[i] for i in range(len(ax.texts))]  # thanks chatgpt
    adjust_text(texts, ax=ax,expand=(1.2, 1.2),arrowprops=dict(color="gray", lw=0.5),only_move={'points':'y', 'texts':'y'})
    
    sns.despine(ax=ax,right=True, top=True)
    ax.set_xlabel('Log2 FC')
    ax.set_ylabel('-Log10(padj)')
    ax.set_title(f'DE : {de_file}')
    
    
    plt.rcParams['font.family'] = 'sans-serif'
    plt.rcParams['font.sans-serif'] = 'Arial'
    plt.rcParams['font.size'] = 12.0
    plt.rcParams['legend.fontsize'] = 12.0


plt.tight_layout()


#fig.savefig(fig_path+'volcano_infected_bystander_vs_VC.pdf')