# Compositional Analysis - Timepoints Comparison (All Cell Types)

### Environment Set Up

In [None]:
# load libraries
import warnings
import matplotlib.pyplot as plt
import mudata as mu
import pertpy as pt
import scanpy as sc
import numpy as np
import pandas as pd
import seaborn
import os
from statannot import add_stat_annotation

In [None]:
# set up figure parameters
plt.rcParams['figure.figsize'] = (4, 4)
sc.settings.verbosity = 0
sc.settings.set_figure_params(
    dpi=300,
    facecolor="white",
    frameon=False,
    figsize=(4,4)
)

In [None]:
# remove warnings
import warnings
warnings.filterwarnings('ignore')

In [None]:
# set up dirs
work_dir = "/scratch_isilon/groups/singlecell/gdeuner/SERPENTINE/"
fig_dir = os.path.join(work_dir, "figures", "combined", "compositional_analysis", "timepoints_comparison")
sc.settings.figdir = os.path.join(work_dir, "figures", "combined", "compositional_analysis", "timepoints_comparison")

In [None]:
# read anndata object
adata = sc.read_h5ad(os.path.join(work_dir, "data", "outputdata", "combined", "Combined_SCR_CO2_TCR_full-integrated_annot_22-03-24.h5ad"))

In [None]:
# create condition variable
adata

## Model Setup & Inference

In [None]:
# initiate scCODA model
sccoda_model = pt.tl.Sccoda()
sccoda_data = sccoda_model.load(
    adata,
    #adata[adata.obs["Annotation_1.0"].isin(["CD8 T", "CD4 T", "NK", "pDC", "Myeloid", "Plasma", "B Cell"])].copy(), # do just include the immune cell types
    type="cell_level",
    generate_sample_level=True,
    cell_type_identifier="Annotation_1.0",
    sample_identifier="sample",
    covariate_obs=["subproject", "patient", "timepoint", "response", "ICI_status", "Condition"],
        
)
print(sccoda_data)
print(sccoda_data["coda"].X)
print(sccoda_data["coda"].obs)

In [None]:
pt.pl.coda.boxplots(sccoda_data, 
                    modality_key="coda", 
                    feature_name="timepoint", 
                    add_dots=False,
                    figsize=[6,6],
                    level_order=["SCR", "C02"],
                    cmap=["coral","darkviolet"]
                   )
plt.show()

In [None]:
# prepare the model
sccoda_data = sccoda_model.prepare(
    sccoda_data,
    modality_key="coda",
    formula="Condition", #condition = timepoint
    reference_cell_type="automatic",
)
sccoda_data["coda"]

In [None]:
# run MCMC
sccoda_model.run_nuts(sccoda_data, modality_key="coda")
sccoda_data["coda"]

## Result Intepretation

In [None]:
# see most relevant information
sccoda_model.summary(sccoda_data, modality_key="coda")

In [None]:
sccoda_model.credible_effects(sccoda_data, modality_key="coda")

### Adjust FDR

In [None]:
sccoda_model.set_fdr(sccoda_data, modality_key="coda", est_fdr=0.2)
sccoda_model.summary(sccoda_data, modality_key="coda")

## Save Results

In [None]:
#sccoda_data.write_h5mu(os.path.join(work_dir, "data", "outputdata", "combined", "Combined_SCR_CO2_immune_scCODA_10-04-24.h5mu"))

In [None]:
test_model = pt.tl.Sccoda()
test_model.get_intercept_df(sccoda_data, modality_key="coda")

In [None]:
test_model.get_intercept_df(sccoda_data, modality_key="coda")

In [None]:
test_model.get_effect_df(sccoda_data, modality_key="coda")

In [None]:
sccoda_data["coda"].varm["intercept_df"]

## Visualization

In [None]:
# counts
sccoda_data['coda'].var

In [None]:
# proportion
props = round(sccoda_data['coda'].var.n_cells / sccoda_data['coda'].var.n_cells.sum(), 3)
props

In [None]:
sccoda_data['coda'].obs

In [None]:
palette = ["limegreen", "saddlebrown", "tomato", "red", "gold", "hotpink", "royalblue", "orange", "darkgreen", "dimgray", "skyblue"]

# Convert props to cumulative sums
cf = props.cumsum()

# Set the figure size and dpi
plt.figure(figsize=(8, 1), dpi=120)  # Adjust the width and height to make the plot thinner

# Create horizontal bar plot
plt.barh(0, props[0], edgecolor="white", height=0.2, color=palette[0])

for i in range(1, len(props)):
    plt.barh(0, props[i], left=cf[i-1], edgecolor="white", height=0.2, color=palette[i])

# Remove y-axis ticks and labels
plt.gca().axes.get_yaxis().set_visible(False)

# Set x-axis label and title
plt.xlabel('Proportion')
plt.title('Proportion Bar Plot', pad=20)

# Move the x-axis label and title below the plot
plt.gca().xaxis.set_label_coords(0.5, -0.1)
plt.gca().title.set_position([0.5, -0.2])

In [None]:
props[0]

In [None]:
palette=["limegreen", "saddlebrown", "tomato", "red", "gold", "hotpink", "royalblue", "orange", "darkgreen", "dimgray", "skyblue"]

sum(props)

cf = props.cumsum()

# Create a figure and axis
plt.figure(figsize=(6, .5), dpi=120)
ax = plt.gca()

# Plot each bar with the specified color
for i in range(len(props)):
    if i == 0:
        # First bar starts from 0
        ax.barh(0, props[i], edgecolor="white", height=0.01, color=palette[i])
    else:
        # Subsequent bars start from the end of the previous bar
        ax.barh(0, props[i], left=cf[i-1], edgecolor="white", height=0.01, color=palette[i])

# Remove y-axis ticks and labels
ax.axes.get_yaxis().set_visible(False)

# Set x-axis label and title
ax.set_xlabel('Proportion')
# Move the x-axis label and title below the plot
ax.xaxis.set_label_coords(0.5, -1)
ax.title.set_position([0.5, -0.2])
plt.xticks(fontsize=11)
plt.xlim(0, 1.0)
#plt.ylabel('proportion')
#frame1=plt.gca()
#frame1.axes.get_xaxis().set_visible(False)
plt.savefig(os.path.join("/scratch_isilon/groups/singlecell/gdeuner/SERPENTINE/figures/combined/compositional_analysis", "barplot_all_celltypes.pdf"), dpi=600, format="png", bbox_inches="tight")
plt.savefig(os.path.join("/scratch_isilon/groups/singlecell/gdeuner/SERPENTINE/figures/TFM/Fig1", "barplot_all_celltypes.svg"), dpi=600, format="svg", bbox_inches="tight")
plt.show()

In [None]:
# stacked plot of cell type proportions per patient
import matplotlib.colors as mcolors
cmap = mcolors.ListedColormap(palette)
plot = pt.pl.coda.stacked_barplot(
    sccoda_data,
    modality_key="coda",  
    feature_name="patient", 
    cmap=cmap,
    dpi=300
)
plot.set(title = "Patient")
plot.set_xticks([0,1,2,3,4], ["PD_01", "PD_02", "PD_03", "SD_01", "PD_04"], ha="center", rotation=30)
plt.savefig(os.path.join("/scratch_isilon/groups/singlecell/gdeuner/SERPENTINE/figures/ISCO24_poster/Fig1", "Comp_barplot_by_patient.png"), dpi=600, format="png", bbox_inches="tight")
plt.show()

# Compositional Analysis - Timepoints Comparison (Immune Cell Types)

### Environment Set Up

In [None]:
# load libraries
import warnings
import matplotlib.pyplot as plt
import mudata as mu
import pertpy as pt
import scanpy as sc
import numpy as np
import pandas as pd
import seaborn as sns
import os
from statannot import add_stat_annotation

In [None]:
# set up figure parameters
plt.rcParams['figure.figsize'] = (4, 4)
sc.settings.verbosity = 0
sc.settings.set_figure_params(
    dpi=300,
    facecolor="white",
    frameon=False,
    figsize=(4,4)
)

In [None]:
# remove warnings
import warnings
warnings.filterwarnings('ignore')

In [None]:
# set up dirs
work_dir = "/scratch_isilon/groups/singlecell/gdeuner/SERPENTINE/"
fig_dir = os.path.join(work_dir, "figures", "combined", "compositional_analysis", "timepoints_comparison")
sc.settings.figdir = os.path.join(work_dir, "figures", "combined", "compositional_analysis", "timepoints_comparison")

In [None]:
# read anndata object
adata = sc.read_h5ad(os.path.join(work_dir, "data", "outputdata", "combined", "Combined_SCR_CO2_TCR_full-integrated_annot_22-03-24.h5ad"))

In [None]:
# create condition variable
adata

## Model Setup & Inference

In [None]:
# initiate scCODA model
sccoda_model = pt.tl.Sccoda()
sccoda_data = sccoda_model.load(
    adata[adata.obs["Annotation_1.0"].isin(["CD8 T", "CD4 T", "NK", "pDC", "Myeloid", "Plasma", "B Cell"])].copy(), # do just include the immune cell types
    type="cell_level",
    generate_sample_level=True,
    cell_type_identifier="Annotation_1.0",
    sample_identifier="sample",
    covariate_obs=["subproject", "patient", "timepoint", "response", "ICI_status", "Condition"],
        
)
print(sccoda_data)
print(sccoda_data["coda"].X)
print(sccoda_data["coda"].obs)

In [None]:
pt.pl.coda.boxplots(sccoda_data, 
                    modality_key="coda", 
                    feature_name="timepoint", 
                    add_dots=True,
                    figsize=[6,6],
                    level_order=["SCR", "C02"],
                    cmap=["coral","darkviolet"],
                    #args_swarmplot={"palette": ["red"]},
                   )
plt.show()

In [None]:
# prepare the model
sccoda_data = sccoda_model.prepare(
    sccoda_data,
    modality_key="coda",
    formula="Condition", #condition = timepoint
    reference_cell_type="automatic",
)
sccoda_data["coda"]

In [None]:
# run MCMC
sccoda_model.run_nuts(sccoda_data, modality_key="coda")
sccoda_data["coda"]

## Result Intepretation

In [None]:
# see most relevant information
sccoda_model.summary(sccoda_data, modality_key="coda")

In [None]:
sccoda_model.credible_effects(sccoda_data, modality_key="coda")

### Adjust FDR

In [None]:
sccoda_model.set_fdr(sccoda_data, modality_key="coda", est_fdr=0.2)
sccoda_model.summary(sccoda_data, modality_key="coda")

## Save Results

In [None]:
#sccoda_data.write_h5mu(os.path.join(work_dir, "data", "outputdata", "combined", "Combined_SCR_CO2_immune_scCODA_10-04-24.h5mu"))

In [None]:
test_model = pt.tl.Sccoda()
test_model.get_intercept_df(sccoda_data, modality_key="coda")

In [None]:
test_model.get_intercept_df(sccoda_data, modality_key="coda")

In [None]:
test_model.get_effect_df(sccoda_data, modality_key="coda")

In [None]:
sccoda_data["coda"].varm["intercept_df"]

## Visualization

In [None]:
# counts
sccoda_data['coda'].var

In [None]:
# proportion
props = round(sccoda_data['coda'].var.n_cells / sccoda_data['coda'].var.n_cells.sum(), 3)
props

In [None]:
sccoda_data['coda'].obs

In [None]:
# some booxplot arguments to customize the plot
boxprops = dict(linestyle='-', linewidth=1, edgecolor='black')
flierprops = dict(marker='o', markerfacecolor='black', #markersize=12,
                  markeredgecolor='none')
medianprops = dict(linestyle='-', linewidth=.8, color='black')
#meanpointprops = dict(marker='D', markeredgecolor='black',markerfacecolor='black')
meanlineprops = dict(linestyle='--', linewidth=.8, color='black')


In [None]:
# define cell types for plotting
cell_types = sccoda_data['coda'].var_names.tolist()
print(cell_types)

In [None]:
effect_cell_types = []

In [None]:
# create df for plotting
feature_name=["Condition", "response", "patient"]
data=sccoda_data['coda']

sample_sums = np.sum(data.X, axis=1, keepdims=True)
X = data.X/sample_sums
value_name = "Proportion"

count_df = pd.DataFrame(X, columns=data.var.index, index=data.obs.index).\
        merge(data.obs[feature_name], left_index=True, right_index=True)
plot_df = pd.melt(count_df, id_vars=feature_name, var_name="Cell type", value_name=value_name)
if cell_types is not None:
    plot_df = plot_df[plot_df["Cell type"].isin(cell_types)]

In [None]:
plot_df

In [None]:
effect_cell_types

In [None]:
# plots for Poster/Thesis Figures
# overview of cell type distribution across timepoints
plt.rcParams.update({'font.size': 13})
sns.set_style(rc = {'axes.facecolor': 'lightsteelblue'})

i = 0
for cell_type in cell_types:

    
    if True == 1:
    
        fig, ax = plt.subplots(figsize=(2,2), dpi=120)
        
        sns.boxplot(
            x="Condition", 
            y="Proportion", 
            data=plot_df[plot_df["Cell type"] == cell_type],
            hue="Condition", 
            #liersize=1,
            palette=["coral","darkviolet"], 
            order=["T0/-ICI","T1/+ICI"],
            ax=ax,
            vert=True, 
            patch_artist=True, 
            meanline=True, 
            showmeans=True,
            showfliers=False,
            boxprops=boxprops,
            #flierprops=flierprops,
            medianprops=medianprops,
            meanprops=meanlineprops
        )
            
        
        sns.swarmplot(
            x="Condition", 
            y="Proportion", 
            data=plot_df[plot_df["Cell type"] == cell_type], 
            hue="response",  # Color by "patient" variable
            palette=["red", "blue"],  # Choose a color palette
            dodge=False,  # Separate dots for each level of "patient
            #jitter=False
            ax=ax,
            size=5,
            edgecolor="black",
            linewidth=.5,
            alpha=1
        )

        sns.pointplot(
            data=plot_df[plot_df["Cell type"] == cell_type], 
            x="Condition", 
            y="Proportion", 
            hue="patient", 
            legend= False,
            palette="Spectral",
            linewidth=1.5,
            errwidth=1,
            markers='|',
            #errorbar=None,
            join=True
        )
        plt.title(cell_type, pad=18)
        plt.xlabel('', labelpad=10)
        plt.ylabel('', labelpad=10)
        plt.yticks(fontsize = 10) 
        plt.xticks(fontsize = 10) 
        #plt.set_xticks([0,1,2,3,4], ["PD_01", "PD_02", "PD_03", "SD_01", "PD_04"], ha="center", rotation=30)
    
            
        legend = plt.legend(loc='right', bbox_to_anchor=(2, .5), ncol=1, title="Response", frameon=True, facecolor="white")
        plt.setp(legend.get_title(),fontsize='12')
        plt.tight_layout()
  
        xmin, xmax, ymin, ymax = plt.axis()
        plt.ylim((0, ymax + (0.66)*ymax))
        
        i += 1

        
        plt.savefig(os.path.join("/scratch_isilon/groups/singlecell/gdeuner/SERPENTINE/figures/TFM/FigS2", cell_type+"_boxplot_by_condition_colby_response.pdf"), dpi=600, format="pdf", bbox_inches="tight")

In [None]:
# overview of cell type distribution across timepoints
import seaborn as sns
plt.rcParams.update({'font.size': 13})

cell_type = "Plasma"
    
    sns.boxplot(
        x="Condition", 
        y="Proportion", 
        data=plot_df[plot_df["Cell type"] == cell_type],
        hue="Condition", 
        #liersize=1,
        palette=["coral","darkviolet"], 
        order=["T0/-ICI","T1/+ICI"],
        ax=ax,
        vert=True, 
        patch_artist=True, 
        meanline=True, 
        showmeans=True,
        showfliers=False,
        boxprops=boxprops,
        #flierprops=flierprops,
        medianprops=medianprops,
        meanprops=meanlineprops
    )
        
    
    sns.swarmplot(
        x="Condition", 
        y="Proportion", 
        data=plot_df[plot_df["Cell type"] == cell_type], 
        hue="patient",  # Color by "patient" variable
        palette="Spectral",  # Choose a color palette
        dodge=False,  # Separate dots for each level of "patient
        #jitter=False
        ax=ax,
        size=5,
        edgecolor="black",
        linewidth=.5,
        alpha=1
    )

    #ax.set_xticks([])
    ax.set_title(cell_type+" - "+significance)
    ax.set_xlabel('Timepoint')
    plt.yticks(fontsize = 10) 
    plt.xticks(fontsize = 10) 
    
    legend = plt.legend(loc='right', bbox_to_anchor=(1.8, .5), ncol=1, title="Patient", frameon=True)
    plt.setp(legend.get_title(),fontsize='12')
    plt.tight_layout()
        
    plt.savefig(os.path.join(fig_dir, "Comp_"+cell_type+"_boxplot_by_condition.png"), dpi=600, format="png", bbox_inches="tight")


In [None]:
# plots for Poster/Thesis Figures
# overview of cell type distribution across timepoints
plt.rcParams.update({'font.size': 13})
sns.set_style(rc = {'axes.facecolor': 'lightsteelblue'})

#effect_cell_types.append("CD56hi CD16lo NK")

for cell_type in cell_types:

    #if cell_type in effect_cell_types:
    
        fig, ax = plt.subplots(figsize=(2,2), dpi=120)
        
        sns.boxplot(
            x="Condition", 
            y="Proportion", 
            data=plot_df[plot_df["Cell type"] == cell_type],
            hue="Condition", 
            #liersize=1,
            palette=["coral","darkviolet"], 
            order=["T0/-ICI","T1/+ICI"],
            ax=ax,
            vert=True, 
            patch_artist=True, 
            meanline=True, 
            showmeans=True,
            showfliers=False,
            boxprops=boxprops,
            #flierprops=flierprops,
            medianprops=medianprops,
            meanprops=meanlineprops
        )
            
        
        sns.swarmplot(
            x="Condition", 
            y="Proportion", 
            data=plot_df[plot_df["Cell type"] == cell_type], 
            hue="response",  # Color by "patient" variable
            palette=["red", "blue"],  # Choose a color palette
            dodge=False,  # Separate dots for each level of "patient
            #jitter=False
            ax=ax,
            size=5,
            edgecolor="black",
            linewidth=.5,
            alpha=1
        )

        sns.pointplot(
            data=plot_df[plot_df["Cell type"] == cell_type], 
            x="Condition", 
            y="Proportion", 
            hue="patient", 
            legend= False,
            palette="Spectral",
            linewidth=1.5,
            errwidth=1,
            markers='|',
            #errorbar=None,
            join=True
        )

        xmin, xmax, ymin, ymax = plt.axis()
        plt.ylim((0, ymax+0.2))
        
        plt.title(cell_type, fontweight="bold", pad=20)
        plt.xlabel('Timepoint', labelpad=10)
        plt.ylabel('Proportion', labelpad=10)
        plt.yticks(fontsize = 10) 
        plt.xticks(fontsize = 10) 
        
        #plt.set_xticks([0,1,2,3,4], ["PD_01", "PD_02", "PD_03", "SD_01", "PD_04"], ha="center", rotation=30)

        
        legend = plt.legend(loc='right', bbox_to_anchor=(2, .5), ncol=1, title="Patient", frameon=True, facecolor="white")
        plt.setp(legend.get_title(),fontsize='12')
        plt.tight_layout()

        
        plt.savefig(os.path.join("/scratch_isilon/groups/singlecell/gdeuner/SERPENTINE/figures/TFM/Fig2", "cell_type"+"_boxplot_by_condition_colby_response.pdf"), dpi=600, format="pdf", bbox_inches="tight")

In [None]:
# overview of cell type distribution across patients
for cell_type in cell_types:

    if cell_type in effect_cell_types:
        significance = "*"
    else:
        significance = "ns"
    
    fig, ax = plt.subplots(figsize=(2,2), dpi=120)

    sns.boxplot(
        x="patient", 
        y="Proportion", 
        data=plot_df[plot_df["Cell type"] == cell_type],
        hue="patient", 
        #liersize=1,
        palette="Spectral", 
        #order=["T0/-ICI","T1/+ICI"],
        ax=ax,
        vert=True, 
        patch_artist=True, 
        meanline=True, 
        showmeans=True,
        showfliers=False,
        boxprops=boxprops,
        #flierprops=flierprops,
        medianprops=medianprops,
        meanprops=meanlineprops
    )
        
    
    sns.swarmplot(
        x="patient", 
        y="Proportion", 
        data=plot_df[plot_df["Cell type"] == cell_type], 
        hue="Condition",  # Color by "patient" variable
        palette=["coral", "darkviolet"],  # Choose a color palette
        dodge=False,  # Separate dots for each level of "patient
        #jitter=False
        ax=ax,
        size=4,
        edgecolor="black",
        linewidth=.5,
        alpha=1
    )

    #ax.set_xticks([])
    ax.set_title(cell_type+" - "+significance)
    ax.set_xlabel('Patient')
    plt.yticks(fontsize = 10) 
    plt.xticks(fontsize = 10) 
    
    legend = plt.legend(loc='right', bbox_to_anchor=(1.9, .5), ncol=1, title="Condition", frameon=True)
    plt.setp(legend.get_title(),fontsize='12')
    plt.tight_layout()
      
    
    plt.savefig(os.path.join(fig_dir, "Comp_"+cell_type+"_boxplot_by_patient.png"), dpi=600, format="png", bbox_inches="tight")

In [None]:
plot = pt.pl.coda.boxplots(
        sccoda_data,
        modality_key="coda",
        feature_name="patient",
        figsize=(4,4),
        add_dots=True,
        plot_facets=False,
        args_boxplot={"vert":True, 
                    "patch_artist":True, 
                    "meanline":True, 
                    "showmeans":True,
                    "boxprops":boxprops,
                    "flierprops":flierprops,
                    "medianprops":medianprops,
                    "meanprops":meanlineprops,
                    },
        cmap="Spectral"
        )
plt.savefig(os.path.join(fig_dir, "Comp_all_boxplot_by_patient.png"), dpi=600, format="png", bbox_inches="tight")

In [None]:
# overview of cell type distribution across timepoints
import seaborn as sns
plt.rcParams.update({'font.size': 13})

for cell_type in cell_types:

    if cell_type in effect_cell_types:
        significance = "*"
    else:
        significance = "ns"
    
    fig, ax = plt.subplots(figsize=(2,2), dpi=120)
    
    sns.boxplot(
        x="Condition", 
        y="Proportion", 
        data=plot_df[plot_df["Cell type"] == cell_type],
        hue="Condition", 
        #liersize=1,
        palette=["coral","darkviolet"], 
        order=["T0/-ICI","T1/+ICI"],
        ax=ax,
        vert=True, 
        patch_artist=True, 
        meanline=True, 
        showmeans=True,
        showfliers=False,
        boxprops=boxprops,
        #flierprops=flierprops,
        medianprops=medianprops,
        meanprops=meanlineprops
    )
        
    
    sns.swarmplot(
        x="Condition", 
        y="Proportion", 
        data=plot_df[plot_df["Cell type"] == cell_type], 
        hue="patient",  # Color by "patient" variable
        palette="Spectral",  # Choose a color palette
        dodge=False,  # Separate dots for each level of "patient
        #jitter=False
        ax=ax,
        size=5,
        edgecolor="black",
        linewidth=.5,
        alpha=1
    )

    #ax.set_xticks([])
    ax.set_title(cell_type+" - "+significance)
    ax.set_xlabel('Timepoint')
    plt.yticks(fontsize = 10) 
    plt.xticks(fontsize = 10) 
    
    legend = plt.legend(loc='right', bbox_to_anchor=(1.8, .5), ncol=1, title="Patient", frameon=True)
    plt.setp(legend.get_title(),fontsize='12')
    plt.tight_layout()
        
    plt.savefig(os.path.join(fig_dir, "Comp_"+cell_type+"_boxplot_by_condition.png"), dpi=600, format="png", bbox_inches="tight")


In [None]:
# remove tumor from plot_df
types_to_filter = ['Tumor', 'Liver Epithelial', 'CAF', 'Endothelial']
filtered_df = plot_df[~plot_df['Cell type'].isin(types_to_filter)]

In [None]:
# some booxplot arguments to customize the plot
boxprops = dict(linestyle='-', linewidth=1, edgecolor='black')
flierprops = dict(marker='o', markerfacecolor='black', #markersize=12,
                  markeredgecolor='none')
medianprops = dict(linestyle='-', linewidth=.8, color='black')
#meanpointprops = dict(marker='D', markeredgecolor='black',markerfacecolor='black')
meanlineprops = dict(linestyle='--', linewidth=.8, color='black')


import seaborn as sns
plt.rcParams.update({'font.size': 13})
seaborn.set_style(style='white') 

custom_order = ['CD4 T', 'CD8 T', 'NK', 'Myeloid', 'pDC', 'Plasma', 'B Cell']


fig, ax = plt.subplots(figsize=(7,4), dpi=120)

ax = sns.boxplot(
    x="Cell type", 
    y="Proportion", 
    data=filtered_df,
    hue="Condition", 
    #liersize=1,
    palette=["coral","darkviolet"], 
    order=custom_order,
    ax=ax,
    vert=True, 
    patch_artist=True, 
    meanline=True, 
    showmeans=True,
    showfliers=False,
    boxprops=boxprops,
    #flierprops=flierprops,
    medianprops=medianprops,
    meanprops=meanlineprops
)


sns.stripplot(
    x="Cell type", 
    y="Proportion", 
    data=filtered_df, 
    #style="Patient",
    hue="Condition",  # Color by "patient" variable
    palette=["coral","darkviolet"],  # Choose a color palette
    dodge=True,  # Separate dots for each level of "patient
    order=custom_order,
    #jitter=False
    ax=ax,
    size=5,
    edgecolor="black",
    linewidth=.5,
    alpha=1,
)

plt.ylim(0, 0.8)

#ax.set_xticks([])
ax.set_xlabel('Timepoint')
plt.yticks(fontsize = 10) 
plt.xticks(fontsize = 10) 

handles, labels = ax.get_legend_handles_labels()
ax.legend(handles[:2], labels[:2], title='Timepoint', bbox_to_anchor=(1, 1))

#legend = plt.legend(loc='right', bbox_to_anchor=(1.8, .5), ncol=1, title="Patient", frameon=True)
plt.tight_layout()
        
plt.savefig(os.path.join(work_dir, "figures", "TFM", "Fig1", "Boxplots_cell_type_tiempoints_comparison.pdf"), dpi=600, format="pdf", bbox_inches="tight")


In [None]:
# overview of cell type distribution across timepoints

plt.rcParams.update({'font.size': 13})

for cell_type in cell_types:

    if cell_type in effect_cell_types:
        significance = "*"
    else:
        significance = "ns"
    
    fig, ax = plt.subplots(figsize=(2,2), dpi=120)
    
    sns.boxplot(
        x="Condition", 
        y="Proportion", 
        data=plot_df[plot_df["Cell type"] == cell_type],
        hue="Condition", 
        #liersize=1,
        palette=["coral","darkviolet"], 
        order=["T0/-ICI","T1/+ICI"],
        ax=ax,
        vert=True, 
        patch_artist=True, 
        meanline=True, 
        showmeans=True,
        showfliers=False,
        boxprops=boxprops,
        #flierprops=flierprops,
        medianprops=medianprops,
        meanprops=meanlineprops
    )
        
    
    sns.swarmplot(
        x="Condition", 
        y="Proportion", 
        data=plot_df[plot_df["Cell type"] == cell_type], 
        hue="response",  # Color by "patient" variable
        palette=["red", "blue"],  # Choose a color palette
        dodge=False,  # Separate dots for each level of "patient
        #jitter=False
        ax=ax,
        size=5,
        edgecolor="black",
        linewidth=.5,
        alpha=1
    )

    #ax.set_xticks([])
    ax.set_title(cell_type+" - "+significance)
    ax.set_xlabel('Timepoint')
    plt.yticks(fontsize = 10) 
    plt.xticks(fontsize = 10) 
    
    legend = plt.legend(loc='right', bbox_to_anchor=(1.8, .5), ncol=1, title="Response", frameon=True)
    plt.setp(legend.get_title(),fontsize='12')
    plt.tight_layout()
        
    plt.savefig(os.path.join(fig_dir, "Comp_"+cell_type+"_boxplot_by_condition_colby_resp.png"), dpi=600, format="png", bbox_inches="tight")


In [None]:
# stacked plot of cell type proportions per patient
plot = pt.pl.coda.stacked_barplot(
    sccoda_data,
    modality_key="coda",  
    feature_name="patient",
    dpi=300
)
plot.set(title = "Patient")
plot.set_xticks([0,1,2,3,4], ["P01", "P02", "P03", "P08", "P10"], ha="center", rotation=0)
plt.savefig(os.path.join(fig_dir, "Comp_immune_barplot_by_patient.png"), dpi=600, format="png", bbox_inches="tight")
plt.show()

In [None]:
# stacked plot of cell type proportions per patient
plot = pt.pl.coda.stacked_barplot(
    sccoda_data, 
    modality_key="coda", 
    feature_name="patient",
    dpi=300
)
plot.set(title = "Patient")
plot.set_xticks([0,1,2,3,4], ["P01", "P02", "P03", "P04", "P05"], ha="center", rotation=0)
plt.show()
# P08 = P04
# P10 = P05

In [None]:
# stacked plot of cell type proportions per timepoint
plot = pt.pl.coda.stacked_barplot(
    sccoda_data, modality_key="coda", 
    feature_name="Condition", 
    level_order=["T0/-ICI", "T1/+ICI"],
    dpi=300
)
plot.set(title = "Timepoint")
plot.set_xticks([0,1], ["T0/-ICI", "T1/+ICI"], rotation = 0, rotation_mode="anchor", ha="center")
plt.savefig(os.path.join(fig_dir, "Comp_immune_barplot_by_condition.png"), dpi=600, format="png", bbox_inches="tight")
plt.show()

In [None]:
# stacked plot of cell type proportions per timepoint
plot = pt.pl.coda.stacked_barplot(
    sccoda_data, modality_key="coda", 
    feature_name="timepoint", 
    level_order=["SCR", "C02"], 
    dpi=300
)
plot.set(title = "Timepoint")
plot.set_xticks([0,1], ["T0", "T1"], rotation = 0, rotation_mode="anchor", ha="center")
plt.savefig(os.path.join(fig_dir, "Comp_immune_barplot_by_timepoint.png"), dpi=600, format="png", bbox_inches="tight")

plt.show()

In [None]:
# stacked plot of cell type proportions per ICI status
plot = pt.pl.coda.stacked_barplot(
    sccoda_data, 
    modality_key="coda", 
    feature_name="ICI_status",
    #level_order=["`-ICI`", "`+ICI/PD`", "`+ICI/PD`"], 
    dpi=300
)
plot.set(title = "ICI Status")
plot.set_xticks([0,1,2], ["+ICI/PD", "+ICI/SD", "-ICI"], rotation = 0, rotation_mode="anchor", ha="center")
plt.savefig(os.path.join(fig_dir, "Comp_immune_barplot_by_ICI_status.png"), dpi=600, format="png", bbox_inches="tight")
plt.show()

In [None]:
# stacked plot of cell type proportions per response
plot = pt.pl.coda.stacked_barplot(
    sccoda_data, 
    modality_key="coda", 
    feature_name="response",
    dpi=300,
)
plot.set_title("ICI Response")
plot.set_xticks([0,1], ["PD", "SD"], rotation = 0, rotation_mode="anchor", ha="center")
plt.savefig(os.path.join(fig_dir, "Comp_immune_barplot_by_ICI_response.png"), dpi=600, format="png", bbox_inches="tight")
plt.show()

In [None]:
# stacked plot of cell type proportions per patient
plt.figure(figsize=(3,3), dpi=120)
import matplotlib.colors as mcolors
palette=["limegreen", "tomato", "red", "royalblue", "orange", "darkgreen", "skyblue"]
cmap = mcolors.ListedColormap(palette)
plot = pt.pl.coda.stacked_barplot(
    sccoda_data,
    modality_key="coda",  
    feature_name="patient", 
    cmap=cmap,
    dpi=120, 
    show_legend=False
)
plot.set(title = "Patient")
plot.set_xticks([0,1,2,3,4], ["PD_01", "PD_02", "PD_03", "SD_01", "PD_04"], ha="center", rotation=30)
plt.savefig(os.path.join("/scratch_isilon/groups/singlecell/gdeuner/SERPENTINE/figures/ISCO24_poster/Fig1", "CompImmune_barplot_by_patient.pdf"), dpi=600, format="pdf", bbox_inches="tight")
plt.show()

In [None]:
# stacked plot of cell type proportions per patient
plt.figure(figsize=(6,3), dpi=120)
import matplotlib.colors as mcolors
palette=["limegreen", "tomato", "red", "royalblue", "orange", "darkgreen", "skyblue"]
cmap = mcolors.ListedColormap(palette)
plot = pt.pl.coda.stacked_barplot(
    sccoda_data,
    modality_key="coda",  
    feature_name="Condition", 
    cmap=cmap,
    dpi=120,
    show_legend=False
)
plot.set(title = "Tiempoint")
plot.set_xticks([0,1], ["T0/-ICI", "T1/+ICI"], ha="center", rotation=0)
plt.yticks([])
plt.ylabel('')
plt.savefig(os.path.join("/scratch_isilon/groups/singlecell/gdeuner/SERPENTINE/figures/TFM/Fig1", "CompImmune_barplot_by_timepoint.pdf"), dpi=600, format="pdf", bbox_inches="tight")
plt.show()

In [None]:
# stacked plot of cell type proportions per patient
plt.figure(figsize=(3,3), dpi=120)
import matplotlib.colors as mcolors
palette=["limegreen", "tomato", "red", "royalblue", "orange", "darkgreen", "skyblue"]
cmap = mcolors.ListedColormap(palette)
plot = pt.pl.coda.stacked_barplot(
    sccoda_data,
    modality_key="coda",  
    feature_name="response", 
    cmap=cmap,
    dpi=120,
    show_legend=False
)
plot.set(title = "Response")
plot.set_xticks([0,1], ["PD", "SD"], ha="center", rotation=0)
plt.savefig(os.path.join("/scratch_isilon/groups/singlecell/gdeuner/SERPENTINE/figures/ISCO24_poster/Fig1", "CompImmune_barplot_by_response.pdf"), dpi=600, format="pdf", bbox_inches="tight")
plt.show()

In [None]:
# stacked plot of cell type proportions per patient
plt.figure(figsize=(3,3), dpi=120)
import matplotlib.colors as mcolors
palette=["limegreen", "tomato", "red", "royalblue", "orange", "darkgreen", "skyblue"]
cmap = mcolors.ListedColormap(palette)
plot = pt.pl.coda.stacked_barplot(
    sccoda_data,
    modality_key="coda",  
    feature_name="ICI_status", 
    cmap=cmap,
    dpi=120,
    show_legend=False
)
plot.set(title = "ICI Status")
plot.set_xticks([2,0,1], ["-ICI", "+ICI/PD", "+ICI/SD"], ha="center", rotation=0)
plt.yticks([])
plt.ylabel('')
plt.savefig(os.path.join("/scratch_isilon/groups/singlecell/gdeuner/SERPENTINE/figures/ISCO24_poster/Fig1", "CompImmune_barplot_by_ICI_status.pdf"), dpi=600, format="pdf", bbox_inches="tight")
plt.show()

In [None]:
#https://pertpy.readthedocs.io/en/latest/tutorials/notebooks/sccoda.html
#https://github.com/theislab/scCODA/issues/47

# Compositional Analysis - PD Timepoints Comparison (Immune Cell Types)

### Environment Set Up

In [None]:
# load libraries
import warnings
import matplotlib.pyplot as plt
import mudata as mu
import pertpy as pt
import scanpy as sc
import numpy as np
import pandas as pd
import seaborn as sns
import os
from statannot import add_stat_annotation

In [None]:
# set up figure parameters
plt.rcParams['figure.figsize'] = (4, 4)
sc.settings.verbosity = 0
sc.settings.set_figure_params(
    dpi=300,
    facecolor="white",
    frameon=False,
    figsize=(4,4)
)

In [None]:
# remove warnings
import warnings
warnings.filterwarnings('ignore')

In [None]:
# set up dirs
work_dir = "/scratch_isilon/groups/singlecell/gdeuner/SERPENTINE/"
fig_dir = os.path.join(work_dir, "figures", "combined", "compositional_analysis", "timepoints_PD_comparison")
sc.settings.figdir = os.path.join(work_dir, "figures", "combined", "compositional_analysis", "timepoints_PD_comparison")

In [None]:
# read anndata object
adata = sc.read_h5ad(os.path.join(work_dir, "data", "outputdata", "combined", "Combined_SCR_CO2_TCR_full-integrated_annot_22-03-24.h5ad"))

In [None]:
# create condition variable
adata

In [None]:
# subset PD patients
adata = adata[adata.obs["response"] == "PD"].copy()
adata

## Model Setup & Inference

In [None]:
# initiate scCODA model
sccoda_model = pt.tl.Sccoda()
sccoda_data = sccoda_model.load(
    adata[adata.obs["Annotation_1.0"].isin(["CD8 T", "CD4 T", "NK", "pDC", "Myeloid", "Plasma", "B Cell"])].copy(), # do just include the immune cell types
    type="cell_level",
    generate_sample_level=True,
    cell_type_identifier="Annotation_1.0",
    sample_identifier="sample",
    covariate_obs=["subproject", "patient", "timepoint", "response", "ICI_status", "Condition"],
        
)
print(sccoda_data)
print(sccoda_data["coda"].X)
print(sccoda_data["coda"].obs)

In [None]:
pt.pl.coda.boxplots(sccoda_data, 
                    modality_key="coda", 
                    feature_name="timepoint", 
                    add_dots=True,
                    figsize=[6,6],
                    level_order=["SCR", "C02"],
                    cmap=["coral","darkviolet"]
                   )
plt.show()

In [None]:
# prepare the model
sccoda_data = sccoda_model.prepare(
    sccoda_data,
    modality_key="coda",
    formula="Condition", #condition = timepoint
    reference_cell_type="automatic",
)
sccoda_data["coda"]

In [None]:
# run MCMC
sccoda_model.run_nuts(sccoda_data, modality_key="coda")
sccoda_data["coda"]

## Result Intepretation

In [None]:
# see most relevant information
sccoda_model.summary(sccoda_data, modality_key="coda")

In [None]:
sccoda_model.credible_effects(sccoda_data, modality_key="coda")

### Adjust FDR

In [None]:
sccoda_model.set_fdr(sccoda_data, modality_key="coda", est_fdr=0.1)
sccoda_model.summary(sccoda_data, modality_key="coda")

## Save Results

In [None]:
#sccoda_data.write_h5mu(os.path.join(work_dir, "data", "outputdata", "combined", "Combined_SCR_CO2_immune_scCODA_10-04-24.h5mu"))

In [None]:
test_model = pt.tl.Sccoda()
test_model.get_intercept_df(sccoda_data, modality_key="coda")

In [None]:
test_model.get_intercept_df(sccoda_data, modality_key="coda")

In [None]:
test_model.get_effect_df(sccoda_data, modality_key="coda")

In [None]:
sccoda_data["coda"].varm["intercept_df"]

## Visualization

In [None]:
# visualize effects (significant changes in cell type composition)
pt.pl.coda.effects_barplot(sccoda_data, 
                           modality_key="coda", 
                           parameter="Final Parameter",
                           figsize=[3,3],
                           dpi=300)
#plt.savefig(os.path.join(fig_dir, "TNK_Comp_Significant_Celltypes_Effects.png"), dpi=600, format="png", bbox_inches="tight")

In [None]:
pt.pl.coda.effects_barplot(sccoda_data, "coda", "Condition")
plt.savefig(os.path.join(work_dir, "figures", "TFM", "Fig1", "Significant_Celltypes_Log2FC.pdf"), dpi=300, format="pdf", bbox_inches="tight")

In [None]:
pt.pl.coda.effects_barplot(sccoda_data, "coda", parameter = "Final Parameter")
plt.savefig(os.path.join(work_dir, "figures", "TFM", "Fig1", "Significant_Celltypes_Effects.pdf"), dpi=300, format="pdf", bbox_inches="tight")

In [None]:
# counts
sccoda_data['coda'].var

In [None]:
# proportion
props = round(sccoda_data['coda'].var.n_cells / sccoda_data['coda'].var.n_cells.sum(), 3)
props

In [None]:
sccoda_data['coda'].obs

In [None]:
# some booxplot arguments to customize the plot
boxprops = dict(linestyle='-', linewidth=1, edgecolor='black')
flierprops = dict(marker='o', markerfacecolor='black', #markersize=12,
                  markeredgecolor='none')
medianprops = dict(linestyle='-', linewidth=.8, color='black')
#meanpointprops = dict(marker='D', markeredgecolor='black',markerfacecolor='black')
meanlineprops = dict(linestyle='--', linewidth=.8, color='black')


In [None]:
# define cell types for plotting
cell_types = sccoda_data['coda'].var_names.tolist()
print(cell_types)

In [None]:
effect_cell_types = ["NK"]

In [None]:
# create df for plotting
feature_name=["Condition", "response", "patient"]
data=sccoda_data['coda']

sample_sums = np.sum(data.X, axis=1, keepdims=True)
X = data.X/sample_sums
value_name = "Proportion"

count_df = pd.DataFrame(X, columns=data.var.index, index=data.obs.index).\
        merge(data.obs[feature_name], left_index=True, right_index=True)
plot_df = pd.melt(count_df, id_vars=feature_name, var_name="Cell type", value_name=value_name)
if cell_types is not None:
    plot_df = plot_df[plot_df["Cell type"].isin(cell_types)]

In [None]:
plot_df

In [None]:
color_palette = sns.color_palette("Spectral", 5)
print(color_palette)
color_palette.pop(3)
sns.color_palette("Spectral", 5)

In [None]:
# plots for Poster/Thesis Figures
# overview of cell type distribution across timepoints
plt.rcParams.update({'font.size': 13})
sns.set_style(rc = {'axes.facecolor': 'lightsteelblue'})

#effect_cell_types.append("CD56hi CD16lo NK")

for cell_type in cell_types:

    if cell_type in effect_cell_types:
    
        fig, ax = plt.subplots(figsize=(2,2), dpi=120)
        
        sns.boxplot(
            x="Condition", 
            y="Proportion", 
            data=plot_df[plot_df["Cell type"] == cell_type],
            hue="Condition", 
            #liersize=1,
            palette=["coral","darkviolet"], 
            order=["T0/-ICI","T1/+ICI"],
            ax=ax,
            vert=True, 
            patch_artist=True, 
            meanline=True, 
            showmeans=True,
            showfliers=False,
            boxprops=boxprops,
            #flierprops=flierprops,
            medianprops=medianprops,
            meanprops=meanlineprops
        )
            
        
        sns.swarmplot(
            x="Condition", 
            y="Proportion", 
            data=plot_df[plot_df["Cell type"] == cell_type], 
            hue="response",  # Color by "patient" variable
            palette=["red", "blue"],  # Choose a color palette
            dodge=False,  # Separate dots for each level of "patient
            #jitter=False
            ax=ax,
            size=5,
            edgecolor="black",
            linewidth=.5,
            alpha=1
        )

        sns.pointplot(
            data=plot_df[plot_df["Cell type"] == cell_type], 
            x="Condition", 
            y="Proportion", 
            hue="patient", 
            legend= False,
            palette="Spectral",
            linewidth=1.5,
            errwidth=1,
            markers='|',
            #errorbar=None,
            join=True
        )

        xmin, xmax, ymin, ymax = plt.axis()
        plt.ylim((0, ymax+0.2))
        
        plt.title(cell_type, fontweight="bold", pad=20)
        plt.xlabel('Timepoint', labelpad=10)
        plt.ylabel('Proportion', labelpad=10)
        plt.yticks(fontsize = 10) 
        plt.xticks(fontsize = 10) 
        
        #plt.set_xticks([0,1,2,3,4], ["PD_01", "PD_02", "PD_03", "SD_01", "PD_04"], ha="center", rotation=30)

        
        legend = plt.legend(loc='right', bbox_to_anchor=(2, .5), ncol=1, title="Patient", frameon=True, facecolor="white")
        plt.setp(legend.get_title(),fontsize='12')
        plt.tight_layout()

        
        plt.savefig(os.path.join("/scratch_isilon/groups/singlecell/gdeuner/SERPENTINE/figures/TFM/Fig2", "TNK_Comp_"+cell_type+"_boxplot_by_condition_colby_response.pdf"), dpi=600, format="pdf", bbox_inches="tight")

In [None]:
# overview of cell type distribution across patients
for cell_type in cell_types:

    if cell_type in effect_cell_types:
        significance = "*"
    else:
        significance = "ns"
    
    fig, ax = plt.subplots(figsize=(2,2), dpi=120)

    sns.boxplot(
        x="patient", 
        y="Proportion", 
        data=plot_df[plot_df["Cell type"] == cell_type],
        hue="patient", 
        #liersize=1,
        palette=color_palette, 
        #order=["T0/-ICI","T1/+ICI"],
        ax=ax,
        vert=True, 
        patch_artist=True, 
        meanline=True, 
        showmeans=True,
        showfliers=False,
        boxprops=boxprops,
        #flierprops=flierprops,
        medianprops=medianprops,
        meanprops=meanlineprops
    )
        
    
    sns.swarmplot(
        x="patient", 
        y="Proportion", 
        data=plot_df[plot_df["Cell type"] == cell_type], 
        hue="Condition",  # Color by "patient" variable
        palette=["coral", "darkviolet"],  # Choose a color palette
        dodge=False,  # Separate dots for each level of "patient
        #jitter=False
        ax=ax,
        size=4,
        edgecolor="black",
        linewidth=.5,
        alpha=1
    )

    #ax.set_xticks([])
    ax.set_title(cell_type+" - "+significance)
    ax.set_xlabel('Patient')
    plt.yticks(fontsize = 10) 
    plt.xticks(fontsize = 10) 
    
    legend = plt.legend(loc='right', bbox_to_anchor=(1.9, .5), ncol=1, title="Condition", frameon=True)
    plt.setp(legend.get_title(),fontsize='12')
    plt.tight_layout()
      
    
    plt.savefig(os.path.join(fig_dir, "Comp_"+cell_type+"_boxplot_by_patient.png"), dpi=600, format="png", bbox_inches="tight")

In [None]:
plot = pt.pl.coda.boxplots(
        sccoda_data,
        modality_key="coda",
        feature_name="patient",
        figsize=(4,4),
        add_dots=True,
        plot_facets=False,
        args_boxplot={"vert":True, 
                    "patch_artist":True, 
                    "meanline":True, 
                    "showmeans":True,
                    "boxprops":boxprops,
                    "flierprops":flierprops,
                    "medianprops":medianprops,
                    "meanprops":meanlineprops,
                    },
        cmap="Spectral"
        )
plt.savefig(os.path.join(fig_dir, "Comp_all_boxplot_by_patient.png"), dpi=600, format="png", bbox_inches="tight")

In [None]:
# overview of cell type distribution across timepoints

plt.rcParams.update({'font.size': 13})

for cell_type in cell_types:

    if cell_type in effect_cell_types:
        significance = "*"
    else:
        significance = "ns"
    
    fig, ax = plt.subplots(figsize=(2,2), dpi=120)
    
    sns.boxplot(
        x="Condition", 
        y="Proportion", 
        data=plot_df[plot_df["Cell type"] == cell_type],
        hue="Condition", 
        #liersize=1,
        palette=["coral","darkviolet"], 
        order=["T0/-ICI","T1/+ICI"],
        ax=ax,
        vert=True, 
        patch_artist=True, 
        meanline=True, 
        showmeans=True,
        showfliers=False,
        boxprops=boxprops,
        #flierprops=flierprops,
        medianprops=medianprops,
        meanprops=meanlineprops
    )
        
    
    sns.swarmplot(
        x="Condition", 
        y="Proportion", 
        data=plot_df[plot_df["Cell type"] == cell_type], 
        hue="patient",  # Color by "patient" variable
        palette="Spectral",  # Choose a color palette
        dodge=False,  # Separate dots for each level of "patient
        #jitter=False
        ax=ax,
        size=5,
        edgecolor="black",
        linewidth=.5,
        alpha=1
    )

    #ax.set_xticks([])
    ax.set_title(cell_type+" - "+significance)
    ax.set_xlabel('Timepoint')
    plt.yticks(fontsize = 10) 
    plt.xticks(fontsize = 10) 
    
    legend = plt.legend(loc='right', bbox_to_anchor=(1.8, .5), ncol=1, title="Patient", frameon=True)
    plt.setp(legend.get_title(),fontsize='12')
    plt.tight_layout()
        
    plt.savefig(os.path.join(fig_dir, "Comp_"+cell_type+"_boxplot_by_condition.png"), dpi=600, format="png", bbox_inches="tight")


# Compositional Analysis - Response Comparison (Immune Cell Types)

### Environment Set Up

In [None]:
# load libraries
import warnings
import matplotlib.pyplot as plt
import mudata as mu
import pertpy as pt
import scanpy as sc
import numpy as np
import pandas as pd
import seaborn as sns
import os
from statannot import add_stat_annotation

In [None]:
# set up figure parameters
plt.rcParams['figure.figsize'] = (4, 4)
sc.settings.verbosity = 0
sc.settings.set_figure_params(
    dpi=300,
    facecolor="white",
    frameon=False,
    figsize=(4,4)
)

In [None]:
# remove warnings
import warnings
warnings.filterwarnings('ignore')

In [None]:
# set up dirs
work_dir = "/scratch_isilon/groups/singlecell/gdeuner/SERPENTINE/"
fig_dir = os.path.join(work_dir, "figures", "combined", "compositional_analysis", "response_comparison")
sc.settings.figdir = os.path.join(work_dir, "figures", "combined", "compositional_analysis", "response_comparison")

In [None]:
# read anndata object
adata = sc.read_h5ad(os.path.join(work_dir, "data", "outputdata", "combined", "Combined_SCR_CO2_TCR_full-integrated_annot_22-03-24.h5ad"))

In [None]:
# create condition variable
adata

## Model Setup & Inference

In [None]:
# initiate scCODA model
sccoda_model = pt.tl.Sccoda()
sccoda_data = sccoda_model.load(
    adata[adata.obs["Annotation_1.0"].isin(["CD8 T", "CD4 T", "NK", "pDC", "Myeloid", "Plasma", "B Cell"])].copy(), # do just include the immune cell types
    type="cell_level",
    generate_sample_level=True,
    cell_type_identifier="Annotation_1.0",
    sample_identifier="sample",
    covariate_obs=["subproject", "patient", "timepoint", "response", "ICI_status", "Condition"],
        
)
print(sccoda_data)
print(sccoda_data["coda"].X)
print(sccoda_data["coda"].obs)

In [None]:
pt.pl.coda.boxplots(sccoda_data, 
                    modality_key="coda", 
                    feature_name="timepoint", 
                    add_dots=True,
                    figsize=[6,6],
                    level_order=["SCR", "C02"],
                    cmap=["coral","darkviolet"]
                   )
plt.show()

In [None]:
# prepare the model
sccoda_data = sccoda_model.prepare(
    sccoda_data,
    modality_key="coda",
    formula="response", # PD vs. SD
    reference_cell_type="automatic",
)
sccoda_data["coda"]

In [None]:
# run MCMC
sccoda_model.run_nuts(sccoda_data, modality_key="coda")
sccoda_data["coda"]

## Result Intepretation

In [None]:
# see most relevant information
sccoda_model.summary(sccoda_data, modality_key="coda")

In [None]:
sccoda_model.credible_effects(sccoda_data, modality_key="coda")

### Adjust FDR

In [None]:
sccoda_model.set_fdr(sccoda_data, modality_key="coda", est_fdr=0.1)
sccoda_model.summary(sccoda_data, modality_key="coda")

## Save Results

In [None]:
#sccoda_data.write_h5mu(os.path.join(work_dir, "data", "outputdata", "combined", "Combined_SCR_CO2_immune_scCODA_10-04-24.h5mu"))

In [None]:
test_model = pt.tl.Sccoda()
test_model.get_intercept_df(sccoda_data, modality_key="coda")

In [None]:
test_model.get_intercept_df(sccoda_data, modality_key="coda")

In [None]:
test_model.get_effect_df(sccoda_data, modality_key="coda")

In [None]:
sccoda_data["coda"].varm["intercept_df"]

## Visualization

In [None]:
# counts
sccoda_data['coda'].var

In [None]:
# proportion
props = round(sccoda_data['coda'].var.n_cells / sccoda_data['coda'].var.n_cells.sum(), 3)
props

In [None]:
sccoda_data['coda'].obs

In [None]:
# some booxplot arguments to customize the plot
boxprops = dict(linestyle='-', linewidth=1, edgecolor='black')
flierprops = dict(marker='o', markerfacecolor='black', #markersize=12,
                  markeredgecolor='none')
medianprops = dict(linestyle='-', linewidth=.8, color='black')
#meanpointprops = dict(marker='D', markeredgecolor='black',markerfacecolor='black')
meanlineprops = dict(linestyle='--', linewidth=.8, color='black')


In [None]:
# define cell types for plotting
cell_types = sccoda_data['coda'].var_names.tolist()
print(cell_types)

In [None]:
effect_cell_types = []

In [None]:
# create df for plotting
feature_name=["Condition", "response", "patient"]
data=sccoda_data['coda']

sample_sums = np.sum(data.X, axis=1, keepdims=True)
X = data.X/sample_sums
value_name = "Proportion"

count_df = pd.DataFrame(X, columns=data.var.index, index=data.obs.index).\
        merge(data.obs[feature_name], left_index=True, right_index=True)
plot_df = pd.melt(count_df, id_vars=feature_name, var_name="Cell type", value_name=value_name)
if cell_types is not None:
    plot_df = plot_df[plot_df["Cell type"].isin(cell_types)]

In [None]:
plot_df

In [None]:
# overview of cell type distribution across timepoints

plt.rcParams.update({'font.size': 13})

for cell_type in cell_types:

    if cell_type in effect_cell_types:
        significance = "*"
    else:
        significance = "ns"

    fig, ax = plt.subplots(figsize=(2,2), dpi=120)

    
    sns.boxplot(
        x="response", 
        y="Proportion", 
        data=plot_df[plot_df["Cell type"] == cell_type],
        hue="response", 
        #liersize=1,
        palette=["mistyrose", "lavender"], 
        ax=ax,
        vert=True, 
        patch_artist=True, 
        meanline=True, 
        showmeans=True,
        showfliers=False,
        boxprops=boxprops,
        #flierprops=flierprops,
        medianprops=medianprops,
        meanprops=meanlineprops
    )
        
    
    sns.swarmplot(
        x="response", 
        y="Proportion", 
        data=plot_df[plot_df["Cell type"] == cell_type], 
        hue="patient",  # Color by "patient" variable
        palette="Spectral",  # Choose a color palette
        dodge=False,  # Separate dots for each level of "patient
        #jitter=False
        ax=ax,
        size=5,
        edgecolor="black",
        linewidth=.5,
        alpha=1
    )

    #ax.set_xticks([])
    ax.set_title(cell_type+" - "+significance)
    ax.set_xlabel('ICI Response')
    plt.yticks(fontsize = 10) 
    plt.yticks(fontsize = 10) 
    
    legend = plt.legend(loc='upper right', bbox_to_anchor=(1.8, 1.1), ncol=1, title="Patient", frameon=False)
    plt.setp(legend.get_title(),fontsize='12')
    plt.tight_layout()
    plt.savefig(os.path.join(fig_dir, "Comp_"+cell_type+"_boxplot_by_response.png"), dpi=600, format="png", bbox_inches="tight")


In [None]:
# overview of cell type distribution across timepoints

plt.rcParams.update({'font.size': 13})

for cell_type in cell_types:

    if cell_type in effect_cell_types:
        significance = "*"
    else:
        significance = "ns"

    fig, ax = plt.subplots(figsize=(2,2), dpi=120)
    
    sns.boxplot(
        x="response", 
        y="Proportion", 
        data=plot_df[plot_df["Cell type"] == cell_type],
        hue="response", 
        #liersize=1,
        palette=["mistyrose", "lavender"], 
        ax=ax,
        vert=True, 
        patch_artist=True, 
        meanline=True, 
        showmeans=True,
        showfliers=False,
        boxprops=boxprops,
        #flierprops=flierprops,
        medianprops=medianprops,
        meanprops=meanlineprops
    )
        
    
    sns.swarmplot(
        x="response", 
        y="Proportion", 
        data=plot_df[plot_df["Cell type"] == cell_type], 
        hue="Condition",  # Color by "patient" variable
        palette=["coral","darkviolet"], 
        dodge=False,  # Separate dots for each level of "patient
        #jitter=False
        ax=ax,
        size=5,
        edgecolor="black",
        linewidth=.5,
        alpha=1
    )

    #ax.set_xticks([])
    ax.set_title(cell_type+" - "+significance)
    ax.set_xlabel('ICI Response')
    plt.yticks(fontsize = 10) 
    plt.yticks(fontsize = 10) 
    
    legend = plt.legend(loc='upper right', bbox_to_anchor=(1.8, 1), ncol=1, title="Patient", frameon=False)
    plt.setp(legend.get_title(),fontsize='12')
    plt.tight_layout()
    plt.savefig(os.path.join(fig_dir, "Comp_"+cell_type+"_boxplot_by_response_col_by_timepoint.png"), dpi=600, format="png", bbox_inches="tight")


# Compositional Analysis - ICI Status Comparison (Immune Cell Types)

### Environment Set Up

In [None]:
# load libraries
import warnings
import matplotlib.pyplot as plt
import mudata as mu
import pertpy as pt
import scanpy as sc
import numpy as np
import pandas as pd
import seaborn as sns
import os
from statannot import add_stat_annotation

In [None]:
# set up figure parameters
plt.rcParams['figure.figsize'] = (4, 4)
sc.settings.verbosity = 0
sc.settings.set_figure_params(
    dpi=300,
    facecolor="white",
    frameon=False,
    figsize=(4,4)
)

In [None]:
# remove warnings
import warnings
warnings.filterwarnings('ignore')

In [None]:
# set up dirs
work_dir = "/scratch_isilon/groups/singlecell/gdeuner/SERPENTINE/"
fig_dir = os.path.join(work_dir, "figures", "combined", "compositional_analysis", "ICI_status_comparison")
sc.settings.figdir = os.path.join(work_dir, "figures", "combined", "compositional_analysis", "ICI_status_comparison")

In [None]:
# read anndata object
adata = sc.read_h5ad(os.path.join(work_dir, "data", "outputdata", "combined", "Combined_SCR_CO2_TCR_full-integrated_annot_22-03-24.h5ad"))

In [None]:
# create condition variable
adata

In [None]:
# order cat values
order = ['-ICI', '+ICI/SD', '+ICI/PD']
sccoda_data['coda'].obs['ICI_status'] = pd.Categorical(sccoda_data['coda'].obs['ICI_status'], categories=order)

## Model Setup & Inference

In [None]:
# initiate scCODA model
sccoda_model = pt.tl.Sccoda()
sccoda_data = sccoda_model.load(
    adata[adata.obs["Annotation_1.0"].isin(["CD8 T", "CD4 T", "NK", "pDC", "Myeloid", "Plasma", "B Cell"])].copy(), # do just include the immune cell types
    type="cell_level",
    generate_sample_level=True,
    cell_type_identifier="Annotation_1.0",
    sample_identifier="sample",
    covariate_obs=["subproject", "patient", "timepoint", "response", "ICI_status", "Condition"],
        
)
print(sccoda_data)
print(sccoda_data["coda"].X)
print(sccoda_data["coda"].obs)

In [None]:
# prepare the model
sccoda_data = sccoda_model.prepare(
    sccoda_data,
    modality_key="coda",
    formula="ICI_status", # PD vs. SD
    reference_cell_type="automatic",
)
sccoda_data["coda"]

In [None]:
# run MCMC
sccoda_model.run_nuts(sccoda_data, modality_key="coda")
sccoda_data["coda"]

## Result Intepretation

In [None]:
# see most relevant information
sccoda_model.summary(sccoda_data, modality_key="coda")

In [None]:
sccoda_model.credible_effects(sccoda_data, modality_key="coda")

### Adjust FDR

In [None]:
sccoda_model.set_fdr(sccoda_data, modality_key="coda", est_fdr=0.2)
sccoda_model.summary(sccoda_data, modality_key="coda")

## Save Results

In [None]:
#sccoda_data.write_h5mu(os.path.join(work_dir, "data", "outputdata", "combined", "Combined_SCR_CO2_immune_scCODA_10-04-24.h5mu"))

In [None]:
test_model = pt.tl.Sccoda()
test_model.get_intercept_df(sccoda_data, modality_key="coda")

In [None]:
test_model.get_intercept_df(sccoda_data, modality_key="coda")

In [None]:
test_model.get_effect_df(sccoda_data, modality_key="coda")

In [None]:
sccoda_data["coda"].varm["intercept_df"]

## Visualization

In [None]:
# counts
sccoda_data['coda'].var

In [None]:
# proportion
props = round(sccoda_data['coda'].var.n_cells / sccoda_data['coda'].var.n_cells.sum(), 3)
props

In [None]:
sccoda_data['coda'].obs

In [None]:
# some booxplot arguments to customize the plot
boxprops = dict(linestyle='-', linewidth=1, edgecolor='black')
flierprops = dict(marker='o', markerfacecolor='black', #markersize=12,
                  markeredgecolor='none')
medianprops = dict(linestyle='-', linewidth=.8, color='black')
#meanpointprops = dict(marker='D', markeredgecolor='black',markerfacecolor='black')
meanlineprops = dict(linestyle='--', linewidth=.8, color='black')


In [None]:
# define cell types for plotting
cell_types = sccoda_data['coda'].var_names.tolist()
print(cell_types)

In [None]:
effect_cell_types = []

In [None]:
# create df for plotting
feature_name=["Condition", "response", "patient", "ICI_status"]
data=sccoda_data['coda']

sample_sums = np.sum(data.X, axis=1, keepdims=True)
X = data.X/sample_sums
value_name = "Proportion"

count_df = pd.DataFrame(X, columns=data.var.index, index=data.obs.index).\
        merge(data.obs[feature_name], left_index=True, right_index=True)
plot_df = pd.melt(count_df, id_vars=feature_name, var_name="Cell type", value_name=value_name)
if cell_types is not None:
    plot_df = plot_df[plot_df["Cell type"].isin(cell_types)]

In [None]:
plot_df

In [None]:
# define cell types that have an effect

effect_dict_1 = { # -ICI vs. +ICI/SD
    'B Cell' : 'ns', 
    'CD4 T' : 'ns',
    'CD8 T' : 'ns',
    'Myeloid' : 'ns',
    'NK' : 'ns',
    'Plasma' : 'ns',
    'pDC' : 'ns'
}

effect_dict_2 = { # -ICI vs. +ICI/PD
    'B Cell' : 'ns', 
    'CD4 T' : 'ns',
    'CD8 T' : 'ns',
    'Myeloid' : 'ns',
    'NK' : 'ns',
    'Plasma' : 'ns',
    'pDC' : 'ns'
}

In [None]:
for cell_type in cell_types:
    
    fig, ax = plt.subplots(figsize=(2,2), dpi=120)

    sns.boxplot(
        x="ICI_status", 
        y="Proportion", 
        data=plot_df[plot_df["Cell type"] == cell_type],
        hue="ICI_status", 
        #order=["T0/-ICI","T1/+ICI"],
        ax=ax,
        vert=True, 
        patch_artist=True, 
        palette=["white", "blue", "red"], 
        meanline=True, 
        showmeans=True,
        showfliers=False,
        boxprops=boxprops,
        #flierprops=flierprops,
        medianprops=medianprops,
        meanprops=meanlineprops
    )
        
    
    sns.swarmplot(
        x="ICI_status", 
        y="Proportion", 
        data=plot_df[plot_df["Cell type"] == cell_type], 
        hue="patient",  # Color by "patient" variable
        palette="Spectral",  # Choose a color palette
        dodge=False,  # Separate dots for each level of "patient
        #jitter=False
        ax=ax,
        size=4,
        edgecolor="black",
        linewidth=.5,
        alpha=1
    )

    #ax.set_xticks([])
    ax.set_title(cell_type)
    ax.set_xlabel('ICI Status')
    ymax = plot_df[plot_df["Cell type"] == cell_type]["Proportion"].max()
    ax.plot([0, 0, 2, 2], [ymax + .08, ymax + .09, ymax + .09, ymax + .08], lw=1, color='black')
    ax.text(1, ymax+.085, f" {effect_dict_2[cell_type]}", ha='center', va='bottom', size=8, color='black')
    ax.plot([0, 0, 1, 1], [ymax + .05, ymax + .06, ymax + .06, ymax + .05], lw=1, color='black')
    ax.text(0.5, ymax+.055, f" {effect_dict_1[cell_type]}", ha='center', va='bottom', size=8, color='black')
    #ax.set_ylim([0, ymax+.1])
    plt.yticks(fontsize = 9) 
    plt.xticks(fontsize = 9) 

    legend = plt.legend(loc='right', bbox_to_anchor=(1.8, .5), ncol=1, title="Patient", frameon=True)
    plt.setp(legend.get_title(),fontsize='12')
    plt.tight_layout()

    
    plt.savefig(os.path.join(fig_dir, "Comp_"+cell_type+"_boxplot_by_ICI_status.png"), dpi=600, format="png", bbox_inches="tight")