In [None]:
import numpy as np
import seaborn as sns

import matplotlib
import matplotlib.pyplot as plt
from matplotlib.legend import Legend
import matplotlib.colors as colors
from matplotlib import cm
from mpl_toolkits.mplot3d import axes3d


import pandas as pd
import scipy
import scanpy as sc

from sklearn import datasets
from sklearn.decomposition import PCA

from numba import jit

import celltypist
from celltypist import models

from matplotlib.cm import ScalarMappable

In [None]:
#Custom colormap

from matplotlib.cm import register_cmap
from matplotlib.colors import ListedColormap

tab20b = matplotlib.colormaps['tab20b']
tab20c = matplotlib.colormaps['tab20c']
colors1 = tab20b(np.linspace(3.001/5., 1, 8))
colors2 = tab20c(np.linspace(0, 3.999/5., 16))

colors = np.concatenate([colors1, colors2])

map_name = 'op_tab24'
op_cmap = ListedColormap(colors, name=map_name )
matplotlib.colormaps.register(name=map_name, cmap=op_cmap)

In [None]:
from mpl_toolkits.axes_grid1 import AxesGrid

def shiftedColorMap(cmap, start=0, midpoint=0.5, stop=1.0, name='shiftedcmap'):
    '''
    Function to offset the "center" of a colormap. Useful for
    data with a negative min and positive max and you want the
    middle of the colormap's dynamic range to be at zero.

    Input
    -----
      cmap : The matplotlib colormap to be altered
      start : Offset from lowest point in the colormap's range.
          Defaults to 0.0 (no lower offset). Should be between
          0.0 and `midpoint`.
      midpoint : The new center of the colormap. Defaults to 
          0.5 (no shift). Should be between 0.0 and 1.0. In
          general, this should be  1 - vmax / (vmax + abs(vmin))
          For example if your data range from -15.0 to +5.0 and
          you want the center of the colormap at 0.0, `midpoint`
          should be set to  1 - 5/(5 + 15)) or 0.75
      stop : Offset from highest point in the colormap's range.
          Defaults to 1.0 (no upper offset). Should be between
          `midpoint` and 1.0.
    '''
    cdict = {
        'red': [],
        'green': [],
        'blue': [],
        'alpha': []
    }

    # regular index to compute the colors
    reg_index = np.linspace(start, stop, 257)

    # shifted index to match the data
    shift_index = np.hstack([
        np.linspace(0.0, midpoint, 128, endpoint=False), 
        np.linspace(midpoint, 1.0, 129, endpoint=True)
    ])

    for ri, si in zip(reg_index, shift_index):
        r, g, b, a = cmap(ri)

        cdict['red'].append((si, r, r))
        cdict['green'].append((si, g, g))
        cdict['blue'].append((si, b, b))
        cdict['alpha'].append((si, a, a))

    newcmap = matplotlib.colors.LinearSegmentedColormap(name, cdict)
    plt.register_cmap(cmap=newcmap)

    return newcmap


In [None]:
# Load preprocessed data
gspDF = pd.read_csv('/Users/oipulk/Documents/prime_data/preprocessed/gsp_clin_noiRECISTNans.csv',low_memory=False, index_col=0)
pspDF = pd.read_csv('/Users/oipulk/Documents/prime_data/preprocessed/psp_clin_noiRECISTNans.csv',low_memory=False, index_col=0)

gsp_genes = gspDF.index.values[13:len(gspDF.index.values)]
psp_genes = pspDF.index.values[13:len(pspDF.index.values)]
ctl_genes = ['CD8A', 'CD8B', 'GZMA', 'GZMB', 'PRF1']

In [None]:
## Genes tested in laboratory and their p-value in t-test for response

orderedGenes = np.array(['SLAMF7', 'ATP2A3', 'GBP1', 'HSH2D', 'LCK', 'MAP4K1', 'CSF2RB', 'CD247', 'NCF1', 'LCP2', 'PRKCH', 'ITGAL', 'LY9', 'HLA-DPA1', 'TIGIT', 'CCDC69', 'RASAL3', 'IL12RB1', 'SASH3', 'IL2RG', 'ARHGAP9', 'ARHGAP25', 'PTK2B', 'TYMP', 'PSD4', 'WAS', 'CD27', 'SLAMF6', 'TMC8', 'GBP5', 'SEPT1', 'TRAF3IP3', 'PSMB10', 'GZMB', 'CD3E', 'SYK', 'PSTPIP1', 'GNLY', 'MYO1G', 'CD40', 'POU2F2', 'CD38', 'SEMA4D', 'FCRL5', 'IGFLR1', 'SIRPG', 'BTK', 'ITGB2', 'HCLS1', 'CD48', 'AOAH', 'RCSD1', 'GPR174', 'SEMA4A', 'PARVG', 'PRF1', 'DOCK2', 'FGR', 'IL10RA', 'FAM107B', 'EVL', 'BIN2', 'HLA-DOB', 'CD52', 'LST1', 'CD84', 'CD53', 'PIK3AP1', 'CLEC2D', 'LYN', 'PTPRC', 'CD3D', 'CD96', 'DOK2', 'TRIM22', 'FGD2', 'EPSTI1', 'CD19', 'VNN2', 'PIK3CG', 'CASP10', 'CD79A', 'GZMA', 'CD2', 'HLA-F', 'MZB1', 'ERAP1', 'CD37', 'FERMT3', 'PLCG2', 'CCR5', 'BTN3A3', 'ZAP70', 'SELL', 'HAVCR2', 'FLT3LG', 'CTSS', 'TRAT1', 'TNFAIP2', 'BLNK', 'ARHGAP30', 'TTC7A', 'SPN', 'CTLA4', 'GIMAP7', 'GBP3', 'LCP1', 'TAPBP', 'CLIC2', 'CYTH4', 'SLC7A7', 'TNFRSF1B', 'BTN3A1', 'LILRB2', 'SEPT6', 'LAT2', 'CCR2', 'CD4', 'NFAM1', 'TAPBPL', 'IGSF6', 'SECTM1', 'UBA7', 'IDO1', 'RSAD2', 'CYBB', 'PTAFR', 'PLCB2', 'ADAM28', 'SELPLG', 'TMEM176B', 'PTPN22', 'NAAA', 'HCK', 'HLA-DQB1', 'RASGEF1B', 'TBXAS1', 'LSP1', 'ADAM8', 'MPEG1', 'TNFSF10', 'PLD4', 'LILRB4', 'STK17B', 'ARRB2', 'SLCO2B1', 'TNFSF13', 'SLAMF1', 'CD28', 'SLC15A3', 'ITGAM', 'RASGRP2', 'VCAM1', 'CLEC7A', 'HLA-DMB', 'CTSW', 'HCST', 'SLC8A1', 'SKAP1', 'IL2RB', 'BASP1', 'RNASE6', 'LGMN'
,'PIK3IP1', 'CYTH1', 'ALDH2', 'GLIPR1', 'ELMO1', 'EVI2B', 'MFNG', 'VAMP5', 'LILRA4', 'MAN1A1', 'KLHL6', 'CD1C', 'CD163', 'MS4A6A', 'ICAM2', 'ACE', 'FGL2', 'NCKAP1L', 'KCNMA1', 'SRGN', 'CST7', 'DAPP1', 'FOLR2', 'ENTPD1', 'PILRA', 'HLA-DMA', 'SAMSN1', 'LAMP3', 'CD14', 'PDCD1LG2', 'CLEC10A', 'GNA15', 'ADAP2', 'SPINT2', 'CD33', 'CR1', 'CXCL12', 'IL7R', 'AIF1', 'PTGER4', 'EMB', 'FAM57A', 'SLC39A10'])


orderedGenes_p = np.array([0.000002, 0.000171, 0.00026, 0.000262, 0.000288, 0.000316, 0.000355, 0.000377, 0.000445, 0.00051, 0.000515, 0.000672, 0.000698, 0.000757, 0.000763, 0.000841, 0.000858, 0.000889, 0.000987, 0.00131, 0.00138, 0.001427, 0.001449, 0.001493, 0.001516, 0.001584, 0.001614, 0.001712, 0.001857, 0.00186, 0.001904, 0.002429, 0.002451, 0.002531, 0.002572, 0.002623, 0.002647, 0.002652, 0.002658, 0.002798, 0.003069, 0.00316, 0.003734, 0.003758, 0.004254, 0.00434, 0.004603, 0.004735, 0.004918, 0.005094, 0.005575, 0.005847, 0.006044, 0.006048, 0.006177, 0.006216, 0.006241, 0.006269, 0.00664, 0.006877, 0.007022, 0.007058, 0.007183, 0.007487, 0.008256, 0.008742, 0.008766, 0.008771, 0.009672, 0.009696, 0.009935, 0.01011, 0.010196, 0.010751, 0.011399, 0.011402, 0.011679, 0.011761, 0.012181, 0.012613, 0.012746, 0.012924, 0.012959, 0.013217, 0.013969, 0.014275, 0.015414, 0.015982, 0.017934, 0.018331, 0.018413, 0.018533, 0.019213, 0.020528, 0.020969, 0.021716, 0.022919, 0.024311, 0.02618, 0.026245, 0.026588, 0.027259, 0.027294, 0.027622, 0.027867, 0.027916, 0.029668, 0.030289, 0.030526, 0.031287, 0.031835, 0.032011
, 0.032486, 0.033289, 0.033758, 0.034644, 0.034706, 0.03764, 0.038836, 0.0441, 0.044157, 0.049343, 0.053912, 0.058147, 0.058859, 0.05891, 0.063574, 0.064643, 0.064875, 0.070647, 0.07081, 0.071064, 0.071227, 0.077519, 0.080847, 0.083266, 0.085183, 0.091863, 0.100711, 0.102939, 0.105291, 0.110201, 0.119018, 0.125466, 0.127341, 0.140282, 0.143816, 0.146222, 0.149913, 0.15029, 0.151847, 0.162302, 0.162963, 0.162976, 0.166283, 0.1701, 0.17621, 0.179118, 0.199627, 0.202331, 0.220066, 0.243455, 0.244815, 0.24494, 0.250183, 0.268815, 0.270333, 0.270414, 0.283139, 0.285096, 0.300669, 0.304284, 0.319236, 0.329626, 0.347148, 0.356216, 0.362608, 0.365105, 0.368516, 0.387825, 0.388237, 0.412144, 0.414301, 0.41613, 0.468657, 0.491555, 0.51177, 0.54486, 0.564074, 0.5938, 0.60468, 0.615798, 0.636671, 0.65277, 0.657607, 0.661587, 0.701709, 0.721311, 0.756649, 0.778583, 0.819239, 0.837958, 0.86186, 0.955137, 0.001639, 0.068734])

orderedGenes = orderedGenes[np.argsort(orderedGenes_p)]
orderedGenes_p = np.sort(orderedGenes_p)

In [None]:
adata= sc.read_loom(
    "/Users/oipulk/Documents/scRNASeq/data/Li2018/Li.loom")

In [None]:
# Cell type predictions by Celltypist
predictions = celltypist.annotate(adata, model = 'Immune_All_Low.pkl', majority_voting = True)
print(predictions.predicted_labels)
adata = predictions.to_adata()

In [None]:
adata.obs['target.y'].unique()

In [None]:
# Compute frequencies of anti-MAA target counts in each patient (normalized by total number of target annotations, and all cell counts)
patients = np.unique(adata.obs['patient'].values)
anti_maa_f = np.zeros(adata.shape[0])
anti_maa_f_all = np.zeros(adata.shape[0])

for pat in patients:

    # Targets are anti-melanoma, anti-viral, Multi, NA, or None. Compute proportion of anti-melanoma of designated targets.
    if (sum(adata.obs['patient']==pat)!=(sum(adata[adata.obs['patient']==pat].obs['target.y']=='NA')+sum(adata[adata.obs['patient']==pat].obs['target.y']=='None'))):
        
        prop = sum(adata[adata.obs['patient']==pat].obs['target.y']=='anti-melanoma')/(sum(adata.obs['patient']==pat)-sum(adata[adata.obs['patient']==pat].obs['target.y']=='NA')-sum(adata[adata.obs['patient']==pat].obs['target.y']=='None'))
    
    else:

        prop=np.nan
    
    prop_all = sum(adata[adata.obs['patient']==pat].obs['target.y']=='anti-melanoma')/sum(adata.obs['patient']==pat)
    anti_maa_f[np.where(adata.obs['patient']==pat)[0]] = prop
    anti_maa_f_all[np.where(adata.obs['patient']==pat)[0]] = prop_all

adata.obs['anti_maa_f'] = anti_maa_f
adata.obs['anti_maa_f_all'] = anti_maa_f_all

In [None]:
# Compute CTL score in each patient
ctl_genes = ['CD8A', 'CD8B', 'GZMA', 'GZMB', 'PRF1']
sc.tl.score_genes(adata, ctl_genes, score_name='ctl_score')

pt_ctl_scores = np.zeros(adata.shape[0])
for pat in patients:

    score = np.nanmean(adata[adata.obs['patient']==pat].obs['ctl_score'])
    pt_ctl_scores[np.where(adata.obs['patient']==pat)[0]] = score

adata.obs['ctl_score_pt'] = pt_ctl_scores

In [None]:
# Add an indicator for prior immunotherapy
IT = (adata.obs['treatment'].values=='1IT')|(adata.obs['treatment'].values=='2IT')|(adata.obs['treatment'].values=='mIT').astype(int)
adata.obs['IT'] = IT 
adata.obs['IT'] = adata.obs['IT'].astype('category')


In [None]:
# Therapy-naive patients
adata_th_naive = adata[adata.obs['IT']==0].copy()

# Normalize counts log-transform again
sc.pp.normalize_total(adata_th_naive, target_sum=1e4)
sc.pp.log1p(adata_th_naive)
print(np.expm1(adata_th_naive.X).sum(axis=1))

In [None]:
adata_th_naive.obs['anti_maa_f'].unique()

In [None]:
# Compute umap and t-sne
sc.pp.pca(adata_th_naive)
sc.pp.neighbors(adata_th_naive)
sc.tl.umap(adata_th_naive)
sc.tl.tsne(adata_th_naive)
umap_coordinates = adata_th_naive.obsm['X_umap']

In [None]:
sc.set_figure_params(scanpy=True, dpi=300, dpi_save=1200, frameon=True, vector_friendly=True, fontsize=8,
                         figsize=(9,8),  format='pdf', facecolor=None, transparent=False, ipython_format='png2x')

In [None]:
# First check the current categories
print("Current categories:", adata_th_naive.obs['majority_voting'].cat.categories)

# ALveolar macrophages is wrong -> Suppressor macrophages (annotation by ACT based on DEGs)
adata_th_naive.obs['majority_voting'] = adata_th_naive.obs['majority_voting'].cat.rename_categories({
    'Alveolar macrophages': 'Suppressor macrophages'
})

# Verify the changes
print("Updated categories:", adata_th_naive.obs['majority_voting'].cat.categories)


In [None]:
sc.pl.tsne(adata_th_naive, color = 'majority_voting', palette='op_tab24', legend_loc = 'on data',save='Annotations_all_types.pdf' )

In [None]:
sc.pl.tsne(adata_th_naive, color = 'majority_voting', palette='op_tab24', legend_loc = 'right margin',save='Annotations_all_types_legend_right.pdf' )

## Analysis of monocytes, macrophages, DCs 

In [None]:
# Find communities of cells (cell types)
sc.tl.leiden(adata_th_naive, key_added='leiden_initial')
# sc.tl.louvain(adata_th_naive)

In [None]:
# Plot UMAP with colors for each cell type by 'leiden' community finding algorithm
sc.set_figure_params(scanpy=True, dpi=300, dpi_save=1200, frameon=True, vector_friendly=True, fontsize=8,
                         figsize=(9,8),  format='pdf', facecolor=None, transparent=False, ipython_format='png2x')

sc.pl.tsne(adata_th_naive, color='leiden_initial', palette='op_tab24', legend_loc='on data')

In [None]:
# 1. Iitial clustering

# 2. Identify the cluster(s) containing DC2, monocytes, and macrophages
target_clusters = ['6','8','13']  # Replace with your actual cluster IDs

# 3. Subset the data
adata_subset = adata_th_naive[adata_th_naive.obs['leiden_initial'].isin(target_clusters)].copy()

# 4. Recompute the neighborhood graph on the subset
sc.pp.neighbors(adata_subset)

# 5. Perform Leiden clustering at higher resolution on the subset
sc.tl.leiden(adata_subset, resolution=0.75, key_added='leiden_refined')

# 6. Prepare categories for the combined clustering
initial_categories = list(adata_th_naive.obs['leiden_initial'].cat.categories)
refined_categories = list(adata_subset.obs['leiden_refined'].cat.categories)

# Remove target clusters from initial categories
initial_categories_filtered = [cat for cat in initial_categories if cat not in target_clusters]

# Create new category names for refined clusters
refined_categories_renamed = [f'r{cat}' for cat in refined_categories]

# Combine filtered initial categories with renamed refined categories
combined_categories = initial_categories_filtered + refined_categories_renamed

# 7. Create new column for combined clustering
adata_th_naive.obs['leiden'] = pd.Categorical(
    adata_th_naive.obs['leiden_initial'],
    categories=combined_categories
)

# 8. Update the combined clustering for the refined subset
for idx in adata_subset.obs.index:
    refined_value = adata_subset.obs.loc[idx, 'leiden_refined']
    adata_th_naive.obs.at[idx, 'leiden'] = f'r{refined_value}'

# 9. Optionally, sort the categories for better readability
adata_th_naive.obs['leiden'] = adata_th_naive.obs['leiden'].cat.reorder_categories(sorted(adata_th_naive.obs['leiden'].cat.categories))

In [None]:
sc.pl.tsne(adata_th_naive, color='leiden', palette='op_tab24', legend_loc='on data')

In [None]:
# Check that all clusters express either CD45 or CD3 (i.e. they are immune cells)
sc.pl.tsne(adata_th_naive, color=['CD45','CD3'],palette='tab20',cmap='coolwarm',vmax=2.0)


In [None]:
adata_subset = adata_th_naive[adata_th_naive.obs['leiden'].isin(['r0','r1','r2','r3','r4','r5','r6','r7','r8','16','17'])].copy()

In [None]:
# Cell type predictions by Celltypist
predictions_subset = celltypist.annotate(adata_subset, model = 'Immune_All_High.pkl', majority_voting = True)
print(predictions_subset.predicted_labels)
adata_subset = predictions_subset.to_adata()

In [None]:
sc.pl.tsne(adata_subset, color = 'majority_voting', palette='tab20',legend_loc = 'right margin' )

In [None]:
# Markers from ACT human pan-tissue (frequency>5)
pDC_markers = ['IL3RA','LILRA4','PLD4']
sc.tl.score_genes(adata_subset, pDC_markers, score_name='pDC_score')
sc.pl.tsne(adata_subset, color=['pDC_score'],palette='tab20',cmap='coolwarm',vmax=2)

# cDC1_markers = ['CLEC9A','XCR1','BATF3','CADM1','CD1C'
# ,'IDO1','ANXA6','CD40','CLIC2','CST7','ETV3','ID2','IL6ST','NET1','PTDSS1'
# ,'SCARB1','TNFRSF10B']

cDC1_markers = ['CLEC9A','XCR1','BATF3','IRF8']

sc.tl.score_genes(adata_subset, cDC1_markers, score_name='cDC1_score')
sc.pl.tsne(adata_subset, color=['cDC1_score'],palette='tab20',cmap='coolwarm',vmax=1.0)

# cDC2_markers = ['CD1C','CLEC10A','FCER1A','SIRPA','ADAM8','ADAM28','ARAF','AXL','FCGR2B','HMGA1','IRF4','LY86','PFDN1','TIMM13']

cDC2_markers = ['CD1C','CLEC10A','IRF4', 'NOTCH2']

sc.tl.score_genes(adata_subset, cDC2_markers, score_name='cDC2_score')
sc.pl.tsne(adata_subset, color=['cDC2_score'],palette='tab20',cmap='coolwarm',vmax=1.5)

#Langerhans cells
LC_markers = ['CD207', 'CD1A', 'CDH1', 'EPCAM']
sc.tl.score_genes(adata_subset, LC_markers, score_name='LC_score')
sc.pl.tsne(adata_subset, color=['LC_score'],palette='tab20',cmap='coolwarm',vmax=1.5)

migratory_state_markers = ['CCR7', 'LAMP3','HLA-DRA']
sc.tl.score_genes(adata_subset, migratory_state_markers, score_name='migratory_state_score')
sc.pl.tsne(adata_subset, color=['migratory_state_score'],palette='tab20',cmap='coolwarm',vmax=3.5)


In [None]:
#individual markers
sc.pl.tsne(adata_subset, color=['CD14','SIRPA','XCR1'],palette='tab20',cmap='coolwarm')
sc.pl.tsne(adata_subset, color=['CD40', 'CD80', 'CD86'],palette='tab20',cmap='coolwarm')


In [None]:
sc.tl.rank_genes_groups(adata_th_naive,'leiden', method='wilcoxon')
sc.pl.rank_genes_groups(adata_th_naive, n_genes=20, sharey=False)

de_markers = sc.get.rank_genes_groups_df(adata_th_naive, None)
de_markers = de_markers[(de_markers.pvals_adj < 0.03) & (de_markers.logfoldchanges > 1.0)]
de_markers

In [None]:
# For exporting to ACT
for cluster in ['r0','r1','r2','r3','r4','r5','r6','r7','r8','16','17']:
    print(cluster,':'+', '.join(de_markers[de_markers['group']==cluster].iloc[np.argsort(de_markers[de_markers['group']==cluster]['scores'])[::-1],:][0:50].names.values))


### Compute PRIME scores of cells and patients

In [None]:
# Load Prime score 
PRIME_df= pd.read_csv('/Users/oipulk/Documents/prime_v3/MS/pub_tables/bDEA_results_and_PRIME_weights.csv', index_col=0)

In [None]:
PRIME_df.head(20)

In [None]:
#LAP3 (or its aliases) is not in the data
PRIME_weights = np.append(PRIME_df.weights.values[0:9],PRIME_df.weights.values[10:15])
PRIME_genes = np.append(PRIME_df.index[0:9],PRIME_df.index[10:15])

In [None]:
# Check the correct format: PRIME uses log2-transformed TPM data
print(np.expm1(adata_th_naive.X).sum(axis=1))

In [None]:
xpr_data = adata_th_naive.copy()

## Here we do not need to take the log again
# PRIME_score = np.asarray(np.matmul(PRIME_weights,np.log2(1+xpr_data[:,PRIME_genes].X.todense()).T))[0]
PRIME_score = np.asarray(np.matmul(PRIME_weights,xpr_data[:,PRIME_genes].X.todense().T))[0]

adata_th_naive.obs['PRIME_score'] = PRIME_score

In [None]:
pt_prime_scores = np.zeros(adata_th_naive.shape[0])
for pat in patients:

    score = np.nanmean(adata_th_naive[adata_th_naive.obs['patient']==pat].obs['PRIME_score'])
    pt_prime_scores[np.where(adata_th_naive.obs['patient']==pat)[0]] = score

adata_th_naive.obs['PRIME_score_pt'] = pt_prime_scores

## Anti-MAA split

In [None]:
anti_maa_pts = np.unique(adata_th_naive.obs[~np.isnan(adata_th_naive.obs['anti_maa_f'])]['patient'].values)
adata_amaa = adata_th_naive[adata_th_naive.obs['patient'].isin(anti_maa_pts)]

In [None]:
adata_amaa.obs['anti_maa_hi'] = adata_amaa.obs['anti_maa_f']>np.quantile(np.unique(adata_amaa.obs['anti_maa_f']), [0.25,0.5,0.75])[1]
adata_amaa.obs['anti_maa_q1'] = adata_amaa.obs['anti_maa_f']>np.quantile(np.unique(adata_amaa.obs['anti_maa_f']), [0.25,0.5,0.75])[2]
adata_amaa.obs['anti_maa_q4'] = adata_amaa.obs['anti_maa_f']<=np.quantile(np.unique(adata_amaa.obs['anti_maa_f']), [0.25,0.5,0.75])[0]

###  PRIME score, SLAMF7, TYMP, and CD74 in high/low anti-MAAcohorts

In [None]:
from scipy import stats
from statsmodels.stats.multitest import multipletests
from matplotlib.colors import to_rgb
from matplotlib.collections import PolyCollection
from matplotlib.legend_handler import HandlerTuple
import textwrap

def compare_gene_expression(df, ordered_cell_list, features, condition_key, condition1, condition2, cell_type_key):
    """
    Compare gene expression between conditions across multiple cell types and genes,
    including an analysis of all cells combined.
    
    Parameters:
    -----------
    df : pandas DataFrame
        Input data frame containing expression data
    ordered_cell_list : list
        List of cell types in desired order
    features : list
        List of genes or signature scores to analyze
    condition_key : str
        Column name for condition 
    condition1: str
        Name of condition 1 (e.g. 'wt')
    condition2: str
        Name of condition 2 (e.g. 'ko')       
    cell_type_key : str
        Column name for cell type (e.g. 'leiden' or 'cell_type')
    
    Returns:
    --------
    tuple: (pandas DataFrame with detailed results, pandas DataFrame with pivoted results)
    """
    
    cell_types = pd.Categorical(list(ordered_cell_list))
    
    all_results = []
    all_p_values = []  # Store all p-values for global correction
    
    # First, calculate all p-values
    for cell_type in cell_types:
        
        # For 'Combined Cells', use the complete dataset; otherwise filter by cell type
        if cell_type == 'Combined Cells':
            cell_type_data = df
        
        else:
            cell_type_data = df[df[cell_type_key] == cell_type]

        direction = '_'
        
        for feature in features:
            
            c1_expr = cell_type_data[cell_type_data[condition_key] ==  condition1][feature]
            c2_expr = cell_type_data[cell_type_data[condition_key] ==  condition2][feature]
            
            # Calculate p-value if enough samples
            if (len(c1_expr) > 1) & (len(c2_expr) > 1):
                statistic, p_value = stats.mannwhitneyu(c1_expr, c2_expr, alternative='two-sided')
            else:
                p_value = 1.0

            if (len(c1_expr) > 0) & (len(c2_expr) > 0):
                
                if np.mean(c1_expr) > np.mean(c2_expr):
                    direction = '>'
    
                if np.mean(c1_expr) < np.mean(c2_expr):
                    direction = '<'
                
            all_results.append({
                'cell_type': cell_type,
                'feature': feature,
                'direction': direction,
                'p_value': p_value
            })
            all_p_values.append(p_value)
    
    # Convert to DataFrame
    results_df = pd.DataFrame(all_results)
    
    # Correct all p-values together
    _, p_values_corrected, _, _ = multipletests(all_p_values, method='fdr_bh')
    results_df['p_value_corrected'] = p_values_corrected
    
    # Add significance asterisks based on corrected p-values
    results_df['significance'] = results_df['p_value_corrected'].apply(
        lambda p: '****' if p < 0.0001 else
        ('***' if p < 0.001 else
        ('**' if p < 0.01 else
        ('*' if p < 0.05 else '')))
    )
    
    # Reshape the results to a more readable format
    pivot_df = results_df.pivot(
        index='cell_type',
        columns='feature',
        values=['direction','p_value', 'p_value_corrected', 'significance']
    )
    
    return results_df, pivot_df


In [None]:
sns.set(font_scale=1.5)
sns.set(style='ticks')

features = ['PRIME_score']
cell_key = 'majority_voting'
condition_key = 'anti_maa_hi'


df = sc.get.obs_df(adata_amaa, features+[cell_key,condition_key])

df[cell_key] = pd.Categorical(df[cell_key]).remove_unused_categories()
df_all_cells = df.copy()
df_all_cells[cell_key] = 'Combined Cells'

# Concatenate original and combined data
df = pd.concat([df, df_all_cells])

cell_list=list(df[cell_key].unique())

# #Add a dummy expression values 0 if there are not cells in a given category
# for cell_type in cell_list:

#     if len(df[(df['majority_voting']==cell_type)&(df['anti_maa_hi']==True)])<2:
   
#         d ={features[0]:[0,0.000001],'majority_voting':[cell_type,cell_type],'anti_maa_hi':[True,True]}
#         df = pd.concat((df,pd.DataFrame(data=d)))

#     if len(df[(df['majority_voting']==cell_type)&(df['anti_maa_hi']==False)])<2:

#         d ={features[0]:[0,0.000001],'majority_voting':[cell_type,cell_type],'anti_maa_hi':[False,False]}
#         df = pd.concat((df,pd.DataFrame(data=d)))


plt.figure(figsize=(12,3))

ax=sns.violinplot(data=df, x="majority_voting", y=features[0], hue=condition_key, split=True, inner='box', gap=.2, 
                          density_norm='width', width=0.8, palette=['.4', '.7'], order=cell_list,
                        legend=True, linewidth=1,cut=0,
                          alpha=0.5)

ax.set(xlabel = None)
ax.legend(handles=ax.legend_.legendHandles, title='Anti-MAA',labels=['Low', 'High'])

# Get the maximum y value for positioning asterisks
y_max = df.groupby(cell_key)[features[0]].max()

# Add asterisks above each violin plot
results_long, results_pivot = compare_gene_expression(df, cell_list, features, condition_key, False, True, cell_key )

for idx, (cell_type, direction, asterisk) in enumerate(zip(results_long['cell_type'], results_long['direction'], results_long['significance'])):
    
    if asterisk:  # Only add text if there is a significance marker
        # Add some padding above the maximum value
        y_position = y_max[cell_type] + 0.25 * (y_max.max() - y_max.min())
        ax.text(idx, y_position,  direction+asterisk, ha='center', va='bottom', fontsize=12)

plt.yticks([0.0,1.0,2.0,3.0])
plt.xticks(np.arange(len(cell_list)),cell_list,rotation=30,  fontsize=10, ha='right' )
plt.ylim([0,2.5])
plt.xlim([-1.0,28])

handles = []

## FIX THE COLORS TO CORRESPOND UMAPS 
n_col =len(cell_list)
cmap = matplotlib.colormaps['op_tab24']

# Take colors at regular intervals spanning the colormap.
colors = cmap(np.linspace(0, 1, n_col+1))
# colors='tab10'

for ind, violin in enumerate(ax.findobj(PolyCollection)):
    
    rgb = to_rgb(colors[4-ind // 2])
   
    if ind % 2 != 0:
        rgb = 0.5 + 0.5 * np.array(rgb)  # make whiter
    violin.set_facecolor(rgb)
    handles.append(plt.Rectangle((0, 0), 0, 0, facecolor=rgb, edgecolor='black'))

# Adjust the top margin to make room for asterisks
plt.tight_layout()
plt.subplots_adjust(top=1.5)  # Adjust this value if needed

plt.savefig('PRIME_score_aMAA_all_cells_Li.pdf', dpi=600, bbox_inches='tight')

plt.show()

results_long

In [None]:
sc.pl.tsne(adata_amaa[adata_amaa.obs['anti_maa_hi']==False], color = ['PRIME_score'], cmap = 'coolwarm' , vmin=0,vmax=1., save='_PRIME_score_all_cells_aMAA_low.pdf')
sc.pl.tsne(adata_amaa[adata_amaa.obs['anti_maa_hi']==True], color = ['PRIME_score'], cmap = 'coolwarm' , vmin=0,vmax=1., save='_PRIME_score_all_cells_aMAA_hi.pdf')


In [None]:
#SLAMF7 alone
sns.set(font_scale=1.5)
sns.set(style='ticks')

features = ['SLAMF7']
cell_key = 'majority_voting'
condition_key = 'anti_maa_hi'

df = sc.get.obs_df(adata_amaa, features+[cell_key,condition_key])

df[cell_key] = pd.Categorical(df[cell_key]).remove_unused_categories()
df_all_cells = df.copy()
df_all_cells[cell_key] = 'Combined Cells'

# Concatenate original and combined data
df = pd.concat([df, df_all_cells])

cell_list=list(df[cell_key].unique())

# #Add a dummy expression values 0 if there are not cells in a given category
# for cell_type in cell_list:

#     if len(df[(df['majority_voting']==cell_type)&(df['anti_maa_hi']==True)])<2:
   
#         d ={features[0]:[0,0.000001],'majority_voting':[cell_type,cell_type],'anti_maa_hi':[True,True]}
#         df = pd.concat((df,pd.DataFrame(data=d)))

#     if len(df[(df['majority_voting']==cell_type)&(df['anti_maa_hi']==False)])<2:

#         d ={features[0]:[0,0.000001],'majority_voting':[cell_type,cell_type],'anti_maa_hi':[False,False]}
#         df = pd.concat((df,pd.DataFrame(data=d)))


plt.figure(figsize=(12,3))

ax=sns.violinplot(data=df, x="majority_voting", y=features[0], hue=condition_key, split=True, inner='box', gap=.2, 
                          density_norm='width', width=0.8, palette=['.4', '.7'], order=cell_list,
                        legend=True, linewidth=1,cut=0,
                          alpha=0.5)

ax.set(xlabel = None)
ax.legend(handles=ax.legend_.legendHandles, title='Anti-MAA',labels=['Low', 'High'])

# Get the maximum y value for positioning asterisks
y_max = df.groupby(cell_key)[features[0]].max()

# Add asterisks above each violin plot
results_long, results_pivot = compare_gene_expression(df, cell_list, features, condition_key, False, True, cell_key )

for idx, (cell_type, direction, asterisk) in enumerate(zip(results_long['cell_type'], results_long['direction'], results_long['significance'])):
    
    if asterisk:  # Only add text if there is a significance marker
        # Add some padding above the maximum value
        y_position = y_max[cell_type] + 0.25 * (y_max.max() - y_max.min())
        ax.text(idx, y_position,  asterisk, ha='center', va='bottom', fontsize=12)
        ax.text(idx, y_position-0.15, direction, ha='center', va='bottom', fontsize=12)

plt.yticks([0.0,1.0,2.0,3.0,4.0])
plt.xticks(np.arange(len(cell_list)),cell_list,rotation=30,  fontsize=10, ha='right' )
plt.ylim([-0,5.5])
plt.xlim([-1.0,28])

handles = []

n_col =len(cell_list)
cmap = matplotlib.colormaps['tab10']

# Take colors at regular intervals spanning the colormap.
colors = cmap(np.linspace(0, 1, n_col+1))
# colors='tab10'

for ind, violin in enumerate(ax.findobj(PolyCollection)):
    rgb = to_rgb(colors[4-ind // 2])
   
    if ind % 2 != 0:
        rgb = 0.5 + 0.5 * np.array(rgb)  # make whiter
    violin.set_facecolor(rgb)
    handles.append(plt.Rectangle((0, 0), 0, 0, facecolor=rgb, edgecolor='black'))

# Adjust the top margin to make room for asterisks
plt.tight_layout()
plt.subplots_adjust(top=1.5)  # Adjust this value if needed

plt.savefig('SLAMF7_aMAA_all_cells_Li.pdf', dpi=600, bbox_inches='tight')

plt.show()

results_long

In [None]:
sc.pl.tsne(adata_amaa[adata_amaa.obs['anti_maa_hi']==False], color = ['SLAMF7'], cmap = 'coolwarm' , vmin=0,vmax=3.5, save='_SLAMF7_all_cells_aMAA_low.pdf')
sc.pl.tsne(adata_amaa[adata_amaa.obs['anti_maa_hi']==True], color = ['SLAMF7'], cmap = 'coolwarm' , vmin=0,vmax=3.5, save='_SLAMF7_all_cells_aMAA_hi.pdf')


In [None]:
#TYMP
sns.set(font_scale=1.5)
sns.set(style='ticks')

features = ['TYMP']
cell_key = 'majority_voting'
condition_key = 'anti_maa_hi'


df = sc.get.obs_df(adata_amaa, features+[cell_key,condition_key])

df[cell_key] = pd.Categorical(df[cell_key]).remove_unused_categories()
df_all_cells = df.copy()
df_all_cells[cell_key] = 'Combined Cells'

# Concatenate original and combined data
df = pd.concat([df, df_all_cells])

cell_list=list(df[cell_key].unique())

# #Add a dummy expression values 0 if there are not cells in a given category
# for cell_type in cell_list:

#     if len(df[(df['majority_voting']==cell_type)&(df['anti_maa_hi']==True)])<2:
   
#         d ={features[0]:[0,0.000001],'majority_voting':[cell_type,cell_type],'anti_maa_hi':[True,True]}
#         df = pd.concat((df,pd.DataFrame(data=d)))

#     if len(df[(df['majority_voting']==cell_type)&(df['anti_maa_hi']==False)])<2:

#         d ={features[0]:[0,0.000001],'majority_voting':[cell_type,cell_type],'anti_maa_hi':[False,False]}
#         df = pd.concat((df,pd.DataFrame(data=d)))


plt.figure(figsize=(12,3))

ax=sns.violinplot(data=df, x="majority_voting", y=features[0], hue=condition_key, split=True, inner='box', gap=.2, 
                          density_norm='width', width=0.8, palette=['.4', '.7'], order=cell_list,
                        legend=True, linewidth=1,cut=0,
                          alpha=0.5)

ax.set(xlabel = None)
ax.legend(handles=ax.legend_.legendHandles, title='Anti-MAA',labels=['Low', 'High'])

# Get the maximum y value for positioning asterisks
y_max = df.groupby(cell_key)[features[0]].max()

# Add asterisks above each violin plot
results_long, results_pivot = compare_gene_expression(df, cell_list, features, condition_key, False, True, cell_key )

for idx, (cell_type, direction, asterisk) in enumerate(zip(results_long['cell_type'], results_long['direction'], results_long['significance'])):
    
    if asterisk:  # Only add text if there is a significance marker
        # Add some padding above the maximum value
        y_position = y_max[cell_type] + 0.25 * (y_max.max() - y_max.min())
        ax.text(idx, y_position,  direction+asterisk, ha='center', va='bottom', fontsize=12)

plt.yticks([0.0,1.0,2.0,3.0,4.0])
plt.xticks(np.arange(len(cell_list)),cell_list,rotation=30,  fontsize=10, ha='right' )
plt.ylim([0,5.5])
plt.xlim([-1.0,28])

handles = []

n_col =len(cell_list)
cmap = matplotlib.colormaps['tab10']

# Take colors at regular intervals spanning the colormap.
colors = cmap(np.linspace(0, 1, n_col+1))
# colors='tab10'

for ind, violin in enumerate(ax.findobj(PolyCollection)):
    rgb = to_rgb(colors[4-ind // 2])
   
    if ind % 2 != 0:
        rgb = 0.5 + 0.5 * np.array(rgb)  # make whiter
    violin.set_facecolor(rgb)
    handles.append(plt.Rectangle((0, 0), 0, 0, facecolor=rgb, edgecolor='black'))

# Adjust the top margin to make room for asterisks
plt.tight_layout()
plt.subplots_adjust(top=1.5)  # Adjust this value if needed

plt.savefig('TYMP_aMAA_all_cells_Li.pdf', dpi=600, bbox_inches='tight')

plt.show()

results_long

In [None]:
sc.pl.tsne(adata_amaa[adata_amaa.obs['anti_maa_hi']==False], color = ['TYMP'], cmap = 'coolwarm' , vmin=0,vmax=3.5, save='TYMP_all_cells_aMAA_low.pdf')
sc.pl.tsne(adata_amaa[adata_amaa.obs['anti_maa_hi']==True], color = ['TYMP'], cmap = 'coolwarm' , vmin=0,vmax=3.5, save='TYMP_all_cells_aMAA_hi.pdf')


In [None]:
#CD74
sns.set(font_scale=1.5)
sns.set(style='ticks')

features = ['CD74']
cell_key = 'majority_voting'
condition_key = 'anti_maa_hi'

df = sc.get.obs_df(adata_amaa, features+[cell_key,condition_key])

df[cell_key] = pd.Categorical(df[cell_key]).remove_unused_categories()
df_all_cells = df.copy()
df_all_cells[cell_key] = 'Combined Cells'

# Concatenate original and combined data
df = pd.concat([df, df_all_cells])

cell_list=list(df[cell_key].unique())

# #Add a dummy expression values 0 if there are not cells in a given category
# for cell_type in cell_list:

#     if len(df[(df['majority_voting']==cell_type)&(df['anti_maa_hi']==True)])<2:
   
#         d ={features[0]:[0.,0.000001,0.,0.000001],
#             'majority_voting':[cell_type,cell_type,cell_type,cell_type],'anti_maa_hi':[False,False,True,True]}
#         df = pd.concat((df,pd.DataFrame(data=d)))

#     if len(df[(df['majority_voting']==cell_type)&(df['anti_maa_hi']==False)])<2:

#         d ={features[0]:[0.,0.000001,0.,0.000001],
#             'majority_voting':[cell_type,cell_type,cell_type,cell_type],'anti_maa_hi':[False,False,True,True]}
#         df = pd.concat((df,pd.DataFrame(data=d)))


plt.figure(figsize=(12,3))

ax=sns.violinplot(data=df, x="majority_voting", y=features[0], hue=condition_key, split=True, inner='box', gap=.2, 
                          density_norm='width', width=0.8, palette=['.4', '.7'], order=cell_list,
                        legend=True, linewidth=1,cut=0,
                          alpha=0.5)

ax.set(xlabel = None)
ax.legend(handles=ax.legend_.legendHandles, title='Anti-MAA',labels=['Low', 'High'])

# Get the maximum y value for positioning asterisks
y_max = df.groupby(cell_key)[features[0]].max()

# Add asterisks above each violin plot
results_long, results_pivot = compare_gene_expression(df, cell_list, features, condition_key, False, True, cell_key )

for idx, (cell_type, direction, asterisk) in enumerate(zip(results_long['cell_type'], results_long['direction'], results_long['significance'])):
    
    if asterisk:  # Only add text if there is a significance marker
        # Add some padding above the maximum value
        y_position = y_max[cell_type] + 0.25 * (y_max.max() - y_max.min())
        ax.text(idx, y_position,  direction+asterisk, ha='center', va='bottom', fontsize=12)

plt.yticks([0.0,1.0,2.0,3.0,4.0,5.0])
plt.xticks(np.arange(len(cell_list)),cell_list,rotation=30,  fontsize=10, ha='right' )
plt.ylim([0,6.])
plt.xlim([-1.0,28])

handles = []

n_col =len(cell_list)
cmap = matplotlib.colormaps['tab10']

# Take colors at regular intervals spanning the colormap.
colors = cmap(np.linspace(0, 1, n_col+1))
# colors='tab10'

for ind, violin in enumerate(ax.findobj(PolyCollection)):
    rgb = to_rgb(colors[4-ind // 2])
   
    if ind % 2 != 0:
        rgb = 0.5 + 0.5 * np.array(rgb)  # make whiter
    violin.set_facecolor(rgb)
    handles.append(plt.Rectangle((0, 0), 0, 0, facecolor=rgb, edgecolor='black'))

# Adjust the top margin to make room for asterisks
plt.tight_layout()
plt.subplots_adjust(top=1.5)  # Adjust this value if needed

plt.savefig('CD74_aMAA_all_cells_Li.pdf', dpi=600, bbox_inches='tight')

plt.show()

results_long

In [None]:
sc.pl.tsne(adata_amaa[adata_amaa.obs['anti_maa_hi']==False], color = ['CD74'], cmap = 'coolwarm' , vmin=0,vmax=5.5, save='CD74_all_cells_aMAA_low.pdf')
sc.pl.tsne(adata_amaa[adata_amaa.obs['anti_maa_hi']==True], color = ['CD74'], cmap = 'coolwarm' , vmin=0,vmax=5.5, save='CD74_all_cells_aMAA_hi.pdf')


#### Immune regulators

In [None]:
reg_list = ['PDCD1','LAG3','TMEM173','SIGLEC10','LILRB4','LILRB2']

In [None]:
sns.set(font_scale=1.5)
sns.set(style='ticks')

for feature in reg_list:

    features = [feature]
    cell_key = 'majority_voting'
    condition_key = 'anti_maa_hi'
    
    
    df = sc.get.obs_df(adata_amaa, features+[cell_key,condition_key])
    
    df[cell_key] = pd.Categorical(df[cell_key]).remove_unused_categories()
    df_all_cells = df.copy()
    df_all_cells[cell_key] = 'Combined Cells'
    
    # Concatenate original and combined data
    df = pd.concat([df, df_all_cells])
    
    cell_list=list(df[cell_key].unique())

    # #Add a dummy expression values 0 if there are not cells in a given category
    # for cell_type in cell_list:
    
    #     if ((len(df[(df['majority_voting']==cell_type)&(df['anti_maa_hi']==True)])<2)|
    #         (len(df[(df['majority_voting']==cell_type)&(df['anti_maa_hi']==False)])<2)):
       
    #         d ={features[0]:[0.,0.0,0.,0.0],
    #             'majority_voting':[cell_type,cell_type,cell_type,cell_type],'anti_maa_hi':[False,False,True,True]}
    #         df = pd.concat((df,pd.DataFrame(data=d)))
    
    #     # if len(df[(df['majority_voting']==cell_type)&(df['anti_maa_hi']==False)])<2:
    
    #     #     d ={features[0]:[0.,0.000001,0.,0.000001],
    #     #         'majority_voting':[cell_type,cell_type,cell_type,cell_type],'anti_maa_hi':[False,False,True,True]}
    #     #     df = pd.concat((df,pd.DataFrame(data=d)))

    
    plt.figure(figsize=(12,3))
    
    ax=sns.violinplot(data=df, x="majority_voting", y=features[0], hue=condition_key, split=True, inner='box', gap=.2, 
                              density_norm='width', width=0.8, palette=['.4', '.7'], order=cell_list,
                            legend=True, linewidth=1,cut=0,
                              alpha=0.5)
    
    ax.set(xlabel = None)
    ax.legend(handles=ax.legend_.legend_handles, title='Anti-MAA',labels=['Low', 'High'])
    
    # Get the maximum y value for positioning asterisks
    y_max = df.groupby(cell_key)[features[0]].max()
    
    # Add asterisks above each violin plot
    results_long, results_pivot = compare_gene_expression(df, cell_list, features, condition_key, False, True, cell_key )
    
    for idx, (cell_type, direction, asterisk) in enumerate(zip(results_long['cell_type'], results_long['direction'], results_long['significance'])):

        # Uncomment to add text only if there is a significance marker
        # if asterisk:  
        
        y_position = y_max[cell_type] + 0.2 * (y_max.max() - y_max.min())
        ax.text(idx, y_position,  direction+asterisk, ha='center', va='bottom', fontsize=12)

    plt.yticks([0.0,1.0,2.0,3.0,4.0,5.0])
    plt.xticks(np.arange(len(cell_list)),cell_list,rotation=30,  fontsize=10, ha='right' )
    plt.ylim([0,6])
    plt.xlim([-1.0,28])
    
    handles = []
    
    n_col =len(cell_list)
    cmap = matplotlib.colormaps['tab10']
    
    # Take colors at regular intervals spanning the colormap.
    colors = cmap(np.linspace(0, 1, n_col))
    # colors='tab10'
    
    for ind, violin in enumerate(ax.findobj(PolyCollection)):
    # for ind in np.arange(len(cell_list)):

        rgb = to_rgb(colors[4-ind // 2])
       
        if ind % 2 != 0:
            rgb = 0.5 + 0.5 * np.array(rgb)  # make whiter
        violin.set_facecolor(rgb)
        handles.append(plt.Rectangle((0, 0), 0, 0, facecolor=rgb, edgecolor='black'))
    
    # Adjust the top margin to make room for asterisks
    plt.tight_layout()
    plt.subplots_adjust(top=1.5)  # Adjust this value if needed
    
    # plt.savefig(str(feature)+'_aMAA_all_cells_Li.pdf', dpi=600, bbox_inches='tight')
    
    plt.show()
    
    # results_long

In [None]:
#CTL score
sns.set(font_scale=1.5)
sns.set(style='ticks')

features = ['ctl_score']
cell_key = 'majority_voting'
condition_key = 'anti_maa_hi'

df = sc.get.obs_df(adata_amaa, features+[cell_key,condition_key])

df[cell_key] = pd.Categorical(df[cell_key]).remove_unused_categories()
df_all_cells = df.copy()
df_all_cells[cell_key] = 'Combined Cells'

# Concatenate original and combined data
df = pd.concat([df, df_all_cells])

cell_list=list(df[cell_key].unique())

# #Add a dummy expression values 0 if there are not cells in a given category
# for cell_type in cell_list:

#     if len(df[(df['majority_voting']==cell_type)&(df['anti_maa_hi']==True)])<2:
   
#         d ={features[0]:[0.,0.000001,0.,0.000001],
#             'majority_voting':[cell_type,cell_type,cell_type,cell_type],'anti_maa_hi':[False,False,True,True]}
#         df = pd.concat((df,pd.DataFrame(data=d)))

#     if len(df[(df['majority_voting']==cell_type)&(df['anti_maa_hi']==False)])<2:

#         d ={features[0]:[0.,0.000001,0.,0.000001],
#             'majority_voting':[cell_type,cell_type,cell_type,cell_type],'anti_maa_hi':[False,False,True,True]}
#         df = pd.concat((df,pd.DataFrame(data=d)))


plt.figure(figsize=(12,3))

ax=sns.violinplot(data=df, x="majority_voting", y=features[0], hue=condition_key, split=True, inner='box', gap=.2, 
                          density_norm='width', width=0.8, palette=['.4', '.7'], order=cell_list,
                        legend=True, linewidth=1,cut=0,
                          alpha=0.5)

ax.set(xlabel = None)
ax.legend(handles=ax.legend_.legendHandles, title='Anti-MAA',labels=['Low', 'High'])

# Get the maximum y value for positioning asterisks
y_max = df.groupby(cell_key)[features[0]].max()

# Add asterisks above each violin plot
results_long, results_pivot = compare_gene_expression(df, cell_list, features, condition_key, False, True, cell_key )

for idx, (cell_type, direction, asterisk) in enumerate(zip(results_long['cell_type'], results_long['direction'], results_long['significance'])):
    
    if asterisk:  # Only add text if there is a significance marker
        # Add some padding above the maximum value
        y_position = y_max[cell_type] + 0.25 * (y_max.max() - y_max.min())
        ax.text(idx, y_position,  direction+asterisk, ha='center', va='bottom', fontsize=12)

plt.yticks([0.0,1.0,2.0,3.0,4.0,5.0])
plt.xticks(np.arange(len(cell_list)),cell_list,rotation=30,  fontsize=10, ha='right' )
plt.ylim([-2,6.])
plt.xlim([-1.0,28])

handles = []

n_col =len(cell_list)
cmap = matplotlib.colormaps['tab10']

# Take colors at regular intervals spanning the colormap.
colors = cmap(np.linspace(0, 1, n_col+1))
# colors='tab10'

for ind, violin in enumerate(ax.findobj(PolyCollection)):
    rgb = to_rgb(colors[4-ind // 2])
   
    if ind % 2 != 0:
        rgb = 0.5 + 0.5 * np.array(rgb)  # make whiter
    violin.set_facecolor(rgb)
    handles.append(plt.Rectangle((0, 0), 0, 0, facecolor=rgb, edgecolor='black'))

# Adjust the top margin to make room for asterisks
plt.tight_layout()
plt.subplots_adjust(top=1.5)  # Adjust this value if needed

plt.savefig('CTL_aMAA_all_cells_Li.pdf', dpi=600, bbox_inches='tight')

plt.show()

results_long

## CTL split

In [None]:
adata_th_naive.obs['ctl_hi'] = adata_th_naive.obs['ctl_score_pt']>np.quantile(np.unique(adata_th_naive.obs['ctl_score_pt']), [0.25,0.5,0.75])[1]
adata_th_naive.obs['ctl_q1'] = adata_th_naive.obs['ctl_score_pt']>np.quantile(np.unique(adata_th_naive.obs['ctl_score_pt']), [0.25,0.5,0.75])[2]
adata_th_naive.obs['ctl_q4'] = adata_th_naive.obs['ctl_score_pt']<=np.quantile(np.unique(adata_th_naive.obs['ctl_score_pt']), [0.25,0.5,0.75])[0]

In [None]:
#PRIME
sns.set(font_scale=1.5)
sns.set(style='ticks')

features = ['PRIME_score']
cell_key = 'majority_voting'
condition_key = 'ctl_hi'


df = sc.get.obs_df(adata_th_naive, features+[cell_key,condition_key])

df[cell_key] = pd.Categorical(df[cell_key]).remove_unused_categories()
df_all_cells = df.copy()
df_all_cells[cell_key] = 'Combined Cells'

# Concatenate original and combined data
df = pd.concat([df, df_all_cells])

cell_list=list(df[cell_key].unique())

plt.figure(figsize=(12,3))

ax=sns.violinplot(data=df, x="majority_voting", y=features[0], hue=condition_key, split=True, inner='box', gap=.2, 
                          density_norm='width', width=0.8, palette=['.4', '.7'], order=cell_list,
                        legend=True, linewidth=1,cut=0,
                          alpha=0.5)

ax.set(xlabel = None)
ax.legend(handles=ax.legend_.legendHandles, title='CTL',labels=['Low', 'High'])

# Get the maximum y value for positioning asterisks
y_max = df.groupby(cell_key)[features[0]].max()

# Add asterisks above each violin plot
results_long, results_pivot = compare_gene_expression(df, cell_list, features, condition_key, False, True, cell_key )

for idx, (cell_type, direction, asterisk) in enumerate(zip(results_long['cell_type'], results_long['direction'], results_long['significance'])):
    
    if asterisk:  # Only add text if there is a significance marker
        # Add some padding above the maximum value
        y_position = y_max[cell_type] + 0.25 * (y_max.max() - y_max.min())
        ax.text(idx, y_position,  direction+asterisk, ha='center', va='bottom', fontsize=12)

plt.yticks([0.0,1.0,2.0,3.0])
plt.xticks(np.arange(len(cell_list)),cell_list,rotation=30,  fontsize=10, ha='right' )
plt.ylim([-0.25,2.5])
plt.xlim([-1.0,28])

handles = []

n_col =len(cell_list)
cmap = matplotlib.colormaps['tab10']

# Take colors at regular intervals spanning the colormap.
colors = cmap(np.linspace(0, 1, n_col+1))
# colors='tab10'

for ind, violin in enumerate(ax.findobj(PolyCollection)):
    rgb = to_rgb(colors[4-ind // 2])
   
    if ind % 2 != 0:
        rgb = 0.5 + 0.5 * np.array(rgb)  # make whiter
    violin.set_facecolor(rgb)
    handles.append(plt.Rectangle((0, 0), 0, 0, facecolor=rgb, edgecolor='black'))

# Adjust the top margin to make room for asterisks
plt.tight_layout()
plt.subplots_adjust(top=1.5)  # Adjust this value if needed

plt.savefig('PRIME_CTL_all_cells_Li.pdf', dpi=600, bbox_inches='tight')

plt.show()

results_long

In [None]:
#SLAMF7
sns.set(font_scale=1.5)
sns.set(style='ticks')

features = ['SLAMF7']
cell_key = 'majority_voting'
condition_key = 'ctl_hi'


df = sc.get.obs_df(adata_th_naive, features+[cell_key,condition_key])

df[cell_key] = pd.Categorical(df[cell_key]).remove_unused_categories()
df_all_cells = df.copy()
df_all_cells[cell_key] = 'Combined Cells'

# Concatenate original and combined data
df = pd.concat([df, df_all_cells])

cell_list=list(df[cell_key].unique())

plt.figure(figsize=(12,3))

ax=sns.violinplot(data=df, x="majority_voting", y=features[0], hue=condition_key, split=True, inner='box', gap=.2, 
                          density_norm='width', width=0.8, palette=['.4', '.7'], order=cell_list,
                        legend=True, linewidth=1,cut=0,
                          alpha=0.5)

ax.set(xlabel = None)
ax.legend(handles=ax.legend_.legendHandles, title='CTL',labels=['Low', 'High'])

# Get the maximum y value for positioning asterisks
y_max = df.groupby(cell_key)[features[0]].max()

# Add asterisks above each violin plot
results_long, results_pivot = compare_gene_expression(df, cell_list, features, condition_key, False, True, cell_key )

for idx, (cell_type, direction, asterisk) in enumerate(zip(results_long['cell_type'], results_long['direction'], results_long['significance'])):
    
    if asterisk:  # Only add text if there is a significance marker
        # Add some padding above the maximum value
        y_position = y_max[cell_type] + 0.2 * (y_max.max() - y_max.min())
        ax.text(idx, y_position,  direction+asterisk, ha='center', va='bottom', fontsize=12)

plt.yticks([0.0,1.0,2.0,3.0])
plt.xticks(np.arange(len(cell_list)),cell_list,rotation=30,  fontsize=10, ha='right' )
plt.ylim([-0.25,5.5])
plt.xlim([-1.0,28])

handles = []

n_col =len(cell_list)
cmap = matplotlib.colormaps['tab10']

# Take colors at regular intervals spanning the colormap.
colors = cmap(np.linspace(0, 1, n_col+1))
# colors='tab10'

for ind, violin in enumerate(ax.findobj(PolyCollection)):
    rgb = to_rgb(colors[4-ind // 2])
   
    if ind % 2 != 0:
        rgb = 0.5 + 0.5 * np.array(rgb)  # make whiter
    violin.set_facecolor(rgb)
    handles.append(plt.Rectangle((0, 0), 0, 0, facecolor=rgb, edgecolor='black'))

# Adjust the top margin to make room for asterisks
plt.tight_layout()
plt.subplots_adjust(top=1.5)  # Adjust this value if needed

plt.savefig('SLAMF7_CTL_all_cells_Li.pdf', dpi=600, bbox_inches='tight')

plt.show()

results_long

In [None]:
#TYMP
sns.set(font_scale=1.5)
sns.set(style='ticks')

features = ['TYMP']
cell_key = 'majority_voting'
condition_key = 'ctl_hi'


df = sc.get.obs_df(adata_th_naive, features+[cell_key,condition_key])

df[cell_key] = pd.Categorical(df[cell_key]).remove_unused_categories()
df_all_cells = df.copy()
df_all_cells[cell_key] = 'Combined Cells'

# Concatenate original and combined data
df = pd.concat([df, df_all_cells])

cell_list=list(df[cell_key].unique())

plt.figure(figsize=(12,3))

ax=sns.violinplot(data=df, x="majority_voting", y=features[0], hue=condition_key, split=True, inner='box', gap=.2, 
                          density_norm='width', width=0.8, palette=['.4', '.7'], order=cell_list,
                        legend=True, linewidth=1,cut=0,
                          alpha=0.5)

ax.set(xlabel = None)
ax.legend(handles=ax.legend_.legendHandles, title='CTL',labels=['Low', 'High'])

# Get the maximum y value for positioning asterisks
y_max = df.groupby(cell_key)[features[0]].max()

# Add asterisks above each violin plot
results_long, results_pivot = compare_gene_expression(df, cell_list, features, condition_key, False, True, cell_key )

for idx, (cell_type, direction, asterisk) in enumerate(zip(results_long['cell_type'], results_long['direction'], results_long['significance'])):
    
    if asterisk:  # Only add text if there is a significance marker
        # Add some padding above the maximum value
        y_position = y_max[cell_type] + 0.2 * (y_max.max() - y_max.min())
        ax.text(idx, y_position,  direction+asterisk, ha='center', va='bottom', fontsize=12)

plt.yticks([0.0,1.0,2.0,3.0,4.0])
plt.xticks(np.arange(len(cell_list)),cell_list,rotation=30,  fontsize=10, ha='right' )
plt.ylim([-0.25,5.5])
plt.xlim([-1.0,28])

handles = []

n_col =len(cell_list)
cmap = matplotlib.colormaps['tab10']

# Take colors at regular intervals spanning the colormap.
colors = cmap(np.linspace(0, 1, n_col+1))
# colors='tab10'

for ind, violin in enumerate(ax.findobj(PolyCollection)):
    rgb = to_rgb(colors[4-ind // 2])
   
    if ind % 2 != 0:
        rgb = 0.5 + 0.5 * np.array(rgb)  # make whiter
    violin.set_facecolor(rgb)
    handles.append(plt.Rectangle((0, 0), 0, 0, facecolor=rgb, edgecolor='black'))

# Adjust the top margin to make room for asterisks
plt.tight_layout()
plt.subplots_adjust(top=1.5)  # Adjust this value if needed

plt.savefig('TYMP_CTL_all_cells_Li.pdf', dpi=600, bbox_inches='tight')

plt.show()

results_long

In [None]:
#CD74
sns.set(font_scale=1.5)
sns.set(style='ticks')

features = ['CD74']
cell_key = 'majority_voting'
condition_key = 'ctl_hi'


df = sc.get.obs_df(adata_th_naive, features+[cell_key,condition_key])

df[cell_key] = pd.Categorical(df[cell_key]).remove_unused_categories()
df_all_cells = df.copy()
df_all_cells[cell_key] = 'Combined Cells'

# Concatenate original and combined data
df = pd.concat([df, df_all_cells])

cell_list=list(df[cell_key].unique())

plt.figure(figsize=(12,3))

ax=sns.violinplot(data=df, x="majority_voting", y=features[0], hue=condition_key, split=True, inner='box', gap=.2, 
                          density_norm='width', width=0.8, palette=['.4', '.7'], order=cell_list,
                        legend=True, linewidth=1,cut=0,
                          alpha=0.5)

ax.set(xlabel = None)
# ax.legend(handles=ax.legend_.legendHandles, title='CTL',labels=['Low', 'High'])
handles = ax.get_legend_handles_labels()[0]  # gets all handles
ax.legend(handles=handles, labels=['Low', 'High'], title='CTL')


# Get the maximum y value for positioning asterisks
y_max = df.groupby(cell_key)[features[0]].max()

# Add asterisks above each violin plot
results_long, results_pivot = compare_gene_expression(df, cell_list, features, condition_key, False, True, cell_key )

for idx, (cell_type, direction, asterisk) in enumerate(zip(results_long['cell_type'], results_long['direction'], results_long['significance'])):
    
    if asterisk:  # Only add text if there is a significance marker
        # Add some padding above the maximum value
        y_position = y_max[cell_type] + 0.25 * (y_max.max() - y_max.min())
        ax.text(idx, y_position,  direction+asterisk, ha='center', va='bottom', fontsize=12)

plt.yticks([0.0,1.0,2.0,3.0])
plt.xticks(np.arange(len(cell_list)),cell_list,rotation=30,  fontsize=10, ha='right' )
plt.ylim([-0.25,5.5])
plt.xlim([-1.0,28])

handles = []

n_col =len(cell_list)
cmap = matplotlib.colormaps['tab10']

# Take colors at regular intervals spanning the colormap.
colors = cmap(np.linspace(0, 1, n_col+1))
# colors='tab10'

for ind, violin in enumerate(ax.findobj(PolyCollection)):
    rgb = to_rgb(colors[4-ind // 2])
   
    if ind % 2 != 0:
        rgb = 0.5 + 0.5 * np.array(rgb)  # make whiter
    violin.set_facecolor(rgb)
    handles.append(plt.Rectangle((0, 0), 0, 0, facecolor=rgb, edgecolor='black'))

# Adjust the top margin to make room for asterisks
plt.tight_layout()
plt.subplots_adjust(top=1.5)  # Adjust this value if needed

plt.savefig('CD74_CTL_all_cells_Li.pdf', dpi=600, bbox_inches='tight')

plt.show()

results_long

In [None]:
# Double check: Cell CTL score split by median patient CTL score
sns.set(font_scale=1.5)
sns.set(style='ticks')

features = ['ctl_score']
cell_key = 'majority_voting'
condition_key = 'ctl_hi'


df = sc.get.obs_df(adata_th_naive, features+[cell_key,condition_key])

df[cell_key] = pd.Categorical(df[cell_key]).remove_unused_categories()
df_all_cells = df.copy()
df_all_cells[cell_key] = 'Combined Cells'

# Concatenate original and combined data
df = pd.concat([df, df_all_cells])

cell_list=list(df[cell_key].unique())

plt.figure(figsize=(12,3))

ax=sns.violinplot(data=df, x="majority_voting", y=features[0], hue=condition_key, split=True, inner='box', gap=.2, 
                          density_norm='width', width=0.8, palette=['.4', '.7'], order=cell_list,
                        legend=True, linewidth=1,cut=0,
                          alpha=0.5)

ax.set(xlabel = None)
# ax.legend(handles=ax.legend_.legendHandles, title='CTL',labels=['Low', 'High'])
handles = ax.get_legend_handles_labels()[0]  # gets all handles
ax.legend(handles=handles, labels=['Low', 'High'], title='CTL')


# Get the maximum y value for positioning asterisks
y_max = df.groupby(cell_key)[features[0]].max()

# Add asterisks above each violin plot
results_long, results_pivot = compare_gene_expression(df, cell_list, features, condition_key, False, True, cell_key )

for idx, (cell_type, direction, asterisk) in enumerate(zip(results_long['cell_type'], results_long['direction'], results_long['significance'])):
    
    if asterisk:  # Only add text if there is a significance marker
        # Add some padding above the maximum value
        y_position = y_max[cell_type] + 0.2 * (y_max.max() - y_max.min())
        ax.text(idx, y_position,  direction+asterisk, ha='center', va='bottom', fontsize=12)

plt.yticks([0.0,1.0,2.0,3.0])
plt.xticks(np.arange(len(cell_list)),cell_list,rotation=30,  fontsize=10, ha='right' )
plt.ylim([-2.25,5.5])
plt.xlim([-1.0,28])

handles = []

n_col =len(cell_list)
cmap = matplotlib.colormaps['tab10']

# Take colors at regular intervals spanning the colormap.
colors = cmap(np.linspace(0, 1, n_col+1))
# colors='tab10'

for ind, violin in enumerate(ax.findobj(PolyCollection)):
    rgb = to_rgb(colors[4-ind // 2])
   
    if ind % 2 != 0:
        rgb = 0.5 + 0.5 * np.array(rgb)  # make whiter
    violin.set_facecolor(rgb)
    handles.append(plt.Rectangle((0, 0), 0, 0, facecolor=rgb, edgecolor='black'))

# Adjust the top margin to make room for asterisks
plt.tight_layout()
plt.subplots_adjust(top=1.5)  # Adjust this value if needed

plt.savefig('cellCTL_by_ptCTL_all_cells_Li.pdf', dpi=600, bbox_inches='tight')

plt.show()

results_long

## DC, MPhages and monocytes

In [None]:
adata_subset = adata_amaa[adata_amaa.obs['leiden'].isin(['r0','r1','r2','r3','r4','r5','r6','r7','r8','16','17'])].copy()

In [None]:
dc_sig_genes = ["SLC7A7", "LILRB2", "TNFAIP2", "SECTM1", "CSF2RB", "LST1", "BLNK", "LAT2", "FGR", "CTSS", "TYMP", "AOAH", "CD4", "PIK3AP1", "POU2F2", "HLA-DPA1", "ITGB2", "MZB1", "CD84", "HCLS1", "TNFRSF1B", "HAVCR2", "DOK2", "CD53", "SELL", "SLAMF7", "LCP1", "TAPBP", "RCSD1", "ARHGAP30", "CD37", "PSMB10", "EPSTI1", "CCDC69"]
sc.tl.score_genes(adata_subset, dc_sig_genes, score_name='dc_sig_score')

In [None]:
# Cell type predictions by Celltypist
predictions_subset = celltypist.annotate(adata_subset, model = 'Immune_All_High.pkl', majority_voting = True)
print(predictions_subset.predicted_labels)
adata_subset = predictions_subset.to_adata()

In [None]:

features = ['PRIME_score']
cell_key = 'majority_voting'
condition_key = 'anti_maa_hi'

df = sc.get.obs_df(adata_subset, features+[cell_key,condition_key])

df[cell_key] = pd.Categorical(df[cell_key]).remove_unused_categories()
df_all_cells = df.copy()
df_all_cells[cell_key] = 'Combined Cells'

# Concatenate original and combined data
df = pd.concat([df, df_all_cells])

cell_list=list(df[cell_key].unique())

# n_col =len(cell_list)
# cmap = matplotlib.colormaps['tab10']
# colors = cmap(np.linspace(0., 1, 4))

# Alphabetical order
colors = [(0.5803921568627451, 0.403921568627451, 0.7411764705882353),
          (0.7372549019607844, 0.7411764705882353, 0.1333333333),
          (0.8901960784313725, 0.4666666666666667, 0.7607843137254902),
          (0.17254901960784313, 0.6274509803921569, 0.17254901960784313)]

subset_cmap = ListedColormap(colors, name='cmap1' )
matplotlib.colormaps.register(name='cmap1', cmap=subset_cmap)

In [None]:
subset_cmap

In [None]:
## All DC, Mphages and monocytes
sc.pl.tsne(adata_subset, color = 'majority_voting', palette ='cmap1',legend_loc = 'on data', alpha=0.5 , 
           add_outline=True, outline_width=(0.1,0.2), save='Li_DC_subset_CT_Immune_All_High.pdf')

In [None]:
#Anti-MAA high
print(adata_amaa[adata_amaa.obs['anti_maa_hi']==True].obs['patient'].unique())
sc.pl.tsne(adata_subset[adata_subset.obs['anti_maa_hi']==True], color = 'majority_voting', palette ='cmap1',legend_loc = 'on data', alpha=0.5 , 
           add_outline=True, outline_width=(0.1,0.2), save='Li_DC_subset_CT_Immune_All_High_aMAAhi.pdf')

In [None]:
# Anti-MAA low

print(adata_amaa[adata_amaa.obs['anti_maa_hi']==False].obs['patient'].unique())

sc.pl.tsne(adata_subset[adata_subset.obs['anti_maa_hi']==False], color = 'majority_voting', palette ='cmap1',legend_loc = 'on data', alpha=0.5 , 
           add_outline=True, outline_width=(0.1,0.2), save='Li_DC_subset_CT_Immune_All_High_aMAAlow.pdf')


In [None]:
sns.set(font_scale=1.5)
sns.set(style='ticks')

features = ['PRIME_score']
cell_key = 'majority_voting'
condition_key = 'anti_maa_hi'

df = sc.get.obs_df(adata_subset, features+[cell_key,condition_key])

df[cell_key] = pd.Categorical(df[cell_key]).remove_unused_categories()
df_all_cells = df.copy()
df_all_cells[cell_key] = 'Combined Cells'

# Concatenate original and combined data
df = pd.concat([df, df_all_cells])

cell_list=list(df[cell_key].unique())

plt.figure(figsize=(6,2))

ax=sns.violinplot(data=df, x="majority_voting", y=features[0], hue=condition_key, split=True, inner='point', gap=.2, 
                          density_norm='width', width=0.8, palette=['.4', '.7'], order=cell_list,
                        legend=True, linewidth=1,cut=0,
                          alpha=0.5)

ax.set(xlabel = None)
ax.set(ylabel = 'PRIME score')
ax.legend(handles=ax.legend_.legendHandles, title='Anti-MAA',labels=['Low', 'High'])

# Get the maximum y value for positioning asterisks
y_max = df.groupby(cell_key)[features[0]].max()

# Add asterisks above each violin plot
results_long, results_pivot = compare_gene_expression(df, cell_list, features, condition_key, False, True, cell_key )

for idx, (cell_type, direction, asterisk) in enumerate(zip(results_long['cell_type'], results_long['direction'], results_long['significance'])):
    
    if asterisk:  # Only add text if there is a significance marker
        # Add some padding above the maximum value
        y_position = y_max[cell_type] + 0.4 * (y_max.max() - y_max.min())
        ax.text(idx, y_position,  asterisk, ha='center', va='bottom', fontsize=12)
        ax.text(idx, y_position-0.1, direction, ha='center', va='bottom', fontsize=12)

plt.yticks([0.0,1.0,2.0,3.0])
plt.xticks(np.arange(len(cell_list)),cell_list,rotation=15,  fontsize=12, ha='center' )
plt.ylim([0,2.5])
plt.xlim([-0.5,5.3])

handles = []

n_col =len(cell_list)
cmap = matplotlib.colormaps['tab10']

# Take colors at regular intervals spanning the colormap.
colors = cmap(np.linspace(0, 1, n_col+1))
# colors='tab10'

for ind, violin in enumerate(ax.findobj(PolyCollection)):
    rgb = to_rgb(colors[4-ind // 2])
   
    if ind % 2 != 0:
        rgb = 0.5 + 0.5 * np.array(rgb)  # make whiter
    violin.set_facecolor(rgb)
    handles.append(plt.Rectangle((0, 0), 0, 0, facecolor=rgb, edgecolor='black'))

# Adjust the top margin to make room for asterisks
plt.tight_layout()
plt.subplots_adjust(top=1.5)  # Adjust this value if needed

plt.savefig('PRIME_score_aMAA_Select_cells_Li.pdf', dpi=600, bbox_inches='tight')

plt.show()

results_long

In [None]:
sns.set(font_scale=1.5)
sns.set(style='ticks')

features = ['SLAMF7']
cell_key = 'majority_voting'
condition_key = 'anti_maa_hi'

df = sc.get.obs_df(adata_subset, features+[cell_key,condition_key])

df[cell_key] = pd.Categorical(df[cell_key]).remove_unused_categories()
df_all_cells = df.copy()
df_all_cells[cell_key] = 'Combined Cells'

# Concatenate original and combined data
df = pd.concat([df, df_all_cells])

cell_list=list(df[cell_key].unique())

plt.figure(figsize=(6,2))

ax=sns.violinplot(data=df, x="majority_voting", y=features[0], hue=condition_key, split=True, inner='point', gap=.2, 
                          density_norm='width', width=0.8, palette=['.4', '.7'], order=cell_list,
                        legend=True, linewidth=1,cut=0,
                          alpha=0.5)

ax.set(xlabel = None)
ax.set(ylabel = 'SLAMF7')
ax.legend(handles=ax.legend_.legendHandles, title='Anti-MAA',labels=['Low', 'High'])

# Get the maximum y value for positioning asterisks
y_max = df.groupby(cell_key)[features[0]].max()

# Add asterisks above each violin plot
results_long, results_pivot = compare_gene_expression(df, cell_list, features, condition_key, False, True, cell_key )

for idx, (cell_type, direction, asterisk) in enumerate(zip(results_long['cell_type'], results_long['direction'], results_long['significance'])):
    
    if asterisk:  # Only add text if there is a significance marker
        # Add some padding above the maximum value
        y_position = y_max[cell_type] + 1.2 * (y_max.max() - y_max.min())
        ax.text(idx, y_position,  asterisk, ha='center', va='bottom', fontsize=12)
        ax.text(idx, y_position-0.15, direction, ha='center', va='bottom', fontsize=12)

plt.yticks([0.0,1.0,2.0,3.0,4.0])
plt.xticks(np.arange(len(cell_list)),cell_list,rotation=15,  fontsize=12, ha='center' )
plt.ylim([0,5])
plt.xlim([-0.5,5.3])

handles = []

n_col =len(cell_list)
cmap = matplotlib.colormaps['tab10']

# Take colors at regular intervals spanning the colormap.
colors = cmap(np.linspace(0, 1, n_col+1))
# colors='tab10'

for ind, violin in enumerate(ax.findobj(PolyCollection)):
    rgb = to_rgb(colors[4-ind // 2])
   
    if ind % 2 != 0:
        rgb = 0.5 + 0.5 * np.array(rgb)  # make whiter
    violin.set_facecolor(rgb)
    handles.append(plt.Rectangle((0, 0), 0, 0, facecolor=rgb, edgecolor='black'))

# Adjust the top margin to make room for asterisks
plt.tight_layout()
plt.subplots_adjust(top=1.5)  # Adjust this value if needed

plt.savefig('SLAMF7_aMAA_Select_cells_Li.pdf', dpi=600, bbox_inches='tight')

plt.show()

results_long

In [None]:
### sns.set(font_scale=1.5)
sns.set(style='ticks')

features = ['TYMP']
cell_key = 'majority_voting'
condition_key = 'anti_maa_hi'

df = sc.get.obs_df(adata_subset, features+[cell_key,condition_key])

df[cell_key] = pd.Categorical(df[cell_key]).remove_unused_categories()
df_all_cells = df.copy()
df_all_cells[cell_key] = 'Combined Cells'

# Concatenate original and combined data
df = pd.concat([df, df_all_cells])

cell_list=list(df[cell_key].unique())

plt.figure(figsize=(6,2))

ax=sns.violinplot(data=df, x="majority_voting", y=features[0], hue=condition_key, split=True, inner='point', gap=.2, 
                          density_norm='width', width=0.8, palette=['.4', '.7'], order=cell_list,
                        legend=True, linewidth=1,cut=0,
                          alpha=0.5)

ax.set(xlabel = None)
ax.set(ylabel = 'TYMP')
ax.legend(handles=ax.legend_.legendHandles, title='Anti-MAA',labels=['Low', 'High'])

# Get the maximum y value for positioning asterisks
y_max = df.groupby(cell_key)[features[0]].max()

# Add asterisks above each violin plot
results_long, results_pivot = compare_gene_expression(df, cell_list, features, condition_key, False, True, cell_key )

for idx, (cell_type, direction, asterisk) in enumerate(zip(results_long['cell_type'], results_long['direction'], results_long['significance'])):
    
    if asterisk:  # Only add text if there is a significance marker
        # Add some padding above the maximum value
        y_position = y_max[cell_type] + 0.5 * (y_max.max() - y_max.min())
        ax.text(idx, y_position,  asterisk, ha='center', va='bottom', fontsize=12)
        ax.text(idx, y_position-0.15, direction, ha='center', va='bottom', fontsize=12)
        
plt.yticks([0.0,1.0,2.0,3.0,4.0])
plt.xticks(np.arange(len(cell_list)),cell_list,rotation=15,  fontsize=12, ha='center' )
plt.ylim([0,5])
plt.xlim([-0.5,5.3])

handles = []

n_col =len(cell_list)
cmap = matplotlib.colormaps['tab10']

# Take colors at regular intervals spanning the colormap.
colors = cmap(np.linspace(0, 1, n_col+1))
# colors='tab10'

for ind, violin in enumerate(ax.findobj(PolyCollection)):
    rgb = to_rgb(colors[4-ind // 2])
   
    if ind % 2 != 0:
        rgb = 0.5 + 0.5 * np.array(rgb)  # make whiter
    violin.set_facecolor(rgb)
    handles.append(plt.Rectangle((0, 0), 0, 0, facecolor=rgb, edgecolor='black'))

# Adjust the top margin to make room for asterisks
plt.tight_layout()
plt.subplots_adjust(top=1.5)  # Adjust this value if needed

plt.savefig('TYMP_aMAA_Select_cells_Li.pdf', dpi=600, bbox_inches='tight')

plt.show()

results_long

In [None]:
sns.set(font_scale=1.5)
sns.set(style='ticks')

features = ['CD74']
cell_key = 'majority_voting'
condition_key = 'anti_maa_hi'

df = sc.get.obs_df(adata_subset, features+[cell_key,condition_key])

df[cell_key] = pd.Categorical(df[cell_key]).remove_unused_categories()
df_all_cells = df.copy()
df_all_cells[cell_key] = 'Combined Cells'

# Concatenate original and combined data
df = pd.concat([df, df_all_cells])

cell_list=list(df[cell_key].unique())

plt.figure(figsize=(6,2))

ax=sns.violinplot(data=df, x="majority_voting", y=features[0], hue=condition_key, split=True, inner='point', gap=.2, 
                          density_norm='width', width=0.8, palette=['.4', '.7'], order=cell_list,
                        legend=True, linewidth=1,cut=0,
                          alpha=0.5)

ax.set(xlabel = None)
ax.set(ylabel = 'CD74')
ax.legend(handles=ax.legend_.legendHandles, title='Anti-MAA',labels=['Low', 'High'])

# Get the maximum y value for positioning asterisks
y_max = df.groupby(cell_key)[features[0]].max()

# Add asterisks above each violin plot
results_long, results_pivot = compare_gene_expression(df, cell_list, features, condition_key, False, True, cell_key )

for idx, (cell_type, direction, asterisk) in enumerate(zip(results_long['cell_type'], results_long['direction'], results_long['significance'])):
    
    if asterisk:  # Only add text if there is a significance marker
        # Add some padding above the maximum value
        y_position = y_max[cell_type] + 1.5 * (y_max.max() - y_max.min())
        ax.text(idx, y_position,  asterisk, ha='center', va='bottom', fontsize=12)
        ax.text(idx, y_position-0.15, direction, ha='center', va='bottom', fontsize=12)

plt.yticks([0.0,1.0,2.0,3.0,4.0])
plt.xticks(np.arange(len(cell_list)),cell_list,rotation=15,  fontsize=13, ha='center' )
plt.ylim([0,5.5])
plt.xlim([-0.5,5.3])

handles = []

n_col =len(cell_list)
cmap = matplotlib.colormaps['tab10']

# Take colors at regular intervals spanning the colormap.
colors = cmap(np.linspace(0, 1, n_col+1))
# colors='tab10'

for ind, violin in enumerate(ax.findobj(PolyCollection)):
    rgb = to_rgb(colors[4-ind // 2])
   
    if ind % 2 != 0:
        rgb = 0.5 + 0.5 * np.array(rgb)  # make whiter
    violin.set_facecolor(rgb)
    handles.append(plt.Rectangle((0, 0), 0, 0, facecolor=rgb, edgecolor='black'))

# Adjust the top margin to make room for asterisks
plt.tight_layout()
plt.subplots_adjust(top=1.5)  # Adjust this value if needed

plt.savefig('CD74_aMAA_Select_cells_Li.pdf', dpi=600, bbox_inches='tight')

plt.show()

results_long

### CTL split, select cells

In [None]:
adata_subset = adata_th_naive[adata_th_naive.obs['leiden'].isin(['r0','r1','r2','r3','r4','r5','r6','r7','r8','16','17'])].copy()
# Cell type predictions by Celltypist
predictions_subset = celltypist.annotate(adata_subset, model = 'Immune_All_High.pkl', majority_voting = True)
print(predictions_subset.predicted_labels)
adata_subset = predictions_subset.to_adata()

In [None]:
sns.set(font_scale=1.5)
sns.set(style='ticks')

features = ['PRIME_score']
cell_key = 'majority_voting'
condition_key = 'ctl_hi'

df = sc.get.obs_df(adata_subset, features+[cell_key,condition_key])

df[cell_key] = pd.Categorical(df[cell_key]).remove_unused_categories()
df_all_cells = df.copy()
df_all_cells[cell_key] = 'Combined Cells'

# Concatenate original and combined data
df = pd.concat([df, df_all_cells])

cell_list=list(df[cell_key].unique())

plt.figure(figsize=(9,3))

ax=sns.violinplot(data=df, x="majority_voting", y=features[0], hue=condition_key, split=True, inner='point', gap=.2, 
                          density_norm='width', width=0.8, palette=['.4', '.7'], order=cell_list,
                        legend=True, linewidth=1,cut=0,
                          alpha=0.5)

ax.set(xlabel = None)
ax.legend(handles=ax.legend_.legendHandles, title='CTL',labels=['Low', 'High'])

# Get the maximum y value for positioning asterisks
y_max = df.groupby(cell_key)[features[0]].max()

# Add asterisks above each violin plot
results_long, results_pivot = compare_gene_expression(df, cell_list, features, condition_key, False, True, cell_key )

for idx, (cell_type, direction, asterisk) in enumerate(zip(results_long['cell_type'], results_long['direction'], results_long['significance'])):
    
    if asterisk:  # Only add text if there is a significance marker
        # Add some padding above the maximum value
        y_position = y_max[cell_type] + 0.25 * (y_max.max() - y_max.min())
        ax.text(idx, y_position,  direction+asterisk, ha='center', va='bottom', fontsize=12)

plt.yticks([0.0,1.0,2.0,3.0])
plt.xticks(np.arange(len(cell_list)),cell_list,rotation=30,  fontsize=10, ha='center' )
plt.ylim([0,2.5])
plt.xlim([-0.5,5])

handles = []

n_col =len(cell_list)
cmap = matplotlib.colormaps['tab10']

# Take colors at regular intervals spanning the colormap.
colors = cmap(np.linspace(0, 1, n_col+1))
# colors='tab10'

for ind, violin in enumerate(ax.findobj(PolyCollection)):
    rgb = to_rgb(colors[4-ind // 2])
   
    if ind % 2 != 0:
        rgb = 0.5 + 0.5 * np.array(rgb)  # make whiter
    violin.set_facecolor(rgb)
    handles.append(plt.Rectangle((0, 0), 0, 0, facecolor=rgb, edgecolor='black'))

# Adjust the top margin to make room for asterisks
plt.tight_layout()
plt.subplots_adjust(top=1.5)  # Adjust this value if needed

plt.savefig('PRIME_score_CTL_Select_cells_Li.pdf', dpi=600, bbox_inches='tight')

plt.show()

results_long

In [None]:
sns.set(font_scale=1.5)
sns.set(style='ticks')

features = ['SLAMF7']
cell_key = 'majority_voting'
condition_key = 'ctl_hi'

df = sc.get.obs_df(adata_subset, features+[cell_key,condition_key])

df[cell_key] = pd.Categorical(df[cell_key]).remove_unused_categories()
df_all_cells = df.copy()
df_all_cells[cell_key] = 'Combined Cells'

# Concatenate original and combined data
df = pd.concat([df, df_all_cells])

cell_list=list(df[cell_key].unique())

plt.figure(figsize=(9,3))

ax=sns.violinplot(data=df, x="majority_voting", y=features[0], hue=condition_key, split=True, inner='point', gap=.2, 
                          density_norm='width', width=0.8, palette=['.4', '.7'], order=cell_list,
                        legend=True, linewidth=1,cut=0,
                          alpha=0.5)

ax.set(xlabel = None)
ax.legend(handles=ax.legend_.legendHandles, title='CTL',labels=['Low', 'High'])

# Get the maximum y value for positioning asterisks
y_max = df.groupby(cell_key)[features[0]].max()

# Add asterisks above each violin plot
results_long, results_pivot = compare_gene_expression(df, cell_list, features, condition_key, False, True, cell_key )

for idx, (cell_type, direction, asterisk) in enumerate(zip(results_long['cell_type'], results_long['direction'], results_long['significance'])):
    
    if asterisk:  # Only add text if there is a significance marker
        # Add some padding above the maximum value
        y_position = y_max[cell_type] + 0.25 * (y_max.max() - y_max.min())
        ax.text(idx, y_position,  direction+asterisk, ha='center', va='bottom', fontsize=12)

plt.yticks([0.0,1.0,2.0,3.0,4.0])
plt.xticks(np.arange(len(cell_list)),cell_list,rotation=30,  fontsize=10, ha='center' )
plt.ylim([0,5])
plt.xlim([-0.5,5])

handles = []

n_col =len(cell_list)
cmap = matplotlib.colormaps['tab10']

# Take colors at regular intervals spanning the colormap.
colors = cmap(np.linspace(0, 1, n_col+1))
# colors='tab10'

for ind, violin in enumerate(ax.findobj(PolyCollection)):
    rgb = to_rgb(colors[4-ind // 2])
   
    if ind % 2 != 0:
        rgb = 0.5 + 0.5 * np.array(rgb)  # make whiter
    violin.set_facecolor(rgb)
    handles.append(plt.Rectangle((0, 0), 0, 0, facecolor=rgb, edgecolor='black'))

# Adjust the top margin to make room for asterisks
plt.tight_layout()
plt.subplots_adjust(top=1.5)  # Adjust this value if needed

plt.savefig('SLAMF7_CTL_Select_cells_Li.pdf', dpi=600, bbox_inches='tight')

plt.show()

results_long

In [None]:
sns.set(font_scale=1.5)
sns.set(style='ticks')

features = ['TYMP']
cell_key = 'majority_voting'
condition_key = 'ctl_hi'

df = sc.get.obs_df(adata_subset, features+[cell_key,condition_key])

df[cell_key] = pd.Categorical(df[cell_key]).remove_unused_categories()
df_all_cells = df.copy()
df_all_cells[cell_key] = 'Combined Cells'

# Concatenate original and combined data
df = pd.concat([df, df_all_cells])

cell_list=list(df[cell_key].unique())

plt.figure(figsize=(9,3))

ax=sns.violinplot(data=df, x="majority_voting", y=features[0], hue=condition_key, split=True, inner='point', gap=.2, 
                          density_norm='width', width=0.8, palette=['.4', '.7'], order=cell_list,
                        legend=True, linewidth=1,cut=0,
                          alpha=0.5)

ax.set(xlabel = None)
ax.legend(handles=ax.legend_.legendHandles, title='CTL',labels=['Low', 'High'])

# Get the maximum y value for positioning asterisks
y_max = df.groupby(cell_key)[features[0]].max()

# Add asterisks above each violin plot
results_long, results_pivot = compare_gene_expression(df, cell_list, features, condition_key, False, True, cell_key )

for idx, (cell_type, direction, asterisk) in enumerate(zip(results_long['cell_type'], results_long['direction'], results_long['significance'])):
    
    if asterisk:  # Only add text if there is a significance marker
        # Add some padding above the maximum value
        y_position = y_max[cell_type] + 0.25 * (y_max.max() - y_max.min())
        ax.text(idx, y_position,  direction+asterisk, ha='center', va='bottom', fontsize=12)

plt.yticks([0.0,1.0,2.0,3.0])
plt.xticks(np.arange(len(cell_list)),cell_list,rotation=30,  fontsize=10, ha='center' )
plt.ylim([0,5])
plt.xlim([-0.5,5])

handles = []

n_col =len(cell_list)
cmap = matplotlib.colormaps['tab10']

# Take colors at regular intervals spanning the colormap.
colors = cmap(np.linspace(0, 1, n_col+1))
# colors='tab10'

for ind, violin in enumerate(ax.findobj(PolyCollection)):
    rgb = to_rgb(colors[4-ind // 2])
   
    if ind % 2 != 0:
        rgb = 0.5 + 0.5 * np.array(rgb)  # make whiter
    violin.set_facecolor(rgb)
    handles.append(plt.Rectangle((0, 0), 0, 0, facecolor=rgb, edgecolor='black'))

# Adjust the top margin to make room for asterisks
plt.tight_layout()
plt.subplots_adjust(top=1.5)  # Adjust this value if needed

plt.savefig('TYMP_CTL_Select_cells_Li.pdf', dpi=600, bbox_inches='tight')

plt.show()

results_long

In [None]:
sns.set(font_scale=1.5)
sns.set(style='ticks')

features = ['CD74']
cell_key = 'majority_voting'
condition_key = 'ctl_hi'

df = sc.get.obs_df(adata_subset, features+[cell_key,condition_key])

df[cell_key] = pd.Categorical(df[cell_key]).remove_unused_categories()
df_all_cells = df.copy()
df_all_cells[cell_key] = 'Combined Cells'

# Concatenate original and combined data
df = pd.concat([df, df_all_cells])

cell_list=list(df[cell_key].unique())

plt.figure(figsize=(9,3))

ax=sns.violinplot(data=df, x="majority_voting", y=features[0], hue=condition_key, split=True, inner='point', gap=.2, 
                          density_norm='width', width=0.8, palette=['.4', '.7'], order=cell_list,
                        legend=True, linewidth=1,cut=0,
                          alpha=0.5)

ax.set(xlabel = None)
ax.legend(handles=ax.legend_.legendHandles, title='CTL',labels=['Low', 'High'])

# Get the maximum y value for positioning asterisks
y_max = df.groupby(cell_key)[features[0]].max()

# Add asterisks above each violin plot
results_long, results_pivot = compare_gene_expression(df, cell_list, features, condition_key, False, True, cell_key )

for idx, (cell_type, direction, asterisk) in enumerate(zip(results_long['cell_type'], results_long['direction'], results_long['significance'])):
    
    if asterisk:  # Only add text if there is a significance marker
        # Add some padding above the maximum value
        y_position = y_max[cell_type] + 0.25 * (y_max.max() - y_max.min())
        ax.text(idx, y_position,  direction+asterisk, ha='center', va='bottom', fontsize=12)

plt.yticks([0.0,1.0,2.0,3.0])
plt.xticks(np.arange(len(cell_list)),cell_list,rotation=30,  fontsize=10, ha='center' )
plt.ylim([0,5])
plt.xlim([-0.5,5])

handles = []

n_col =len(cell_list)
cmap = matplotlib.colormaps['tab10']

# Take colors at regular intervals spanning the colormap.
colors = cmap(np.linspace(0, 1, n_col+1))
# colors='tab10'

for ind, violin in enumerate(ax.findobj(PolyCollection)):
    rgb = to_rgb(colors[4-ind // 2])
   
    if ind % 2 != 0:
        rgb = 0.5 + 0.5 * np.array(rgb)  # make whiter
    violin.set_facecolor(rgb)
    handles.append(plt.Rectangle((0, 0), 0, 0, facecolor=rgb, edgecolor='black'))

# Adjust the top margin to make room for asterisks
plt.tight_layout()
plt.subplots_adjust(top=1.5)  # Adjust this value if needed

plt.savefig('CD74_CTL_Select_cells_Li.pdf', dpi=600, bbox_inches='tight')

plt.show()

results_long