In [1]:
from sklearn.neighbors import NearestNeighbors
import numpy as np
import pandas as pd
from pathlib import Path
import os
from plotnine import *
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
from descartes import PolygonPatch
import alphashape
from shapely.geometry import Point
from shapely.geometry import Polygon
from shapely.geometry import Point
import math
from scipy.stats import kde
import hdbscan
from sklearn.cluster import DBSCAN
from scipy.spatial import Delaunay
from collections import defaultdict
from itertools import permutations
import plotly.figure_factory as ff
import matplotlib.cm as cm
import itertools
from scipy.stats import wilcoxon #Wilcoxon is nonparametric, paired data = paired student T test ***
from scipy.stats import pearsonr

In [26]:
# Load cluster-participant cell composition
df_Cluster_pv2 = pd.read_csv("C:/Data/DCs_results/c_Type.txt")
df_Cluster_pv2_Pt = df_Cluster_pv2[~df_Cluster_pv2["Pt"].isin(["Norm LN1x4","Norm LN2x4","Norm LN3x4","Norm LN4x4"])].drop(columns=["Unnamed: 0"])
# Load cluster-overall size (include all cells)
SIZE_cType = pd.read_csv("C:/Data/DCs_results/SIZE_c_Type.txt")
SIZE_cType_Pt = SIZE_cType[~SIZE_cType["Pt"].isin(["Norm LN1x4","Norm LN2x4","Norm LN3x4","Norm LN4x4"])].drop(columns=["Unnamed: 0"])
# Load cluster-overall size (include all cells, with mean in each specimen)
SIZE_cType_Pts = pd.read_csv("C:/Data/DCs_results/SIZE_cType_Pts.txt")
# Load neighbor-cluster data, remove control samples (each cell, numbers of neighbors, participated cluster ID)
all_files_cType = pd.read_csv("C:/Data/DCs_results/nn_DCs_clustera_cType.txt")
all_files_cType_Pt = all_files_cType[~all_files_cType["Pt"].isin(["Norm LN1x4","Norm LN2x4","Norm LN3x4","Norm LN4x4"])].drop(columns=["Unnamed: 0", "Unnamed: 0.1"]).dropna(
    subset=["X", "Y"]).fillna(0)

# PD-L1+ percent (take the percent in each cluster, then take an average for one specimen)
dfRatio = df_Cluster_pv2_Pt[["Pt", "Cluster", "c_Type"]]
dfRatio["Total cDC1"] = df_Cluster_pv2_Pt["CD141+PD-L1+"] + df_Cluster_pv2_Pt["CD141+PD-L1-"]
dfRatio["%PD-L1 cDC1"] = 100*df_Cluster_pv2_Pt["CD141+PD-L1+"] / dfRatio["Total cDC1"]
dfRatio["Total CD1A"] = df_Cluster_pv2_Pt["CD1a+PD-L1+"] + df_Cluster_pv2_Pt["CD1a+PD-L1-"] + df_Cluster_pv2_Pt["CD1a+"]
dfRatio["%PD-L1 CD1A"] = 100*df_Cluster_pv2_Pt["CD1a+PD-L1+"] / dfRatio["Total CD1A"]
dfRatio = dfRatio.round(1)

# %PD-L1+ on cDC1
# Average for every specimen
dfRatio_cDC1 = dfRatio[["Pt", "Cluster", "c_Type", "%PD-L1 cDC1"]].pivot_table(index=["Pt","c_Type"],values=["%PD-L1 cDC1"],
aggfunc=lambda x: x.mean()).round(1)
dfRatio_cDC1.reset_index(inplace=True)
# Plot (vs. types of clusters)
sns.set(style = "ticks", font_scale = 1.5, color_codes=True)
g = sns.catplot(y='%PD-L1 cDC1', x='c_Type', data=dfRatio_cDC1, kind = "swarm", marker='o', alpha=0.8,color="black", 
                height = 6, aspect = .75, linewidth = 1)
sns.barplot(y='%PD-L1 cDC1', x='c_Type', data=dfRatio_cDC1, alpha=1,edgecolor="0.1", linewidth=1.3,ax = g.ax, 
            palette={"Het": "brown", "cDC1-h": "khaki", "iso": "white"})
g.set(xticklabels=["Het","cDC1-h","Iso"])
plt.xlabel('Type of DC clusters', fontsize=17)
plt.ylabel('%PD-L1+ on cDC1 cells', fontsize=17)
g.savefig('C:/Data/DCs_results/%PD-L1on_cDC1.tiff', format='tiff', dpi=600)
plt.show()
# Statistics 
# To perform pair, first to rearrange the table
df_pair = dfRatio_cDC1.pivot_table(index=["Pt"], columns=["c_Type"]).dropna()
df_pair.reset_index(inplace=True)
p_Het_vs_cDC1h = wilcoxon(df_pair[('%PD-L1 cDC1',    'Het')],df_pair[('%PD-L1 cDC1', 'cDC1-h')])[1]
p_Het_vs_Iso = wilcoxon(df_pair[('%PD-L1 cDC1',    'Het')],df_pair[('%PD-L1 cDC1',    'iso')])[1]
p_cDC1h_vs_Iso = wilcoxon(df_pair[('%PD-L1 cDC1', 'cDC1-h')],df_pair[('%PD-L1 cDC1',    'iso')])[1]
p_PDL1_cDC1 = pd.DataFrame({"Het vs cDC1-h":p_Het_vs_cDC1h, "Het vs Iso":p_Het_vs_Iso,
                       "cDC1-h vs Iso":[p_cDC1h_vs_Iso]})
p_PDL1_cDC1.to_csv("C:/Data/DCs_results/pvalue_PD-L1 on cDC1.txt")

# %PD-L1+ on CD1A
# Average for every specimen
dfRatio_CD1A = dfRatio[["Pt", "Cluster", "c_Type", "%PD-L1 CD1A"]].pivot_table(index=["Pt","c_Type"],values=["%PD-L1 CD1A"],
aggfunc=lambda x: x.mean()).round(1)
dfRatio_CD1A.reset_index(inplace=True)
# Plot (vs. types of clusters)
sns.set(style = "ticks", font_scale = 1.5, color_codes=True)
g = sns.catplot(y='%PD-L1 CD1A', x='c_Type', data=dfRatio_CD1A, kind = "swarm", marker='o', alpha=0.8,color="black", 
                height = 6, aspect = .75, linewidth = 1)
sns.barplot(y='%PD-L1 CD1A', x='c_Type', data=dfRatio_CD1A, alpha=1,edgecolor="0.1", linewidth=1.3,ax = g.ax, 
            palette={"Het": "brown", "CD1a-h": "khaki", "iso": "white"})
g.set(xticklabels=["Het","CD1a-h","Iso"])
plt.xlabel('Type of DC clusters', fontsize=17)
plt.ylabel('%PD-L1+ on CD1a+ cells', fontsize=17)
g.savefig('C:/Data/DCs_results/%PD-L1on_CD1A.tiff', format='tiff', dpi=600)
plt.show()
# Statistics 
# To perform pair, first to rearrange the table
df_pair2 = dfRatio_CD1A.pivot_table(index=["Pt"], columns=["c_Type"]).dropna()
df_pair2.reset_index(inplace=True)
p2_Het_vs_cDC1h = wilcoxon(df_pair2[('%PD-L1 CD1A',    'Het')],df_pair2[('%PD-L1 CD1A', 'CD1a-h')])[1]
p2_Het_vs_Iso = wilcoxon(df_pair2[('%PD-L1 CD1A',    'Het')],df_pair2[('%PD-L1 CD1A',    'iso')])[1]
p2_cDC1h_vs_Iso = wilcoxon(df_pair2[('%PD-L1 CD1A', 'CD1a-h')],df_pair2[('%PD-L1 CD1A',    'iso')])[1]
p2_PDL1_cDC1 = pd.DataFrame({"Het vs CD1a-h":p2_Het_vs_cDC1h, "Het vs Iso":p2_Het_vs_Iso,
                       "CD1a-h vs Iso":[p2_cDC1h_vs_Iso]})
p2_PDL1_cDC1.to_csv("C:/Data/DCs_results/pvalue_PD-L1 on CD1A.txt")