### Some small additions to the dataframe
For ease of plotting in 2b

In [1]:
from functions import map_scatter, glasbey
from matplotlib.ticker import LogLocator, ScalarFormatter, NullFormatter
from scipy.cluster.hierarchy import linkage, leaves_list
from scipy.stats import chi2_contingency
from scipy.stats import ttest_ind
from scipy import stats
import sklearn
import matplotlib.pyplot as plt
import matplotlib as mpl
from anndata import AnnData
import pandas as pd
import numpy as np
import seaborn as sns
import colorcet as cc
import scanpy as sc
import copy
import os

In [2]:
# read in dimred_clstr data
data_path = os.path.join('..', 'outputs', 'dimred_clstr.csv')

if os.path.exists(data_path):
    df = pd.read_csv(data_path)
else:
    print(f"File not found: {data_path}.\nCheck data path.")

In [3]:
metadata = ['Image', 'Object ID', 'Classification', 'Parent', 'Centroid X µm', 'Centroid Y µm', 'UMAP1', 'UMAP2', 'PC1', 'PC2', 'Cluster_UMAP']
markers = [col for col in df.columns if col not in metadata]
print(markers)

['DAPI', 'CD44', 'HLA-DR', 'CD4', 'IFNG', 'Ki67', 'CD107a', 'CD45', 'CD20', 'CD40', 'CD8', 'Pan-Cytokeratin', 'CD68', 'HLA-A', 'CD79a', 'CD45RO', 'CD21', 'CD11c', 'HLA-E', 'IDO1', 'CD14', 'CD56', 'VISTA', 'FOXP3', 'Granzyme B', 'PCNA', 'T-bet/TBX21', 'PD-L1', 'TOX', 'PD-1', 'CD38', 'ICOS', 'CD39', 'LAG3', 'TCF-1', 'CD3e']


### Hierarchical Clustering
Based on Figure S1C of [this](https://pubmed.ncbi.nlm.nih.gov/37059105/) paper.

In [4]:
# level 1 classification
def classify_cells(row):
    """ Level 1 classification. """
    s = row['Classification']
    if ('Pan-Cytokeratin' in s) and ('CD45' not in s):
        return 'Epithelial'
    elif ('CD45' in s) and ('Pan-Cytokeratin' not in s):
        return 'Immune'
    else:
        if ('CD45' in s) and ('Pan-Cytokeratin' in s): # not exclusively one or the other
            if row['Pan-Cytokeratin'] > row['CD45']:
                return 'Epithelial'
            else:
                return 'Immune'
        else: # neither
            return 'Other'

# level 2 classification
def classify_level2(row):
    """ Level 2 classification. """
    s = row['Classification']
    lymphoid = ('CD3e' in s) or ('CD20' in s) or ('CD21' in s)
    myeloid = ('CD68' in s) or ('CD11c' in s) or ('CD107a' in s) or ('CD56' in s) or ('Granzyme B' in s)
    if row['Level 1'] == 'Immune':
        if lymphoid and (not myeloid):
            return 'Lymphoid'
        elif (myeloid) and (not lymphoid):
            return 'Myeloid'
        elif lymphoid and myeloid: # not exclusively one or the other
            l = max(row['CD3e'], row['CD20'], row['CD21'])
            m = max(row['CD68'], row['CD11c'], row['CD107a'], row['CD56'], row['Granzyme B'])
            if l > m:
                return 'Lymphoid'
            else:
                return 'Myeloid'
        else:
            return 'Other'
    else:
        return 'Other'

# level 3 classification
def classify_level3(row):
    """ Level 3 classification. """
    s = row['Classification']
    cytotoxic = ('CD3e' in s) and ('CD8' in s)
    helper = ('CD3e' in s) and ('CD4' in s)
    macrophage = 'CD68' in s
    dendritic = 'CD11c' in s
    nk = ('CD107a' in s) and ('CD56' in s) and ('Granzyme B' in s)
    if row['Level 2'] == 'Lymphoid':
        if cytotoxic and (not helper):
            return 'Cyt T'
        elif helper and (not cytotoxic):
            return 'Helper T'
        elif helper and cytotoxic: # not exclusively one or the other
            if row['CD8'] > row['CD4']:
                return 'Cyt T'
            else:
                return 'Helper T'
        else: 
            return 'Other'
    elif row['Level 2'] == 'Myeloid':
        m = row['CD68']
        d = row['CD11c']
        n = max(row['CD107a'], row['CD56'], row['Granzyme B'])
        if macrophage and not (dendritic or nk):
            return 'Macrophage'
        elif dendritic and not (macrophage or nk):
            return 'Dendritic'
        elif nk and not (macrophage or dendritic):
            return 'NK Cell'
        elif macrophage and dendritic:
            if m > d:
                return 'Macrophage'
            else:
                return 'Dendritic'
        elif dendritic and nk:
            if d > n:
                return 'Dendritic'
            else:
                return 'NK Cell'
        elif macrophage and nk:
            if m > n:
                return 'Macrophage'
            else:
                return 'NK Cell'
        else: # none 
            return 'Other'
    else:
        return 'Other'
    
# level 4 classification
def classify_level4(row):
    """ Level 4 classification. """
    s = row['Classification']
    pre = 'TCF-1' in s
    tc = 'PD-1' in s
    ex_tc = ('TOX' in s) and ('CD39' in s)
    e = max(row['TOX'], row['CD39'])
    treg = 'FOXP3' in s
    if row['Level 3'] == 'Cyt T':
        if pre and not (tc or ex_tc):
            return 'Cyt T Precursor'
        elif tc and not (pre or ex_tc):
            return 'Tc'
        elif ex_tc and not (pre or tc):
            return 'Exhausted Tc'
        elif pre and tc:
            if row['TCF-1'] > row['PD-1']:
                return 'Cyt T Precursor'
            else:
                return 'Tc'
        elif pre and ex_tc:
            if row['TCF-1'] > e:
                return 'Cyt T Precursor'
            else:
                return 'Exhausted Tc'
        elif tc and ex_tc:
            if row['PD-1'] > e:
                return 'Tc'
            else:
                return 'Exhausted Tc'
        else:
            return 'Other'
    elif row['Level 3'] == 'Helper T':
        if pre and not treg:
            return 'Helper T Precursor'
        elif treg and not pre:
            return 'T reg'
        elif pre and treg:
            if row['TCF-1'] > row['FOXP3']:
                return 'Helper T Precursor'
            else:
                return 'T reg'
        else:
            return 'Other'
    else:
        return 'Other'

In [5]:
df['Level 1'] = df.apply(classify_cells, axis=1)
df['Level 2'] = df.apply(classify_level2, axis=1)
df['Level 3'] = df.apply(classify_level3, axis=1)
df['Level 4'] = df.apply(classify_level4, axis=1)

In [6]:
metadata.extend(['Level 1', 'Level 2', 'Level 3', 'Level 4'])
print(metadata)

['Image', 'Object ID', 'Classification', 'Parent', 'Centroid X µm', 'Centroid Y µm', 'UMAP1', 'UMAP2', 'PC1', 'PC2', 'Cluster_UMAP', 'Level 1', 'Level 2', 'Level 3', 'Level 4']


In [7]:
def determine_phenotype(row):
  """ Assign the most granular phenotype possible. """
  if row['Level 4'] != 'Other':
    return row['Level 4']
  elif row['Level 3'] != 'Other':
    return row['Level 3']
  elif row['Level 2'] != 'Other':
    return row['Level 2']
  elif row['Level 1'] != 'Other':
    return row['Level 1']
  return 'Other'  # default value if all levels are 'Other'

In [8]:
df['Phenotype'] = df.apply(determine_phenotype, axis=1)
print(df['Phenotype'].unique())

['Other' 'Helper T' 'Macrophage' 'Immune' 'T reg' 'Myeloid'
 'Cyt T Precursor' 'Cyt T' 'Lymphoid' 'Helper T Precursor' 'Dendritic'
 'Tc' 'NK Cell' 'Epithelial' 'Exhausted Tc']


In [9]:
# make the 'Timepoint' column for ease of comparison
pre_timepoints = ['Slide 11 B1', 'Slide 11 A1', 'Slide 11 A1-1', 'Slide 9 A1', 'Slide 9 B2', 'Slide 9 A1-1']
df['Timepoint'] = df['Parent'].apply(lambda x: 'DLN pre' if x in pre_timepoints else 'DLN post')
df.head()

Unnamed: 0,Image,Object ID,Classification,Parent,Centroid X µm,Centroid Y µm,DAPI,CD44,HLA-DR,CD4,...,UMAP2,PC1,PC2,Cluster_UMAP,Level 1,Level 2,Level 3,Level 4,Phenotype,Timepoint
0,Slide10_Scan1.ome.tif,f5505ea9-ddff-4fea-90dd-a425402a544d,Other/NA,Slide 10 C1-1,4004.2,1674.9,-0.443124,-1.278166,-0.611227,-0.836912,...,6.757998,-5.286668,-0.298409,32,Other,Other,Other,Other,Other,DLN post
1,Slide10_Scan1.ome.tif,4b313e44-f97c-43e8-99a7-31b9ab2a45c0,Other/NA,Slide 10 C1-1,4055.9,1675.2,-1.365461,-1.426814,-0.96625,-1.498882,...,6.541201,-5.876908,-0.926938,80,Other,Other,Other,Other,Other,DLN post
2,Slide10_Scan1.ome.tif,e0673808-3a80-4490-b565-2fa9b3425738,CD56,Slide 10 C1-1,4070.8,1675.8,-0.981877,-1.412553,-0.762933,-1.413784,...,7.164611,-4.714929,-0.376258,80,Other,Other,Other,Other,Other,DLN post
3,Slide10_Scan1.ome.tif,e2036b6c-b8ba-42fc-b05c-1e0c8c853192,Other/NA,Slide 10 C1-1,3904.7,1677.1,-0.554083,-1.372611,0.236735,-0.79335,...,6.723613,-4.541659,0.661836,32,Other,Other,Other,Other,Other,DLN post
4,Slide10_Scan1.ome.tif,5afebba8-c97a-4d69-b9e2-395d49cebd90,Other/NA,Slide 10 C1-1,3921.0,1676.8,-0.943903,-1.384704,-0.198105,-1.196011,...,6.659215,-5.077836,-0.181445,32,Other,Other,Other,Other,Other,DLN post


In [10]:
sp = os.path.join('..', 'outputs', 'data.csv')
df.to_csv(sp, index=False)