### Some small additions to the dataframe
For ease of plotting in 2b

In [1]:
from functions import map_scatter, glasbey
from matplotlib.ticker import LogLocator, ScalarFormatter, NullFormatter
from scipy.cluster.hierarchy import linkage, leaves_list
from scipy.stats import chi2_contingency
from scipy.stats import ttest_ind
from scipy import stats
import sklearn
import matplotlib.pyplot as plt
import matplotlib as mpl
from anndata import AnnData
import pandas as pd
import numpy as np
import seaborn as sns
import colorcet as cc
import scanpy as sc
import copy
import os

In [2]:
# read in dimred_clstr data
data_path = os.path.join('..', 'outputs', 'dimred_clstr.csv')

if os.path.exists(data_path):
    df = pd.read_csv(data_path)
else:
    print(f"File not found: {data_path}.\nCheck data path.")

In [3]:
metadata = ['Image', 'Object ID', 'Classification', 'Parent', 'Centroid X µm', 'Centroid Y µm', 'UMAP1', 'UMAP2', 'PC1', 'PC2', 'Cluster_UMAP']
markers = [col for col in df.columns if col not in metadata]
print(markers)

['DAPI', 'CD44', 'HLA-DR', 'CD4', 'IFNG', 'Ki67', 'CD107a', 'CD45', 'CD20', 'CD40', 'CD8', 'Pan-Cytokeratin', 'CD68', 'HLA-A', 'CD79a', 'CD45RO', 'CD21', 'CD11c', 'HLA-E', 'IDO1', 'CD14', 'CD56', 'VISTA', 'FOXP3', 'Granzyme B', 'PCNA', 'T-bet/TBX21', 'PD-L1', 'TOX', 'PD-1', 'CD38', 'ICOS', 'CD39', 'LAG3', 'TCF-1', 'CD3e']


### Hierarchical Clustering
Based on Figure S1C of [this](https://pubmed.ncbi.nlm.nih.gov/37059105/) paper.

In [4]:
# level 1 classification
def classify_cells(row):
    """ Level 1 classification. """
    s = row['Classification']
    if ('Pan-Cytokeratin' in s) and ('CD45' not in s):
        return 'Epithelial'
    elif ('CD45' in s) and ('Pan-Cytokeratin' not in s):
        return 'Immune'
    else:
        if ('CD45' in s) and ('Pan-Cytokeratin' in s): # not exclusively one or the other
            if row['Pan-Cytokeratin'] > row['CD45']:
                return 'Epithelial'
            else:
                return 'Immune'
        else: # neither
            return 'Other'

# level 2 classification
def classify_level2(row):
    """ Level 2 classification. """
    s = row['Classification']
    lymphoid = ('CD3e' in s) or ('CD20' in s) or ('CD21' in s)
    myeloid = ('CD68' in s) or ('CD14') or ('CD11c' in s) or (('CD107a' in s) and ('CD56' in s)) or ('HLA-DR' in s) or ('HLA-A' in s) or ('VISTA' in s)
    if row['Level 1'] == 'Immune':
        if lymphoid and (not myeloid):
            return 'Lymphoid'
        elif (myeloid) and (not lymphoid):
            return 'Myeloid'
        elif lymphoid and myeloid: # not exclusively one or the other
            l = max(row['CD3e'], row['CD20'], row['CD21'])
            m = max(row['CD68'], row['CD14'], row['CD11c'], row['CD107a'], row['CD56'], row['HLA-DR'], row['HLA-A'], row['VISTA'])
            if l > m:
                return 'Lymphoid'
            else:
                return 'Myeloid'
        else:
            return 'Other'
    else:
        return 'Other'

# level 3 classification
def classify_level3(row):
    """ Level 3 classification. """
    s = row['Classification']
    cytotoxic = ('CD3e' in s) and ('CD8' in s)
    helper = ('CD3e' in s) and ('CD4' in s)
    b = ('CD20' in s) or ('CD21' in s)
    macrophage = 'CD68' in s
    monocyte = 'CD14' in s
    dendritic = 'CD11c' in s
    nk = ('CD107a' in s) and ('CD56' in s)
    apc = ('HLA-DR' in s) or ('HLA-A') in s
    chkpt = 'VISTA' in s
    if row['Level 2'] == 'Lymphoid':
        if cytotoxic and (not helper or b):
            return 'Cyt T'
        elif helper and (not cytotoxic or b):
            return 'Helper T'
        elif b and (not helper or cytotoxic):
            return 'B Cell'
        elif helper or cytotoxic or b: # not one, but not none
            max_val = row[['CD3e', 'CD8', 'CD4', 'CD20', 'CD21']].idxmax()
            if (max_val == 'CD3e'):
                # find the max between CD8 and CD4
                sub_max = row[['CD8', 'CD4']].idxmax()
                if sub_max == 'CD8':
                    return 'Cyt T'
                else:
                    return 'Helper T'
            elif (max_val == 'CD20') or (max_val == 'CD21'):
                return 'B Cell'        
        else: # none
            return 'Other'
    elif row['Level 2'] == 'Myeloid':
        if macrophage and not (monocyte or dendritic or nk or apc or chkpt):
            return 'Macrophage'
        elif dendritic and not (monocyte or macrophage or nk or apc or chkpt):
            return 'Dendritic'
        elif nk and not (macrophage or dendritic or monocyte or apc or chkpt):
            return 'NK Cell'
        elif monocyte and not (macrophage or dendritic or nk or apc or chkpt):
            return 'Monocyte'
        elif apc and not (macrophage or dendritic or monocyte or nk or chkpt):
            return 'Other Ant Pres Cell'
        elif chkpt and not (macrophage or dendritic or monocyte or apc or nk):
            return 'Imm Check+ Myeloid'
        elif macrophage or dendritic or monocyte or nk or apc or chkpt: # not one, but not none
            max_val = row[['CD68', 'CD14', 'CD11c', 'CD107a', 'CD56', 'HLA-DR', 'HLA-A', 'VISTA']].idxmax()
            if max_val == 'CD68':
                return 'Macrophage'
            elif max_val == 'CD14':
                return 'Monocyte'
            elif max_val == 'CD11c':
                return 'Dendritic'
            elif (max_val == 'CD107a') or (max_val == 'CD56'):
                return 'NK Cell' 
            elif (max_val == 'HLA-DR') or (max_val == 'HLA-A'):
                return 'Other Ant Pres Cell' 
            elif max_val == 'VISTA':
                return 'Imm Check+ Myeloid'
        else: # none 
            return 'Other'
    else:
        return 'Other'
    
# level 4 classification
def classify_level4(row):
    """ Level 4 classification. """
    s = row['Classification']
    pre = 'TCF-1' in s
    prolif = ('Ki67' in s) or ('PCNA' in s)
    act_tc = ('Granzyme B' in s) or ('CD44' in s) or ('IFNG' in s)
    tc = 'PD-1' in s
    early_ex_tc = ('TOX' in s)
    ex_tc = ('CD38' in s) or ('LAG3' in s)

    treg = 'FOXP3' in s
    act_th = ('ICOS' in s) or ('IFNG' in s)
    mem_th = 'CD45RO' in s
    th1 = 'T-bet/TBX21' in s

    plasma = 'CD38' in s
    act_b = 'HLA-E' in s
    b_ant = 'CD79a' in s

    act_mac = 'IDO1' in s
    apc = ('HLA-DR' in s) or ('HLA-A' in s)
    inhib = 'VISTA' in s
    act_den = 'CD40' in s
    act_nk = 'Granzyme B' in s
    ifg_nk = 'IFNG' in s
    
    if row['Level 3'] == 'Cyt T':
        if pre and not (prolif or act_tc or tc or early_ex_tc or ex_tc):
            return 'Cyt T Pre'
        elif prolif and not (pre or act_tc or tc or early_ex_tc or ex_tc):
            return 'Prolif Cyt T'
        elif act_tc and not (prolif or pre or tc or early_ex_tc or ex_tc):
            return 'Act Cyt T'
        elif tc and not (prolif or act_tc or pre or early_ex_tc or ex_tc):
            return 'PD-1+ Tc'
        elif early_ex_tc and not (prolif or act_tc or tc or ex_tc or pre):
            return 'Early Ex Tc'
        elif ex_tc and not (prolif or act_tc or tc or early_ex_tc or pre):
            return 'Ex Tc'
        elif pre or prolif or act_tc or tc or early_ex_tc or ex_tc: # not one, but not none
            max_val = row[['TCF-1', 'Ki67', 'PCNA', 'Granzyme B', 'CD44', 'IFNG', 'PD-1', 'TOX', 'CD38', 'LAG3']].idxmax()
            if max_val == 'TCF-1':
                return 'Cyt T Pre'
            elif (max_val == 'Ki67') or (max_val == 'PCNA'):
                return 'Prolif Cyt T'
            elif (max_val == 'Granzyme B') or (max_val == 'CD44') or (max_val == 'IFNG'):
                return 'Act Cyt T'
            elif (max_val == 'PD-1'):
                return 'PD-1+ Tc'
            elif (max_val == 'TOX'):
                return 'Early Ex Tc'
            else:
                return 'Ex Tc'
        else: # none
            return 'Other'
    elif row['Level 3'] == 'Helper T':
        if pre and not (treg or prolif or act_th or mem_th or th1):
            return 'Helper T Pre'
        elif treg and not (pre or prolif or act_th or mem_th or th1):
            return 'T reg'
        elif prolif and not (treg or pre or act_th or mem_th or th1):
            return 'Prolif Cyt T'
        elif act_th and not (treg or prolif or pre or mem_th or th1):
            return 'Act Helper T'
        elif mem_th and not (treg or prolif or act_th or pre or th1):
            return 'Mem Helper T'
        elif th1 and not (treg or prolif or act_th or mem_th or pre):
            return 'Th1 Helper T'
        elif pre or treg or prolif or act_th or mem_th or th1: # not one but not none
            max_val = row[['TCF-1', 'FOXP3', 'Ki67', 'PCNA', 'ICOS', 'IFNG', 'CD45RO', 'T-bet/TBX21']].idxmax()
            if max_val == 'TCF-1':
                return 'Helper T Pre'
            elif max_val == 'FOXP3':
                return 'T reg'
            elif (max_val == 'Ki67') or (max_val == 'PCNA'):
                return 'Prolif Cyt T'
            elif (max_val == 'ICOS') or (max_val == 'IFNG'):
                return 'Act Helper T'
            elif max_val == 'CD45RO':
                return 'Mem Helper T'
            elif max_val == 'T-bet/TBX21':
                return 'Th1 Helper T'
        else: # none
            return 'Other'
        
    elif row['Level 3'] == 'B Cell':
        if prolif and not (plasma or act_b or b_ant):
            return 'Prolif B'
        elif plasma and not (prolif or act_b or b_ant):
            return 'Plasma B'
        elif act_b and not (prolif or plasma or b_ant):
            return 'Act B'
        elif b_ant and not (prolif or plasma or act_b):
            return 'B Ant Rec'
        elif prolif or plasma or act_b or b_ant: # not one, but not none
            max_val = row[['Ki67', 'PCNA', 'CD38', 'HLA-E', 'CD79a']].idxmax()
            if (max_val == 'Ki67') or (max_val == 'PCNA'):
                return 'Prolif B'
            elif max_val == 'CD38':
                return 'Plasma B'
            elif max_val == 'HLA-E':
                return 'Act B'
            else:
                return 'B Ant Rec'
        else: # none
            return 'Other'
        
    elif row['Level 3'] == 'Macrophage':
        if act_mac and not apc:
            return 'Act Macrophage'
        elif apc and not act_mac:
            return 'Ant Pres Macrophage'
        elif apc and act_mac: # not one or the other exclusively
            max_val = row[['IDO1', 'HLA-DR', 'HLA-A']].idxmax()
            if max_val == 'IDO1':
                return 'Act Macrophage'
            else:
                return 'Ant Pres Macrophage'
        else: # neither
            return 'Other'
        
    elif row['Level 3'] == 'Monocyte':
        if inhib and not apc:
            return 'Inhib Monocyte'
        elif apc and not inhib:
            return 'Ant Pres Macrophage'
        elif apc and inhib: # not one or the other exclusively
            max_val = row[['VISTA', 'HLA-DR', 'HLA-A']].idxmax()
            if max_val == 'VISTA':
                return 'Inhib Monocyte'
            else:
                return 'Ant Pres Monocyte'
        else: # neither
            return 'Other'
        
    elif row['Level 3'] == 'Dendritic':
        if act_den and not apc:
            return 'Act Dendritic'
        elif apc and not act_den:
            return 'Ant Pres Dendritic'
        elif apc and act_den: # not one or the other exclusively
            max_val = row[['CD40', 'HLA-DR', 'HLA-A']].idxmax()
            if max_val == 'CD40':
                return 'Act Dendritic'
            else:
                return 'Ant Pres Dendritic'
        else: # neither
            return 'Other'
        
    elif row['Level 3'] == 'NK Cell':
        if act_nk and not ifg_nk:
            return 'Act NK'
        elif ifg_nk and not act_nk:
            return 'IFNG Sec NK'
        elif ifg_nk and act_nk: # not one or the other exclusively
            if row['CD40'] > row['IFNG']:
                return 'Act NK'
            else:
                return 'IFNG Sec NK'
        else: # neither
            return 'Other'
    else:
        return 'Other'

In [5]:
df['Level 1'] = df.apply(classify_cells, axis=1)
df['Level 2'] = df.apply(classify_level2, axis=1)
df['Level 3'] = df.apply(classify_level3, axis=1)
df['Level 4'] = df.apply(classify_level4, axis=1)

In [6]:
metadata.extend(['Level 1', 'Level 2', 'Level 3', 'Level 4'])
print(metadata)

['Image', 'Object ID', 'Classification', 'Parent', 'Centroid X µm', 'Centroid Y µm', 'UMAP1', 'UMAP2', 'PC1', 'PC2', 'Cluster_UMAP', 'Level 1', 'Level 2', 'Level 3', 'Level 4']


In [7]:
def determine_phenotype(row):
  """ Assign the most granular phenotype possible. """
  if row['Level 4'] != 'Other':
    return row['Level 4']
  elif row['Level 3'] != 'Other':
    return row['Level 3']
  elif row['Level 2'] != 'Other':
    return row['Level 2']
  elif row['Level 1'] != 'Other':
    return row['Level 1']
  return 'Other'  # default value if all levels are 'Other'

In [8]:
df['Phenotype'] = df.apply(determine_phenotype, axis=1)
print(df['Phenotype'].unique())

['Other' 'Helper T' 'Macrophage' 'Immune' 'T reg' 'Myeloid'
 'Cyt T Precursor' 'Cyt T' 'Lymphoid' 'Helper T Precursor' 'Dendritic'
 'Tc' 'NK Cell' 'Epithelial' 'Exhausted Tc']


In [9]:
# make the 'Timepoint' column for ease of comparison
pre_timepoints = ['Slide 11 B1', 'Slide 11 A1', 'Slide 11 A1-1', 'Slide 9 A1', 'Slide 9 B2', 'Slide 9 A1-1']
df['Timepoint'] = df['Parent'].apply(lambda x: 'DLN pre' if x in pre_timepoints else 'DLN post')
df.head()

Unnamed: 0,Image,Object ID,Classification,Parent,Centroid X µm,Centroid Y µm,DAPI,CD44,HLA-DR,CD4,...,UMAP2,PC1,PC2,Cluster_UMAP,Level 1,Level 2,Level 3,Level 4,Phenotype,Timepoint
0,Slide10_Scan1.ome.tif,f5505ea9-ddff-4fea-90dd-a425402a544d,Other/NA,Slide 10 C1-1,4004.2,1674.9,-0.443124,-1.278166,-0.611227,-0.836912,...,6.757998,-5.286668,-0.298409,32,Other,Other,Other,Other,Other,DLN post
1,Slide10_Scan1.ome.tif,4b313e44-f97c-43e8-99a7-31b9ab2a45c0,Other/NA,Slide 10 C1-1,4055.9,1675.2,-1.365461,-1.426814,-0.96625,-1.498882,...,6.541201,-5.876908,-0.926938,80,Other,Other,Other,Other,Other,DLN post
2,Slide10_Scan1.ome.tif,e0673808-3a80-4490-b565-2fa9b3425738,CD56,Slide 10 C1-1,4070.8,1675.8,-0.981877,-1.412553,-0.762933,-1.413784,...,7.164611,-4.714929,-0.376258,80,Other,Other,Other,Other,Other,DLN post
3,Slide10_Scan1.ome.tif,e2036b6c-b8ba-42fc-b05c-1e0c8c853192,Other/NA,Slide 10 C1-1,3904.7,1677.1,-0.554083,-1.372611,0.236735,-0.79335,...,6.723613,-4.541659,0.661836,32,Other,Other,Other,Other,Other,DLN post
4,Slide10_Scan1.ome.tif,5afebba8-c97a-4d69-b9e2-395d49cebd90,Other/NA,Slide 10 C1-1,3921.0,1676.8,-0.943903,-1.384704,-0.198105,-1.196011,...,6.659215,-5.077836,-0.181445,32,Other,Other,Other,Other,Other,DLN post


In [10]:
sp = os.path.join('..', 'outputs', 'data.csv')
df.to_csv(sp, index=False)