# Define Shared and Unique Status for Clonotypes and Assign Degree of Expansion
- Here we provide the code necessary to assign "shared" and "unique" status to each clonotype
- We also provide the source code for assigning degrees of expansion to clonotypes. We define a function "get_tertile_bins" which allows us to separate clonotypes into low, medium, and high degrees of expansion based on number of unique cell counts.
- We have provided the code necessary for working with cells within the lung, but the code can be reciprocated for cells within the liver. The location of a cell is based on "final_filtered_object.obs['organ']" being either "Lung" or "Liver".
- This code provides the data to recreate Figure 6A, 6B, and S5B.
- This code is also used to prepare a the subset lists for the recreation of Figure 5L and Figure S5A. The methods for calculating the Pearson coefficients for these data are found in the Notebook "06_Pearson_Correlation_Calculations"

In [None]:
import numpy as np #v1.26.4
import pandas as pd #v2.2.0
import anndata #v0.10.5.post1
import scanpy as sc #v1.9.8
import pydeseq2 #v0.4.10
import decoupler as dc #v1.6.0
import random  

c_iSeed = 6161904
np.random.seed(c_iSeed)
random.seed(c_iSeed)

In [None]:
#Import annData hdf5
final_filtered_object = anndata.read_h5ad(filename=___) #Replace ___ with path to file "02_final_filtered_object.hdf5"

In [None]:
# Find Shared Clonotypes
shared_clonotypes=(final_filtered_object.obs.groupby(["clonotype_id","organ"])
.size().reset_index(name="cells_in_clones")
.pivot(index=["clonotype_id"],columns=["organ"],values="cells_in_clones")
.query('(Lung>=1 and Liver>=1)'))

shared_clonotypes = shared_clonotypes.index.tolist()

In [None]:
# Find Unique Lung Clonotypes
unique_lung_clonotypes=(final_filtered_object.obs.groupby(["clonotype_id","organ"])
.size().reset_index(name="cells_in_clones")
.pivot(index=["clonotype_id"],columns=["organ"],values="cells_in_clones")
.query('(Lung>=1 and Liver==0)'))

In [None]:
# Generate Shared Clonotype Tertiles - Lung (Figure 5A)

unexpanded_lung =(final_filtered_object.obs.groupby(["clonotype_id","organ"])
.size().reset_index(name="cells_in_clones")
.pivot(index=["clonotype_id"],columns=["organ"],values="cells_in_clones")
.query('(Lung==1 & Liver==0)'))

unexpanded_lung  = unexpanded_lung.index.tolist()

clonotype_table = final_filtered_object.obs.copy()
origin_list = pd.unique(clonotype_table['sample']).tolist()
tertile_list = pd.DataFrame()

Primary_Table = clonotype_table.copy()
dfTertile_Table = pd.DataFrame()

Primary_Table = Primary_Table[(Primary_Table['organ'] == 'Lung')] # Subset by organ
Primary_Table = Primary_Table[~Primary_Table['clonotype_id'].isin(unexpanded_lung)] #Remove unexpanded lung clonotypes

Primary_Table['Number_of_Clones'] = Primary_Table.groupby('clonotype_id')['clonotype_id'].transform('size')
    
dfTertile_Table = Primary_Table[['clonotype_id','Number_of_Clones','animal_id']]
    
dfTertile_Table = dfTertile_Table.drop_duplicates(subset='clonotype_id', keep='first')
    
def get_tertile_bins(values):
    sorted_unique_vals = sorted(values.unique())

    one_third_index = len(sorted_unique_vals) // 3
    two_third_index = (2 * len(sorted_unique_vals)) // 3

    return [
        float('-inf'), 
        sorted_unique_vals[one_third_index], 
        sorted_unique_vals[two_third_index], 
        float('inf')
        ]
      
bin_edges = get_tertile_bins(dfTertile_Table['Number_of_Clones'])
dfTertile_Table['Tertile'] = pd.cut(dfTertile_Table['Number_of_Clones'], bins=bin_edges, labels=["Tertile_1","Tertile_2","Tertile_3"], right = False, include_lowest = True)
    
dfTertile_Table['Tertile'] = dfTertile_Table['Tertile'].astype(str)
tertile_list = pd.concat([tertile_list,dfTertile_Table], ignore_index = True) 
    
#tertile_list.sort_values(by=['Tertile'], ascending=False).tail(2000)

tertile_1_lung = tertile_list[tertile_list['Tertile']=="Tertile_1"]
tertile_1_lung = tertile_1_lung['clonotype_id'].tolist()

tertile_2_lung = tertile_list[tertile_list['Tertile']=="Tertile_2"]
tertile_2_lung = tertile_2_lung['clonotype_id'].tolist()

tertile_3_lung = tertile_list[tertile_list['Tertile']=="Tertile_3"]
tertile_3_lung = tertile_3_lung['clonotype_id'].tolist()

# Generate Clustering Object for Data Visualization

clustering_object = final_filtered_object.copy()
clustering_object = clustering_object[clustering_object.obs['organ']=='Lung']

conditions = [
    clustering_object.obs['clonotype_id'].isin(unexpanded_lung),
    clustering_object.obs['clonotype_id'].isin(tertile_1_lung),
    clustering_object.obs['clonotype_id'].isin(tertile_2_lung),
    clustering_object.obs['clonotype_id'].isin(tertile_3_lung)
    ]

choices = [' Unexpanded_Lung','Lung_Tertile_1','Lung_Tertile_2','Lung_Tertile_3']

clustering_object.obs['General_Tertile'] = np.select(conditions,choices)
clustering_object = clustering_object[clustering_object.obs['General_Tertile'] != '0']

cluster_labels = {
    '0':'0: CD8 Tem - GZMA',
    '1':'1: CD8 Teff.1 - CX3CR1',
    '2':'2: CD4 Tcm/Tscm - LEF1',
    '3':'3: CD8 Cycling T - CDCA3/5',
    '4':'4: CD8 Cycling T - PCLAF',
    '5':'5: CD8 Tem - EOMES',
    '6':'6: ab/gd T - CDCA3/5',
    '7':'7: DN bCycling T - CIT',
    '8':'8: CD8 Tc17 - CCR6',
    '9':'9: CD4 Tfh.1 - IL21',
    '10':'10: CD8 Tem - TMIGD2',
    '11':'11: CD8 Tem - TNFRSF9',
    '12':'12: CD8 Tcm/Tscm - LEF1',
    '13':'13: CD8 Teff.2 - CX3CR1',
    '14':'14: CD8 Tem - SLC7A11',
    '15':'15: CD4 Tfh.2 - IL21'
}

clustering_object.obs['scVI_cluster_labels'] = clustering_object.obs['scVI_clusters'].map(cluster_labels)

In [None]:
# Generate Shared Clonotype Lung Tertile - Figure S5C

clonotype_table = final_filtered_object.obs.copy()
origin_list = pd.unique(clonotype_table['sample']).tolist()
tertile_list = pd.DataFrame()

Primary_Table = clonotype_table.copy()
dfTertile_Table = pd.DataFrame()

Primary_Table = Primary_Table[(Primary_Table['clonotype_id'].isin(shared_clonotypes))] # Subset by shared status
Primary_Table = Primary_Table[(Primary_Table['organ'] == 'Lung')] # Subset by organ

Primary_Table['Number_of_Clones'] = Primary_Table.groupby('clonotype_id')['clonotype_id'].transform('size')
    
dfTertile_Table = Primary_Table[['clonotype_id','Number_of_Clones','animal_id']]
    
dfTertile_Table = dfTertile_Table.drop_duplicates(subset='clonotype_id', keep='first')
    
def get_tertile_bins(values):
    sorted_unique_vals = sorted(values.unique())

    one_third_index = len(sorted_unique_vals) // 3
    two_third_index = (2 * len(sorted_unique_vals)) // 3

    return [
        float('-inf'), 
        sorted_unique_vals[one_third_index], 
        sorted_unique_vals[two_third_index], 
        float('inf')
        ]

      
bin_edges = get_tertile_bins(dfTertile_Table['Number_of_Clones'])
dfTertile_Table['Tertile'] = pd.cut(dfTertile_Table['Number_of_Clones'], bins=bin_edges, labels=["Tertile_1","Tertile_2","Tertile_3"], right = False, include_lowest = True)
    
dfTertile_Table['Tertile'] = dfTertile_Table['Tertile'].astype(str)
tertile_list = pd.concat([tertile_list,dfTertile_Table], ignore_index = True) 
    
tertile_1_lung = tertile_list[tertile_list['Tertile']=="Tertile_1"]
tertile_1_lung = tertile_1_lung['clonotype_id'].tolist()

tertile_2_lung = tertile_list[tertile_list['Tertile']=="Tertile_2"]
tertile_2_lung = tertile_2_lung['clonotype_id'].tolist()

tertile_3_lung = tertile_list[tertile_list['Tertile']=="Tertile_3"]
tertile_3_lung = tertile_3_lung['clonotype_id'].tolist()

# Generate Clustering Object for Data Visualization

clustering_object = final_filtered_object.copy()

clustering_object = clustering_object[(clustering_object.obs['clonotype_id'].isin(shared_clonotypes))] # Subset by shared clonotypes
clustering_object = clustering_object[(clustering_object.obs['organ'] == 'Lung')] # Subset by organ

conditions = [
    clustering_object.obs['clonotype_id'].isin(tertile_1_lung),
    clustering_object.obs['clonotype_id'].isin(tertile_2_lung),
    clustering_object.obs['clonotype_id'].isin(tertile_3_lung)
    ]

choices = ['Lung_Tertile_1','Lung_Tertile_2','Lung_Tertile_3']

clustering_object.obs['General_Tertile'] = np.select(conditions,choices)
clustering_object = clustering_object[clustering_object.obs['General_Tertile'] != '0']

clustering_object.obs

In [None]:
# Generate Unique Clonotype Lung Tertile - Figure S5C

clonotype_table = final_filtered_object.obs.copy()
origin_list = pd.unique(clonotype_table['sample']).tolist()
tertile_list = pd.DataFrame()

Primary_Table = clonotype_table.copy()
dfTertile_Table = pd.DataFrame()

Primary_Table = Primary_Table[~Primary_Table['clonotype_id'].isin(shared_clonotypes)] # Subset by unique status
Primary_Table = Primary_Table[(Primary_Table['organ'] == 'Lung')] # Subset by organ
Primary_Table = Primary_Table[~Primary_Table['clonotype_id'].isin(unexpanded_lung)] # Exclude unexpanded clonotypes

Primary_Table['Number_of_Clones'] = Primary_Table.groupby('clonotype_id')['clonotype_id'].transform('size')
    
dfTertile_Table = Primary_Table[['clonotype_id','Number_of_Clones','sample']]
  
dfTertile_Table = dfTertile_Table.drop_duplicates(subset='clonotype_id', keep='first')
    
def get_tertile_bins(values):
    sorted_unique_vals = sorted(values.unique())

    one_third_index = len(sorted_unique_vals) // 3
    two_third_index = (2 * len(sorted_unique_vals)) // 3

    return [
        float('-inf'), 
        sorted_unique_vals[one_third_index], 
        sorted_unique_vals[two_third_index], 
        float('inf')
        ]

      
bin_edges = get_tertile_bins(dfTertile_Table['Number_of_Clones'])
dfTertile_Table['Tertile'] = pd.cut(dfTertile_Table['Number_of_Clones'], bins=bin_edges, labels=["Tertile_1","Tertile_2","Tertile_3"], right = False, include_lowest = True)
    
dfTertile_Table['Tertile'] = dfTertile_Table['Tertile'].astype(str)
tertile_list = pd.concat([tertile_list,dfTertile_Table], ignore_index = True) 
    
tertile_1_lung = tertile_list[tertile_list['Tertile']=="Tertile_1"]
tertile_1_lung = tertile_1_lung['clonotype_id'].tolist()

tertile_2_lung = tertile_list[tertile_list['Tertile']=="Tertile_2"]
tertile_2_lung = tertile_2_lung['clonotype_id'].tolist()

tertile_3_lung = tertile_list[tertile_list['Tertile']=="Tertile_3"]
tertile_3_lung = tertile_3_lung['clonotype_id'].tolist()

# Generate Clustering Object for Visualization

clustering_object = final_filtered_object.copy()

clustering_object = clustering_object[~clustering_object.obs['clonotype_id'].isin(shared_clonotypes)] # Subset by unique status
clustering_object = clustering_object[(clustering_object.obs['organ'] == 'Lung')] # Subset by organ

conditions = [
    clustering_object.obs['clonotype_id'].isin(unexpanded_lung),
    clustering_object.obs['clonotype_id'].isin(tertile_1_lung),
    clustering_object.obs['clonotype_id'].isin(tertile_2_lung),
    clustering_object.obs['clonotype_id'].isin(tertile_3_lung)
    ]

choices = [' Unexpanded_Lung','Lung_Tertile_1','Lung_Tertile_2','Lung_Tertile_3']

clustering_object.obs['General_Tertile'] = np.select(conditions,choices)
clustering_object = clustering_object[clustering_object.obs['General_Tertile'] != '0']

clustering_object.obs