# Pearson Correlation Coefficient Calculation
- This notebook provides the source code for calculation Pearson correlation coefficients for Figures 5L and S5B
- To re-create the data used for Figure 5L, one would need to generate a list including the clonotype lists for Lung Terile 1 (Low Clonal Expansion), Lung Tertile 2 (Mid Clonal Expansion), and Lung Tertile 3 (High Clonocal Expansion) for shared clonotypes. The same would need to be done for liver cells.
- For instructions on how to subset clonotypes (for use in "subset_list" below), please refer to the notebook "03_Clonotype_Expansion_Analysis"  

In [None]:
import numpy as np #v1.26.4
import pandas as pd #v2.2.0
import anndata #v0.10.5.post1
import scanpy as sc #v1.9.8
import random  

c_iSeed = 6161904
np.random.seed(c_iSeed)
random.seed(c_iSeed)

In [None]:
#Import annData hdf5
final_filtered_object = anndata.read_h5ad(filename=___) #Replace ___ with path to file "02_final_filtered_object.hdf5"

In [None]:
# Generate Proportion Table

# "subset_list" should be a list of lists containing the desired clonotype subsets
# "subset_list_str" should be a list of strings containing the titles of those lists (to eventually be exported to an external spreadsheet)

final_prop_table = pd.DataFrame()

for subset, subset_str in zip(subset_list, subset_list_str):
    proportion_table = final_filtered_object.obs.copy()
    if 'lung' in subset_str:
        organ = "Lung"
    elif 'liver' in subset_str:
        organ = "Liver"
    proportion_table = proportion_table[proportion_table['clonotype_id'].isin(subset)]
    proportion_table = proportion_table[proportion_table['organ'] == organ]
    proportion_table = proportion_table.groupby(['scVI_clusters']).size().reset_index(name='Cell_Count')
    proportion_table['Proportions'] = proportion_table['Cell_Count'] / (proportion_table['Cell_Count'].sum())
    proportion_table['Subset'] = subset_str
    final_prop_table = pd.concat([final_prop_table,proportion_table], axis=0)
    
final_prop_table = final_prop_table.pivot(index = 'scVI_clusters', columns = 'Subset', values = 'Proportions')

corr_matrix = final_prop_table.corr(method='pearson')