# Jaccard Similarity Coefficient
- This notebook contains the code for calculating the Jaccard Similarity Coefficient (JSC) used in Figure 5E

In [None]:
import numpy as np #v1.26.4
import pandas as pd #v2.2.0
import anndata #v0.10.5.post1
import scanpy as sc #v1.9.8
import random  

c_iSeed = 6161904
np.random.seed(c_iSeed)
random.seed(c_iSeed)

In [None]:
#Import annData hdf5
final_filtered_object = anndata.read_h5ad(filename=___) #Replace ___ with path to file "02_final_filtered_object.hdf5"

In [None]:
# Find Shared Clonotypes For Every Animal
all_overlap=(final_filtered_object.obs.groupby(["clonotype_id","organ"])
.size().reset_index(name="cells_in_clones")
.pivot(index=["clonotype_id"],columns=["organ"],values="cells_in_clones")
.query('(Lung>=1 and Liver>=1)'))

all_overlap = all_overlap.index.tolist()

In [None]:
# Generate JSC for Shared Clonotypes

JSC_table = pd.DataFrame()

for x in all_overlap:
    
    subset_table = final_filtered_object.obs.copy()
    subset_table = subset_table[subset_table['clonotype_id'] == x]

    Lung_clones = subset_table[subset_table['organ'] == 'Lung']
    unique_lung_clusters = Lung_clones['scVI_clusters'].unique().tolist()

    Liver_clones = subset_table[subset_table['organ'] == 'Liver']
    unique_liver_clusters = Liver_clones['scVI_clusters'].unique().tolist()

    numerator = set(unique_lung_clusters) & set(unique_liver_clusters)
    denominator = set(unique_lung_clusters + unique_liver_clusters)

    JSC = len(numerator) / len(denominator)
    
    temp_table = pd.DataFrame(data = {'Clonotype': [x], 'JSC': [JSC]})
    JSC_table = pd.concat([JSC_table,temp_table])

JSC_table = JSC_table.reset_index(drop= True)