### Perform genetic divergence analysis using the distance matrices output from DENDRO, and calculate the minimum distance between tumor and stem pseudoaggregates

In [None]:
#import libraries
import pandas as pd
import numpy as np
import glob

In [None]:
target_mat = 'filename'

In [None]:
#read in matrix
data = pd.read_csv(target_mat,index_col=0)
data.columns = data.columns.astype(str)
data.index = data.index.astype(str) 

In [None]:
#annotate ASC and SSC populations for AD and SER samples, respectively, in addition to STM populations.
#this can be done by applying Cell Type labels from the labeled cohort AnnData object.
data['Type'][np.isin(data.index,np.array(['target_ASC_column_names']).astype(str))] = 'ASC'
data['Type'][np.isin(data.index,np.array(['target_SSC_column_names']).astype(str))] = 'SSC'
data['Type'][np.isin(data.index,np.array(['target_STM_column_names']).astype(str))] = 'STM'

In [None]:
#save annotated pseudoaggregates
data.to_csv(matname+"DENDRO_dist_annotated.csv")

In [None]:
#read in list of annotated distance matrices as dist_list (not shown)

In [None]:
#iterate through list of annotated distance matrices
output_list = []
for element in dist_list:
    test = element[0].copy()
    max_dist = test.max().max() #get max value per distance matrix
    stm_idx = test[test.Type == 'STM'] #locate STM pseudoaggregate
    tumor_idx = test[(test.Type == 'ASC') | (test.Type == 'SSC')].index.values.astype(str) #locate ASC or SSC pseudoaggregates 
    min_dist = stm_idx[tumor_idx].min().min() #get min value per distance matrix
    tumor_type = test.Type.value_counts().index[test.Type.value_counts().index!='STM'].values[0] #get tumor type for table
    output_list.append([min_dist/max_dist,tumor_type,element[1]]) #output and append normalized value through min_dist/max_dist

In [None]:
out = pd.DataFrame(output_list)

In [None]:
out.to_csv("Tumor_Min_Genetic_Divergence_To_Stem.csv")