# Hierarchical Clustering

In [1]:
# importing required packages
import pandas as pd
from sklearn.metrics.cluster import normalized_mutual_info_score
import csv

## Preprocessing of Data for Clustering

In [2]:
# reading the csv file and creating a dataframe
df = pd.read_csv("df_na_full.csv")

In [3]:
df.head()

Unnamed: 0.1,Unnamed: 0,"Ni_et_al_2016_13, 8, selectivity=0.125","Weiser_et_al_2017_28, 47, selectivity=0.02127659574468085","Ni_et_al_2016_11, 16, selectivity=0.0625","Ni_et_al_2016_8, 24, selectivity=0.041666666666666664","Weiser_et_al_2017_25, 122, selectivity=0.00819672131147541","Ni_et_al_2016_3, 67, selectivity=0.014925373134328358","Ni_et_al_2016_1, 72, selectivity=0.013888888888888888","Ni_et_al_2016_9, 15, selectivity=0.06666666666666667","Ni_et_al_2016_10, 24, selectivity=0.041666666666666664",...,"Manage_et_al_2020_6, 1667, selectivity=0.0005998800239952009","Cecere_et_al_2014_1, 526, selectivity=0.0019011406844106464","Cornes_et_al_2021_4, 266, selectivity=0.0037593984962406013","Cecere_et_al_2014_7, 1724, selectivity=0.000580046403712297","Kim_et_al_2021_6, 926, selectivity=0.0010799136069114472","Weiser_et_al_2017_16, 409, selectivity=0.0024449877750611247","Esse_et_al_2023_4, 415, selectivity=0.0024096385542168677","Gushchanskaia_et_al_2019_3, 1488, selectivity=0.0006720430107526882","Quarato_et_al_2021_3, 1432, selectivity=0.0006983240223463687","Singh_et_al_2021_11, 1417, selectivity=0.0007057163020465773"
0,"WBGene00017069, r-score = 0.0048751254346891615",1.0,1.0,1.0,1.0,1.0,1.0,1.0,,1.0,...,1.0,,,,,,,,,
1,"WBGene00016953, r-score = 0.004841172173364056",,1.0,,,1.0,,,,,...,,,,1.0,,,,,,
2,"WBGene00000443, r-score = 0.004667290796857615",,1.0,,,1.0,,,,,...,,,,,,,,,,
3,"WBGene00008010, r-score = 0.004302373134416553",,1.0,,1.0,1.0,1.0,1.0,,,...,,1.0,,,,,,,,
4,"WBGene00007489, r-score = 0.00419239699598032",,,,,1.0,,,,,...,1.0,,,1.0,,,,,,


In [4]:
# renaming the column containing gene names to "Gene"
df.rename(columns={'Unnamed: 0': 'Gene'}, inplace=True)

In [5]:
# transposing the dataframe since genes are to be clustered
df.set_index('Gene', inplace=True)
df = df.transpose()

In [6]:
df

Gene,"WBGene00017069, r-score = 0.0048751254346891615","WBGene00016953, r-score = 0.004841172173364056","WBGene00000443, r-score = 0.004667290796857615","WBGene00008010, r-score = 0.004302373134416553","WBGene00007489, r-score = 0.00419239699598032","WBGene00016512, r-score = 0.004062068482644259","WBGene00008447, r-score = 0.0040389477349124766","WBGene00007675, r-score = 0.003970447634835349","WBGene00007454, r-score = 0.0037586611806887955","WBGene00010507, r-score = 0.0036729852052008486",...,"WBGene00010785, r-score = 0.0008547683596934944","WBGene00004955, r-score = 0.0008446914462785428","WBGene00010263, r-score = 0.0007620244809853524","WBGene00010621, r-score = 0.000716646075839293","WBGene00004239, r-score = 0.000711770047839181","WBGene00009035, r-score = 0.0006779000717146764","WBGene00013038, r-score = 0.0006511530533419316","WBGene00004075, r-score = 0.0006104894791420395","WBGene00002225, r-score = 0.0004982169780372186","WBGene00001977, r-score = 0.00021875549158561092"
"Ni_et_al_2016_13, 8, selectivity=0.125",1.0,,,,,,,,,,...,,,,,,,,,,
"Weiser_et_al_2017_28, 47, selectivity=0.02127659574468085",1.0,1.0,1.0,1.0,,,,,1.0,,...,,,,,,,,,,
"Ni_et_al_2016_11, 16, selectivity=0.0625",1.0,,,,,,1.0,,,,...,,,,,,,,,,
"Ni_et_al_2016_8, 24, selectivity=0.041666666666666664",1.0,,,1.0,,,,,1.0,,...,,,,,,,,,,
"Weiser_et_al_2017_25, 122, selectivity=0.00819672131147541",1.0,1.0,1.0,1.0,1.0,,,1.0,1.0,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Weiser_et_al_2017_16, 409, selectivity=0.0024449877750611247",,,,,,,,,,,...,,,,,,,,,,
"Esse_et_al_2023_4, 415, selectivity=0.0024096385542168677",,,,,,,,,,,...,,,,,,,,,,
"Gushchanskaia_et_al_2019_3, 1488, selectivity=0.0006720430107526882",,,,,,,,,,,...,,,,,,,,,,
"Quarato_et_al_2021_3, 1432, selectivity=0.0006983240223463687",,,,,,,,,,,...,,,,,,,,,,


In [7]:
# renaming columns for readability
ds_old = df.columns
ds_new = []
for ds in ds_old:
    ds_new.append(ds[ :ds.find(",")])

print(ds_new)

['WBGene00017069', 'WBGene00016953', 'WBGene00000443', 'WBGene00008010', 'WBGene00007489', 'WBGene00016512', 'WBGene00008447', 'WBGene00007675', 'WBGene00007454', 'WBGene00010507', 'WBGene00004976', 'WBGene00020164', 'WBGene00008862', 'WBGene00019858', 'WBGene00001898', 'WBGene00018199', 'WBGene00007303', 'WBGene00009572', 'WBGene00015456', 'WBGene00010212', 'WBGene00019598', 'WBGene00021018', 'WBGene00001638', 'WBGene00009247', 'WBGene00018204', 'WBGene00011059', 'WBGene00009126', 'WBGene00021671', 'WBGene00016885', 'WBGene00019070', 'WBGene00000546', 'WBGene00018310', 'WBGene00010012', 'WBGene00012452', 'WBGene00018878', 'WBGene00003915', 'WBGene00011964', 'WBGene00008681', 'WBGene00011805', 'WBGene00021019', 'WBGene00012961', 'WBGene00019174', 'WBGene00022730', 'WBGene00015735', 'WBGene00020630', 'WBGene00021239', 'WBGene00023497', 'WBGene00002008', 'WBGene00012290', 'WBGene00003920', 'WBGene00001433', 'WBGene00007624', 'WBGene00003994', 'WBGene00006529', 'WBGene00006537', 'WBGene00

In [8]:
# renaming the columns
df.columns = ds_new

In [9]:
df

Unnamed: 0,WBGene00017069,WBGene00016953,WBGene00000443,WBGene00008010,WBGene00007489,WBGene00016512,WBGene00008447,WBGene00007675,WBGene00007454,WBGene00010507,...,WBGene00010785,WBGene00004955,WBGene00010263,WBGene00010621,WBGene00004239,WBGene00009035,WBGene00013038,WBGene00004075,WBGene00002225,WBGene00001977
"Ni_et_al_2016_13, 8, selectivity=0.125",1.0,,,,,,,,,,...,,,,,,,,,,
"Weiser_et_al_2017_28, 47, selectivity=0.02127659574468085",1.0,1.0,1.0,1.0,,,,,1.0,,...,,,,,,,,,,
"Ni_et_al_2016_11, 16, selectivity=0.0625",1.0,,,,,,1.0,,,,...,,,,,,,,,,
"Ni_et_al_2016_8, 24, selectivity=0.041666666666666664",1.0,,,1.0,,,,,1.0,,...,,,,,,,,,,
"Weiser_et_al_2017_25, 122, selectivity=0.00819672131147541",1.0,1.0,1.0,1.0,1.0,,,1.0,1.0,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Weiser_et_al_2017_16, 409, selectivity=0.0024449877750611247",,,,,,,,,,,...,,,,,,,,,,
"Esse_et_al_2023_4, 415, selectivity=0.0024096385542168677",,,,,,,,,,,...,,,,,,,,,,
"Gushchanskaia_et_al_2019_3, 1488, selectivity=0.0006720430107526882",,,,,,,,,,,...,,,,,,,,,,
"Quarato_et_al_2021_3, 1432, selectivity=0.0006983240223463687",,,,,,,,,,,...,,,,,,,,,,


In [10]:
# replacing all NAs with 0
df = df.fillna(0)

In [11]:
df.head()

Unnamed: 0,WBGene00017069,WBGene00016953,WBGene00000443,WBGene00008010,WBGene00007489,WBGene00016512,WBGene00008447,WBGene00007675,WBGene00007454,WBGene00010507,...,WBGene00010785,WBGene00004955,WBGene00010263,WBGene00010621,WBGene00004239,WBGene00009035,WBGene00013038,WBGene00004075,WBGene00002225,WBGene00001977
"Ni_et_al_2016_13, 8, selectivity=0.125",1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Weiser_et_al_2017_28, 47, selectivity=0.02127659574468085",1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Ni_et_al_2016_11, 16, selectivity=0.0625",1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Ni_et_al_2016_8, 24, selectivity=0.041666666666666664",1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Weiser_et_al_2017_25, 122, selectivity=0.00819672131147541",1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Gene Renaming

In [12]:
# creating a list of WBGene names
genes = df.columns

In [13]:
# reading in the Wormbase database to a dataframe
db = pd.read_csv('Wormbase_Gene_Sanitizer_Database_DownloadedOn_10-30-2023.txt', comment="#", sep="\t")

In [14]:
db

Unnamed: 0,Input,Status,Suggested Match,Public Name For,Sequence Name For,Molecular Name For,Other Name For,Split Into,Merged Into,History
0,WBGene00000001,Live - Unique,WBGene00000001,,,,,,,History: N.A.
1,WBGene00000002,Live - Unique,WBGene00000002,,,,,,,History: N.A.
2,WBGene00000003,Live - Unique,WBGene00000003,,,,,,,History: N.A.
3,WBGene00000004,Live - Unique,WBGene00000004,,,,,,,History: N.A.
4,WBGene00000005,Live - Unique,WBGene00000005,,,,,,,History: N.A.
...,...,...,...,...,...,...,...,...,...,...
257830,zyg-12,Live - Unique,WBGene00006997,WBGene00006997,,,,,,Public name for WBGene00006997. History: N.A.
257831,zyg-13,Live - Unique,WBGene00006998,WBGene00006998,,,,,,Public name for WBGene00006998. History: N.A.
257832,zyg-14,Live - Unique,WBGene00003916,,,,WBGene00003916,,,Other name for WBGene00003916(par-1). History:...
257833,zyx-1,Live - Unique,WBGene00006999,WBGene00006999,,,,,,Public name for WBGene00006999. History: N.A.


In [15]:
# converting WBGene names to common names
new_names = list()
for gene in genes:
    result = db.index[db["Public Name For"] == gene].item()
    new_names.append(db.iloc[result, 0])
print(new_names)

['timm-17B.2', 'C55C3.3', 'ceh-20', 'C38D9.2', 'C09G5.7', 'C38C3.3', 'E01G4.5', 'C18D4.6', 'C08F11.7', 'K02E2.6', 'spe-41', 'T02G5.4', 'F15D4.5', 'R03H10.6', 'his-24', 'F39E9.7', 'rnh-1.3', 'F40D4.13', 'C04G6.6', 'fbxa-192', 'K09H9.7', 'W04B5.1', 'gly-13', 'bath-45', 'F39F10.4', 'R06C1.4', 'pyk-1', 'Y48G1BM.6', 'fbxb-97', 'F58H7.5', 'clp-6', 'F41G4.7', 'saeg-1', 'Y17D7B.4', 'glit-1', 'pan-1', 'saeg-2', 'scrm-4', 'T16G12.4', 'W04B5.2', 'Y47H10A.5', 'H09G03.1', 'ZK402.3', 'pdfr-1', 'T20F7.1', 'Y20F4.4', 'lin-15B', 'hsp-4', 'W05H12.2', 'par-5', 'fkb-8', 'hrde-1', 'pgl-3', 'tba-2', 'tbb-2', 'W06A11.4', 'qdpr-1', 'F34D10.4', 'Y37E11B.2', 'W09B7.2', 'vig-1', 'hil-4', 'csr-1', 'citk-1', 'R03D7.2', 'deps-1', 'cdk-1', 'klp-7', 'hsp-90', 'mrp-4', 'cdc-48.1', 'W09B7.1', 'elf-1', 'idh-1', 'hsp-1', 'ani-2', 'eel-1', 'sea-2', 'mcm-7', 'ima-3', 'daf-18', 'mex-5', 'rpn-9', 'cey-2', 'cpg-1', 'wago-1', 'rme-2', 'simr-1', 'set-2', 'mut-16', 'top-2', 'spd-5', 'wago-4', 'egg-6', 'puf-3', 'gfat-2', 'ani-1',

In [16]:
# Save top 100 common gene names to a csv
with open('top_100.csv', 'w', newline='') as csv_file:
    csv_writer = csv.writer(csv_file)
    for item in new_names:
        csv_writer.writerow([item])

In [17]:
# changing the WBGene column names to common names
df.columns = new_names

In [18]:
df

Unnamed: 0,timm-17B.2,C55C3.3,ceh-20,C38D9.2,C09G5.7,C38C3.3,E01G4.5,C18D4.6,C08F11.7,K02E2.6,...,top-2,spd-5,wago-4,egg-6,puf-3,gfat-2,ani-1,pod-1,klp-15,hmg-12
"Ni_et_al_2016_13, 8, selectivity=0.125",1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Weiser_et_al_2017_28, 47, selectivity=0.02127659574468085",1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Ni_et_al_2016_11, 16, selectivity=0.0625",1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Ni_et_al_2016_8, 24, selectivity=0.041666666666666664",1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Weiser_et_al_2017_25, 122, selectivity=0.00819672131147541",1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Weiser_et_al_2017_16, 409, selectivity=0.0024449877750611247",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Esse_et_al_2023_4, 415, selectivity=0.0024096385542168677",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Gushchanskaia_et_al_2019_3, 1488, selectivity=0.0006720430107526882",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Quarato_et_al_2021_3, 1432, selectivity=0.0006983240223463687",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Clustering

In [19]:
# creating an empty matrix to hold NMI values
NMI_matrix = pd.DataFrame(index=df.columns, columns=df.columns)

In [20]:
NMI_matrix

Unnamed: 0,timm-17B.2,C55C3.3,ceh-20,C38D9.2,C09G5.7,C38C3.3,E01G4.5,C18D4.6,C08F11.7,K02E2.6,...,top-2,spd-5,wago-4,egg-6,puf-3,gfat-2,ani-1,pod-1,klp-15,hmg-12
timm-17B.2,,,,,,,,,,,...,,,,,,,,,,
C55C3.3,,,,,,,,,,,...,,,,,,,,,,
ceh-20,,,,,,,,,,,...,,,,,,,,,,
C38D9.2,,,,,,,,,,,...,,,,,,,,,,
C09G5.7,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
gfat-2,,,,,,,,,,,...,,,,,,,,,,
ani-1,,,,,,,,,,,...,,,,,,,,,,
pod-1,,,,,,,,,,,...,,,,,,,,,,
klp-15,,,,,,,,,,,...,,,,,,,,,,


In [21]:
# Calculating NMI for each gene pair
for col1 in df.columns:
    for col2 in df.columns:
        NMI_matrix.loc[col1, col2] = normalized_mutual_info_score(df[col1], df[col2])

In [22]:
NMI_matrix

Unnamed: 0,timm-17B.2,C55C3.3,ceh-20,C38D9.2,C09G5.7,C38C3.3,E01G4.5,C18D4.6,C08F11.7,K02E2.6,...,top-2,spd-5,wago-4,egg-6,puf-3,gfat-2,ani-1,pod-1,klp-15,hmg-12
timm-17B.2,1.0,0.002784,0.12638,0.151174,0.035471,0.049255,0.010682,0.046206,0.199482,0.008588,...,0.000619,0.061538,0.009382,0.001501,0.012948,0.021544,0.018468,0.02918,0.007145,0.010447
C55C3.3,0.002784,1.0,0.040406,0.003726,0.132402,0.031643,0.00026,0.04951,0.010797,0.00016,...,0.000007,0.003965,0.004587,0.002152,0.016844,0.007049,0.005019,0.002814,0.008997,0.000019
ceh-20,0.12638,0.040406,1.0,0.123475,0.028434,0.14396,0.019718,0.171257,0.171109,0.000022,...,0.004065,0.052002,0.008011,0.005664,0.014848,0.053572,0.024138,0.009116,0.007404,0.025582
C38D9.2,0.151174,0.003726,0.123475,1.0,0.005088,0.026473,0.00036,0.096057,0.202043,0.00467,...,0.000739,0.007489,0.000069,0.000158,0.000737,0.017517,0.01482,0.004629,0.001117,0.002416
C09G5.7,0.035471,0.132402,0.028434,0.005088,1.0,0.122445,0.012573,0.036354,0.039777,0.004437,...,0.000085,0.000005,0.008622,0.000019,0.000116,0.000019,0.00197,0.009033,0.007925,0.001154
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
gfat-2,0.021544,0.007049,0.053572,0.017517,0.000019,0.021698,0.000924,0.013667,0.017517,0.009026,...,0.084186,0.058326,0.05525,0.148401,0.350947,1.0,0.04373,0.018958,0.223642,0.078949
ani-1,0.018468,0.005019,0.024138,0.01482,0.00197,0.007577,0.048901,0.024138,0.006494,0.055754,...,0.095024,0.0894,0.033257,0.084186,0.102698,0.04373,1.0,0.136836,0.054995,0.047033
pod-1,0.02918,0.002814,0.009116,0.004629,0.009033,0.000162,0.00002,0.021247,0.012191,0.001016,...,0.058717,0.076705,0.074944,0.122806,0.176395,0.018958,0.136836,1.0,0.168341,0.129561
klp-15,0.007145,0.008997,0.007404,0.001117,0.007925,0.00407,0.006392,0.029906,0.004533,0.00494,...,0.122218,0.115175,0.167686,0.295944,0.453926,0.223642,0.054995,0.168341,1.0,0.235145


In [23]:
# calculating the distance matrix from the NMI matrix
dist_matrix = 1 - NMI_matrix.astype(float)

In [24]:
dist_matrix

Unnamed: 0,timm-17B.2,C55C3.3,ceh-20,C38D9.2,C09G5.7,C38C3.3,E01G4.5,C18D4.6,C08F11.7,K02E2.6,...,top-2,spd-5,wago-4,egg-6,puf-3,gfat-2,ani-1,pod-1,klp-15,hmg-12
timm-17B.2,0.000000,0.997216,0.873620,0.848826,0.964529,0.950745,0.989318,0.953794,0.800518,0.991412,...,0.999381,0.938462,0.990618,0.998499,0.987052,0.978456,0.981532,0.970820,0.992855,0.989553
C55C3.3,0.997216,0.000000,0.959594,0.996274,0.867598,0.968357,0.999740,0.950490,0.989203,0.999840,...,0.999993,0.996035,0.995413,0.997848,0.983156,0.992951,0.994981,0.997186,0.991003,0.999981
ceh-20,0.873620,0.959594,0.000000,0.876525,0.971566,0.856040,0.980282,0.828743,0.828891,0.999978,...,0.995935,0.947998,0.991989,0.994336,0.985152,0.946428,0.975862,0.990884,0.992596,0.974418
C38D9.2,0.848826,0.996274,0.876525,0.000000,0.994912,0.973527,0.999640,0.903943,0.797957,0.995330,...,0.999261,0.992511,0.999931,0.999842,0.999263,0.982483,0.985180,0.995371,0.998883,0.997584
C09G5.7,0.964529,0.867598,0.971566,0.994912,0.000000,0.877555,0.987427,0.963646,0.960223,0.995563,...,0.999915,0.999995,0.991378,0.999981,0.999884,0.999981,0.998030,0.990967,0.992075,0.998846
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
gfat-2,0.978456,0.992951,0.946428,0.982483,0.999981,0.978302,0.999076,0.986333,0.982483,0.990974,...,0.915814,0.941674,0.944750,0.851599,0.649053,0.000000,0.956270,0.981042,0.776358,0.921051
ani-1,0.981532,0.994981,0.975862,0.985180,0.998030,0.992423,0.951099,0.975862,0.993506,0.944246,...,0.904976,0.910600,0.966743,0.915814,0.897302,0.956270,0.000000,0.863164,0.945005,0.952967
pod-1,0.970820,0.997186,0.990884,0.995371,0.990967,0.999838,0.999980,0.978753,0.987809,0.998984,...,0.941283,0.923295,0.925056,0.877194,0.823605,0.981042,0.863164,0.000000,0.831659,0.870439
klp-15,0.992855,0.991003,0.992596,0.998883,0.992075,0.995930,0.993608,0.970094,0.995467,0.995060,...,0.877782,0.884825,0.832314,0.704056,0.546074,0.776358,0.945005,0.831659,0.000000,0.764855


In [25]:
# ensuring the distance matrix is symmetric
dist_matrix = (dist_matrix + dist_matrix.T) / 2

In [26]:
dist_matrix.to_csv("dist_matrix_100_full.csv")