## Exporting results as (entity1, relation, entity2, confidence)

### Import required libraries

In [6]:
import pandas as pd
from collections import Counter
from pathlib import Path

### Load csv

In [7]:
edges_df = pd.read_csv('data/relations.csv')

### Compute confidence score

In [8]:
edges_df = edges_df.rename(columns={'source': 'entity_1', 'target': 'entity_2'})

edge_counter = Counter(
    [(row['entity_1'], row['relation'], row['entity_2']) for _, row in edges_df.iterrows()]
)
max_count = max(edge_counter.values())

edges_df['confidence'] = edges_df.apply(
    lambda row: 0.5*row['distance_score'] + 0.5*(edge_counter[(row['entity_1'], row['relation'], row['entity_2'])]/max_count),
    axis=1
)

In [9]:
triples_df = edges_df[['entity_1', 'relation', 'entity_2', 'confidence']]
triples_df.head

<bound method NDFrame.head of                         entity_1 relation                 entity_2  confidence
0                      JN.1/KP.2  LABEL_1                S-induced    0.523810
1               {Delta}3a7b-Nluc  LABEL_1                      RBD    0.525000
2                            ARM  LABEL_1                      RBD    0.533333
3           SARS-CoV-2 infection  LABEL_1             inflammation    0.571429
4           SARS-CoV-2 infection  LABEL_1                infection    0.750000
..                           ...      ...                      ...         ...
65  nontuberculous mycobacterial  LABEL_1                      NTM    0.625000
66                           NTM  LABEL_1                infection    0.625000
67                           NTM  LABEL_1  NTM-associated diseases    0.583333
68           Mycobacterium avium  LABEL_1                    avium    0.750000
69                           NTM  LABEL_1                   terrae    0.520000

[70 rows x 4 columns]

In [10]:
data_path = Path("output")
data_path.mkdir(exist_ok=True)
triples_df.to_csv('output/triples_output.csv', index=False)