In [1]:
from src import Icsr
from src.utils import get_matches

import datasets
import random
from copy import deepcopy

In [2]:
# load matches
dataset = datasets.load_dataset("BioDEX/raw_dataset")
matches = get_matches(dataset['train'])
print(len(matches))

Using custom data configuration BioDEX--raw_dataset-e1a8735a3d189f31


Downloading and preparing dataset json/BioDEX--raw_dataset to /Users/kldooste/.cache/huggingface/datasets/BioDEX___json/BioDEX--raw_dataset-e1a8735a3d189f31/0.0.0/e6070c77f18f01a5ad4551a8b7edfba20b8438b7cad4d94e6ad9378022ce4aab...


Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/46.8M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/46.7M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/47.2M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/46.8M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/45.6M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/45.9M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/25.9M [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

0 tables [00:00, ? tables/s]

Dataset json downloaded and prepared to /Users/kldooste/.cache/huggingface/datasets/BioDEX___json/BioDEX--raw_dataset-e1a8735a3d189f31/0.0.0/e6070c77f18f01a5ad4551a8b7edfba20b8438b7cad4d94e6ad9378022ce4aab. Subsequent calls will reuse this data.


  0%|          | 0/1 [00:00<?, ?it/s]

65648


### Simplest IAA
priviledged icsr vs random icsr

In [3]:
# for every article, parse all the reports
icsrs = []

for m in matches:
    new_icsrs = [(index, Icsr.from_report(r)) for index, r in enumerate(m.reports)]
    new_icsrs = [t for t in new_icsrs if t[1]]
    icsrs.append(new_icsrs)

all_icsrs = [i for ls in icsrs for i in ls]

In [4]:
# for every article, sample on priviledged report and put all the others in a list
random.seed(42)

sampled_icsrs = []
other_icsrs = []

for ls in icsrs:
    sampled = None
    other = []
    if ls:
        sampled = random.choice(ls)
        other = deepcopy(ls)
        other.remove(sampled)
    sampled_icsrs.append(sampled)
    other_icsrs.append(other)


In [5]:
print(f'number of total matches: \t\t{len(matches):,}')
print(f'number of articles with >=1 icsr: \t{len([i for i in sampled_icsrs if i]):,}')
print(f'number of articles with >1 icsr: \t{len([i for i in other_icsrs if i]):,}')

number of total matches: 		65,648
number of articles with >=1 icsr: 	51,212
number of articles with >1 icsr: 	27,377


In [7]:
# validate the priviledged icsr against a random icsr with the same report
# validate the priviledged icsr against a random icsr from a random report
random.seed(42)

similar_scores = []
random_scores = []
for sampled, others in zip(sampled_icsrs, other_icsrs):
    if others:
        other = random.choice(others)
        
        sampled_icsr = sampled[1]
        other_icsr = other[1]

        similar_scores.append(sampled_icsr.score(other_icsr))

        random_other = random.choice(all_icsrs)[1]
        random_scores.append(sampled_icsr.score(random_other))
        

In [8]:
# aggregate scores across precision, recall and f1

def agg_scores(list, index):
    ls = [l[index] for l in list]
    return sum(ls) / len(ls)

print(agg_scores(random_scores,0))
print(agg_scores(random_scores,1))
print(agg_scores(random_scores,2))
print('')
print(agg_scores(similar_scores,0))
print(agg_scores(similar_scores,1))
print(agg_scores(similar_scores,2))
print('')
print(f'Calculated over {len(similar_scores):,} applicable examples.')

0.2432435244969848
0.24388269245800143
0.2428201908688846

0.728383086816412
0.7286750223845676
0.7204383709761228

Calculated over 27,377 applicable examples.


In [9]:
# validate the priviledged icsr against a random icsr with the same report
# validate the priviledged icsr against a random icsr from a random report
random.seed(42)

similar_scores = []
random_scores = []
for sampled, others in zip(sampled_icsrs, other_icsrs):
    if others:
        other = random.choice(others)
        
        sampled_icsr = sampled[1]
        other_icsr = other[1]

        similar_scores.append(sampled_icsr.score_detangled(other_icsr))

        random_other = random.choice(all_icsrs)[1]
        random_scores.append(sampled_icsr.score_detangled(random_other))
        

In [13]:
similar_scores[20]

(1, 1, (1.0, 1.0), (0.6666666666666666, 1.0))

In [25]:
def f1(precision, recall):
    if precision and recall:
        f1 = 2 * (precision * recall) / (precision + recall)
    else:
        f1 = 0.0
    return f1

def agg_metric(target,metric, scores):
    assert target in ['drug', 'reaction', 'seriousness', 'patientsex']
    assert metric in ['precision', 'recall', 'macro_f1', 'micro_f1']

    if target in ['seriousness', 'patientsex']:
        target_index = 0 if target == 'seriousness' else 1
        return sum([s[target_index] for s in scores]) / len(scores)
    else:
        target_index = 2 if target == 'drug' else 3

        if 'f1' not in metric:
            metric_index = 0 if metric == 'precision' else 1
            metrics = [s[target_index][metric_index] for s in scores]
            return sum(metrics)/len(metrics)
        elif metric == 'macro_f1':
            precision = agg_metric(target, 'precision', scores)
            recall = agg_metric(target, 'recall', scores)
            return f1(precision, recall)
        else:
            instance_level_f1_scores = [f1(s[target_index][0], s[target_index][1]) for s in scores]
            return sum(instance_level_f1_scores) / len(instance_level_f1_scores)


In [31]:
print('macro_f1 = f1 of aggregated precision and recall')
print('micro_f1 = aggregate of f1 per datapoint')
print()
print('Random:')
print('drug - precision:', agg_metric('drug', 'precision', random_scores))
print('drug - recall:', agg_metric('drug', 'recall', random_scores))
print('drug - macro_f1 :', agg_metric('drug', 'macro_f1', random_scores))
print('drug - micro_f1 :', agg_metric('drug', 'micro_f1', random_scores))
print('reaction - precision:', agg_metric('reaction', 'precision', random_scores))
print('reaction - recall:', agg_metric('reaction', 'recall', random_scores))
print('reaction - macro_f1 :', agg_metric('reaction', 'macro_f1', random_scores))
print('reaction - micro_f1 :', agg_metric('reaction', 'micro_f1', random_scores))

print()
print('IAA:')
print('drug - precision:', agg_metric('drug', 'precision', similar_scores))
print('drug - recall:', agg_metric('drug', 'recall', similar_scores))
print('drug - macro_f1 :', agg_metric('drug', 'macro_f1', similar_scores))
print('drug - micro_f1 :', agg_metric('drug', 'micro_f1', similar_scores))
print('reaction - precision:', agg_metric('reaction', 'precision', similar_scores))
print('reaction - recall:', agg_metric('reaction', 'recall', similar_scores))
print('reaction - macro_f1 :', agg_metric('reaction', 'macro_f1', similar_scores))
print('reaction - micro_f1 :', agg_metric('reaction', 'micro_f1', similar_scores))

macro_f1 = f1 of aggregated precision and recall
micro_f1 = aggregate of f1 per datapoint

Random:
drug - precision: 0.016090494816025473
drug - recall: 0.016627807319550682
drug - macro_f1 : 0.016354739091805216
drug - micro_f1 : 0.01315983369399975
reaction - precision: 0.010403785436066432
reaction - recall: 0.011783976815589797
reaction - macro_f1 : 0.011050953672791058
reaction - micro_f1 : 0.009031875990767883

IAA:
drug - precision: 0.7127620877631963
drug - recall: 0.7114308446249148
drug - macro_f1 : 0.7120958440142044
drug - micro_f1 : 0.6827454806898535
reaction - precision: 0.5612208652016648
reaction - recall: 0.5634279150444074
reaction - macro_f1 : 0.5623222245273879
reaction - micro_f1 : 0.5196896810138785


### Different IAA
priviledged icsr vs random icsr that is not of the same company

In [25]:
# for every article, parse all the reports
# also keep track of the company that submitted the icsr
def get_company(report):
    comp = None
    if report.companynumb:
        if '-' in report.companynumb:
            comp = report.companynumb.split('-')[1].strip().title()
    return comp

icsrs = []

for m in matches:
    new_icsrs = [(index, get_company(r) ,Icsr.from_report(r)) for index, r in enumerate(m.reports)]
    new_icsrs = [t for t in new_icsrs if t[1] and t[2]]
    icsrs.append(new_icsrs)

all_icsrs = [i for ls in icsrs for i in ls]

In [27]:
# for every article, sample on priviledged report and put all the others in a list
random.seed(42)

sampled_icsrs = []
other_icsrs = []

for ls in icsrs:
    sampled = None
    other = []
    if ls:
        sampled = random.choice(ls)
        other = deepcopy(ls)
        other.remove(sampled)
    sampled_icsrs.append(sampled)
    other_icsrs.append(other)


In [31]:
print(f'number of total matches: \t\t{len(matches):,}')
print(f'number of articles with >=1 icsr: \t{len([i for i in sampled_icsrs if i]):,}')
print(f'number of articles with >1 icsr: \t{len([i for i in other_icsrs if i]):,}')

number of total matches: 		65,648
number of articles with >=1 icsr: 	48,708
number of articles with >1 icsr: 	25,675


In [38]:
# validate the priviledged icsr against a random icsr with the same report
# validate the priviledged icsr against a random icsr from a random report
random.seed(42)

similar_scores = []
random_scores = []
for sampled, others in zip(sampled_icsrs, other_icsrs):
    if others:
        other = random.choice(others)
        

        # only compare for different companies
        if sampled[1] != other[1]:
            sampled_icsr = sampled[2]
            other_icsr = other[2]
        
            similar_scores.append(sampled_icsr.score(other_icsr))

            random_other = random.choice(all_icsrs)[2]
            random_scores.append(sampled_icsr.score(random_other))

print(f'Made {len(similar_scores)} comparison same article different company')
        

Made 19254 comparison same article different company


In [39]:
print(agg_scores(random_scores,0))
print(agg_scores(random_scores,1))
print(agg_scores(random_scores,2))
print('')
print(agg_scores(similar_scores,0))
print(agg_scores(similar_scores,1))
print(agg_scores(similar_scores,2))
print('')
print(f'Calculated over {len(similar_scores):,} applicable examples.')

0.24530324070215034
0.24606646000919297
0.24492883153940762

0.7316943442414694
0.7321459690671169
0.7233746876594158

Calculated over 19,254 applicable examples.
