In [1]:
import pandas as pd
import numpy as np

from parsers.obo import parse_obo
from evaluation import evaluate, Hprecision_micro, Hrecall_micro, HF1_micro

In [2]:
ONTOLOGIES = ['biological_process', 'cellular_component', 'molecular_function']
ORGANISMS = ['celegans', 'dmel', 'hg', 'mm']

ontology_path = '../datasets/raw/obo/go-basic.obo'
gos, ontology_gos, go_alt_ids, ontology_graphs = parse_obo(ontology_path)

In [3]:
organism = 'celegans'
ontology = 'cellular_component'
print(organism, ontology)

file_name = 'results/results_model_{}_{}.csv'.format(organism, ontology)
results = pd.read_csv(file_name, sep='\t').set_index(['pos', 'seqname'])
random_results = np.random.uniform(size=results.shape)*(results != 0)
random_results = pd.DataFrame(random_results, columns=results.columns, index=results.index)

go_ids = results.columns.tolist()
ontology_subgraph = ontology_graphs[ontology].subgraph(go_ids)

celegans cellular_component


In [4]:
post_results, preds = evaluate(results, ontology_subgraph, threshold=0.5)
random_post_results, random_preds = evaluate(random_results, ontology_subgraph, threshold=0.5)

100%|██████████| 4042/4042 [01:02<00:00, 64.48it/s]
100%|██████████| 4042/4042 [01:14<00:00, 53.99it/s]


In [5]:
expanded_annots = pd.read_csv('../datasets/preprocessed/{}/expanded_annots_{}.csv'.format(organism, ontology), sep='\t')
expanded_annots = expanded_annots.set_index(['pos', 'seqname'])
genome = pd.read_csv('../datasets/preprocessed/{}/genome.csv'.format(organism), sep='\t').set_index(['pos', 'seqname'])

annots_test = expanded_annots[expanded_annots.go_id.isin(go_ids) & expanded_annots.index.isin(results.index.tolist())]
true_annots_test = {(pos, chromosome):list(set(df['go_id'].values)) for (pos, chromosome), df in annots_test.groupby(['pos', 'seqname'])}

In [6]:
results_precision = Hprecision_micro(ontology_subgraph, preds, true_annots_test)
random_precision = Hprecision_micro(ontology_subgraph, random_preds, true_annots_test)
print('Precision', results_precision, random_precision)

results_recall = Hrecall_micro(ontology_subgraph, preds, true_annots_test)
random_recall = Hrecall_micro(ontology_subgraph, random_preds, true_annots_test)
print('Recall', results_recall, random_recall)

results_F1 = HF1_micro(ontology_subgraph, preds, true_annots_test)
random_F1 = HF1_micro(ontology_subgraph, random_preds, true_annots_test)
print('F1', results_F1, random_F1)

random_post_results.to_csv('results/random_post_results_model_{}_{}.csv'.format(organism, ontology), sep='\t', index=True)


Precision 0.4118377850740971 0.3319759965709387
Recall 0.4237195288961928 0.14142244134027207
F1 0.41769417694176936 0.19834816569562713
