In [None]:
import pickle as pkl
import networkx as nx
import random as rd

In [None]:
def get_character(graph,u,v):
    kegg_2_node = dict()
    i_nodes = [graph.nodes[n]['species'] for n in graph.nodes()]
    for n in graph.nodes():
        kegg_2_node[graph.nodes[n]['species']] = n
    x = kegg_2_node[u]
    y = kegg_2_node[v]
    c = graph.edges[(x,y)]['character']
    return(c)

In [None]:
def contingency_table(true_graph,graph):
    t_nodes = [true_graph.nodes[n]['kegg_id'] for n in true_graph.nodes()]
    t_edges = [(true_graph.nodes[u]['kegg_id'],true_graph.nodes[v]['kegg_id']) for u,v in true_graph.edges()]
    i_nodes = [graph.nodes[n]['species'] for n in graph.nodes()]
    i_edges = [(graph.nodes[u]['species'],graph.nodes[v]['species']) for u,v in graph.edges()]

    if (len(t_nodes) != len(i_nodes) or set(t_nodes) != set(i_nodes)):
        raise ValueError("compared graphs must have the same vertex sets")

    tp,fp,fn = 0,0,0

    correct = []

    for u,v in i_edges:
        if (u,v) in t_edges:
            tp +=1
            c = get_character(graph,u,v)
            correct.append((u,v,c))
        else:
            fp +=1
    for u,v in t_edges:
        if (u,v) not in i_edges:
            fn += 1
    tn = (len(i_nodes) * (len(i_nodes)-1) - (tp + fp + fn))

    print("\t HGTs inferred correctly:",correct)

    return tp,tn,fp,fn

In [None]:
def performance(true_graph,graph):
    tp,tn,fp,fn = contingency_table(true_graph,graph)
    accuracy = (tp + tn) / (tp + tn + fp + fn) if tp + tn + fp + fn > 0 else float('nan')
    precision = tp / (tp + fp) if tp + fp > 0 else float('nan')
    recall = tp / (tp + fn) if tp + fn > 0 else float('nan')
    f1 = (precision*recall)/(precision + recall) if precision + recall > 0 else float('nan')

    return (graph.order(), graph.size(),
            tp, tn, fp, fn,
            accuracy, precision, recall,f1)


In [None]:
from google.colab import files

In [None]:
files.upload()

Saving inferred_HGT_algo2_heur.pkl to inferred_HGT_algo2_heur.pkl


{'inferred_HGT_algo2_heur.pkl': b'\x80\x04\x95j:\x00\x00\x00\x00\x00\x00\x8c\x18networkx.classes.digraph\x94\x8c\x07DiGraph\x94\x93\x94)\x81\x94}\x94(\x8c\x05graph\x94}\x94\x8c\x05_node\x94}\x94(\x8a\x06\x90\xe1iQ\x18\x7f}\x94(\x8c\x07species\x94\x8c\x06T01842\x94\x8c\x06number\x94K3\x8c\ttimestamp\x94K}\x8c\x04type\x94\x8c\x00\x94\x8c\x05label\x94]\x94(K\x01K\x00K\x00K\x01K\x00K\x00K\x00K\x00K\x00K\x01K\x00K\x00K\x01K\x00K\x00K\x00K\x00K\x00K\x00K\x00K\x00K\x00K\x00eu\x8a\x06\x10\xdfiQ\x18\x7f}\x94(h\n\x8c\x06T02505\x94h\x0cK1h\rK}h\x0eh\x0fh\x10]\x94(K\x01K\x00K\x00K\x01K\x00K\x00K\x00K\x00K\x00K\x01K\x00K\x00K\x01K\x00K\x00K\x00K\x00K\x00K\x00K\x00K\x00K\x00K\x00eu\x8a\x06P\xe0iQ\x18\x7f}\x94(h\n\x8c\x06T04222\x94h\x0cK2h\rK}h\x0eh\x0fh\x10]\x94(K\x01K\x00K\x00K\x01K\x00K\x00K\x00K\x00K\x00K\x01K\x00K\x00K\x01K\x00K\x01K\x00K\x00K\x00K\x00K\x00K\x00K\x00K\x00eu\x8a\x06\xd0\xddiQ\x18\x7f}\x94(h\n\x8c\x06T03496\x94h\x0cK0h\rK}h\x0eh\x0fh\x10]\x94(K\x01K\x00K\x00K\x01K\x00K\x00K\x00K\x

**Real-life contex vs Greedy Algorithm**

In [None]:
file = open('/content/interphylum_50_HGT.pkl',"rb")
true_HGT = pkl.load(file)
file.close()

file = open('/content/inferred_HGT_algo2.pkl',"rb")
inferred_HGT = pkl.load(file)
file.close()

order,size,tp,tn,fp,fn,accuracy,precision,recall,f1 = performance(true_HGT,inferred_HGT)

print("\n\t\t\t Greedy Algorithm RESULTS:")
print("\t Order :", order)
print("\t Size :", size)
print("\t N. tp :", tp)
print("\t N. tn :",tn)
print("\t N. fp :",fp)
print("\t n. fn :",fn)
print("\t accuracy: ", accuracy)
print("\t precision: ", precision)
print("\t recall: ", recall)
print("\t f1: ", f1)


	 HGTs inferred correctly: [('T06085', 'T02545', 9), ('T03770', 'T02545', 9), ('T00964', 'T01485', 3), ('T05738', 'T05843', 9), ('T04721', 'T02545', 9)]

			 Greedy Algorithm RESULTS:
	 Order : 45
	 Size : 397
	 N. tp : 5
	 N. tn : 1544
	 N. fp : 392
	 n. fn : 39
	 accuracy:  0.7823232323232323
	 precision:  0.012594458438287154
	 recall:  0.11363636363636363
	 f1:  0.011337868480725625


**Real-life contex VS Greedy + Heuristic**

In [None]:
file = open('/content/interphylum_50_HGT.pkl',"rb")
true_HGT = pkl.load(file)
file.close()

file = open('/content/inferred_HGT_algo2_heur.pkl',"rb")
inferred_HGT = pkl.load(file)
file.close()

order,size,tp,tn,fp,fn,accuracy,precision,recall,f1 = performance(true_HGT,inferred_HGT)

print("\n\t\t\t Greedy + Heuristic RESULTS:")
print("\t Order :", order)
print("\t Size :", size)
print("\t N. tp :", tp)
print("\t N. tn :",tn)
print("\t N. fp :",fp)
print("\t n. fn :",fn)
print("\t accuracy: ", accuracy)
print("\t precision: ", precision)
print("\t recall: ", recall)
print("\t f1: ", f1)

	 HGTs inferred correctly: [('T03770', 'T02545', 10), ('T00964', 'T01485', 4), ('T05738', 'T05843', 10), ('T04721', 'T02545', 10), ('T06085', 'T02545', 10)]

			 Greedy + Heuristic RESULTS:
	 Order : 45
	 Size : 296
	 N. tp : 5
	 N. tn : 1645
	 N. fp : 291
	 n. fn : 39
	 accuracy:  0.8333333333333334
	 precision:  0.016891891891891893
	 recall:  0.11363636363636363
	 f1:  0.014705882352941176
