In [None]:
import sys, csv, os
from nplinker.nplinker import NPLinker
from nplinker.logconfig import LogConfig
%reload_ext autoreload
%autoreload 2

In [None]:
npl = NPLinker('local_fbmn.toml')
npl.load_data()
# don't need to call process_dataset any more, handled internally

In [None]:
# get a list of the names of the available scoring methods
# (defined in nplinker/scoring/methods.py)
print('Available scoring methods:')
for m in npl.scoring_methods:
    print(' - {}'.format(m))
    
# to get an instance of a particular method, just pass the
# name to scoring_method()...
mc = npl.scoring_method('metcalf')
test = npl.scoring_method('testscore')

# once you have an instance of a method you're free to change
# its various parameters
mc.cutoff = 3.5
test.foo = 456

# examples of different ways you can tell nplinker to generate scoring results

# 1. simplest case: 1 set of objects and 1 scoring method. if the 3rd parameter
# to get_links is not given, it defaults to ANDing results from different methods
# but this obviously doesn't matter if there's only one of them
results = npl.get_links(npl.gcfs[:10], mc) 
# "results" is a dict indexed by the scoring method object(s) passed to get_links.
# each value is another dict indexed by the input objects (e.g. the set of 10 GCFs
# in the above example), with the values being lists of whatever the output of the
# scoring method was. With metcalf, it returns SimpleNamespace objects with fields
# src (source object), dst (linked object) and score (metcalf score). Input objects
# with no links found are not included in the dict. 
# 
# e.g. results = { <mc obj>: {gcf_1: [link1, ...], gcf_2: [link1, ...], ... } }
for method in results:
    print('Results for method "{}""'.format(method.name))
    for obj in results[method]:
        print('   {} has {} links'.format(obj, len(results[method][obj])))
        for link in results[method][obj]:
            # metcalf output
            print('      -> {} | {}'.format(link.score, link.dst))
            
# 2. use the same set of objects with two different methods, and AND the results
# together...
# NOTE: there will still be two entries in the "results" dict when it's returned 
# here, one for each method. But if an object appears in the results from one 
# method it should be guaranteed to appear in the results for the other(s) too. 
results = npl.get_links([npl.gcfs[:10], npl.gcfs[:10]], [test, mc], npl.MODE_AND)

# 3. same thing but ORing the results
results = npl.get_links([npl.gcfs[:10], npl.gcfs[:10]], [test, mc], npl.MODE_OR)

# 4. if you just want to run multiple completely independent methods on different
# sets of objects
results = npl.get_links([npl.gcfs[:10], npl.spectra[:10]], [test, mc], npl.MODE_SEPARATE)