In [1]:
import sys
import json
from pprint import pprint as print_

sys.path.append("../src/")

import numpy as np
from tqdm import tqdm_notebook as tqdm

from isanlp_srl_framebank.pipeline_default import PipelineDefault

In [2]:
ppl = PipelineDefault(address_morph=('vmh1.isa.ru', 3300),
                      address_syntax=('vmh1.isa.ru', 3311),
                      address_srl=('vmh1.isa.ru', 3322))

In [3]:
def get_roles_pred(lemma, role_annot, part_id):
    ann_sent = role_annot[part_id]
    predicates = {}
    arguments = {}
    for event in ann_sent:
        predicate = {
            'lemma': lemma[part_id][event.pred[0]],
        }
        predicates[event.pred[0]] = predicate
        arguments[event.pred[0]] = []
        for arg in event.args:
            #print(vars(arg))
            argument = {
                'tag': arg.tag,
                'lemma': lemma[part_id][arg.begin]
            }
            arguments[event.pred[0]].append(argument)
            
    return predicates, arguments

In [4]:
def get_example(corpus, ex_number, part_id):
    words = []
    for obj in corpus[ex_number][1][part_id]:
        words.append(obj['form'])
        
    return " ".join(words)

def get_roles_true(corpus, ex_number, part_id):
    predicates = {}
    arguments = {}
    for i, obj in enumerate(corpus[ex_number][1][part_id]):
        if 'rank' in obj:
            if obj['rank'] == 'Предикат':
                predicate = {
                    'lemma': obj['lemma']
                }
                predicates[i] = predicate
            else:
                argument = {
                    'lemma': obj['lemma'],
                    'tag': obj['rolepred1']
                }
                pred_id = obj['fillpred']
                if pred_id not in arguments.keys():
                    arguments[pred_id] = []
                arguments[pred_id].append(argument)
    return predicates, arguments

In [5]:
with open("../data/cleared_corpus.json", 'r', encoding='utf-8') as f:
    corpus = json.load(f)

In [6]:
def random_predictions(corpus, ppl, n_samples=100):
    samples_idxs = np.random.choice(len(corpus), size=n_samples)
    true_roles = [get_roles_true(corpus, ex_num, 0) for ex_num in samples_idxs]
    texts = [get_example(corpus, ex_num, 0) for ex_num in samples_idxs]
    pred_roles = [ppl(text) for text in tqdm(texts, desc='Analyzing texts')]
    pred_roles = [get_roles_pred(res['lemma'], res['srl'], 0) for res in pred_roles]
    return true_roles, pred_roles

In [None]:
true_roles, pred_roles = random_predictions(corpus, ppl, n_samples=100)

HBox(children=(IntProgress(value=0, description='Analyzing texts'), HTML(value='')))

In [13]:
def compute_metrics(y_pred, y_true, weak_matching=False):
    true_positive = 0
    condition_positive = 0
    predicted_condition_positive = 0
    
    for i, (true_predicates, true_arguments) in enumerate(y_true):
        num_true_predicates = len(true_predicates)
        condition_positive += num_true_predicates
        
        pred_predicates, pred_arguments = y_pred[i]
        
        predicted_condition_positive += len(pred_predicates)
        
        for true_pred_idx, true_predicate in true_predicates.items():
            if true_pred_idx in pred_predicates:
                true_arguments_i = true_arguments[true_pred_idx]
                pred_arguments_i = pred_arguments[true_pred_idx]
                
                if len(true_arguments_i) == len(pred_arguments_i):
                    for obj in true_arguments_i:
                        true_tag = obj['tag']
                        true_lemma = obj['lemma']
                        for obj_pred in pred_arguments_i:
                            if weak_matching:
                                if obj_pred['tag'] == true_tag:
                                    true_positive += 1
                            else:
                                if obj_pred['tag'] == true_tag and obj_pred['lemma'] == true_lemma:
                                    true_positive += 1
                                
    recall = true_positive/condition_positive  
    percision = true_positive/predicted_condition_positive
    
    return {
        'recall': recall,
        'percision': percision,
        'f1': 2 * ((percision*recall)/(percision+recall))
    }

In [14]:
compute_metrics(y_pred=pred_roles, y_true=true_roles)

{'recall': 0.2, 'percision': 0.035897435897435895, 'f1': 0.06086956521739131}

In [13]:
compute_metrics(true_roles, pred_roles)

{'recall': 0.0972972972972973, 'percision': 0.45, 'f1': 0.16}

In [19]:
for i in range(10, 20):
    text = get_example(corpus, i, 0)
    res = ppl(text)
    pred = get_roles_pred(res['lemma'], res['srl'], 0)
    true = get_roles_true(corpus, i, 0)
    print("="*40)
    print("Example: ", i)
    print("="*10)
    print("True")
    print_(true)
    print("Predict")
    print_(pred)

Example:  10
True
({9: {'lemma': 'зависеть'}},
 {9: [{'lemma': 'стоимость', 'tag': 'пациенс'},
      {'lemma': 'количество', 'tag': 'причина'}]})
Predict
({9: {'lemma': 'зависеть'}},
 {9: [{'lemma': 'стоимость', 'tag': 'пациенс'},
      {'lemma': 'количество', 'tag': 'причина'}]})
Example:  11
True
({26: {'lemma': 'зависеть'}},
 {26: [{'lemma': 'который', 'tag': 'пациенс'},
       {'lemma': 'преобладание', 'tag': 'причина'}]})
Predict
({}, {})
Example:  12
True
({28: {'lemma': 'зависеть'}}, {28: [{'lemma': 'работа', 'tag': 'причина'}]})
Predict
({}, {})
Example:  13
True
({26: {'lemma': 'зависеть'}},
 {26: [{'lemma': 'майор', 'tag': 'пациенс'},
       {'lemma': 'желание', 'tag': 'причина'}]})
Predict
({5: {'lemma': 'попадать'}},
 {5: [{'lemma': 'экзерсис', 'tag': 'способ'},
      {'lemma': 'переплет', 'tag': 'конечная точка'}]})
Example:  14
True
({22: {'lemma': 'зависеть'}},
 {22: [{'lemma': 'параметр', 'tag': 'пациенс'},
       {'lemma': 'действие', 'tag': 'причина'}]})
Predict
({3: 