In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
SERVER0 = ''
SERVER2 = ''

In [None]:
from isanlp import PipelineCommon
from isanlp.processor_remote import ProcessorRemote
from isanlp.ru.processor_mystem import ProcessorMystem
from isanlp.ru.converter_mystem_to_ud import ConverterMystemToUd

address_morph = (SERVER0, 4333)
address_syntax = (SERVER2, 3334)
address_rst = (SERVER0, 3346)
address_rst = (SERVER2, 3345)

ppl = PipelineCommon([
    (ProcessorRemote(address_syntax[0], address_syntax[1], '0'),
     ['text'],
     {'sentences': 'sentences',
      'tokens': 'tokens',
      'lemma': 'lemma',
      'syntax_dep_tree': 'syntax_dep_tree',
      'postag': 'ud_postag'}),
    (ProcessorMystem(delay_init=False),
     ['tokens', 'sentences'],
     {'postag': 'postag'}),
    (ConverterMystemToUd(),
     ['postag'],
     {'morph': 'morph',
      'postag': 'postag'}),
    (ProcessorRemote(address_rst[0], address_rst[1], 'default'),
     ['text', 'tokens', 'sentences', 'postag', 'morph', 'lemma', 'syntax_dep_tree'],
     {'rst': 'rst'})
])

In [None]:
from utils.file_reading import read_annotation, read_edus, read_gold
from utils.evaluation import *

example = 'data/news2_4'
text = open('corpus/RuRsTreebank_full_2/news2/news2_txt/news2_4.txt', 'r').read().strip()
gold_edus = read_edus(example)
gold_pairs = prepare_gold_pairs(read_gold(example, features=True))

In [None]:
%%time

result = ppl(text)

In [None]:
for tree in result['rst']:
    print(tree.proba)

In [None]:

ex = ForestExporter()
ex(result['rst'], 'news2_4_pred.rs3')

In [None]:
from utils.evaluation import *

pred_edus = []
for tree in result['rst']:
    pred_edus += extr_edus(tree)

In [None]:
pred_edus

In [None]:
gold_edus

In [None]:
len(pred_edus), len(gold_edus)

In [None]:
%%writefile utils/evaluation.py

import pandas as pd
from utils.file_reading import text_html_map

labels = ['condition_NS',
     'concession_NS',
     'elaboration_NS',
     'preparation_SN',
     #'background_SN',
     'condition_SN',
     'purpose_NS',
     'cause-effect_NS',
     'background_NS',
     'interpretation-evaluation_NS',
     'evidence_NS',
     'same-unit_NN',
     'joint_NN',
     'attribution_SN',
     'contrast_NN',
     'restatement_NN',
     'comparison_NN',
     'cause-effect_SN',
     'solutionhood_SN',
     'purpose_SN',
     'sequence_NN',
     'attribution_NS',
     'interpretation-evaluation_SN']

top_classes = [
    'attribution_NS',
    'attribution_SN',
    'purpose_NS',
    'purpose_SN',
    'condition_SN',
    'contrast_NN',
    'condition_NS',
    'joint_NN',
    'concession_NS',
    'same-unit_NN',
    'elaboration_NS',
    'cause-effect_NS',
    #'solutionhood_SN',
    #'cause-effect_SN'
]

class_mapper = {weird_class: 'other' + weird_class[-3:] for weird_class in labels if not weird_class in top_classes}

pred_mapper = {
    'other_NN': 'joint_NN',
    'other_NS': 'joint_NN',
    'other_SN': 'joint_NN'
}

target_map = {
    'relation': 'joint',
    'antithesis': 'contrast',
    'cause': 'cause-effect',
    'effect': 'cause-effect',
    'conclusion': 'restatement',
    'interpretation': 'interpretation-evaluation',
    'evaluation': 'interpretation-evaluation',
    'motivation': 'condition',
}

relation_map = {
    'restatement_SN': 'restatement_NN',
    'restatement_NS': 'restatement_NN',
    'contrast_SN': 'contrast_NN',
    'contrast_NS': 'contrast_NN',
    'solutionhood_NS': 'elaboration_NS',
    'preparation_NS': 'elaboration_NS',
    'concession_SN': 'preparation_SN',
    'evaluation_SN': 'preparation_SN',
    'elaboration_SN': 'preparation_SN',
    'evidence_SN': 'preparation_SN',
    'background_SN': 'preparation_SN'
}

def prepare_gold_pairs(gold_pairs):
    TARGET = 'category_id'

    gold_pairs['category_id'] = gold_pairs['category_id'].map(lambda row: row.split('_')[0])
    gold_pairs['category_id'] = gold_pairs['category_id'].replace([0.0], 'same-unit')
    gold_pairs['order'] = gold_pairs['order'].replace([0.0], 'NN')
    gold_pairs['category_id'] = gold_pairs['category_id'].replace(target_map, regex=False)

    gold_pairs['relation'] = gold_pairs['category_id'].map(lambda row: row) + '_' + gold_pairs['order']
    gold_pairs['relation'] = gold_pairs['relation'].replace(relation_map, regex=False)
    
    for key, value in class_mapper.items():
        gold_pairs['relation'] = gold_pairs['relation'].replace(key, value)
        
    gold_pairs['order'] = gold_pairs['relation'].map(lambda row: row.split('_')[1])
    gold_pairs[TARGET] = gold_pairs['relation'].map(lambda row: row.split('_')[0])
        
    return gold_pairs

def prepare_string(string):
    for key, value in text_html_map.items():
        string = string.replace(key, value).strip()
                
    if '-' in string:
        string = string.replace('-', ' ').strip()

    while '  ' in string:
        string = string.replace('  ', ' ')
        
    return string.strip()

def metric_parseval(parsed_pairs, gold, span=True, labeled=False, nuc=False):
    
    parsed_strings = []
    for i in parsed_pairs.index:
        if span:
            x, y = prepare_string(parsed_pairs.loc[i, 'snippet_x']), prepare_string(parsed_pairs.loc[i, 'snippet_y'])

        else:
            x, y = '', ''
            
        label = parsed_pairs.loc[i, 'category_id'].split('_')[0]
        nuclearity = parsed_pairs.loc[i, 'order']
        merged_label = '_'.join([label, nuclearity])
        
        if labeled or nuc:
            replacement_cand = class_mapper.get(merged_label)
            if replacement_cand:
                if 'other' in replacement_cand:
                    label, nuclearity = pred_mapper.get(replacement_cand).split('_')
                else:
                    label, nuclearity = replacement_cand.split('_')
            
        label = label if labeled else ''
        nuclearity = nuclearity if nuc else ''
        
        result = '&'.join([x, y, label, nuclearity])
        parsed_strings.append(result)

    parsed_strings = list(set(parsed_strings))

    gold_strings = []
    for i in gold.index:
        if span:
            x, y = prepare_string(gold.loc[i, 'snippet_x']), prepare_string(gold.loc[i, 'snippet_y'])

        else:
            x, y = '', ''

        label = gold.loc[i, 'category_id'].split('_')[0] if labeled else ''
        nuclearity = gold.loc[i, 'order'] if nuc else ''
        merged_label = '_'.join([label, nuclearity])
        
        if labeled or nuc:
            if class_mapper.get(merged_label):
                label = class_mapper.get(merged_label).split('_')[0] if labeled else ''
                nuclearity = class_mapper.get(merged_label).split('_')[1] if nuc else ''
            
        result = '&'.join([x, y, label, nuclearity])
        gold_strings.append(result)

    gold_strings = set(gold_strings)
    
    _to_exclude = [string.split('other')[0] for string in gold_strings if 'other' in string]
    gold_strings = set([string for string in gold_strings if not 'other' in string])
    
    _remove_from_parsed_strings = []
    for i, parsed_string in enumerate(parsed_strings):
        for excluding_pair in _to_exclude:
            if excluding_pair in parsed_string:
                _remove_from_parsed_strings.append(i)
    
    parsed_strings = set([parsed_strings[i] for i in range(len(parsed_strings)) if not i in _remove_from_parsed_strings])

    true_pos = len(gold_strings & parsed_strings)
        
    all_parsed = len(parsed_strings)
    all_gold = len(gold_strings)
    
    return true_pos, all_parsed, all_gold


def metric_parseval_df(parsed_pairs, gold, span=True, labeled=False, nuc=False):
    parsed_strings = []

    for i in parsed_pairs.index:
        if span:
            x, y = prepare_string(parsed_pairs.loc[i, 'snippet_x']), prepare_string(parsed_pairs.loc[i, 'snippet_y'])

        else:
            x, y = '', ''

        label = ' ' + parsed_pairs.loc[i, 'category_id'].split('_')[0] if labeled else ''
        nuclearity = ' ' + parsed_pairs.loc[i, 'order'] if nuc else ''
        parsed_strings.append(x + ' ' + y + label + nuclearity)

    parsed_strings = list(set(parsed_strings))

    gold_strings = []
    for i in gold.index:
        if span:
            x, y = prepare_string(gold.loc[i, 'snippet_x']), prepare_string(gold.loc[i, 'snippet_y'])

        else:
            x, y = '', ''

        label = ' ' + gold.loc[i, 'category_id'].split('_')[0] if labeled else ''
        nuclearity = ' ' + gold.loc[i, 'order'] if nuc else ''
        gold_strings.append(x + ' ' + y + label + nuclearity)

    gold_strings = set(gold_strings)
    
    _to_exclude = [string.split('other')[0] for string in gold_strings if 'other' in string]
    gold_strings = set([string for string in gold_strings if not 'other' in string])
    
    _remove_from_parsed_strings = []
    for i, parsed_string in enumerate(parsed_strings):
        for excluding_pair in _to_exclude:
            if excluding_pair in parsed_string:
                _remove_from_parsed_strings.append(i)
        
    #all_parsed = [string for string in all_parsed if not 'other' in string]
    parsed_strings = set([parsed_strings[i] for i in range(len(parsed_strings)) if not i in _remove_from_parsed_strings])

    true_pos = len(gold_strings & parsed_strings)
    all_parsed = len(parsed_strings)
    all_gold = len(gold_strings)
    return true_pos, all_parsed, all_gold


def extr_pairs(tree):
    pp = []
    if tree.left:
        pp.append([tree.left.text, tree.right.text, tree.relation])
        pp += extr_pairs(tree.left)
        pp += extr_pairs(tree.right)
    return pp


def extr_pairs(tree, text, locations=False):
    pp = []
    if tree.left:
        pp.append([text[tree.left.start:tree.left.end], text[tree.right.start:tree.right.end], tree.relation,
                   tree.nuclearity] + [tree.left.start, tree.right.start] * locations)
        pp += extr_pairs(tree.left, text, locations)
        pp += extr_pairs(tree.right, text, locations)
    return pp


def extr_pairs_forest(forest, text, locations=False):
    pp = []
    for tree in forest:
        pp += extr_pairs(tree, text, locations=locations)
    return pp


def _check_snippet_pair_in_dataset(left_snippet, right_snippet):
    left_snippet = left_snippet.strip()
    right_snippet = right_snippet.strip()
    return ((((gold.snippet_x == left_snippet) & (gold.snippet_y == right_snippet)).sum(axis=0) != 0)
            or ((gold.snippet_y == left_snippet) & (gold.snippet_x == right_snippet)).sum(axis=0) != 0)


def _not_parsed_as_in_gold(parsed_pairs: pd.DataFrame, gold: pd.DataFrame, labeled=False):
    for key in text_html_map.keys():
        parsed_pairs['snippet_x'].replace(key, text_html_map[key], regex=True, inplace=True)
        parsed_pairs['snippet_y'].replace(key, text_html_map[key], regex=True, inplace=True)

    for key in text_html_map.keys():
        gold['snippet_x'].replace(key, text_html_map[key], regex=True, inplace=True)
        gold['snippet_y'].replace(key, text_html_map[key], regex=True, inplace=True)

    tmp = pd.merge(gold, parsed_pairs, on=['snippet_x', 'snippet_y'], how='left', suffixes=('_gold', '_parsed'))
    if labeled:
        tmp = tmp.fillna(0)
        tmp = tmp[tmp.category_id_parsed != 0]
        #tmp.category_id_gold = tmp.category_id_gold.map(lambda row: row[:-2])
        return tmp[tmp.category_id_gold != tmp.category_id_parsed]
    else:
        return tmp[pd.isnull(tmp.category_id_parsed)]

def extr_edus(tree):
    edus = []
    if tree.left:
        edus += extr_edus(tree.left)
        edus += extr_edus(tree.right)
    else:
        edus.append(tree.text)
    return edus


def eval_segmentation(trees, _gold_edus, verbose=False):
    true_predictions = 0
    all_predicted = 0
    
    gold_edus = []
    
    for gold_edu in _gold_edus:
        gold_edus.append(prepare_string(gold_edu))

    for tree in trees:
        pred_edus = extr_edus(tree)
        all_predicted += len(pred_edus)

        for pred_edu in pred_edus:
            pred_edu = prepare_string(pred_edu)

            if prepare_string(pred_edu) in gold_edus:
                true_predictions += 1
                
            elif verbose:
                print(pred_edu)

    return true_predictions, all_predicted, len(gold_edus)


def eval_pipeline(trees=None, gold_edus=[], gold_pairs=pd.DataFrame([]), text="", parsed_pairs=pd.DataFrame([])):
    if parsed_pairs.empty:
        parsed_pairs = extr_pairs_forest(trees, text)
    
    result = {}
    result['seg_true_pred'], result['seg_all_pred'], result['seg_all_true'] = eval_segmentation(trees, gold_edus,
                                                                                                verbose=False)
    result['unlab_true_pred'], result['unlab_all_pred'], result['unlab_all_true'] = metric_parseval(parsed_pairs,
                                                                                                    gold_pairs)
    result['lab_true_pred'], result['lab_all_pred'], result['lab_all_true'] = metric_parseval(parsed_pairs, gold_pairs,
                                                                                              labeled=True, nuc=False)
    result['nuc_true_pred'], result['nuc_all_pred'], result['nuc_all_true'] = metric_parseval(parsed_pairs, gold_pairs,
                                                                                              labeled=False, nuc=True)
    result['full_true_pred'], result['full_all_pred'], result['full_all_true'] = metric_parseval(parsed_pairs, gold_pairs,
                                                                                                labeled=True, nuc=True)
    return result

In [None]:
! mkdir parsing_results_0707

In [None]:
from tqdm.autonotebook import tqdm
from utils.train_test_split import split_train_dev_test
from utils.file_reading import read_edus, read_gold
from utils.evaluation import *
from utils.export_to_rs3 import ForestExporter
import pickle

pipeline_evaluation = {}
train, dev, test = split_train_dev_test('data/')

# news only
#test = [filename for filename in test if 'news' in filename]
cache = []
thrown_error = []

test.sort()
ex = ForestExporter()

for file in tqdm(test):
    file = file.replace('.edus', '')
    for name in ['news1', 'news2', 'blogs']:
        if name in file:
            text = open(f'corpus/RuRsTreebank_full/{name}/{file.replace("data/", name+"_txt/")}.txt', 'r').read().strip()
    if 'sci.ling' in file:
        text = open(f'corpus/RuRsTreebank_full/sci_ling/sci_ling_txt/{file.replace("data/", "")}.txt', 'r').read().strip()
    elif 'sci.comp' in file:
        text = open(f'corpus/RuRsTreebank_full/sci_comp/sci_comp_txt/{file.replace("data/", "")}.txt', 'r').read().strip()

    try:
        result = ppl(text)
        out_file = file.split('/')[-1]
        pickle.dump(result, open(f'parsing_results_0707/{out_file}.pkl', 'wb'))

        try:
            ex(result['rst'], out_file+'_pred.rs3')
        except:
            print(out_file, "was not saved in .rs3")

        gold_edus = read_edus(file)
        gold_pairs = prepare_gold_pairs(read_gold(file, features=True))

    #     evaluation = eval_pipeline(result['rst'], gold_edus, gold_pairs, result['text'])

        parsed_pairs = pd.DataFrame(extr_pairs_forest(result['rst'], result['text']), 
                                    columns=['snippet_x', 'snippet_y', 'category_id', 'order'])

        evaluation = eval_pipeline(parsed_pairs=parsed_pairs,
                               gold_edus=gold_edus,
                               gold_pairs=prepare_gold_pairs(read_gold(file, features=True)),
                               text=result['text'],
                               trees=result['rst'])

        evaluation['filename'] = file
        cache.append(evaluation)

        #pipeline_evaluation[file] = eval_pipeline(result['rst'], gold_edus, gold_pairs)
    except:
        thrown_error.append(file)

In [None]:
file

In [None]:
gold_pairs.head(1)

In [None]:
if type(gold_pairs) == int and gold_pairs == -1:
    print([0., 1.])

In [None]:
pickle.dump(result, open(f'parsing_results_0707/{out_file}.pkl', 'wb'))
    
try:
    ex(result['rst'], out_file+'_pred.rs3')
except:
    print(out_file, "was not saved in .rs3")

gold_edus = read_edus(file)
gold_pairs = prepare_gold_pairs(read_gold(file, features=True))

#     evaluation = eval_pipeline(result['rst'], gold_edus, gold_pairs, result['text'])

parsed_pairs = pd.DataFrame(extr_pairs_forest(result['rst'], result['text']), 
                            columns=['snippet_x', 'snippet_y', 'category_id', 'order'])

evaluation = eval_pipeline(parsed_pairs=parsed_pairs,
                       gold_edus=gold_edus,
                       gold_pairs=prepare_gold_pairs(read_gold(file, features=True)),
                       text=result['text'],
                       trees=result['rst'])

evaluation['filename'] = file
cache.append(evaluation)

In [None]:
from tqdm.autonotebook import tqdm
from utils.train_test_split import split_train_dev_test
from utils.file_reading import read_edus, read_gold, read_annotation
from utils.evaluation import *
import pickle
import glob

pipeline_evaluation = {}
train, dev, test = split_train_dev_test('data/')

cache = []
thrown_error = []

for file in tqdm(glob.glob('parsing_results_0707/*.pkl')):
    
    result = pickle.load(open(file, 'rb'))
    file = file.replace('parsing_results_0707/', 'data/').replace('.pkl', '')

    gold_edus = read_edus(file)
    gold_pairs = prepare_gold_pairs(read_gold(file, features=True))

    #evaluation = eval_pipeline(result['rst'], gold_edus, gold_pairs, result['text'])
    
    parsed_pairs = pd.DataFrame(extr_pairs_forest(result['rst'], result['text']), 
                                columns=['snippet_x', 'snippet_y', 'category_id', 'order'])
    
    evaluation = eval_pipeline(parsed_pairs=parsed_pairs,
                           gold_edus=gold_edus,
                           gold_pairs=gold_pairs,
                           text=result['text'],
                           trees=result['rst'])
    
    evaluation['filename'] = file
    cache.append(evaluation)

In [None]:
thrown_error

In [None]:
import pickle
from tqdm.autonotebook import tqdm
from utils.train_test_split import split_train_dev_test
from utils.file_reading import read_edus, read_gold
from utils.evaluation import *
import pandas as pd

tmp = pd.DataFrame(cache)

tmp['pr_seg'] = tmp.seg_true_pred / tmp.seg_all_pred
tmp['re_seg'] = tmp.seg_true_pred / tmp.seg_all_true
tmp['f1_seg'] = 2 * tmp.pr_seg * tmp.re_seg / (tmp.pr_seg + tmp.re_seg)
tmp['pr_unlab'] = tmp.unlab_true_pred / tmp.unlab_all_pred
tmp['re_unlab'] = tmp.unlab_true_pred / tmp.unlab_all_true
tmp['f1_unlab'] = 2 * tmp.pr_unlab * tmp.re_unlab / (tmp.pr_unlab + tmp.re_unlab)
tmp['pr_lab'] = tmp.lab_true_pred / tmp.lab_all_pred
tmp['re_lab'] = tmp.lab_true_pred / tmp.lab_all_true
tmp['f1_lab'] = 2 * tmp.pr_lab * tmp.re_lab / (tmp.pr_lab + tmp.re_lab)
tmp['pr_nuc'] = tmp.nuc_true_pred / tmp.nuc_all_pred
tmp['re_nuc'] = tmp.nuc_true_pred / tmp.nuc_all_true
tmp['f1_nuc'] = 2 * tmp.pr_nuc * tmp.re_nuc / (tmp.pr_nuc + tmp.re_nuc)
tmp['pr_full'] = tmp.full_true_pred / tmp.full_all_pred
tmp['re_full'] = tmp.full_true_pred / tmp.full_all_true
tmp['f1_full'] = 2 * tmp.pr_full * tmp.re_full / (tmp.pr_full + tmp.re_full)

tmp.sort_values('f1_full', ascending=False)

In [None]:
tmp.sort_values('f1_full', ascending=False)[[key for key in tmp.keys() if 'f1' in key]]

In [None]:
tmp.seg_all_true.sum(), tmp.seg_all_pred.sum()

In [None]:
tmp.lab_all_true.sum(), tmp.lab_all_pred.sum()

In [None]:
tmp2 = tmp.copy()

In [None]:
tmp = tmp2.copy()

In [None]:
tmp = tmp2[tmp2.filename.str.contains('blogs')]

In [None]:
overall_score = {
    'pr_seg': tmp.seg_true_pred.sum() / tmp.seg_all_pred.sum(),
    're_seg': tmp.seg_true_pred.sum() / tmp.seg_all_true.sum(),
    'pr_unlab': tmp.unlab_true_pred.sum() / tmp.unlab_all_pred.sum(),
    're_unlab': tmp.unlab_true_pred.sum() / tmp.unlab_all_true.sum(),
    'pr_lab': tmp.lab_true_pred.sum() / tmp.lab_all_pred.sum(),
    're_lab': tmp.lab_true_pred.sum() / tmp.lab_all_true.sum(),
    'pr_nuc': tmp.nuc_true_pred.sum() / tmp.nuc_all_pred.sum(),
    're_nuc': tmp.nuc_true_pred.sum() / tmp.nuc_all_true.sum(),
    'pr_full': tmp.full_true_pred.sum() / tmp.full_all_pred.sum(),
    're_full': tmp.full_true_pred.sum() / tmp.full_all_true.sum(),  
}

def get_overall_score(step: str):
    return 2. * overall_score['pr_' + step] * overall_score['re_' + step] / (
    overall_score['pr_' + step] + overall_score['re_' + step])

for step in ('seg', 'unlab', 'nuc', 'lab', 'full'):
    overall_score['f1_' + step] = get_overall_score(step)

In [None]:
evaluation_table = pd.DataFrame(columns=['component', 'P', 'R', 'F1'], data=[
    ['segmentation', overall_score['pr_seg'], overall_score['re_seg'], overall_score['f1_seg']],
    ['span', overall_score['pr_unlab'], overall_score['re_unlab'], overall_score['f1_unlab']],
    ['nuclearity', overall_score['pr_nuc'], overall_score['re_nuc'], overall_score['f1_nuc']],
    ['relation', overall_score['pr_lab'], overall_score['re_lab'], overall_score['f1_lab']],
    ['full', overall_score['pr_full'], overall_score['re_full'], overall_score['f1_full']],
])

evaluation_table['P'] *= 100
evaluation_table['R'] *= 100
evaluation_table['F1'] *= 100

In [None]:
print(evaluation_table.to_latex(index=False, float_format='%.2f', column_format='|l|l|l|l|'))

In [None]:
tmp = pd.DataFrame(cache)
tmp['pr_seg'] = tmp.seg_true_pred / tmp.seg_all_pred
tmp['re_seg'] = tmp.seg_true_pred / tmp.seg_all_true
tmp['f1_seg'] = 2 * tmp.pr_seg * tmp.re_seg / (tmp.pr_seg + tmp.re_seg)
tmp['pr_unlab'] = tmp.unlab_true_pred / tmp.unlab_all_pred
tmp['re_unlab'] = tmp.unlab_true_pred / tmp.unlab_all_true
tmp['f1_unlab'] = 2 * tmp.pr_unlab * tmp.re_unlab / (tmp.pr_unlab + tmp.re_unlab)
tmp['pr_lab'] = tmp.lab_true_pred / tmp.lab_all_pred
tmp['re_lab'] = tmp.lab_true_pred / tmp.lab_all_true
tmp['f1_lab'] = 2 * tmp.pr_lab * tmp.re_lab / (tmp.pr_lab + tmp.re_lab)
tmp['pr_nuc'] = tmp.nuc_true_pred / tmp.nuc_all_pred
tmp['re_nuc'] = tmp.nuc_true_pred / tmp.nuc_all_true
tmp['f1_nuc'] = 2 * tmp.pr_nuc * tmp.re_nuc / (tmp.pr_nuc + tmp.re_nuc)
tmp.sort_values('f1_unlab', ascending=False)

In [None]:
tmp.lab_all_true.sum(), tmp.lab_all_pred.sum()

In [None]:
overall_score = {
    'pr_seg': tmp.seg_true_pred.sum() / tmp.seg_all_pred.sum(),
    're_seg': tmp.seg_true_pred.sum() / tmp.seg_all_true.sum(),
    'pr_unlab': tmp.unlab_true_pred.sum() / tmp.unlab_all_pred.sum(),
    're_unlab': tmp.unlab_true_pred.sum() / tmp.unlab_all_true.sum(),
    'pr_lab': tmp.lab_true_pred.sum() / tmp.lab_all_pred.sum(),
    're_lab': tmp.lab_true_pred.sum() / tmp.lab_all_true.sum(),
    'pr_nuc': tmp.nuc_true_pred.sum() / tmp.nuc_all_pred.sum(),
    're_nuc': tmp.nuc_true_pred.sum() / tmp.nuc_all_true.sum(),
}

overall_score['f1_seg'] = 2. * overall_score['pr_seg'] * overall_score['re_seg'] / (
    overall_score['pr_seg'] + overall_score['re_seg'])
overall_score['f1_unlab'] = 2. * overall_score['pr_unlab'] * overall_score['re_unlab'] / (
    overall_score['pr_unlab'] + overall_score['re_unlab'])
overall_score['f1_lab'] = 2. * overall_score['pr_lab'] * overall_score['re_lab'] / (
    overall_score['pr_lab'] + overall_score['re_lab'])
overall_score['f1_nuc'] = 2. * overall_score['pr_nuc'] * overall_score['re_nuc'] / (
    overall_score['pr_nuc'] + overall_score['re_nuc'])

overall_score

In [None]:
evaluation_table = pd.DataFrame(columns=['component', 'P', 'R', 'F1'], data=[
    ['segmentation', overall_score['pr_seg'], overall_score['re_seg'], overall_score['f1_seg']],
    ['span', overall_score['pr_unlab'], overall_score['re_unlab'], overall_score['f1_unlab']],
    ['nuclearity', overall_score['pr_nuc'], overall_score['re_nuc'], overall_score['f1_nuc']],
    ['relation', overall_score['pr_lab'], overall_score['re_lab'], overall_score['f1_lab']],
])

evaluation_table['P'] *= 100
evaluation_table['R'] *= 100
evaluation_table['F1'] *= 100

In [None]:
print(evaluation_table.to_latex(index=False, float_format='%.2f', column_format='|l|l|l|l|'))

### Analyse segmentation 

In [None]:
from tqdm.autonotebook import tqdm
from utils.train_test_split import split_train_dev_test
from utils.file_reading import read_edus, read_gold
from utils.evaluation import *
import pickle
import glob

for i in range(len(test[:1])):
    file = glob.glob('parsing_results_crf/*.pkl')[i]
    result = pickle.load(open(file, 'rb'))
    file = file.replace('parsing_results_crf/', 'data/')
    file = file.replace('.pkl', '')

    gold_edus = read_edus(file)
    gold_pairs = prepare_gold_pairs(read_gold(file, features=True))
    pred_edus = [] 
    for tree in result['rst']:
        pred_edus += extr_edus(tree)

    evaluation = eval_pipeline(result['rst'], gold_edus, gold_pairs, result['text'])
    evaluation['filename'] = file

In [None]:
file

In [None]:
gold_edus

In [None]:
pred_edus

In [None]:
evaluation

In [None]:
tmp = pd.DataFrame([evaluation])
tmp['pr_seg'] = tmp.seg_true_pred / tmp.seg_all_pred
tmp['re_seg'] = tmp.seg_true_pred / tmp.seg_all_true
tmp['f1_seg'] = 2 * tmp.pr_seg * tmp.re_seg / (tmp.pr_seg + tmp.re_seg)
tmp['pr_unlab'] = tmp.unlab_true_pred / tmp.unlab_all_pred
tmp['re_unlab'] = tmp.unlab_true_pred / tmp.unlab_all_true
tmp['f1_unlab'] = 2 * tmp.pr_unlab * tmp.re_unlab / (tmp.pr_unlab + tmp.re_unlab)
tmp['pr_lab'] = tmp.lab_true_pred / tmp.lab_all_pred
tmp['re_lab'] = tmp.lab_true_pred / tmp.lab_all_true
tmp['f1_lab'] = 2 * tmp.pr_lab * tmp.re_lab / (tmp.pr_lab + tmp.re_lab)
tmp['pr_nuc'] = tmp.nuc_true_pred / tmp.nuc_all_pred
tmp['re_nuc'] = tmp.nuc_true_pred / tmp.nuc_all_true
tmp['f1_nuc'] = 2 * tmp.pr_nuc * tmp.re_nuc / (tmp.pr_nuc + tmp.re_nuc)
tmp.sort_values('f1_seg', ascending=False)