In [None]:
import numpy as np
import pandas as pd
from pandas import DataFrame
from pyrouge import Rouge155
import logging

In [None]:
def read_comments(file_name):
    with open(file_name,'r') as f:
        file_content = f.readlines()
    comments = []
    for line in file_content:
        comments.append(line[:-1].split())
    return comments

In [None]:
def build_vocab(file_name,min_num):
    with open(file_name,'r') as f:
        file_content = f.readlines()
    word_to_int = {}
    int_to_word = {}
    words_num = 0
    for line in file_content:
        line = line[:-1].split('\t')
        if int(line[2]) >= min_num:
            word_to_int[line[1]] = int(line[0])
            int_to_word[int(line[0])] = line[1] 
            words_num += 1
        else:
            break
    word_to_int['<UNK>'] = words_num+1
    int_to_word[words_num+1] = '<UNK>'
    return word_to_int,int_to_word

In [None]:
def convert_comments(comments,word_to_int,int_to_word):
    comments_to_int = []
    for comment in comments:
        comment_to_int = [word_to_int[word] if word_to_int.get(word) != None else word_to_int['<UNK>'] for word in comment]  
        comments_to_int.append(comment_to_int)
    return comments_to_int

In [None]:
def id_seq_to_word_seq(id_seq,id_vocab):
    return ' '.join([id_vocab[id] for id in id_seq])

In [None]:
def prepare_evaluation(data_path,comments,int_to_word,word_to_int):
    with open(data_path,'r') as f:
        content = f.readlines()
    model_comments = {}
    for line in content:
        line = line[:-1].split('\t')
        if int(line[2]) == 1:
            model_comments[(line[0],line[1])] = [id_seq_to_word_seq(comments[int(comment)],int_to_word) for comment in line[3].split('|')]
    return model_comments

In [None]:
def read_system_comments(data_path):
    system_comments = {}
    with open(data_path,'r') as f:
        content = f.readlines()
    for line in content:
        line = line.split('\t')
        system_comments[(line[0],line[1])] = line[2][:-1]
    return system_comments   

In [None]:
comments_path = 'dataset/text.dat'
vocab_path = 'dataset/vocab.dat'
min_num = 5

In [None]:
comments = read_comments(comments_path)
comments

In [None]:
word_to_int,int_to_word = build_vocab(vocab_path,min_num)
vocab_size = len(word_to_int)

In [None]:
comments = convert_comments(comments,word_to_int,int_to_word)
comments

In [None]:
data_path = 'dataset/testdata_tops.dat'
model_tops_comments = prepare_evaluation(data_path,comments,int_to_word,word_to_int)

In [None]:
data_path = 'dataset/testdata_downs.dat'
model_downs_comments = prepare_evaluation(data_path,comments,int_to_word,word_to_int)

In [None]:
nfr_system_tops_comments_path = 'system_comments/system_tops_comments.dat'
nfr_system_downs_comments_path = 'system_comments/system_downs_comments.dat'

In [None]:
nfr_system_tops_comments = read_system_comments(nfr_system_tops_comments_path)
nfr_system_downs_comments = read_system_comments(nfr_system_downs_comments_path)

In [None]:
F = open('rouge/rouge_conf.xml','w')
F.write('<ROUGE-EVAL version=\"1.55\">\n')
index = 1

In [None]:
for combination,comments in model_tops_comments.items():
    F.write('\t<EVAL ID=\"'+str(index)+'\">\n')
    F.write('\t\t<MODEL-ROOT>model</MODEL-ROOT>\n')
    F.write('\t\t<PEER-ROOT>system</PEER-ROOT>\n')
    F.write('\t\t<INPUT-FORMAT TYPE=\"SEE\">\n')
    F.write('\t\t</INPUT-FORMAT>\n')
    F.write('\t\t<PEERS>\n')
    with open('rouge/system/nfr.'+str(index)+'.txt','w') as f:
        if nfr_system_tops_comments.get(combination) != None:
            f.write(nfr_system_tops_comments[combination])
        else:
            f.write('')
    F.write('\t\t\t<P ID=\"nfr\">'+'nfr.'+str(index)+'.txt'+'</P>\n')
    F.write('\t\t</PEERS>\n')
    F.write('\t\t<MODELS>\n')
    i = 1
    for comment in comments:
        with open('rouge/model/common.'+str(i)+'.'+str(index)+'.txt','w') as f:
            f.write(comment)
        F.write('\t\t\t<M ID=\"'+str(i)+'\">'+'common.'+str(i)+'.'+str(index)+'.txt'+'</M>\n')
        i += 1
    F.write('\t\t</MODELS>\n')
    F.write('\t</EVAL>\n')
    index += 1

In [None]:
for combination,comments in model_downs_comments.items():
    F.write('\t<EVAL ID=\"'+str(index)+'\">\n')
    F.write('\t\t<MODEL-ROOT>model</MODEL-ROOT>\n')
    F.write('\t\t<PEER-ROOT>system</PEER-ROOT>\n')
    F.write('\t\t<INPUT-FORMAT TYPE=\"SEE\">\n')
    F.write('\t\t</INPUT-FORMAT>\n')
    F.write('\t\t<PEERS>\n')
    with open('rouge/system/nfr.'+str(index)+'.txt','w') as f:
        if nfr_system_downs_comments.get(combination) != None:
            f.write(nfr_system_downs_comments[combination])
        else:
            f.write('')
    F.write('\t\t\t<P ID=\"nfr\">'+'nfr.'+str(index)+'.txt'+'</P>\n')
    F.write('\t\t</PEERS>\n')
    F.write('\t\t<MODELS>\n')
    i = 1
    for comment in comments:
        with open('rouge/model/common.'+str(i)+'.'+str(index)+'.txt','w') as f:
            f.write(comment)
        F.write('\t\t\t<M ID=\"'+str(i)+'\">'+'common.'+str(i)+'.'+str(index)+'.txt'+'</M>\n')
        i += 1
    F.write('\t\t</MODELS>\n')
    F.write('\t</EVAL>\n')
    index += 1

In [None]:
F.write('</ROUGE-EVAL>\n')
F.close()

In [None]:
system_dir = 'rouge/system'
model_dir = 'rouge/model'
r = Rouge155(log_level=logging.ERROR)
r.convert_summaries_to_rouge_format(system_dir,system_dir)
r.convert_summaries_to_rouge_format(model_dir,model_dir)

In [None]:
#ROUGE-1.5.5.pl -n 4 -2 4 -u -a rouge_conf.xml