In [None]:
! git clone https://github.com/MMU-TDMLab/SimpleTICO19.git

In [None]:
! pip install simpletransformers
! git clone https://github.com/mjpost/sacrebleu.git
! cd sacrebleu && pip install .
! pip install bert-score

! pip install datasets

! pip install xlrd==1.2.0

In [None]:
import pandas
from simpletransformers.seq2seq import Seq2SeqModel, Seq2SeqArgs
import torch
from sacrebleu.metrics import BLEU
import unicodedata
import math
from bert_score import BERTScorer
from datasets import load_dataset
import os
import pickle

In [None]:
path = "SimpleTICO19/dataset/"

df_tst_en = pandas.read_csv(path + "simpletico19.test.en.csv")
df_tst_es = pandas.read_csv(path + "simpletico19.test.es.csv")

df_dev_en = pandas.read_csv(path + "simpletico19.dev.en.csv")
df_dev_es = pandas.read_csv(path + "simpletico19.dev.es.csv")

In [None]:
cuda_available = torch.cuda.is_available()
model_args = Seq2SeqArgs()
model_args.use_multiprocessing = False
model_args.manual_seed = 1
model_args.max_length = 200
model_args.do_sample = True
model_args.early_stopping = False
model_args.num_beams = 12
model_args.use_multiprocessed_decoding = True
model_args.encoding = 'utf-8'
model_args.evaluate_during_training = True
model_args.evaluate_during_training_verbose = True
model_args.evaluate_during_training_steps = 50
model_args.overwrite_output_dir = True

#set correct base model for direction (en-es or es-en)
model_args.base_marian_model_name='Helsinki-NLP/opus-mt-en-es'

#make sure correct lm is selected es-en or en-es
model = Seq2SeqModel(encoder_decoder_type="marian", 
                  encoder_decoder_name='Helsinki-NLP/opus-mt-en-es', 
                  use_cuda=cuda_available,
                  args=model_args)

In [None]:
df_tst_merge = df_tst_en.merge(df_tst_es, on='stringID')
#df_tst_merge = df_tst_merge.drop(columns=['annotator_x','annotator_y'])

en_es_df_tst = df_tst_merge

df_dev_merge = df_dev_en.merge(df_dev_es, on='stringID')
#df_dev_merge = df_dev_merge.drop(columns=['annotator_x','annotator_y'])

en_es_df_dev = df_dev_merge


In [None]:
en_es_df_tst['source'] = [item.split("_")[0] for item in en_es_df_tst.stringID.to_list()]
en_es_df_tst.loc[en_es_df_tst['source'] == 'wiki'] = 'Wikipedia' 

en_es_df_dev['source'] = [item.split("_")[0] for item in en_es_df_dev.stringID.to_list()]
en_es_df_dev.loc[en_es_df_dev['source'] == 'wiki'] = 'Wikipedia' 

In [None]:
df_ev = en_es_df_dev.copy()
df_ev = df_ev.drop(columns=['stringID','simplification_x','comments_x','original_y','comments_y'])
df_ev = df_ev.rename(columns={'original_x':'input_text', 'simplification_y':'target_text'})


In [None]:
bleu = BLEU(max_ngram_order=4)

#make sure bert scorer is set for correct target language
bertscore = BERTScorer(lang="es", rescale_with_baseline=True)

def do_corpus_exp(df, id):
  print(id)
  # set desired source + target (simp/orig, x/y) 
  inputs  = df[df.source.str.contains(id)].original_x.to_list()
  refs    = df[df.source.str.contains(id)].simplification_y.to_list()
  outputs = model.predict(inputs)
  print(len(outputs))
  print(len(refs))
  print(bleu.corpus_score(outputs, [refs]))
  bert_scores = bertscore.score(outputs,refs)
  avg_scores = [s.mean(dim=0) for s in bert_scores]
  print(avg_scores[0].cpu().item())
  print(avg_scores[1].cpu().item())
  print(avg_scores[2].cpu().item())
  print()
  print()

do_corpus_exp(en_es_df_dev, "PubMed")
do_corpus_exp(en_es_df_dev, "wiki")
do_corpus_exp(en_es_df_dev, "Wikivoyage")
do_corpus_exp(en_es_df_dev, "Wikipedia")
do_corpus_exp(en_es_df_dev, "CMU")
do_corpus_exp(en_es_df_dev, "Wikisource")
do_corpus_exp(en_es_df_dev, "Wikinews")

print("ALL")
do_corpus_exp(en_es_df_dev, "")
print()

do_corpus_exp(en_es_df_tst, "PubMed")
do_corpus_exp(en_es_df_tst, "Wikipedia")
do_corpus_exp(en_es_df_tst, "CMU")
do_corpus_exp(en_es_df_tst, "Wikisource")
do_corpus_exp(en_es_df_tst, "Wikinews")

print("ALL")
do_corpus_exp(en_es_df_tst, "")
print()
