In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)


## Data Pre-Processing

In [None]:
original_r = open("/content/drive/MyDrive/550/data/daily/reddit/aggregated_test.txt","r")
gpt_r = open("/content/drive/MyDrive/550/output/reddit/gpt_out.txt","r")
hybrid_r = open("/content/drive/MyDrive/550/output/reddit/hybrid_out.txt","r")

original_d = open("/content/drive/MyDrive/550/data/reddit/aggregated_test.txt","r")
gpt_d = open("/content/drive/MyDrive/550/output/daily/gpt_out.txt","r")
hybrid_d = open("/content/drive/MyDrive/550/output/daily/hybrid_out.txt","r")

In [None]:
!pip install rouge_score
!pip install nlp

In [None]:
import spacy
import nlp
from itertools import chain
import numpy as np

In [None]:
tokenizer = spacy.load("en_core_web_sm", disable=["parser", "ner", "textcat"])
tokenizer.add_pipe(tokenizer.create_pipe('sentencizer'))

In [None]:

rouge = nlp.load_metric("rouge", experiment_id=1)

## File Processing

In [None]:
from tqdm.notebook import tqdm
with open('/content/drive/MyDrive/550/output/daily/gpt_out.txt', 'r') as f:
 gpt_lines = f.readlines()
with open('/content/drive/MyDrive/550/output/daily/hybrid_out.txt', 'r') as f:
 hybrid_lines = f.readlines()


In [None]:
 
with open('/content/drive/MyDrive/550/data/reddit/aggregated_test.txt', 'r') as f:
  true_lines = f.readlines()
hybrid_docs = [tokenizer(t) for t in tqdm(hybrid_lines)]
true_docs = [tokenizer(t) for t in tqdm(true_lines)]
gpt_docs = [tokenizer(t) for t in tqdm(gpt_lines)]


In [None]:
from tqdm.notebook import tqdm
with open('/content/drive/MyDrive/550/output/reddit/gpt_out.txt', 'r') as f:
  gpt_r_lines = f.readlines()
with open('/content/drive/MyDrive/550/output/reddit/hybrid_out.txt', 'r') as f:
  hybrid_r_lines = f.readlines()
with open('/content/drive/MyDrive/550/data/daily/reddit/aggregated_test.txt', 'r') as f:
  true_r_lines = f.readlines()

#hybrid_docs_r = [tokenizer(t) for t in tqdm(hybrid_r_lines)]
true_docs_r = [tokenizer(t) for t in tqdm(true_r_lines)]
gpt_docs_r = [tokenizer(t) for t in tqdm(gpt_r_lines)]

In [None]:
from itertools import chain
def get_lemmas(document):
 ''' returns all lemmas in a doc, excluding punctuation '''
 return list(chain(*[[token.lemma_ for token in sentence if not token.is_punct] for sentence in document.sents]))


In [None]:
true_dm = []
for i in tqdm(true_docs):
  l_trim=[]
  l = get_lemmas(i)
  count = 0

  for i in l:
    if count < 70:
      l_trim.append(i)
      count += 1
    else:
      break

  str1 = ' '.join(map(str, l_trim)) 
  true_dm.append(str1)

In [None]:
gpt_dm = []
for i in tqdm(gpt_docs):
  l = get_lemmas(i)
  str1 = ' '.join(map(str, l)) 
  gpt_dm.append(str1)

In [None]:
hybrid_dm = []
for i in tqdm(hybrid_docs):
  l = get_lemmas(i)
  str1 = ' '.join(map(str, l)) 
  hybrid_dm.append(str1)

In [None]:
true_r = []
for i in tqdm(true_docs_r):
  l_trim=[]
  l = get_lemmas(i)
  count = 0

  for i in l:
    if count < 70:
      l_trim.append(i)
      count += 1
    else:
      break

  str1 = ' '.join(map(str, l_trim)) 
  true_r.append(str1)
  

In [None]:
gpt_r = []
for i in tqdm(gpt_docs_r):
  l = get_lemmas(i)
  str1 = ' '.join(map(str, l)) 
  gpt_r.append(str1)

In [None]:
hybrid_r = []
for i in tqdm(hybrid_docs_r):
  l = get_lemmas(i)
  str1 = ' '.join(map(str, l)) 
  hybrid_r.append(str1)

## Evaluation

In [None]:
def compute_metrics(pd, true):
  r = []
  if len(pd) == len(true):
    for i in range(len(pd)):
      pd_curr = pd[i]
      true_curr = true[i]
      if len(pd_curr) < len(true_curr):
        try:
          rouge_output = rouge.compute(pd_curr,true_curr[0:len(pd_curr)], rouge_types=["rouge1"])["rouge1"].mid
          r.append(rouge_output.fmeasure)
        except KeyError as e:
          print('I got a KeyError')
      else:
        continue
      return rouge_output.fmeasure
  else:
    print("The number of examples are different.")

In [None]:
def compute_std(l):
  arr = np.array(l)
  mean = np.mean(arr)
  sd = np.std(arr)
  return mean,sd

In [None]:
d_truth = readfile(original_d)
d_gpt = readfile(gpt_d)
d_hybrid = readfile(hybrid_d)

In [None]:
r_truth = readfile(original_r)
r_gpt = readfile(gpt_r)
r_hybrid = readfile(hybrid_r)

## Computing Daily Mail outputs


In [None]:
dm_gpt = compute_metrics(d_gpt,d_truth)
print(compute_std(dm_gpt))

In [None]:
dm_hybrid = compute_metrics(d_hybrid,d_truth)
print(compute_std(dm_hybrid))

In [None]:
compute_metrics(hybrid_dm,true_dm)

In [None]:
rouge_output = rouge.compute(hybrid_dm,true_dm, rouge_types=["rouge1"])["rouge1"].mid
print(rouge_output)

In [None]:
rouge_output_gpt = rouge.compute(gpt_dm,true_dm, rouge_types=["rouge1"])["rouge1"].mid
print(rouge_output_gpt)

## Computing reddit outputs

In [None]:
reddit_gpt = compute_metrics(r_gpt,r_truth)
print(compute_std(reddit_gpt))

In [None]:
reddit_hybrid = compute_metrics(r_hybrid,r_truth)
print(compute_std(reddit_hybrid))

In [None]:
rouge_r_hybird = rouge.compute(hybrid_r,true_r, rouge_types=["rouge1"])["rouge1"].mid

In [None]:
print(rouge_r_hybird)

In [None]:
rouge_r_gpt = rouge.compute(gpt_r,true_r, rouge_types=["rouge1"])["rouge1"].mid


In [None]:
print(rouge_r_gpt)

## Testing playground

In [None]:
type(d_truth)

In [None]:
r_truth[1]