In [None]:
!pip install transformers
!pip install sentencepiece
!pip install rouge
!pip install -U sentence-transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.21.2-py3-none-any.whl (4.7 MB)
[K     |████████████████████████████████| 4.7 MB 4.8 MB/s 
Collecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.9.1-py3-none-any.whl (120 kB)
[K     |████████████████████████████████| 120 kB 36.6 MB/s 
Collecting tokenizers!=0.11.3,<0.13,>=0.11.1
  Downloading tokenizers-0.12.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.6 MB)
[K     |████████████████████████████████| 6.6 MB 14.3 MB/s 
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.9.1 tokenizers-0.12.1 transformers-4.21.2
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting sentencepiece
  Downloading sentencepiece-0.1.97-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[K     

In [None]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, PegasusForConditionalGeneration, AutoModelForSequenceClassification, AutoModel
import torch
import pandas as pd
from tqdm import tqdm
import requests
import nltk
from nltk.translate.bleu_score import sentence_bleu
from rouge import rouge
import pandas as pd

# Loading test pairs from combined dataset(PAWS+TaPaco)

In [None]:
test_combined_en = pd.read_csv('/content/drive/MyDrive/working_directory/tfm/Distilled_GPT2_finetuning/datasets/Combined(PAWSX + Tapaco)/dataset_comb_test_en.csv')

In [None]:
test_combined_en

Unnamed: 0,Text,Paraphrase,lan
0,The Chapel and Hall were both fully funded by ...,The chapel and the room were both fully funded...,en
1,This isn't my key.,This key is not mine.,en
2,He is a biologist.,He's a biologist.,en
3,I haven't read all the books on the shelves.,It's not as if I had read all the books on the...,en
4,He is able to run faster than I am.,He can run faster than I.,en
...,...,...,...
11924,Many provinces and states organize regional an...,Many provinces and states organize regional an...,en
11925,He still has not called.,He hasn't called yet.,en
11926,We'll certainly invite Tom.,We'll definitely invite Tom.,en
11927,Tom called the police.,Tom called the cops.,en


In [None]:
test_combined_en = test_combined_en[['Text']][:5000]

In [None]:
test_combined_en

Unnamed: 0,Text
0,The Chapel and Hall were both fully funded by ...
1,This isn't my key.
2,He is a biologist.
3,I haven't read all the books on the shelves.
4,He is able to run faster than I am.
...,...
4995,Tom and Mary said they're not hungry.
4996,I went swimming in the sea.
4997,I'm not too surprised.
4998,I think you could've been more patient.


# Evaluation metrics code

## Lexical scores

### Rouge scores


In [None]:
def rouge_score(candidate,reference,ngram=1 ,metric='f'):
  from rouge import Rouge
  rouge = Rouge()

  if ngram == 1:
    ngram = 'rouge-1'
  elif ngram == 2:
    ngram = 'rouge-2'
  elif ngram == 'l':
    ngram = 'rouge-l'

  all_scores = rouge.get_scores(candidate.lower(), reference.lower())
  
  return all_scores[0][ngram][metric]

### BLEU

In [None]:
nltk.download('punkt')
def bleu_score(candidate, reference, ngram=1):
  from nltk.translate.bleu_score import sentence_bleu

  if ngram == 1:
    ngram = (1,0,0,0)
  if ngram == 2:
    ngram = (0,1,0,0)
  if ngram == 3:
    ngram = (0,0,1,0)
  if ngram == 4:
    ngram = (0,0,0,1)
  
  candidate = nltk.word_tokenize(candidate.lower())
  reference = nltk.word_tokenize(reference.lower())
  
  return sentence_bleu([reference], candidate, weights=ngram)

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


### Jaccard similarity

In [None]:
def jaccard_similarity(candidate, reference, ngram=1):
  import nltk
  nltk.download('punkt', quiet=True)

  candidate = nltk.word_tokenize(candidate.lower())
  reference = nltk.word_tokenize(reference.lower())

  if ngram != 1:
    candidate = nltk.ngrams(candidate, n=ngram)
    reference = nltk.ngrams(reference, n=ngram)

  candidate = set(candidate)
  reference = set(reference)
  intersection = len(list(candidate.intersection(reference)))
  union = (len(candidate) + len(reference)) - intersection
  
  return float(intersection) / union

## Semantics evaluation models

In [None]:
# Load model from HuggingFace Hub
paraphrase_xlm_r_multilingual_tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/paraphrase-xlm-r-multilingual-v1')
paraphrase_xlm_r_multilingual_model = AutoModel.from_pretrained('sentence-transformers/paraphrase-xlm-r-multilingual-v1')

Downloading tokenizer_config.json:   0%|          | 0.00/550 [00:00<?, ?B/s]

Downloading config.json:   0%|          | 0.00/718 [00:00<?, ?B/s]

Downloading sentencepiece.bpe.model:   0%|          | 0.00/4.83M [00:00<?, ?B/s]

Downloading tokenizer.json:   0%|          | 0.00/8.68M [00:00<?, ?B/s]

Downloading special_tokens_map.json:   0%|          | 0.00/150 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/1.04G [00:00<?, ?B/s]

In [None]:
# Necessary to run the model of the cell above to run this function
def cosine_similarity(sentence1, sentence2):
  from transformers import AutoTokenizer, AutoModel
  import torch


  #Mean Pooling - Take attention mask into account for correct averaging
  def mean_pooling(model_output, attention_mask):
      token_embeddings = model_output[0] #First element of model_output contains all token embeddings
      input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
      return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)


  # Sentences we want sentence embeddings for
  sentences = [sentence1, sentence2]


  # Tokenize sentences
  encoded_input = paraphrase_xlm_r_multilingual_tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')

  # Compute token embeddings
  with torch.no_grad():
      model_output = paraphrase_xlm_r_multilingual_model(**encoded_input)

  # Perform pooling. In this case, max pooling.
  sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])

  return float(torch.cosine_similarity(torch.tensor(sentence_embeddings[0].reshape(1,-1)), torch.tensor(sentence_embeddings[1].reshape(1,-1))))

#PEGASUS

## PEGASUS paraphrase inference


In [None]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, PegasusForConditionalGeneration
import torch
import pandas as pd
from tqdm import tqdm

model_identifier = 'tuner007/pegasus_paraphrase'
torch_device = 'cuda' if torch.cuda.is_available() else 'cpu'

pegasus_tokenizer = AutoTokenizer.from_pretrained(model_identifier)

pegasus_model = PegasusForConditionalGeneration.from_pretrained(model_identifier).to(torch_device)

In [None]:
def get_response_pegasus(input_text,num_return_sequences,num_beams):
  batch = pegasus_tokenizer([input_text],truncation=True,padding='longest',max_length=60, return_tensors="pt").to(torch_device)
  translated = pegasus_model.generate(**batch,max_length=60,num_beams=num_beams, num_return_sequences=num_return_sequences, temperature=1.5)
  tgt_text = pegasus_tokenizer.batch_decode(translated, skip_special_tokens=True)
  return tgt_text

In [None]:
pegasus_paraphrases = []
for original_sentence in tqdm(test_combined_en['Text']):
  paraphrases = get_response_pegasus(original_sentence,1,1)
  pegasus_paraphrases.append(paraphrases[0])

100%|██████████| 5000/5000 [18:48<00:00,  4.43it/s]


In [None]:
pegasus_paraphrases

['The Chapel and Hall were designed by Butterfield and funded by William Gibbs.',
 "This isn't my key.",
 'He is a Biologist.',
 "I haven't read all the books.",
 'He is able to run faster than I am.',
 'Arnold was sent to Skenesboro and Asa Douglas to secure boats when Samuel Herrick arrived.',
 'Jon Uren, marketing director of Warner Music Europe, said that the song had " early" support across Europe.',
 'Tom left after Mary.',
 'She has lived in that town for five years.',
 'I think wine is good.',
 'What are you smiling at?',
 'The Nationalist parties and other liberal groups said that they would not participate in the elections.',
 'Tom asked if he was not disturbed.',
 'Tom changed his clothes.',
 'The Toronto Maple Leafs have been a franchise for 54 years, the 64th season being the 1980 - 81 season.',
 "I've dated a person who was crazy.",
 '13 ranchos were granted in the state of California during the time when it was a province of independent Mexico.',
 "Tom thinks I'm dumb.",

In [None]:
test_combined_en['Pegasus_paraphrases'] = pegasus_paraphrases

## Computing metrics over pegasus paraphrases

In [None]:
Jaccard_1 = []
bleu_1 = []
bleu_2 = []
bleu_3 = []
rouge_l = []


for n in tqdm(range(len(test_combined_en))):
  reference = test_combined_en['Text'][n]
  candidate = test_combined_en['Pegasus_paraphrases'][n]
  #lexical-syntactical variation metrics
  rouge_l.append(rouge_score(candidate, reference,ngram='l' ,metric='f'))
  bleu_1.append(bleu_score(candidate, reference, ngram=1))
  bleu_2.append(bleu_score(candidate, reference, ngram=2))
  bleu_3.append(bleu_score(candidate, reference, ngram=3))
  Jaccard_1.append(jaccard_similarity(candidate, reference, ngram=1))


The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
100%|██████████| 5000/5000 [00:08<00:00, 582.18it/s]


In [None]:
test_combined_en['bleu_1'] = bleu_1
test_combined_en['bleu_2'] = bleu_2
test_combined_en['bleu_3'] = bleu_3
test_combined_en['rouge_l'] = rouge_l
test_combined_en['jaccard_1'] = Jaccard_1

In [None]:
test_combined_en

Unnamed: 0,Text,Pegasus_paraphrases,bleu_1,bleu_2,bleu_3,rouge_l,jaccard_1
0,The Chapel and Hall were both fully funded by ...,The Chapel and Hall were designed by Butterfie...,0.751477,0.520254,0.375739,0.720000,0.800000
1,This isn't my key.,This isn't my key.,1.000000,1.000000,1.000000,1.000000,1.000000
2,He is a biologist.,He is a Biologist.,1.000000,1.000000,1.000000,1.000000,1.000000
3,I haven't read all the books on the shelves.,I haven't read all the books.,0.687289,0.589105,0.572741,0.857143,0.800000
4,He is able to run faster than I am.,He is able to run faster than I am.,1.000000,1.000000,1.000000,1.000000,1.000000
...,...,...,...,...,...,...,...
4995,Tom and Mary said they're not hungry.,Tom and Mary said they are not hungry.,0.888889,0.750000,0.571429,0.800000,0.800000
4996,I went swimming in the sea.,I went swimming in the ocean.,0.857143,0.666667,0.600000,0.833333,0.750000
4997,I'm not too surprised.,I'm not surprised.,0.818731,0.614048,0.272910,0.857143,0.833333
4998,I think you could've been more patient.,I think you could have been more patient.,0.888889,0.750000,0.571429,0.800000,0.800000


In [None]:
cosine_similarities = []
for n in tqdm(range(len(test_combined_en))):
  reference = test_combined_en['Text'][n]
  candidate = test_combined_en['Pegasus_paraphrases'][n]
  cosine_similarities.append(cosine_similarity(candidate, reference))

100%|██████████| 5000/5000 [14:46<00:00,  5.64it/s]


In [None]:
cosine_similarities

[0.8988051414489746,
 1.0000001192092896,
 0.8168563842773438,
 0.9121456146240234,
 1.0,
 0.7893121242523193,
 0.8503328561782837,
 0.8887044191360474,
 0.9668281078338623,
 0.9876384735107422,
 0.9667176604270935,
 0.6730732321739197,
 0.9039831161499023,
 1.0,
 0.844748854637146,
 0.7692811489105225,
 0.7111097574234009,
 0.9360570311546326,
 1.0000001192092896,
 0.9999999403953552,
 1.0000001192092896,
 0.650810182094574,
 0.8481392860412598,
 1.0000001192092896,
 0.8229542374610901,
 0.919998824596405,
 0.8811103105545044,
 0.8110806941986084,
 0.7688732743263245,
 0.9485958218574524,
 0.8777983784675598,
 0.8485931754112244,
 0.6159957647323608,
 0.9610430002212524,
 0.9686131477355957,
 0.8871310949325562,
 0.9638571739196777,
 0.8019075989723206,
 0.5353590250015259,
 0.508536159992218,
 0.9266802072525024,
 0.7270269989967346,
 0.9808511137962341,
 0.9562147855758667,
 0.8634520173072815,
 0.974837064743042,
 0.7310572266578674,
 0.9216208457946777,
 0.9383984804153442,
 0.767

In [None]:
len(cosine_similarities)

5000

In [None]:
test_combined_en['cosine_similarities'] = cosine_similarities

In [None]:
test_combined_en

In [None]:
test_combined_en.to_csv('/content/drive/MyDrive/working_directory/tfm/models evaluation/pegasus_evaluation.csv', index=False)

In [None]:
test_combined_en['bleu_1'].describe()

count    5.000000e+03
mean     7.142181e-01
std      2.117976e-01
min      3.775135e-11
25%      5.643211e-01
50%      7.245769e-01
75%      8.750000e-01
max      1.000000e+00
Name: bleu_1, dtype: float64

In [None]:
test_combined_en['bleu_2'].describe()

count     5.000000e+03
mean      5.473337e-01
std       2.807887e-01
min      8.399956e-319
25%       3.333333e-01
50%       5.014747e-01
75%       7.223982e-01
max       1.000000e+00
Name: bleu_2, dtype: float64

In [None]:
test_combined_en['bleu_3'].describe()

count     5.000000e+03
mean      4.322730e-01
std       3.326158e-01
min      8.399956e-319
25%       1.791328e-01
50%       3.585562e-01
75%       6.250000e-01
max       1.000000e+00
Name: bleu_3, dtype: float64

In [None]:
test_combined_en['rouge_l'].describe()

count    5000.000000
mean        0.703858
std         0.212835
min         0.000000
25%         0.545455
50%         0.714286
75%         0.880000
max         1.000000
Name: rouge_l, dtype: float64

In [None]:
test_combined_en['jaccard_1'].describe()

count    5000.000000
mean        0.698851
std         0.204917
min         0.050000
25%         0.545455
50%         0.684211
75%         0.857143
max         1.000000
Name: jaccard_1, dtype: float64

In [None]:
test_combined_en['cosine_similarities'].describe()

count    5000.000000
mean        0.867727
std         0.121969
min        -0.035636
25%         0.793547
50%         0.887522
75%         0.978057
max         1.000000
Name: cosine_similarities, dtype: float64

Cosine similarity filters + Counts

In [None]:
test_combined_en_filter1= test_combined_en[test_combined_en['cosine_similarities'] > 0.5]

In [None]:
test_combined_en_filter2= test_combined_en[test_combined_en['cosine_similarities'] > 0.7]

In [None]:
test_combined_en_filter3= test_combined_en[test_combined_en['cosine_similarities'] > 0.9]

In [None]:
print(len(test_combined_en_filter1),len(test_combined_en_filter2),len(test_combined_en_filter3))

4962 4510 2360


# T5

## T5-large high quality paraphrase inference

In [None]:
#Installing correct versions of dependencies to run the model correctly
!pip install transformers==4.10.2
!pip install sentencepiece==0.1.96

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from tqdm import tqdm

In [None]:
t5_model = AutoModelForSeq2SeqLM.from_pretrained("ramsrigouthamg/t5-large-paraphraser-diverse-high-quality")
t5_tokenizer = AutoTokenizer.from_pretrained("ramsrigouthamg/t5-large-paraphraser-diverse-high-quality")


Downloading config.json:   0%|          | 0.00/1.29k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/2.75G [00:00<?, ?B/s]

Downloading tokenizer_config.json:   0%|          | 0.00/1.81k [00:00<?, ?B/s]

Downloading spiece.model:   0%|          | 0.00/773k [00:00<?, ?B/s]

Downloading special_tokens_map.json:   0%|          | 0.00/1.74k [00:00<?, ?B/s]

In [None]:
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print ("device ",device)
t5_model = t5_model.to(device)

device  cuda


In [None]:

# Beam Search
def get_response_t5(input_text,num_return_sequences,num_beams):
  text = "paraphrase: "+input_text + " </s>"

  encoding = t5_tokenizer.encode_plus(text,max_length = 128, padding=True, return_tensors="pt")
  input_ids,attention_mask  = encoding["input_ids"].to(device), encoding["attention_mask"].to(device)

  t5_model.eval()
  beam_outputs = t5_model.generate(
      input_ids=input_ids,attention_mask=attention_mask,
      max_length=128,
      early_stopping=True,
      num_beams=num_beams,
      num_return_sequences=num_return_sequences)
  
  paraphrased = []
  for beam_output in beam_outputs:
      sent = t5_tokenizer.decode(beam_output, skip_special_tokens=True,clean_up_tokenization_spaces=True)[19:]
      paraphrased.append(sent)
  return paraphrased

In [None]:
t5_paraphrases = []
for original_sentence in tqdm(test_combined_en['Text']):
  paraphrases = get_response_t5(original_sentence,1,1)
  t5_paraphrases.append(paraphrases[0])

  "`max_length` is ignored when `padding`=`True` and there is no truncation strategy. "
100%|██████████| 5000/5000 [58:39<00:00,  1.42it/s]


In [None]:
t5_paraphrases

['William Gibbs funded the Chapel and Hall, which was also designed by Butterfield.',
 "This isn't my only thing.",
 'He is a biologist.',
 "I haven't read any of the books on the shelves.",
 "He's able to run faster than I am.",
 'Arnold had already been sent from Skenesboro and Asa Douglas to Panton with sections to secure boats when Samuel Herrick arrived on the scene.',
 'The song also received " early " dazzling support across Europe," according to Jon Uren, Warner Music Europe\'s marketing manager.',
 'After Mary, Tom was dismissed.',
 'She has been living in the town for five years.',
 'I think the wine is delicious.',
 'What are you dreaming about?',
 'However, nationalist parties, as well as other liberal parties, have announced that they would not attend the elections in July.',
 'Tom said he would not be concerned with being disturbed.',
 'Tom changed his clothes.',
 "The 1980-81 Toronto Maple Leafs season was the 54th season of the franchise, the Maple Leafs' 64th season.",

In [None]:
test_combined_en['t5_paraphrases'] = t5_paraphrases

## Computing metrics over T5 paraphrases

In [None]:
Jaccard_1 = []
bleu_1 = []
bleu_2 = []
bleu_3 = []
rouge_l = []


for n in tqdm(range(len(test_combined_en))):
  reference = test_combined_en['Text'][n]
  candidate = test_combined_en['t5_paraphrases'][n]
  #lexical-syntactical variation metrics
  rouge_l.append(rouge_score(candidate, reference,ngram='l' ,metric='f'))
  bleu_1.append(bleu_score(candidate, reference, ngram=1))
  bleu_2.append(bleu_score(candidate, reference, ngram=2))
  bleu_3.append(bleu_score(candidate, reference, ngram=3))
  Jaccard_1.append(jaccard_similarity(candidate, reference, ngram=1))


The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
100%|██████████| 5000/5000 [00:10<00:00, 457.49it/s]


In [None]:
test_combined_en['bleu_1'] = bleu_1
test_combined_en['bleu_2'] = bleu_2
test_combined_en['bleu_3'] = bleu_3
test_combined_en['rouge_l'] = rouge_l
test_combined_en['jaccard_1'] = Jaccard_1

In [None]:
test_combined_en

Unnamed: 0,Text,t5_paraphrases,bleu_1,bleu_2,bleu_3,rouge_l,jaccard_1
0,The Chapel and Hall were both fully funded by ...,"William Gibbs funded the Chapel and Hall, whic...",0.654985,0.467846,0.314896,0.518519,0.666667
1,This isn't my key.,This isn't my only thing.,0.714286,0.500000,0.400000,0.666667,0.625000
2,He is a biologist.,He is a biologist.,1.000000,1.000000,1.000000,1.000000,1.000000
3,I haven't read all the books on the shelves.,I haven't read any of the books on the shelves.,0.833333,0.727273,0.600000,0.823529,0.750000
4,He is able to run faster than I am.,He's able to run faster than I am.,0.900000,0.777778,0.750000,0.823529,0.818182
...,...,...,...,...,...,...,...
4995,Tom and Mary said they're not hungry.,"They are not hungry, according to Tom and Mary.",0.636364,0.300000,0.111111,0.375000,0.538462
4996,I went swimming in the sea.,I went swimming in the sea for the first time.,0.636364,0.500000,0.444444,0.800000,0.700000
4997,I'm not too surprised.,I'm not surprised.,0.818731,0.614048,0.272910,0.857143,0.833333
4998,I think you could've been more patient.,I think you should have been more patient.,0.777778,0.625000,0.428571,0.800000,0.636364


In [None]:
cosine_similarities = []
for n in tqdm(range(len(test_combined_en))):
  reference = test_combined_en['Text'][n]
  candidate = test_combined_en['t5_paraphrases'][n]
  cosine_similarities.append(cosine_similarity(candidate, reference))

100%|██████████| 5000/5000 [15:55<00:00,  5.23it/s]


In [None]:
cosine_similarities

[0.8850963115692139,
 0.6095017790794373,
 1.0000001192092896,
 0.9863133430480956,
 0.9860917329788208,
 0.8571737408638,
 0.8069598078727722,
 0.7686092257499695,
 0.9455618262290956,
 0.9223952889442444,
 0.6106800436973572,
 0.6341623663902283,
 0.8864777684211731,
 1.0,
 0.9220762252807616,
 0.9999999403953552,
 0.9252288341522216,
 0.8517116904258728,
 1.0000001192092896,
 0.9999999403953552,
 1.0000001192092896,
 0.8900654911994934,
 0.8968914151191711,
 1.0000001192092896,
 0.9620798230171204,
 0.9776427745819092,
 1.0000001192092896,
 0.5361198782920837,
 0.907414972782135,
 0.8598713278770447,
 0.9037912487983704,
 0.7524813413619995,
 0.903741419315338,
 0.9384921193122864,
 0.9958555698394777,
 0.8388655781745911,
 1.0,
 0.9370099306106568,
 0.9999999403953552,
 0.9393525123596193,
 1.0000001192092896,
 0.9400084018707277,
 0.85153728723526,
 0.8904365301132202,
 0.87713223695755,
 0.9861152768135072,
 0.9999999403953552,
 0.9709902405738832,
 0.946008801460266,
 0.90565603

In [None]:
len(cosine_similarities)

5000

In [None]:
test_combined_en['cosine_similarities'] = cosine_similarities

In [None]:
test_combined_en

Unnamed: 0,Text,t5_paraphrases,bleu_1,bleu_2,bleu_3,rouge_l,jaccard_1,cosine_similarities
0,The Chapel and Hall were both fully funded by ...,"William Gibbs funded the Chapel and Hall, whic...",0.654985,0.467846,0.314896,0.518519,0.666667,0.885096
1,This isn't my key.,This isn't my only thing.,0.714286,0.500000,0.400000,0.666667,0.625000,0.609502
2,He is a biologist.,He is a biologist.,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000
3,I haven't read all the books on the shelves.,I haven't read any of the books on the shelves.,0.833333,0.727273,0.600000,0.823529,0.750000,0.986313
4,He is able to run faster than I am.,He's able to run faster than I am.,0.900000,0.777778,0.750000,0.823529,0.818182,0.986092
...,...,...,...,...,...,...,...,...
4995,Tom and Mary said they're not hungry.,"They are not hungry, according to Tom and Mary.",0.636364,0.300000,0.111111,0.375000,0.538462,0.921720
4996,I went swimming in the sea.,I went swimming in the sea for the first time.,0.636364,0.500000,0.444444,0.800000,0.700000,0.926251
4997,I'm not too surprised.,I'm not surprised.,0.818731,0.614048,0.272910,0.857143,0.833333,0.960693
4998,I think you could've been more patient.,I think you should have been more patient.,0.777778,0.625000,0.428571,0.800000,0.636364,0.945135


In [None]:
test_combined_en.to_csv('/content/drive/MyDrive/working_directory/tfm/models evaluation/t5_evaluation.csv', index=False)

In [None]:
test_combined_en['bleu_1'].describe()

count    5000.000000
mean        0.801562
std         0.171524
min         0.000000
25%         0.705401
50%         0.833333
75%         0.941176
max         1.000000
Name: bleu_1, dtype: float64

In [None]:
test_combined_en['bleu_2'].describe()

count    5000.000000
mean        0.628569
std         0.273035
min         0.000000
25%         0.444444
50%         0.625000
75%         0.841044
max         1.000000
Name: bleu_2, dtype: float64

In [None]:
test_combined_en['bleu_3'].describe()

count    5000.000000
mean        0.512634
std         0.331539
min         0.000000
25%         0.250000
50%         0.500000
75%         0.769231
max         1.000000
Name: bleu_3, dtype: float64

In [None]:
test_combined_en['rouge_l'].describe()

count    5000.000000
mean        0.714817
std         0.218238
min         0.000000
25%         0.571429
50%         0.727273
75%         0.888889
max         1.000000
Name: rouge_l, dtype: float64

In [None]:
test_combined_en['jaccard_1'].describe()

count    5000.000000
mean        0.751659
std         0.202830
min         0.000000
25%         0.625000
50%         0.750000
75%         0.944444
max         1.000000
Name: jaccard_1, dtype: float64

In [None]:
test_combined_en['cosine_similarities'].describe()

count    5000.000000
mean        0.922573
std         0.095737
min         0.464620
25%         0.860187
50%         0.969157
75%         1.000000
max         1.000000
Name: cosine_similarities, dtype: float64

Cosine similarity filters + Counts

In [None]:
test_combined_en_filter1= test_combined_en[test_combined_en['cosine_similarities'] > 0.5]

In [None]:
test_combined_en_filter2= test_combined_en[test_combined_en['cosine_similarities'] > 0.7]

In [None]:
test_combined_en_filter3= test_combined_en[test_combined_en['cosine_similarities'] > 0.9]

In [None]:
print(len(test_combined_en_filter1),len(test_combined_en_filter2),len(test_combined_en_filter3))

4992 4856 3278


# BART


## BART paraphrase inference

In [None]:
import torch
from transformers import BartForConditionalGeneration, BartTokenizer
bart_model = BartForConditionalGeneration.from_pretrained('eugenesiow/bart-paraphrase')
device = torch.device("cpu")
bart_model  = bart_model.to(device)
bart_tokenizer = BartTokenizer.from_pretrained('eugenesiow/bart-paraphrase')


Downloading config.json:   0%|          | 0.00/1.65k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/1.51G [00:00<?, ?B/s]

Downloading vocab.json:   0%|          | 0.00/780k [00:00<?, ?B/s]

Downloading merges.txt:   0%|          | 0.00/446k [00:00<?, ?B/s]

Downloading special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

Downloading tokenizer_config.json:   0%|          | 0.00/332 [00:00<?, ?B/s]

In [None]:
def get_response_bart(input_text):
  batch = bart_tokenizer(input_text, return_tensors='pt')
  generated_ids = bart_model.generate(batch['input_ids'])
  generated_sentence = bart_tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
  return generated_sentence

In [None]:
bart_paraphrases = []
for original_sentence in tqdm(test_combined_en['Text']):
  paraphrases = get_response_bart(original_sentence)
  bart_paraphrases.append(paraphrases[0])

100%|██████████| 5000/5000 [3:40:38<00:00,  2.65s/it]


In [None]:
bart_paraphrases

['The chapel and the hall were both fully funded by William Gibbs and were also designed by Butter',
 'This is not my key.',
 'He is a biologist.',
 "I haven't read all the books on the shelves.",
 'He is faster than me.',
 'When Samuel Herrick arrived on the scene, Arnold had already been sent to Skenes',
 'According to Jon Uren, Marketing Director of Warner Music Europe, the song had also ``',
 'After Mary, Tom took off.',
 'She has already lived in that town for five years.',
 'Is wine good?',
 'What do you smile about?',
 'However, nationalist parties, together with other liberal groups, said they would boycott the July elections',
 'Tom asked that he not be disturbed.',
 'Tom changed his clothes.',
 'The 1980 -- 81 Toronto Maple Leafs season was the Toronto Maple - Leafs 54th season of',
 "I've dated a lunatic.",
 'During the period between 1836 and 1846, when California was a province of independent Mexico',
 "Tom thinks I'm an idiot.",
 'How did she learn to dance?',
 'I talk fa

In [None]:
test_combined_en['bart_paraphrases'] = bart_paraphrases

## Computing metrics over BART paraphrases

In [None]:
Jaccard_1 = []
bleu_1 = []
bleu_2 = []
bleu_3 = []
rouge_l = []


for n in tqdm(range(len(test_combined_en))):
  reference = test_combined_en['Text'][n]
  candidate = test_combined_en['bart_paraphrases'][n]
  #lexical-syntactical variation metrics
  rouge_l.append(rouge_score(candidate, reference,ngram='l' ,metric='f'))
  bleu_1.append(bleu_score(candidate, reference, ngram=1))
  bleu_2.append(bleu_score(candidate, reference, ngram=2))
  bleu_3.append(bleu_score(candidate, reference, ngram=3))
  Jaccard_1.append(jaccard_similarity(candidate, reference, ngram=1))


The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
100%|██████████| 5000/5000 [00:10<00:00, 457.49it/s]


In [None]:
test_combined_en['bleu_1'] = bleu_1
test_combined_en['bleu_2'] = bleu_2
test_combined_en['bleu_3'] = bleu_3
test_combined_en['rouge_l'] = rouge_l
test_combined_en['jaccard_1'] = Jaccard_1

In [None]:
test_combined_en

Unnamed: 0,Text,bart_paraphrases,bleu_1,bleu_2,bleu_3,rouge_l,jaccard_1
0,The Chapel and Hall were both fully funded by ...,The chapel and the hall were both fully funded...,0.888889,0.823529,7.500000e-01,0.928571,0.812500
1,This isn't my key.,This is not my key.,0.833333,0.600000,2.500000e-01,0.666667,0.714286
2,He is a biologist.,He is a biologist.,1.000000,1.000000,1.000000e+00,1.000000,1.000000
3,I haven't read all the books on the shelves.,I haven't read all the books on the shelves.,1.000000,1.000000,1.000000e+00,1.000000,1.000000
4,He is able to run faster than I am.,He is faster than me.,0.427848,0.205367,1.142391e-308,0.571429,0.454545
...,...,...,...,...,...,...,...
4995,Tom and Mary said they're not hungry.,Tom and Mary said they're not hungry.,1.000000,1.000000,1.000000e+00,1.000000,1.000000
4996,I went swimming in the sea.,I went swimming in the sea.,1.000000,1.000000,1.000000e+00,1.000000,1.000000
4997,I'm not too surprised.,I'm not too surprised.,1.000000,1.000000,1.000000e+00,1.000000,1.000000
4998,I think you could've been more patient.,"I think you could've been more patient, he said.",0.750000,0.636364,6.000000e-01,0.750000,0.750000


In [None]:
cosine_similarities = []
for n in tqdm(range(len(test_combined_en))):
  reference = test_combined_en['Text'][n]
  candidate = test_combined_en['bart_paraphrases'][n]
  cosine_similarities.append(cosine_similarity(candidate, reference))

100%|██████████| 5000/5000 [15:55<00:00,  5.23it/s]


In [None]:
cosine_similarities

[0.8911691308021545,
 0.9903277158737183,
 1.0000001192092896,
 1.0,
 0.8351850509643555,
 0.7071976065635681,
 0.8033717274665833,
 0.9523603916168213,
 1.0000001192092896,
 0.7871351838111877,
 0.9563596844673157,
 0.8107166290283203,
 0.9999999403953552,
 1.0,
 0.942598819732666,
 0.9999999403953552,
 0.7531691789627075,
 1.0,
 1.0000001192092896,
 0.9580051302909851,
 1.0000001192092896,
 0.792489230632782,
 0.898823618888855,
 0.9910138845443726,
 0.9999998807907104,
 1.0,
 0.6816774606704712,
 1.0000001192092896,
 0.9054992198944092,
 0.9293562173843384,
 0.881113588809967,
 0.8320733308792114,
 0.7854591012001038,
 0.9999999403953552,
 0.9790887832641602,
 0.8979122042655945,
 1.0,
 0.8756097555160522,
 0.9999999403953552,
 0.9999999403953552,
 0.964002788066864,
 0.8716351389884949,
 0.9435681104660034,
 1.0,
 0.8752878904342651,
 0.9939123392105103,
 0.8455442190170288,
 1.0,
 0.9999999403953552,
 0.8914995193481445,
 0.7290793061256409,
 0.8146786689758301,
 0.827239036560058

In [None]:
len(cosine_similarities)

5000

In [None]:
test_combined_en['cosine_similarities'] = cosine_similarities

In [None]:
test_combined_en

Unnamed: 0,Text,bart_paraphrases,bleu_1,bleu_2,bleu_3,rouge_l,jaccard_1,cosine_similarities
0,The Chapel and Hall were both fully funded by ...,The chapel and the hall were both fully funded...,0.888889,0.823529,7.500000e-01,0.928571,0.812500,0.891169
1,This isn't my key.,This is not my key.,0.833333,0.600000,2.500000e-01,0.666667,0.714286,0.990328
2,He is a biologist.,He is a biologist.,1.000000,1.000000,1.000000e+00,1.000000,1.000000,1.000000
3,I haven't read all the books on the shelves.,I haven't read all the books on the shelves.,1.000000,1.000000,1.000000e+00,1.000000,1.000000,1.000000
4,He is able to run faster than I am.,He is faster than me.,0.427848,0.205367,1.142391e-308,0.571429,0.454545,0.835185
...,...,...,...,...,...,...,...,...
4995,Tom and Mary said they're not hungry.,Tom and Mary said they're not hungry.,1.000000,1.000000,1.000000e+00,1.000000,1.000000,1.000000
4996,I went swimming in the sea.,I went swimming in the sea.,1.000000,1.000000,1.000000e+00,1.000000,1.000000,1.000000
4997,I'm not too surprised.,I'm not too surprised.,1.000000,1.000000,1.000000e+00,1.000000,1.000000,1.000000
4998,I think you could've been more patient.,"I think you could've been more patient, he said.",0.750000,0.636364,6.000000e-01,0.750000,0.750000,0.870027


In [None]:
test_combined_en.to_csv('/content/drive/MyDrive/working_directory/tfm/models evaluation/bart_evaluation.csv', index=False)

In [None]:
test_combined_en['bleu_1'].describe()

count    5000.000000
mean        0.811152
std         0.224819
min         0.016960
25%         0.651439
50%         0.894839
75%         1.000000
max         1.000000
Name: bleu_1, dtype: float64

In [None]:
test_combined_en['bleu_2'].describe()

count     5.000000e+03
mean      7.402258e-01
std       2.738232e-01
min      6.374943e-309
25%       5.078890e-01
50%       7.829844e-01
75%       1.000000e+00
max       1.000000e+00
Name: bleu_2, dtype: float64

In [None]:
test_combined_en['bleu_3'].describe()

count     5.000000e+03
mean      6.852139e-01
std       3.225868e-01
min      4.492347e-309
25%       4.178112e-01
50%       7.142857e-01
75%       1.000000e+00
max       1.000000e+00
Name: bleu_3, dtype: float64

In [None]:
test_combined_en['rouge_l'].describe()

count    5000.000000
mean        0.835335
std         0.187984
min         0.000000
25%         0.705882
50%         0.888889
75%         1.000000
max         1.000000
Name: rouge_l, dtype: float64

In [None]:
test_combined_en['jaccard_1'].describe()

count    5000.000000
mean        0.825280
std         0.198060
min         0.066667
25%         0.666667
50%         0.888889
75%         1.000000
max         1.000000
Name: jaccard_1, dtype: float64

In [None]:
test_combined_en['cosine_similarities'].describe()

count    5000.000000
mean        0.900740
std         0.094167
min         0.100092
25%         0.850200
50%         0.914487
75%         0.983810
max         1.000000
Name: cosine_similarities, dtype: float64

Cosine similarity filters + Counts

In [None]:
test_combined_en_filter1= test_combined_en[test_combined_en['cosine_similarities'] > 0.5]

In [None]:
test_combined_en_filter2= test_combined_en[test_combined_en['cosine_similarities'] > 0.7]

In [None]:
test_combined_en_filter3= test_combined_en[test_combined_en['cosine_similarities'] > 0.9]

In [None]:
print(len(test_combined_en_filter1),len(test_combined_en_filter2),len(test_combined_en_filter3))

4985 4820 2820


#mT5

## mT5 paraphrase inference

In [None]:
!pip install simpletransformers

Downloading config.json:   0%|          | 0.00/1.65k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/1.51G [00:00<?, ?B/s]

Downloading vocab.json:   0%|          | 0.00/780k [00:00<?, ?B/s]

Downloading merges.txt:   0%|          | 0.00/446k [00:00<?, ?B/s]

Downloading special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

Downloading tokenizer_config.json:   0%|          | 0.00/332 [00:00<?, ?B/s]

In [None]:
import logging
import pandas as pd
from simpletransformers.t5 import T5Model, T5Args
import sklearn

In [None]:
mt5_model = T5Model(
    "t5",
    '/content/drive/MyDrive/working_directory/tfm/Distilled_mt5_finetuning/mt5_outputs/best_model'
)

In [None]:
def get_response_mt5(input_text):
  generated_sentence = t5_model.predict([input_text])
  return generated_sentence[0]

In [None]:
mt5_paraphrases = []
for original_sentence in tqdm(test_combined_en['Text']):
  paraphrases = get_response_mt5(original_sentence)
  bart_paraphrases.append(paraphrases[0])

100%|██████████| 5000/5000 [3:40:38<00:00,  2.65s/it]


In [None]:
mt5_paraphrases

['William Gibbs fully funded the Chapel and Hall, and Butterfield designed them.',
 "This isn't my forte.",
 'He is a biologist.',
 "I haven't read all of the books on the shelves.",
 'He can run faster than I can.',
 'Arnold had already been sent to Skenesboro and Asa Douglas to Panton with sections to hold boats when Samuel Herrick arrived.',
 'The album also received "early" great support throughout Europe, according to Jon Uren, marketing director of Warner Music Europe.',
 'After Mary, Tom took off.',
 'She has already lived in the town for five years.',
 'I think wine is good.',
 'What are you smiling about?',
 'However, nationalist groups, along with other liberal groups, said they would boycott the July elections.',
 'Tom asked him not to be disturbed.',
 'Tom changed his clothes.',
 "The 1980 - 1981 Toronto Maple Leafs season was the franchise's 54th season and the Maple Leafs' 64th season.",
 "I've met a nerd.",
 'The following 13 ranchos were granted in Napa County during th

In [None]:
test_combined_en['mt5_paraphrases'] = mt5_paraphrases

## Computing metrics over mt5 paraphrases

In [None]:
Jaccard_1 = []
bleu_1 = []
bleu_2 = []
bleu_3 = []
rouge_l = []


for n in tqdm(range(len(test_combined_en))):
  reference = test_combined_en['Text'][n]
  candidate = test_combined_en['mt5_paraphrases'][n]
  #lexical-syntactical variation metrics
  rouge_l.append(rouge_score(candidate, reference,ngram='l' ,metric='f'))
  bleu_1.append(bleu_score(candidate, reference, ngram=1))
  bleu_2.append(bleu_score(candidate, reference, ngram=2))
  bleu_3.append(bleu_score(candidate, reference, ngram=3))
  Jaccard_1.append(jaccard_similarity(candidate, reference, ngram=1))


The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
100%|██████████| 5000/5000 [00:10<00:00, 457.49it/s]


In [None]:
test_combined_en['bleu_1'] = bleu_1
test_combined_en['bleu_2'] = bleu_2
test_combined_en['bleu_3'] = bleu_3
test_combined_en['rouge_l'] = rouge_l
test_combined_en['jaccard_1'] = Jaccard_1

In [None]:
test_combined_en

Unnamed: 0,Text,mt5_paraphrases,bleu_1,bleu_2,bleu_3,rouge_l,jaccard_1
0,The Chapel and Hall were both fully funded by ...,William Gibbs fully funded the Chapel and Hall...,0.644123,0.289030,1.252462e-01,0.320000,0.647059
1,This isn't my key.,This isn't my forte.,0.833333,0.600000,5.000000e-01,0.750000,0.714286
2,He is a biologist.,He is a biologist.,1.000000,1.000000,1.000000e+00,1.000000,1.000000
3,I haven't read all the books on the shelves.,I haven't read all of the books on the shelves.,0.916667,0.818182,7.000000e-01,0.941176,0.909091
4,He is able to run faster than I am.,He can run faster than I can.,0.584101,0.333772,2.596003e-01,0.666667,0.545455
...,...,...,...,...,...,...,...
4995,Tom and Mary said they're not hungry.,Tom and Mary said they aren't hungry.,0.777778,0.625000,4.285714e-01,0.714286,0.636364
4996,I went swimming in the sea.,I went swimming in the sea.,1.000000,1.000000,1.000000e+00,1.000000,1.000000
4997,I'm not too surprised.,I'm not too surprised.,1.000000,1.000000,1.000000e+00,1.000000,1.000000
4998,I think you could've been more patient.,I think you should've been more patient.,0.888889,0.750000,5.714286e-01,0.857143,0.800000


In [None]:
cosine_similarities = []
for n in tqdm(range(len(test_combined_en))):
  reference = test_combined_en['Text'][n]
  candidate = test_combined_en['mt5_paraphrases'][n]
  cosine_similarities.append(cosine_similarity(candidate, reference))

100%|██████████| 5000/5000 [15:55<00:00,  5.23it/s]


In [None]:
cosine_similarities

[0.8294095396995544,
 0.5710312724113464,
 1.0000001192092896,
 0.9972928762435912,
 0.9581010937690736,
 0.8246363401412964,
 0.7899690270423889,
 0.9523603916168212,
 0.9857299327850342,
 0.9876384735107422,
 1.0,
 0.7820661067962646,
 0.958531618118286,
 1.0,
 0.9154039621353148,
 0.435489296913147,
 0.918546199798584,
 1.0,
 0.6569724678993225,
 0.9999999403953552,
 1.0000001192092896,
 0.8993380665779114,
 0.8699037432670593,
 0.9910138845443726,
 0.9999998807907104,
 0.8412721157073975,
 1.0000001192092896,
 1.0000001192092896,
 0.760033905506134,
 0.9014626741409302,
 0.7945283055305481,
 0.8066702485084534,
 0.8572185039520264,
 0.9999999403953552,
 0.9977285861968994,
 0.889935314655304,
 0.8936795592308044,
 0.9335485696792604,
 0.9999999403953552,
 0.8875333666801453,
 1.0000001192092896,
 0.930136740207672,
 0.980851113796234,
 0.936462104320526,
 0.8868374228477478,
 0.9602332711219788,
 0.8063328266143799,
 0.9503862857818604,
 0.9383984804153442,
 0.8998771905899048,
 0.

In [None]:
len(cosine_similarities)

5000

In [None]:
test_combined_en['cosine_similarities'] = cosine_similarities

In [None]:
test_combined_en

Unnamed: 0,Text,mt5_paraphrases,bleu_1,bleu_2,bleu_3,rouge_l,jaccard_1,cosine_similarities
0,The Chapel and Hall were both fully funded by ...,William Gibbs fully funded the Chapel and Hall...,0.644123,0.289030,1.252462e-01,0.320000,0.647059,0.829410
1,This isn't my key.,This isn't my forte.,0.833333,0.600000,5.000000e-01,0.750000,0.714286,0.571031
2,He is a biologist.,He is a biologist.,1.000000,1.000000,1.000000e+00,1.000000,1.000000,1.000000
3,I haven't read all the books on the shelves.,I haven't read all of the books on the shelves.,0.916667,0.818182,7.000000e-01,0.941176,0.909091,0.997293
4,He is able to run faster than I am.,He can run faster than I can.,0.584101,0.333772,2.596003e-01,0.666667,0.545455,0.958101
...,...,...,...,...,...,...,...,...
4995,Tom and Mary said they're not hungry.,Tom and Mary said they aren't hungry.,0.777778,0.625000,4.285714e-01,0.714286,0.636364,0.996521
4996,I went swimming in the sea.,I went swimming in the sea.,1.000000,1.000000,1.000000e+00,1.000000,1.000000,1.000000
4997,I'm not too surprised.,I'm not too surprised.,1.000000,1.000000,1.000000e+00,1.000000,1.000000,1.000000
4998,I think you could've been more patient.,I think you should've been more patient.,0.888889,0.750000,5.714286e-01,0.857143,0.800000,0.949482


In [None]:
test_combined_en.to_csv('/content/drive/MyDrive/working_directory/tfm/models evaluation/mt5_evaluation.csv', index=False)

In [None]:
test_combined_en['bleu_1'].describe()

count    5000.000000
mean        0.819999
std         0.164522
min         0.142857
25%         0.725020
50%         0.846482
75%         1.000000
max         1.000000
Name: bleu_1, dtype: float64

In [None]:
test_combined_en['bleu_2'].describe()

count     5.000000e+03
mean      6.528370e-01
std       2.728735e-01
min      9.997901e-309
25%       4.696305e-01
50%       6.666667e-01
75%       9.265873e-01
max       1.000000e+00
Name: bleu_2, dtype: float64

In [None]:
test_combined_en['bleu_3'].describe()

count     5.000000e+03
mean      5.411025e-01
std       3.363775e-01
min      8.185589e-309
25%       2.729103e-01
50%       5.020410e-01
75%       8.888889e-01
max       1.000000e+00
Name: bleu_3, dtype: float64

In [None]:
test_combined_en['rouge_l'].describe()

count    5000.000000
mean        0.733353
std         0.216334
min         0.000000
25%         0.588235
50%         0.750000
75%         0.933333
max         1.000000
Name: rouge_l, dtype: float64

In [None]:
test_combined_en['jaccard_1'].describe()

count    5000.000000
mean        0.773466
std         0.200005
min         0.083333
25%         0.640000
50%         0.785714
75%         1.000000
max         1.000000
Name: jaccard_1, dtype: float64

In [None]:
test_combined_en['cosine_similarities'].describe()

count    5000.000000
mean        0.908482
std         0.089049
min         0.378741
25%         0.861008
50%         0.922321
75%         0.992039
max         1.000000
Name: cosine_similarities, dtype: float64

Cosine similarity filters + Counts

In [None]:
test_combined_en_filter1= test_combined_en[test_combined_en['cosine_similarities'] > 0.5]

In [None]:
test_combined_en_filter2= test_combined_en[test_combined_en['cosine_similarities'] > 0.7]

In [None]:
test_combined_en_filter3= test_combined_en[test_combined_en['cosine_similarities'] > 0.9]

In [None]:
print(len(test_combined_en_filter1),len(test_combined_en_filter2),len(test_combined_en_filter3))

4989 4862 2957


#DistilGPT2

## DistilGPT2 paraphrase inference

In [None]:
DistilGPT2_model = AutoModelWithLMHead.from_pretrained('/content/drive/MyDrive/working_directory/tfm/Distilled_GPT2_finetuning/DistilGPT2_output_model/output_model')
DistilGPT2_tokenizer = AutoTokenizer.from_pretrained("distilgpt2")

Downloading config.json:   0%|          | 0.00/762 [00:00<?, ?B/s]

Downloading vocab.json:   0%|          | 0.00/0.99M [00:00<?, ?B/s]

Downloading merges.txt:   0%|          | 0.00/446k [00:00<?, ?B/s]

Downloading tokenizer.json:   0%|          | 0.00/1.29M [00:00<?, ?B/s]

In [None]:
generator = pipeline('text-generation', model=DistilGPT2_model, tokenizer=DistilGPT2_tokenizer )

In [None]:
def clean_paraphrase(input_sentence):
  p = generator('<s> '+input_sentence+' </s> === <p> ')
  return p[0]['generated_text'].split(' </s> === <p> ')[1].split(' </p>')[0]

In [None]:
DistilGPT2_paraphrases = []
for original_sentence in tqdm(test_combined_en['Text']):
  paraphrases = clean_paraphrase(original_sentence)
  DistilGPT2_paraphrases.append(paraphrases)

100%|██████████| 5000/5000 [3:40:38<00:00,  2.65s/it]


In [None]:
DistilGPT2_paraphrases

['William Gibbs funded the Chapel and Hall, which was also designed by Butterfield.',
 'icky.',
 'Â He is a biologist.',
 "ersatzit is not a book that I've read.",
 'ive met the power of his instinctive mind.',
 'Â When Samuel Herrick arrived in',
 'The song also received " early " dazzling support across Europe," according to Jon Uren, Warner Music Europe\'s marketing manager.',
 'After Mary, Tom was dismissed.',
 'ersatzin is the neighborhood where she settled in that city for five years.',
 'iced wine makes me want to drink that.',
 'ersatz! What are you smiling about?',
 'Â Nationalist parties did not, however, join the other liberal groups that said they',
 'Tom said he would not be concerned with being disturbed.',
 'Tom changed his clothes.',
 'iced Maple Leafs season of the Toronto Maple Leafs was the',
 '""<s> "" The New York Times "" reported that Thomas D. Rockefeller was the "" main banker in New York "" and his nephew, "" John',
 'iced in the 1836 period between 18',
 "I'm

In [None]:
test_combined_en['DistilGPT2_paraphrases'] = DistilGPT2_paraphrases

## Computing metrics over DistilGPT2 paraphrases

In [None]:
Jaccard_1 = []
bleu_1 = []
bleu_2 = []
bleu_3 = []
rouge_l = []


for n in tqdm(range(len(test_combined_en))):
  reference = test_combined_en['Text'][n]
  candidate = test_combined_en['DistilGPT2_paraphrases'][n]
  #lexical-syntactical variation metrics
  rouge_l.append(rouge_score(candidate, reference,ngram='l' ,metric='f'))
  bleu_1.append(bleu_score(candidate, reference, ngram=1))
  bleu_2.append(bleu_score(candidate, reference, ngram=2))
  bleu_3.append(bleu_score(candidate, reference, ngram=3))
  Jaccard_1.append(jaccard_similarity(candidate, reference, ngram=1))


The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
100%|██████████| 5000/5000 [00:12<00:00, 405.62it/s]


In [None]:
test_combined_en['bleu_1'] = bleu_1
test_combined_en['bleu_2'] = bleu_2
test_combined_en['bleu_3'] = bleu_3
test_combined_en['rouge_l'] = rouge_l
test_combined_en['jaccard_1'] = Jaccard_1

In [None]:
test_combined_en

Unnamed: 0,Text,DistilGPT2_paraphrases,bleu_1,bleu_2,bleu_3,rouge_l,jaccard_1
0,The Chapel and Hall were both fully funded by ...,"William Gibbs funded the Chapel and Hall, whic...",0.654985,4.678461e-01,3.148964e-01,0.518519,0.666667
1,This isn't my key.,icky.,0.067668,3.011310e-309,3.011310e-309,0.000000,0.142857
2,He is a biologist.,Â He is a biologist.,0.833333,8.000000e-01,7.500000e-01,0.888889,0.833333
3,I haven't read all the books on the shelves.,ersatzit is not a book that I've read.,0.271451,2.013330e-308,2.013330e-308,0.125000,0.176471
4,He is able to run faster than I am.,ive met the power of his instinctive mind.,0.099427,1.991084e-308,1.991084e-308,0.000000,0.055556
...,...,...,...,...,...,...,...
4995,Tom and Mary said they're not hungry.,ersatzkeitam is a dish that's been developed b...,0.181818,9.523810e-02,5.000000e-02,0.300000,0.200000
4996,I went swimming in the sea.,Âстарандский is an artificial lake with a gree...,0.058824,2.225074e-308,2.225074e-308,0.000000,0.047619
4997,I'm not too surprised.,ikin’ s is really pretty.,0.142857,2.225074e-308,2.225074e-308,0.000000,0.083333
4998,I think you could've been more patient.,ive just told me that you could've been more p...,0.583333,5.454545e-01,5.000000e-01,0.588235,0.500000


In [None]:
cosine_similarities = []
for n in tqdm(range(len(test_combined_en))):
  reference = test_combined_en['Text'][n]
  candidate = test_combined_en['DistilGPT2_paraphrases'][n]
  cosine_similarities.append(cosine_similarity(candidate, reference))

100%|██████████| 5000/5000 [18:25<00:00,  4.52it/s]


In [None]:
cosine_similarities

[0.8850963115692139,
 0.22145681083202362,
 0.9689452648162842,
 0.6159177422523499,
 0.22795262932777405,
 0.41052693128585815,
 0.8069598078727722,
 0.7686092257499695,
 0.6321107149124146,
 0.73627769947052,
 0.8846774101257324,
 0.5497099161148071,
 0.8864777684211731,
 1.0,
 0.7381699085235596,
 0.043936505913734436,
 0.36646679043769836,
 0.8517116904258728,
 0.4023955464363098,
 0.8289767503738403,
 1.0000001192092896,
 0.05586453154683113,
 0.6900055408477783,
 0.06161842495203018,
 0.6991145610809326,
 0.9776427745819092,
 0.5854763388633728,
 0.7669973373413086,
 0.7772834897041321,
 0.61820387840271,
 0.9037912487983704,
 0.2804601490497589,
 0.4131960868835449,
 0.4884192645549774,
 0.842078447341919,
 0.8864714503288269,
 0.5489106178283691,
 0.776170015335083,
 0.09182953834533691,
 0.516272246837616,
 0.8753369450569153,
 0.9400084018707275,
 0.17825403809547424,
 0.2031429260969162,
 0.87713223695755,
 0.7214844822883606,
 0.9999999403953552,
 0.4291098117828369,
 0.147

In [None]:
len(cosine_similarities)

5000

In [None]:
test_combined_en['cosine_similarities'] = cosine_similarities

In [None]:
test_combined_en

Unnamed: 0,Text,DistilGPT2_paraphrases,bleu_1,bleu_2,bleu_3,rouge_l,jaccard_1,cosine_similarities
0,The Chapel and Hall were both fully funded by ...,"William Gibbs funded the Chapel and Hall, whic...",0.654985,4.678461e-01,3.148964e-01,0.518519,0.666667,0.885096
1,This isn't my key.,icky.,0.067668,3.011310e-309,3.011310e-309,0.000000,0.142857,0.221457
2,He is a biologist.,Â He is a biologist.,0.833333,8.000000e-01,7.500000e-01,0.888889,0.833333,0.968945
3,I haven't read all the books on the shelves.,ersatzit is not a book that I've read.,0.271451,2.013330e-308,2.013330e-308,0.125000,0.176471,0.615918
4,He is able to run faster than I am.,ive met the power of his instinctive mind.,0.099427,1.991084e-308,1.991084e-308,0.000000,0.055556,0.227953
...,...,...,...,...,...,...,...,...
4995,Tom and Mary said they're not hungry.,ersatzkeitam is a dish that's been developed b...,0.181818,9.523810e-02,5.000000e-02,0.300000,0.200000,0.457265
4996,I went swimming in the sea.,Âстарандский is an artificial lake with a gree...,0.058824,2.225074e-308,2.225074e-308,0.000000,0.047619,0.328656
4997,I'm not too surprised.,ikin’ s is really pretty.,0.142857,2.225074e-308,2.225074e-308,0.000000,0.083333,0.239553
4998,I think you could've been more patient.,ive just told me that you could've been more p...,0.583333,5.454545e-01,5.000000e-01,0.588235,0.500000,0.893138


In [None]:
test_combined_en.to_csv('/content/drive/MyDrive/working_directory/tfm/models evaluation/DistilGPT2_evaluation.csv', index=False)

In [None]:
test_combined_en['bleu_1'].describe()

count    5000.000000
mean        0.376132
std         0.320407
min         0.000000
25%         0.086939
50%         0.285714
75%         0.666667
max         1.000000
Name: bleu_1, dtype: float64

In [None]:
test_combined_en['bleu_2'].describe()

count     5.000000e+03
mean      2.506344e-01
std       3.044499e-01
min       0.000000e+00
25%      2.225074e-308
50%       1.111111e-01
75%       4.666667e-01
max       1.000000e+00
Name: bleu_2, dtype: float64

In [None]:
test_combined_en['bleu_3'].describe()

count     5.000000e+03
mean      1.942385e-01
std       2.850623e-01
min       0.000000e+00
25%      1.928867e-308
50%      2.225074e-308
75%       3.333333e-01
max       1.000000e+00
Name: bleu_3, dtype: float64

In [None]:
test_combined_en['rouge_l'].describe()

count    5000.000000
mean        0.355800
std         0.315447
min         0.000000
25%         0.000000
50%         0.307692
75%         0.609300
max         1.000000
Name: rouge_l, dtype: float64

In [None]:
test_combined_en['jaccard_1'].describe()

count    5000.000000
mean        0.350281
std         0.298904
min         0.000000
25%         0.090909
50%         0.272727
75%         0.578947
max         1.000000
Name: jaccard_1, dtype: float64

In [None]:
test_combined_en['cosine_similarities'].describe()

count    5000.000000
mean        0.514119
std         0.322176
min        -0.186144
25%         0.210336
50%         0.548858
75%         0.807944
max         1.000000
Name: cosine_similarities, dtype: float64

Cosine similarity filters + Counts

In [None]:
test_combined_en_filter1= test_combined_en[test_combined_en['cosine_similarities'] > 0.5]

In [None]:
test_combined_en_filter2= test_combined_en[test_combined_en['cosine_similarities'] > 0.7]

In [None]:
test_combined_en_filter3= test_combined_en[test_combined_en['cosine_similarities'] > 0.9]

In [None]:
print(len(test_combined_en_filter1),len(test_combined_en_filter2),len(test_combined_en_filter3))

2689 1780 710
