In [13]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset

import numpy as np

import matplotlib.pyplot as plt

import pandas as pd

import json

In [14]:
import pandas as pd
df = pd.read_csv('./baselineEncoderDecoder/test.csv')

In [15]:
seq_length = 200
batch_size = 60
hidden_size = 128


In [16]:
class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size

        # Embedding layer
        self.embedding = nn.Embedding(input_size, hidden_size, padding_idx=0)
        
        # GRU layer. The input and output are both of the same size 
        #  since embedding size = hidden size in this example
        self.gru = nn.GRU(hidden_size, hidden_size, batch_first=True)

    def forward(self, input, hidden):
        # The inputs are first transformed into embeddings
        embedded = self.embedding(input)
        output = embedded

        # As in any RNN, the new input and the previous hidden states are fed
        #  into the model at each time step 
        output, hidden = self.gru(output, hidden)
        return output, hidden

    def initHidden(self):
        # This method is used to create the innitial hidden states for the encoder
        return torch.zeros(1, batch_size, self.hidden_size)

In [17]:
class DecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size):
        super(DecoderRNN, self).__init__()
        self.hidden_size = hidden_size

        # Embedding layer
        self.embedding = nn.Embedding(output_size, hidden_size, padding_idx=0)
        
        # The GRU layer
        self.gru = nn.GRU(hidden_size, hidden_size)

        # Fully-connected layer for scores
        self.out = nn.Linear(hidden_size, output_size)

        # Applying Softmax to the scores
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input, hidden):
        # Feeding input through embedding layer
        output = self.embedding(input)

        # Applying an activation function (ReLu)
        output = F.relu(output)

        # Feeding input and previous hidden state
        output, hidden = self.gru(output, hidden)

        # Outputting scores from the final time-step
        output = self.softmax(self.out(output[0]))
        
        return output, hidden

    # We do not need an .initHidden() method for the decoder since the 
    #  encoder output will act as input in the first decoder time-step

In [18]:
device = torch.device('mps')

device

device(type='mps')

In [19]:
model_encoder = EncoderRNN(48073, hidden_size).to(device)
model_encoder.load_state_dict(torch.load('./baselineEncoderDecoder/encoder'))
model_encoder.eval()

EncoderRNN(
  (embedding): Embedding(48073, 128, padding_idx=0)
  (gru): GRU(128, 128, batch_first=True)
)

In [20]:
model_decoder = DecoderRNN(hidden_size, 39653).to(device)
model_decoder.load_state_dict(torch.load('./baselineEncoderDecoder/decoder'))
model_decoder.eval()

DecoderRNN(
  (embedding): Embedding(39653, 128, padding_idx=0)
  (gru): GRU(128, 128)
  (out): Linear(in_features=128, out_features=39653, bias=True)
  (softmax): LogSoftmax(dim=1)
)

In [21]:
def encode_and_pad(vocab, sent, max_length):
    sos = [vocab["<SOS>"]]
    eos = [vocab["<EOS>"]]
    pad = [vocab["<PAD>"]]

    if len(sent) < max_length - 2: # -2 for SOS and EOS
        n_pads = max_length - 2 - len(sent)
        encoded = [vocab[w] for w in sent]
        return sos + encoded + eos + pad * n_pads 
    else: # sent is longer than max_length; truncating
        encoded = [vocab[w] for w in sent]
        truncated = encoded[: max_length-2]
        # truncated = encoded[:max_length - 2]
        return sos + truncated + eos

In [22]:
with open('./baselineEncoderDecoder/indices.json', 'r') as f:
    f
    f = f.read()
    f = json.loads(f)
f = f[0]
f

{'ans_w2i': {'<PAD>': 0,
  '<SOS>': 1,
  '<EOS>': 2,
  'Yeah,': 3,
  'but': 4,
  'once': 5,
  'the': 6,
  'access': 7,
  'to': 8,
  'internet': 9,
  'was': 10,
  'a': 11,
  'rare': 12,
  'thing.': 13,
  'do': 14,
  'you': 15,
  'remember?': 16,
  'Well,': 17,
  'I': 18,
  'know': 19,
  'that': 20,
  'more': 21,
  'people': 22,
  'started': 23,
  'using': 24,
  'it': 25,
  'after': 26,
  'some': 27,
  'restrictions': 28,
  'on': 29,
  'use': 30,
  'were': 31,
  'lifited': 32,
  'in': 33,
  '1995': 34,
  'Yeah.': 35,
  'And': 36,
  'Internet': 37,
  'actually': 38,
  'developed': 39,
  'from': 40,
  'project': 41,
  'founded': 42,
  'by': 43,
  'government': 44,
  'called': 45,
  'ARPANET,': 46,
  'responsible': 47,
  'for': 48,
  'suppoting': 49,
  'projects': 50,
  'at': 51,
  'universities': 52,
  'and': 53,
  'within': 54,
  'is': 55,
  'defined': 56,
  'as': 57,
  'ability': 58,
  'connect': 59,
  'am': 60,
  'bot': 61,
  'so': 62,
  'not': 63,
  'posible.': 64,
  'Do': 65,
  'perfo

In [23]:

ques_word2index = f['ques_w2i']
ans_index2word = f['ans_i2w']
ques_index2word = f['ques_i2w']
ans_word2index = f['ans_w2i']
SOS = ques_word2index["<SOS>"]
EOS = ques_word2index["<EOS>"]

In [24]:
enc_optimizer = torch.optim.Adam(model_encoder.parameters(), lr = 3e-3)
dec_optimizer = torch.optim.Adam(model_decoder.parameters(), lr = 3e-3)

In [25]:
def predict(test_sentence, model_encoder, model_decoder):
    # test_sentence = "I love candy, what's a good brand?	"

    # Tokenizing, Encoding, transforming to Tensor
    test_sentence = torch.tensor(encode_and_pad(ques_word2index, test_sentence.split(), seq_length)).unsqueeze(dim=0)

    encoder_hidden = torch.zeros(1, 1, hidden_size)
    encoder_hidden = encoder_hidden.to(device)

    input_tensor = test_sentence.to(device)

    enc_optimizer.zero_grad()
    dec_optimizer.zero_grad()

    result = []

    encoder_outputs = torch.zeros(seq_length, model_encoder.hidden_size, device=device)


    with torch.set_grad_enabled(False):
        encoder_output, encoder_hidden = model_encoder(input_tensor, encoder_hidden)

        dec_result = torch.zeros(seq_length, 1, len(ans_word2index)).to(device)

        decoder_input = torch.tensor([SOS]).unsqueeze(dim=0).to(device)
        decoder_hidden = encoder_hidden
        for di in range(1, seq_length):
            decoder_output, decoder_hidden = model_decoder(decoder_input, decoder_hidden)
            best = decoder_output.argmax(1)
            result.append(ans_index2word[best.to('cpu').item()])
            if best.item() == EOS:
                break

            decoder_input = best.unsqueeze(dim=0) 
            dec_result[di] = decoder_output
    return ' '.join(result)

In [26]:
import nltk
from nltk.translate.bleu_score import corpus_bleu

# Define the reference and candidate sentences for the entire dataframe
references = []
candidates = []
for index, row in df.head().iterrows():
    references.append([row["answers"]])
    candidate = predict(row["questions"], model_encoder, model_decoder)
    print(candidate, row["answers"])
    candidates.append(candidate)

score = corpus_bleu(references, candidates)

print(score)

I I you you know know a a the Dylan's Dylan's Bar a Bar a the the the <EOS> I don't know how good they are, but Dylan's Candy Bar has a chain of candy shops in various cities.
I don't know but candies the Africa the candy the <EOS> I don't know, really, but they also are a supplier of candy.
I don't know the candies the the the <EOS> It stocks over 7,000 candies from across the world.
I don't sure, but but the the the the the the the Lauren. <EOS> I couldn't really say, but they're owned by the daughter of Ralph Lauren, Dylan Lauren.
I you you you you you that that that that the email, a a a a and a a and a and <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PA

In [28]:
!pip install rouge

Defaulting to user installation because normal site-packages is not writeable
Collecting rouge
  Downloading rouge-1.0.1-py3-none-any.whl (13 kB)
Installing collected packages: rouge
Successfully installed rouge-1.0.1

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m23.1.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49m/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip[0m


In [31]:
from rouge import Rouge
rouge = Rouge()

In [32]:
total_score = [0, 0, 0]
for i in range(len(references)):
    ref = ' '.join(references[i])
    can = ' '.join(candidates[i])
    scores = rouge.get_scores(can, ref)
    print(f"Input: {candidates[i]}\nOutput: {references[i]}")
    print(f"ROUGE-1: {scores[0]['rouge-1']['f']:.2f}")
    print(f"ROUGE-2: {scores[0]['rouge-2']['f']:.2f}")
    print(f"ROUGE-L: {scores[0]['rouge-l']['f']:.2f}")
    total_score[0]+= (scores[0]['rouge-1']['f'])
    total_score[1]+= (scores[0]['rouge-2']['f'])


Input: I I you you know know a a the Dylan's Dylan's Bar a Bar a the the the <EOS>
Output: ["I don't know how good they are, but Dylan's Candy Bar has a chain of candy shops in various cities."]
ROUGE-1: 0.10
ROUGE-2: 0.00
ROUGE-L: 0.10
Input: I don't know but candies the Africa the candy the <EOS>
Output: ["I don't know, really, but they also are a supplier of candy."]
ROUGE-1: 0.11
ROUGE-2: 0.00
ROUGE-L: 0.11
Input: I don't know the candies the the the <EOS>
Output: ['It stocks over 7,000 candies from across the world.']
ROUGE-1: 0.00
ROUGE-2: 0.00
ROUGE-L: 0.00
Input: I don't sure, but but the the the the the the the Lauren. <EOS>
Output: ["I couldn't really say, but they're owned by the daughter of Ralph Lauren, Dylan Lauren."]
ROUGE-1: 0.06
ROUGE-2: 0.00
ROUGE-L: 0.06
Input: I you you you you you that that that that the email, a a a a and a a and a and <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD

In [None]:
x = [i/5 for i in total_score]

In [None]:
print('Rouge-1:', x[0])
print('Rouge-2:', x[1])
print('Rouge-L:', x[2])

Rouge-1: 0.0623542185010036
Rouge-2: 0.0
Rouge-L: 0.0


In [236]:
import torch
from bert_score import score
# Compute BERTScore
P, R, F1 = score(candidates, references, lang='en', verbose=False)

# Print results
print("Precision:", P.mean())
print("Recall:", R.mean())
print("F1 score:", F1.mean())

Some weights of the model checkpoint at roberta-large were not used when initializing RobertaModel: ['lm_head.decoder.weight', 'lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Precision: tensor(0.7796)
Recall: tensor(0.8436)
F1 score: tensor(0.8089)


In [33]:
encoder = EncoderRNN(30205, hidden_size).to(device)
encoder.load_state_dict(torch.load('./contrastiveLossEncoderDecoder/contrastive_encoder'))
encoder.eval()

EncoderRNN(
  (embedding): Embedding(30205, 128, padding_idx=0)
  (gru): GRU(128, 128, batch_first=True)
)

In [34]:
decoder = DecoderRNN(hidden_size, 26386).to(device)
decoder.load_state_dict(torch.load('./contrastiveLossEncoderDecoder/contrastive_decoder'))
decoder.eval()

DecoderRNN(
  (embedding): Embedding(26386, 128, padding_idx=0)
  (gru): GRU(128, 128)
  (out): Linear(in_features=128, out_features=26386, bias=True)
  (softmax): LogSoftmax(dim=1)
)

In [35]:
with open('./contrastiveLossEncoderDecoder/contrastive_indices.json', 'r') as f:
    f
    f = f.read()
    f = json.loads(f)
f = f[0]
f

{'ans_w2i': {'<PAD>': 0,
  '<SOS>': 1,
  '<EOS>': 2,
  'Yeah': 3,
  'but': 4,
  'once': 5,
  'the': 6,
  'access': 7,
  'to': 8,
  'internet': 9,
  'was': 10,
  'a': 11,
  'rare': 12,
  'thing': 13,
  'do': 14,
  'you': 15,
  'remember': 16,
  'Well': 17,
  'I': 18,
  'know': 19,
  'that': 20,
  'more': 21,
  'people': 22,
  'started': 23,
  'using': 24,
  'it': 25,
  'after': 26,
  'some': 27,
  'restrictions': 28,
  'on': 29,
  'use': 30,
  'were': 31,
  'lifited': 32,
  'in': 33,
  '1995': 34,
  'And': 35,
  'Internet': 36,
  'actually': 37,
  'developed': 38,
  'from': 39,
  'project': 40,
  'founded': 41,
  'by': 42,
  'government': 43,
  'called': 44,
  'ARPANET': 45,
  'responsible': 46,
  'for': 47,
  'suppoting': 48,
  'projects': 49,
  'at': 50,
  'universities': 51,
  'and': 52,
  'within': 53,
  'is': 54,
  'defined': 55,
  'as': 56,
  'ability': 57,
  'connect': 58,
  'am': 59,
  'bot': 60,
  'so': 61,
  'not': 62,
  'posible': 63,
  'Do': 64,
  'perform': 65,
  'administr

In [36]:
ques_word2index = f['ques_w2i']
ans_index2word = f['ans_i2w']
ques_index2word = f['ques_i2w']
ans_word2index = f['ans_w2i']
SOS = ques_word2index["<SOS>"]
EOS = ques_word2index["<EOS>"]

In [37]:
enc_optimizer = torch.optim.Adam(encoder.parameters(), lr = 3e-3)
dec_optimizer = torch.optim.Adam(decoder.parameters(), lr = 3e-3)

In [38]:
def predict(test_sentence, encoder, decoder):
    test_sentence = "I love candy, what's a good brand?	"
    test_sentence = test_sentence.strip().split(' ')
    punct = set({'!', '.', '?', ','})
    for i in range(len(test_sentence)):
        if len(test_sentence[i])>0:
            if test_sentence[i][-1] in punct:
                test_sentence[i] = test_sentence[i][:-1]

    # Tokenizing, Encoding, transforming to Tensor
    test_sentence = torch.tensor(encode_and_pad(ques_word2index, test_sentence, seq_length)).unsqueeze(dim=0)

    encoder_hidden = torch.zeros(1, 1, hidden_size)
    encoder_hidden = encoder_hidden.to(device)

    input_tensor = test_sentence.to(device)

    enc_optimizer.zero_grad()
    dec_optimizer.zero_grad()

    result = []

    encoder_outputs = torch.zeros(seq_length, encoder.hidden_size, device=device)


    with torch.set_grad_enabled(False):
        encoder_output, encoder_hidden = encoder(input_tensor, encoder_hidden)

        dec_result = torch.zeros(seq_length, 1, len(ans_word2index)).to(device)

        decoder_input = torch.tensor([SOS]).unsqueeze(dim=0).to(device)
        decoder_hidden = encoder_hidden
        for di in range(1, seq_length):
            decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
            best = decoder_output.argmax(1)
            result.append(ans_index2word[best.to('cpu').item()])
            if best.item() == EOS:
                break

            decoder_input = best.unsqueeze(dim=0) 
            dec_result[di] = decoder_output
    return ' '.join(result)

In [39]:
import nltk
from nltk.translate.bleu_score import corpus_bleu

# Define the reference and candidate sentences for the entire dataframe
references = []
candidates = []
for index, row in df.head().iterrows():
    references.append([row["answers"]])
    candidate = predict(row["questions"], encoder, decoder)
    print(candidate, row["answers"])
    candidates.append(candidate)

# Compute the BLEU score for the entire dataframe
score = corpus_bleu(references, candidates)

# Print the score
print(score)

I I not you that that that the the <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD>

In [40]:
from rouge import Rouge
rouge = Rouge()

for i in range(len(references)):
    ref = ' '.join(references[i])
    can = ' '.join(candidates[i])
    scores = rouge.get_scores(can, ref)
    print(f"Input: {candidates[i]}\nOutput: {references[i]}")
    print(f"ROUGE-1: {scores[0]['rouge-1']['f']:.2f}")
    print(f"ROUGE-2: {scores[0]['rouge-2']['f']:.2f}")
    print(f"ROUGE-L: {scores[0]['rouge-l']['f']:.2f}")

Input: I I not you that that that the the <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD

In [41]:
total_score = [0, 0, 0]
for i in range(len(references)):
    ref = ' '.join(references[i])
    can = ' '.join(candidates[i])
    scores = rouge.get_scores(can, ref)
    print(f"Input: {candidates[i]}\nOutput: {references[i]}")
    print(f"ROUGE-1: {scores[0]['rouge-1']['f']:.2f}")
    print(f"ROUGE-2: {scores[0]['rouge-2']['f']:.2f}")
    print(f"ROUGE-L: {scores[0]['rouge-l']['f']:.2f}")
    total_score[0]+= (scores[0]['rouge-1']['f'])
    total_score[1]+= (scores[0]['rouge-2']['f'])

Input: I I not you that that that the the <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD

In [42]:
x = [i/5 for i in total_score]
print('Rouge-1:', x[0])
print('Rouge-2:', x[1])
print('Rouge-L:', x[2])

Rouge-1: 0.07985644793395565
Rouge-2: 0.0
Rouge-L: 0.0


In [247]:
import torch
from bert_score import score
# Compute BERTScore
P, R, F1 = score(candidates, references, lang='en', verbose=False)

# Print results
print("Precision:", P.mean())
print("Recall:", R.mean())
print("F1 score:", F1.mean())

Some weights of the model checkpoint at roberta-large were not used when initializing RobertaModel: ['lm_head.decoder.weight', 'lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Precision: tensor(0.6292)
Recall: tensor(0.8178)
F1 score: tensor(0.7112)


In [106]:
!pip install rouge

Collecting rouge
  Downloading rouge-1.0.1-py3-none-any.whl (13 kB)
Installing collected packages: rouge
Successfully installed rouge-1.0.1


In [129]:
model_outputs = ["The quick brown fox jumps over the lazy dog."]
reference_summaries = ["The lazy dog is jumped over by the quick brown fox."]

scores = rouge.get_scores(model_outputs, reference_summaries)

print(scores)

[{'rouge-1': {'r': 0.7272727272727273, 'p': 0.8888888888888888, 'f': 0.79999999505}, 'rouge-2': {'r': 0.3, 'p': 0.375, 'f': 0.33333332839506175}, 'rouge-l': {'r': 0.36363636363636365, 'p': 0.4444444444444444, 'f': 0.39999999505}}]


In [164]:
import pickle
with open('critic.pkl', 'rb+') as f:
    clf = pickle.load(f)