In [1]:
# !pip install -q pytorch-lightning
!pip install -q transformers

In [2]:
from google.colab import drive
drive.mount('/content/gdrive', force_remount=False)
root_dir = "/content/gdrive/My Drive/"
base_dir = root_dir + 'Integrated Gradients/'

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [3]:
import os
# os.chdir('/content/gdrive/My Drive/Integrated Gradients/')
# !git clone https://github.com/NeilSinclair/DistilBERT-Style-Transfer.git
os.chdir('/content/gdrive/My Drive/Integrated Gradients/DistilBERT_Style_Transfer')
# !git pull

In [4]:
# imports
import transformers
from torch.utils.data import DataLoader, TensorDataset, random_split, RandomSampler, Dataset
import pandas as pd
import numpy as np
import re

import torch.nn.functional as F
import torch.nn
# import pytorch_lightning as pl
import torch
# from pytorch_lightning.callbacks import ModelCheckpoint
import argparse
import os

from datetime import timedelta
import datetime
import time

from model.utils import *
from model.model import *

In [5]:
# Load the DistilBERT models
from transformers import AdamW, DistilBertTokenizer, DistilBertConfig, DistilBertForMaskedLM, DistilBertForSequenceClassification, get_linear_schedule_with_warmup

dbert_model = DistilBertForMaskedLM.from_pretrained('distilbert-base-uncased')
dbert_tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')

# class_model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased',
#                                                                   num_labels = 2,
#                                                                   output_attentions = False,
#                                                                   output_hidden_states = False)

# class_model = torch.load(base_dir + '/checkpoint_files/yelp_classifier_1epoch_DBert.pth', map_location=torch.device('cpu'))
if torch.cuda.is_available():
  class_model = torch.load(base_dir + '/checkpoint_files/yelp_classifier_1epoch_DBert.pth')
else: 
  class_model = torch.load(base_dir + '/checkpoint_files/yelp_classifier_1epoch_DBert.pth', map_location=torch.device('cpu'))
class_model.eval()

## Add special tokens to the models for <pos> and <neg>
special_tokens_dict = {'additional_special_tokens' : ['<pos>', '<neg>']}

num_added_toks = dbert_tokenizer.add_special_tokens(special_tokens_dict)
print('We have added', num_added_toks, 'tokens')
# Resize the token embeddings
dbert_model.resize_token_embeddings(len(dbert_tokenizer))
class_model.resize_token_embeddings(len(dbert_tokenizer))


We have added 2 tokens


Embedding(30524, 768)

In [6]:
hparams = argparse.Namespace()

hparams.lm_freeze_encoder = False
# Train the embeddings because we have <pos> and <neg> style tokens now
hparams.lm_freeze_pos_embeds = True
hparams.lm_freeze_token_embeds = False

hparams.classifier_freeze_whole_model = True
# Train the embeddings because we have <pos> and <neg> style tokens now
hparams.classifier_freeze_pos_embeds = True
hparams.classifier_freeze_token_embeds = True

hparams.gs_hard = True
hparams.gs_tau = 1

hparams.eval_beams = 4
hparams.max_gen_length = 32

In [7]:
class CombinedModel(torch.nn.Module):
  def __init__(self, lm_model, class_model, tokenizer, hparams, device = 'cuda'):
    super(CombinedModel, self).__init__()
    self.lm_model = lm_model
    self.class_model = class_model
    self.device = device
    self.tokenizer = tokenizer

    self.vocab = torch.FloatTensor(np.arange(0, len(tokenizer))).to(self.device)
    self.gs_tau = hparams.gs_tau
    self.gs_hard = hparams.gs_hard

    # Set the MLM model to train and the classification model to evaluation
    self.lm_model.train()
    self.class_model.train()
    self.lm_model.to(self.device)
    self.class_model.to(self.device)

    self.criterion = torch.nn.CrossEntropyLoss()

    # This selects which parts of the two models are frozen for training
    model_methods = [lm_model.distilbert, # Note here that we don't freeze the language model layer
                 lm_model.distilbert.embeddings.word_embeddings,
                 lm_model.distilbert.embeddings.position_embeddings,
                 class_model.distilbert, # Freeze the whole of the model, including classifier head
                 class_model.distilbert.embeddings.word_embeddings,
                 class_model.distilbert.embeddings.position_embeddings]
    
    model_params = [hparams.lm_freeze_encoder,
                    hparams.lm_freeze_pos_embeds,
                    hparams.lm_freeze_token_embeds,
                    hparams.classifier_freeze_whole_model,
                    hparams.classifier_freeze_pos_embeds,
                    hparams.classifier_freeze_token_embeds]

    ## Freeze / unfreeze parameters depending on what was passed
    freeze_multiple_params(model_methods, model_params)

  def forward(self, batch, training = True, translate = False):
    ''' Forward function combining the two neural networks
    Args: batch - a batch containing multiple items for the model (see below)
          training - bool indicating whether or not we're in training mode
          translate - bool indicating whether to swap the first token
    '''
    if not training:
      self.lm_model.eval()

    src_ids = batch['input_ids'].to(self.device)
    src_mask = batch['attention_mask'].to(self.device)
    # tgt_ids = batch['labels'].to(device)
    mask_ids = batch["masked_ids"].to(self.device)
    class_labels = batch['class_labels'].to(self.device)

    # if in translate mode, swap out the first token; this assumes that the first token is
    # the style token and that there are only 2 styles
    if translate:
      src_ids = self.swap_style_tokens(src_ids)

    outputs = self.lm_model(input_ids = src_ids, attention_mask = src_mask)
    lm_logits = outputs.logits
    
    # Just get the logits of the masked tokens
    new_logits = lm_logits[mask_ids.nonzero()[:,0], mask_ids.nonzero()[:,1], : ].to(self.device)

    # if training, use gumbel softmax to select the token
    if training:
      new_tokens_ohe = F.gumbel_softmax(new_logits, tau = self.gs_tau, hard = self.gs_hard)
    else:
      # new_tokens = new_logits.argmax(-1).squeeze()
      new_tokens_ohe = F.gumbel_softmax(new_logits, tau = self.gs_tau, hard = True)

    # Then, add this token back into the original sentence
    new_sentence_batch = sentence_rewriter(mask_ids, new_tokens_ohe, 
                                           src_ids, self.vocab, 
                                           device = self.device).to(self.device)
    # Pass this updated sentence through the classifier
    class_outs = self.class_model(input_ids = new_sentence_batch,
                              attention_mask = src_mask,
                              labels = class_labels)

    ce_error = self.criterion(class_outs.logits, class_labels)

    # If we're training, just return the loss, but if not, return the loss,
    # predictions and true labels so that we can get the accuracy of the model
    if training:
      model_outs = {"loss" : ce_error}
    else:
      preds = class_outs.logits.cpu().detach().numpy().argmax(-1)
      model_outs = {"loss" : ce_error,
                    "preds" : preds,
                    "true_labels" : class_labels}
  
    return model_outs

  def swap_style_tokens(self, batch):
    ''' Function that swaps the first token in a sentence with it's opposite
    Args: batch - a batch of tokens of size [B, sentence_length]
    Returns: the batch with the first token swapped out
    '''
    max_token = len(self.tokenizer) - 1

    style2_tensor = torch.ones([batch.size()[0]], dtype = torch.int64, device = self.device) * max_token
    style1_tensor = torch.ones([batch.size()[0]], dtype = torch.int64, device = self.device) * (max_token-1)

    # Swap the first tokens around; this is similar to the way one can swap 1 and 0 by doing
    # new_val = (1 - old_value)
    batch[:, 0] = (style2_tensor - batch[:, 0]) + style1_tensor

    return batch


In [8]:
combined_model = CombinedModel(dbert_model, class_model, tokenizer = dbert_tokenizer, hparams = hparams, device = 'cuda')
# combined_model = torch.load(base_dir + '/checkpoint_files/combined_distilBERT_2epoch.pth', map_location=torch.device('cpu'))

In [34]:
def train(data, combined_model, tokenizer, epochs = 1, device = "cuda", batch_report_ = 250, lr = 2e-5):
  vocab = torch.FloatTensor(np.arange(0, len(tokenizer))).to(device)
  epochs = epochs
  t0 = time.time()

  total_train_loss = 0
  batch_train_loss = 0

  batch_loss = []
  
  if device == "cuda":
    batch_report = batch_report_
  else:
    batch_report = 1

  # Set up the optimizer for the lm model
  optimizer = AdamW(combined_model.parameters(),
                  lr = lr,
                  eps = 1e-8)
  print(" === Starting Training === ")
  for n in range(epochs):
    print(f"Starting epoch {n+1} of {epochs}")
    for i, batch in enumerate(data):

      if i % batch_report == 0 and not i == 0:
        elapsed = format_time(time.time() - t0)
        avg_batch_loss = batch_train_loss/batch_report
        batch_loss.append(avg_batch_loss) # Rather get the average batch loss every batch_report batches than every batch
        print('Batch {:>5,}  of  {:>5,}.    Elapsed: {:}. --- Avg batch loss {:.4f} over {} batches'.format(i, len(data), elapsed, avg_batch_loss, batch_report))
        batch_train_loss = 0

      # Run the model and get the logits
      combined_model.zero_grad()

      # Run the combined model
      outputs = combined_model(batch)
      # Get the classification error
      ce_error = outputs['loss']

      batch_train_loss += ce_error.item() 

      ce_error.backward()
      torch.nn.utils.clip_grad_norm(combined_model.parameters(), 5.0)
      # Update the parameters using the gradients
      optimizer.step()
      # Update the learning rate

      total_train_loss += ce_error
    return {"avg_train_loss" : total_train_loss / len(data),
            "batch_loss" : batch_loss}




In [14]:
def validate(data, combined_model, tokenizer, num_batches = 100, device = "cuda", translate = False):
  vocab = torch.FloatTensor(np.arange(0, len(tokenizer))).to(device)

  t0 = time.time()

  total_train_loss = 0
  batch_train_loss = 0

  batch_loss = []
  if device == "cuda":
    batch_report = num_batches / 4
  else:
    batch_report = 1

  print(" === Running Validation ===")
  true_labels = [] 
  preds = []
  for i, batch in enumerate(data):
    src_ids = batch['input_ids'].to(device)
    src_mask = batch['attention_mask'].to(device)
    mask_ids = batch["masked_ids"].to(device)
    class_labels = batch['class_labels'].to(device)

    
    # Swap the labels if we're translating
    if translate:
      true_labels.extend(1 - class_labels.cpu().numpy())
    else: 
      true_labels.extend(class_labels.cpu().numpy())

    if i % batch_report == 0 and not i == 0:
      elapsed = format_time(time.time() - t0)
      print('Batch {:>5,}  of  {:>5,}.    Elapsed: {:}.'.format(i, num_batches, elapsed))
      batch_train_loss = 0

    # Run the combined model
    outputs = combined_model(batch, training = False, translate = translate)
    # Get the classification error
    ce_error = outputs['loss']

    batch_train_loss += ce_error.item()
    batch_loss.append(ce_error.item())

    total_train_loss += ce_error
    # Get the classification error (and hope that it back propagates)
    # Get the predictions
    preds.append(outputs['preds'])
    # true_labels.append(outputs['true_labels'].cpu().numpy())
    
    if i > num_batches:
      batch_size = len(src_ids)
      # Get the accuracy of the predictions
      val_predictions = []
      val_true = []
      for j, it in enumerate(preds):
        val_predictions.extend(it)
        # val_true.extend(true_labels)
      val_acc = np.mean(np.array(val_predictions) == np.array(true_labels))
      print("Validation Accuracy {:.2f}%".format(val_acc*100))
      break
    
  return {"avg_train_loss" : total_train_loss / len(data),
          "batch_loss" : batch_loss,
          "val_predictions" : val_predictions,
          "true_labels" : true_labels}

In [15]:
train_file = '/content/gdrive/My Drive/Integrated Gradients/Yelp Data - Masked/Yelp_train_20_DBert.csv'
dataset = TokenizedDataset(train_file, transform = CreateTokens(dbert_tokenizer, None))                        
train_data = DataLoader(dataset, collate_fn = dataset.collate_fn, sampler = RandomSampler(dataset), batch_size = 64)

In [16]:
val_file = '/content/gdrive/My Drive/Integrated Gradients/Yelp Data - Masked/Yelp_dev_20_DBert.csv'
dataset = TokenizedDataset(val_file, transform = CreateTokens(dbert_tokenizer, None))                        
val_data = DataLoader(dataset, collate_fn = dataset.collate_fn, sampler = RandomSampler(dataset), batch_size = 32)

In [35]:
history = train(train_data, combined_model, dbert_tokenizer, device = 'cuda', epochs = 1)

 === Starting Training === 
Starting epoch 1 of 1




Batch   250  of  6,707.    Elapsed: 0:01:46. --- Avg batch loss 0.3013 over 250 batches
Batch   500  of  6,707.    Elapsed: 0:03:31. --- Avg batch loss 0.3105 over 250 batches
Batch   750  of  6,707.    Elapsed: 0:05:17. --- Avg batch loss 0.3002 over 250 batches
Batch 1,000  of  6,707.    Elapsed: 0:07:03. --- Avg batch loss 0.2992 over 250 batches
Batch 1,250  of  6,707.    Elapsed: 0:08:48. --- Avg batch loss 0.3011 over 250 batches
Batch 1,500  of  6,707.    Elapsed: 0:10:34. --- Avg batch loss 0.2977 over 250 batches
Batch 1,750  of  6,707.    Elapsed: 0:12:21. --- Avg batch loss 0.2988 over 250 batches
Batch 2,000  of  6,707.    Elapsed: 0:14:07. --- Avg batch loss 0.2941 over 250 batches
Batch 2,250  of  6,707.    Elapsed: 0:15:53. --- Avg batch loss 0.3024 over 250 batches
Batch 2,500  of  6,707.    Elapsed: 0:17:39. --- Avg batch loss 0.2954 over 250 batches
Batch 2,750  of  6,707.    Elapsed: 0:19:25. --- Avg batch loss 0.2986 over 250 batches
Batch 3,000  of  6,707.    Elaps

In [None]:
torch.save(combined_model, base_dir + '/checkpoint_files/combined_distilBERT_3epoch.pth')
# torch.save(combined_model, base_dir + '/checkpoint_files/combined_distilBERT_2epoch.pth')
# combined_model = torch.load(base_dir + '/checkpoint_files/combined_distilBERT_1epoch.pth')

In [33]:
history_val = validate(val_data, combined_model, dbert_tokenizer, num_batches = 500, device = 'cuda', translate = True)

 === Running Validation ===
Batch   125  of    500.    Elapsed: 0:00:27.
Batch   250  of    500.    Elapsed: 0:00:53.
Batch   375  of    500.    Elapsed: 0:01:20.
Batch   500  of    500.    Elapsed: 0:01:46.
Validation Accuracy 54.28%


In [31]:
import matplotlib.pyplot as plt

# plt.plot(history['batch_loss'])
print(history_val['avg_train_loss'] * len(val_data) / 500)

tensor(0.3123, device='cuda:0', grad_fn=<DivBackward0>)


In [None]:
def swap_style_tokens(tokenizer, batch):
  ''' Function that swaps the first token in a sentence with it's opposite
  Args: tokenizer - the tokenizer object
        batch - a batch of tokens of size [B, sentence_length]
  Returns: the batch with the first token swapped out
  '''
  max_token = len(tokenizer)

  style2_tensor = torch.ones([batch.size()[0]], dtype = torch.int64) * max_token
  style1_tensor = torch.ones([batch.size()[0]], dtype = torch.int64) * (max_token-1)

  # Swap the first tokens around; this is similar to the way one can swap 1 and 0 by doing
  # new_val = (1 - old_value)
  batch[:, 0] = (style2_tensor - batch[: 0]) + style1_tensor

  return batch


# Storing some random stuff down here - delete later

In [None]:
test_text = dbert_tokenizer("<neg> I was [MASK] about the pizza but then I [MASK] it anyway",
                      return_tensors = 'pt', max_length = 16, padding = 'max_length').to('cuda')
dbert_model.eval()
tt_ = dbert_model(input_ids = test_text['input_ids'], attention_mask = test_text['attention_mask'])

In [None]:
print([dbert_tokenizer.decode(o) for o in tt_.logits[:, [4, 11], :].argmax(-1)])

['thinking ate']


In [None]:
# dbert_model.to('cpu')
# a_ = torch.FloatTensor([0,1,0], device = 'cpu').unsqueeze(0) @ dbert_model.distilbert.embeddings.word_embeddings(torch.LongTensor([564, 571, 1002], device = 'cpu')) 
# a_.size()

embed_matrix = dbert_model.distilbert.embeddings.word_embeddings(
    torch.LongTensor(np.arange(0, len(dbert_tokenizer)))
)

In [None]:
import sys
embed_matrix.size()
(embed_matrix.element_size() * embed_matrix.nelement()) * 1e-6

93.769728

In [None]:
for d in dbert_model.vocab_transform.parameters():
  print(d.requires_grad)

for d in dbert_model.vocab_layer_norm.parameters():
  print(d.requires_grad)
  
# dbert_model.vocab_layer_norm.requires_grad

True
True
True
True


# Validation classification accuracy
This section first creates a batch of noised sentences reconstructed conditioned on a stlye token by the trained BART model and then uses the originally trained BERT classifier to test the accuracy of the (re)generated validation sentences

In [None]:
# Generate a Dataset object of the validation sentences for a generation pass through BART
do_translation = True
GEN_BATCH_SIZE = 32
# val_data = pd.read_csv(base_dir + 'Yelp Data - Masked/Val_Yelp_4k_80percent.csv').iloc[:1000,:]
val_data = pd.read_csv(base_dir + 'Yelp Data - Masked/Yelp_dev.csv')
# shuffle and take 1000 examples
val_data = val_data.sample(frac = 1)[:1000]
val_data_x = val_data['Masked']
val_data_y = val_data['Label']

val_gen_ids = []
val_gen_masks = []

for sent in val_data_x:
  # Replace these unecessary tokens
  sent = re.sub(r'<s> |</s> ', '', sent)
  if do_translation:
    if '<pos>' in sent:
      sent = re.sub(r'<pos>', '<neg>', sent)
    else:
      sent = re.sub(r'<neg>', '<pos>', sent)

  val_enc = tokenizer(sent, max_length=32, truncation = True,
                      pad_to_max_length = True, return_tensors = 'pt',
                      return_attention_mask = True, 
                      add_special_tokens = True)
  val_gen_ids.append(val_enc['input_ids'])
  val_gen_masks.append(val_enc['attention_mask'])

val_gen_ids = torch.cat(val_gen_ids, dim = 0)
val_gen_masks = torch.cat(val_gen_masks, dim = 0)

val_gen_dataset = TensorDataset(val_gen_ids, val_gen_masks, torch.tensor(np.array(val_data_y)))
val_gen_dataset = DataLoader(val_gen_dataset, batch_size = GEN_BATCH_SIZE)



In [None]:
# sum(val_data_y)

In [None]:
# Generate the validation sentences
generated_sentences = []
model.to(torch.device('cuda'))
model.eval()
start_time = time.time()
print("Starting validation sentence generation")
for i, batch in enumerate(val_gen_dataset):
  input_ids = batch[0].to(torch.device('cuda'))
  attn_mask = batch[1].to(torch.device('cuda'))
  for j in range(GEN_BATCH_SIZE):
    try:
      new_sents = model.generate_text(text = input_ids[j].unsqueeze(0), max_len = 32, 
                                      attn_mask = attn_mask[j].unsqueeze(0), is_batch=True)
      generated_sentences.append(new_sents)
    except: 
      continue

  if i % 4 == 0 and i != 0:
    print("Currently on batch {} of {} - {:.2%} completed".format(i, len(val_gen_dataset), i / len(val_gen_dataset)))
    time_per_batch = str(datetime.timedelta(seconds = int(round((time.time() - start_time)))))
    print("Time per batch: {}s".format(time_per_batch))
    start_time = time.time()

Starting validation sentence generation
Currently on batch 4 of 32 - 12.50% completed
Time per batch: 0:00:40s
Currently on batch 8 of 32 - 25.00% completed
Time per batch: 0:00:32s
Currently on batch 12 of 32 - 37.50% completed
Time per batch: 0:00:32s
Currently on batch 16 of 32 - 50.00% completed
Time per batch: 0:00:30s
Currently on batch 20 of 32 - 62.50% completed
Time per batch: 0:00:31s
Currently on batch 24 of 32 - 75.00% completed
Time per batch: 0:00:32s
Currently on batch 28 of 32 - 87.50% completed
Time per batch: 0:00:32s


In [None]:
# Do some minor cleaning, replacing double spaces and deleting the extra space at the end of the sentences
gen_sent = [re.sub(r' {2,4}|(?<=[.!])( )', '', sent[0]) for sent in generated_sentences]
gen_sent = [re.sub(r"^ ", "", sent) for sent in gen_sent]

In [None]:
gen_sent[15:25]#, val_data.loc[:10, 'Context']

["don't go eat here or pick up food.",
 'they provide great service and even better humor.',
 "it's like people forget it's hand washed and don't deserve it.",
 'very friendly, super clean, and extremely polite!',
 "a lot of hidden gems in scottsdale's shopping center.",
 "service was good but the food wasn't great.",
 'very unprofessional rude!',
 'there is no where inside for people to sit while waiting for a table.',
 "i didn't even care to try this dish.",
 'wow what a rip off!']

In [None]:
bert_tokenizer = transformers.BertTokenizerFast.from_pretrained('bert-base-uncased')

# Load the pre-trained BERT classifier
bert_model = torch.load(base_dir + '/checkpoint_files/yelp_classifier_1epoch.pth')
model.to(torch.device('cuda'))
bert_model.eval()

In [None]:
test_gen_ids = []
test_gen_masks = []


print("Starting Validation Set Tokenization: Generated tokens")

for sent in gen_sent:
  test_enc = bert_tokenizer(sent, max_length=32, truncation = True,
                               pad_to_max_length = True, return_tensors = 'pt',
                               return_attention_mask = True)
  test_gen_ids.append(test_enc['input_ids'])
  test_gen_masks.append(test_enc['attention_mask'])

test_gen_ids = torch.cat(test_gen_ids, dim = 0)
test_gen_masks = torch.cat(test_gen_masks, dim = 0)

Starting Validation Set Tokenization: Generated tokens




In [None]:
val_data_y =  val_data['Original'].str.contains('<pos>') * 1

In [None]:
# val_data_y

In [None]:
# Reverse the labels because we're testing translations
translation_val_y = 1 - val_data_y

# Create the DataLoader object for validation
if do_translation:
  test_gen_dataset = TensorDataset(test_gen_ids, test_gen_masks, torch.tensor(np.array(translation_val_y)))
else:
  test_gen_dataset = TensorDataset(test_gen_ids, test_gen_masks, torch.tensor(np.array(val_data_y)))
test_gen_dataset = DataLoader(test_gen_dataset, batch_size = GEN_BATCH_SIZE)

In [None]:
# Get the validation accuracy of the data
device = torch.device('cuda')
preds = []
start_time = time.time()
for i, batch in enumerate(test_gen_dataset):
  if i % 16 == 0:
    print("Currently conducting validation on batch {} of {}".format(i, len(test_gen_dataset)))
    print("Time per 16 batches: {}".format(str(datetime.timedelta(seconds = int(round((time.time() - start_time)))))))
    start_time = time.time()

  # Move the tokens to the device
  input_ids = batch[0].to(device)
  attention_masks = batch[1].to(device)
  labels = batch[2].to(device)

  model_output = bert_model(input_ids = input_ids, 
                        attention_mask = attention_masks,
                        labels = labels)
  
  # total_val_loss += loss.item()
  loss = model_output[0]
  logits = model_output[1]
  # Get the predictions
  preds.append(logits.cpu().detach().numpy().argmax(-1))

# Get the accuracy of the predictions
val_predictions = []
for it in preds:
  val_predictions.extend(it)

if do_translation:
  val_acc = np.mean(val_predictions == translation_val_y)
else:
  val_acc = np.mean(val_predictions == val_data_y)

print("Validation Accuracy {:.2f}%".format(val_acc*100))

Currently conducting validation on batch 0 of 32
Time per 16 batches: 0:00:00
Currently conducting validation on batch 16 of 32
Time per 16 batches: 0:00:02
Validation Accuracy 50.60%


In [None]:
# val_predictions == val_data_y
# val_data[val_predictions != val_data_y]

test_data_df = val_data.copy()
test_data_df['Generated'] = gen_sent

In [None]:
test_data_df.columns

Index(['Original', 'Masked', 'Label', 'Generated'], dtype='object')

In [None]:
test_data_df.rename(columns = {'Labels':'Actual_Label'}, inplace = True)
test_data_df['Predicted_Label'] = val_predictions
# test_data_df.drop(columns = ['Masked_Words', 'Masked_Indices', 'Context'], axis = 1, inplace = True)
# test_data_df.drop(columns = ['Masked_Words', 'Masked_Indices'], axis = 1, inplace = True)
# test_data_df.head()

In [None]:
# Fix up some weird issue with the predictions
test_data_df['Actual_Label'] = test_data_df['Original'].str.contains('<pos>') * 1
test_data_df.head()

Unnamed: 0,Original,Masked,Label,Generated,Predicted_Label,Actual_Label
31573,<pos> everyone there is super friendly.,<pos> everyone there is <mask> <mask>.,1,everyone there is very unprofessional.,0,1
43033,<pos> it's the perfect neighborhood bar.,<pos> it <mask> the <mask> neighborhood bar.,1,it hits the spot neighborhood bar.,1,1
2775,"<neg> i can laugh about it now, i was pissed a...","<neg> i can <mask> about it now, i was <mask> ...",0,"i can dream about it now, i was drunk at the t...",1,0
16895,<neg> she's just not very bright.,<neg> she's just <mask> <mask> <mask>.,0,she's just a sweetheart.,1,0
6954,<neg> we never changed our mind and i'm _num_ ...,<neg> we <mask> changed our mind and i'm _num_...,0,we just changed our mind and i'm _num_ minutes...,0,0


In [None]:
# Filters for "correct" translations from positive to negative
test_data_df_correct = test_data_df[test_data_df['Actual_Label'] != test_data_df['Predicted_Label']]

# Filters for "incorrect" translations from one sentiment to the other
test_data_df_incorrect = test_data_df[test_data_df['Actual_Label'] == test_data_df['Predicted_Label']]

In [None]:
test_data_df_incorrect.to_csv(base_dir + 'translated_results_incorrect_07012021.csv', index = False)
test_data_df_correct.to_csv(base_dir + 'translated_results_correct_07012021.csv', index = False)