In [1]:
!pip install transformers==2.9.0
!nvidia-smi

NVIDIA-SMI has failed because it couldn't communicate with the NVIDIA driver. Make sure that the latest NVIDIA driver is installed and running.



In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
import random
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import (
    AdamW,
    T5ForConditionalGeneration,
    T5Tokenizer,
    get_linear_schedule_with_warmup
)
def set_seed(seed):
  random.seed(seed)
  np.random.seed(seed)
  torch.manual_seed(seed)
set_seed(42)

In [4]:
tokenizer = T5Tokenizer.from_pretrained('t5-base')
t5_model = T5ForConditionalGeneration.from_pretrained('drive/MyDrive/outputs/model_files')

In [5]:
# optimizer
no_decay = ["bias", "LayerNorm.weight"]
optimizer_grouped_parameters = [
    {
        "params": [p for n, p in t5_model.named_parameters() if not any(nd in n for nd in no_decay)],
        "weight_decay": 0.0,
    },
    {
        "params": [p for n, p in t5_model.named_parameters() if any(nd in n for nd in no_decay)],
        "weight_decay": 0.0,
    },
]
optimizer = AdamW(optimizer_grouped_parameters, lr=3e-4, eps=1e-8)

In [6]:
true_false_adjective_tuples = [
                               (("Can you lend me some money?",	"I'm completely broke until payday."), "no"), 
                               (("Do you think he did the theft?",	"He is as innocent as a lamb"), "no"),
                               (("Are you sure you want the spoiler",	"haha sure. by the time, I get to it; I will forget the details, only knowing that it's touching at the end"),"yes"),
                               (("Does tomorrow afternoon work for you?",	"Yeah tomorrow afternoon works for me."),	"yes"),
                               (("oh wow, Cuban cigar? I've never smoke before",	"This one is actually from Peru, which is more my taste."),	"no"),
                               (("Are you sending them an invitation?",	"Certainly."),	"yes"),
                               (("You, uh, you don't want to go for a ride, do you?",	"Is it safe?"),	'yes'),
                               (("Have you ever made them yourself?",	"no. only eat them"),	"no"),
                               (("Have you seen my watch?",	"I will take a look for it around my house."),	"no"),
                               (("You don't look normal. Are you all right?", "I have a headache."),	"no"),
                               (("any improvements in your marathon time?",	"No, only a few minutes. I still find it difficult to run long races"),	"no"),
                               (("do you live near San Mateo?",	"no, but I'm willing to drive for good ramen"),	"no"),
                               (("Can he play the violin?",	"Can a pig fly?"),	"no"),
                               (("Aren't you scared?", "Sometimes."),	"yes")
]

In [7]:

t5_model.train()

epochs = 10

for epoch in range(epochs):
  print ("epoch ",epoch)
  for input,output in true_false_adjective_tuples:
    input_sent = "Answer 'yes' or 'no' based on the intention of the response: "+input[0] + input[1]+ " </s>"
    ouput_sent = output+" </s>"

    tokenized_inp = tokenizer.encode_plus(input_sent,  max_length=96, pad_to_max_length=True,return_tensors="pt")
    tokenized_output = tokenizer.encode_plus(ouput_sent, max_length=96, pad_to_max_length=True,return_tensors="pt")


    input_ids  = tokenized_inp["input_ids"]
    attention_mask = tokenized_inp["attention_mask"]

    lm_labels= tokenized_output["input_ids"]
    decoder_attention_mask=  tokenized_output["attention_mask"]


    # the forward function automatically creates the correct decoder_input_ids
    output = t5_model(input_ids=input_ids, lm_labels=lm_labels,decoder_attention_mask=decoder_attention_mask,attention_mask=attention_mask)
    loss = output[0]

    loss.backward()
    optimizer.step()
    optimizer.zero_grad()

epoch  0


	add_(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add_(Tensor other, *, Number alpha) (Triggered internally at  ../torch/csrc/utils/python_arg_parser.cpp:1055.)
  exp_avg.mul_(beta1).add_(1.0 - beta1, grad)


epoch  1
epoch  2
epoch  3
epoch  4
epoch  5
epoch  6
epoch  7
epoch  8
epoch  9


In [9]:
import csv
file = csv.reader(open('testing.csv'))
correct = 0
wrong = 0
total = 0
tp = 0
tn = 0
fp = 0
fn = 0
for line in file:
  test_sent = "Answer 'yes' or 'no' based on the intention of the response:" + line[0] + "</s>"
  print(test_sent)
  test_tokenized = tokenizer.encode_plus(test_sent, return_tensors="pt")

  test_input_ids  = test_tokenized["input_ids"]
  test_attention_mask = test_tokenized["attention_mask"]

  t5_model.eval()
  beam_outputs = t5_model.generate(
      input_ids=test_input_ids,attention_mask=test_attention_mask,
      max_length=64,
      early_stopping=True,
      num_beams=10,
      num_return_sequences=1,
      no_repeat_ngram_size=2
  )

  for beam_output in beam_outputs:
      sent = tokenizer.decode(beam_output, skip_special_tokens=True,clean_up_tokenization_spaces=True)
      print ("Prediction:", sent, "Actual", line[1])
      if sent == line[1]:
        correct += 1
      else:
        wrong += 1
      if sent == 'yes' and line[1] == 'yes':
        tp += 1
      elif sent == 'yes' and line[1] == 'no':
        fp += 1
      elif sent == 'no' and line[1] == 'yes':
        fn += 1
      elif sent == 'no' and line[1] == 'no':
        tn += 1
      print(correct, wrong)
  print("")
  total += 1
  print(total)


Answer 'yes' or 'no' based on the intention of the response:Did you tell her about the dress?I just bit my tongue.</s>


  beam_id = beam_token_id // vocab_size


Prediction: no Actual no
1 0

1
Answer 'yes' or 'no' based on the intention of the response:Will you stay if we promise to be good?Och! That's a piecrust promise. Easily made, easily broken.</s>
Prediction: no Actual no
2 0

2
Answer 'yes' or 'no' based on the intention of the response:Ever heard of it?No, what is it about?</s>
Prediction: no Actual no
3 0

3
Answer 'yes' or 'no' based on the intention of the response:Do you play pingpong often?No, very rarely. It's kind of amazing that I won, but I'm still taking credit for it.</s>
Prediction: no Actual no
4 0

4
Answer 'yes' or 'no' based on the intention of the response:Does this look good?That's it weirdo!</s>
Prediction: yes Actual no
4 1

5
Answer 'yes' or 'no' based on the intention of the response:Do you have any plans?Yes, no shopping plans but I can't wait to eat thanksgiving food.</s>
Prediction: no Actual yes
4 2

6
Answer 'yes' or 'no' based on the intention of the response:Is someone unable to breathe, sir?It is, I believ

In [10]:
print("Correct Predictions: ",correct)
print("Incorrect Predictions:", wrong)
print("Accuracy:", correct/total)


Correct Predictions:  103
Incorrect Predictions: 79
Accuracy: 0.5659340659340659


In [11]:
recall = tp / (tp + fn)
precision = tp / (tp + fp)
f1_score = (2 * recall * precision) / (recall + precision) 
print("Recall: ", recall)
print("Precision: ", precision)
print("F1 score: ", f1_score)

Recall:  0.5185185185185185
Precision:  0.6829268292682927
F1 score:  0.5894736842105263
