In [1]:
!pip install transformers
!pip install config



In [2]:
import torch
from torch import nn
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from keras.preprocessing.sequence import pad_sequences
#from sklearn.model_selection import train_test_spl
from transformers import RobertaModel, RobertaConfig, RobertaTokenizer
from transformers import GPT2Model, GPT2LMHeadModel, GPT2Config, GPT2Tokenizer
from transformers import BertTokenizer, BertConfig
from transformers import BertForMaskedLM , BertModel ,WEIGHTS_NAME, AdamW, get_linear_schedule_with_warmup
from transformers import PreTrainedModel, PreTrainedTokenizer , BertPreTrainedModel
from tqdm import tqdm, trange
import pandas as pd
import io
import numpy as np
import matplotlib.pyplot as plt
import codecs
from torch.nn.utils.rnn import pack_padded_sequence

Using TensorFlow backend.


In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
n_gpu = torch.cuda.device_count()
torch.cuda.get_device_name(0)

'Tesla P100-PCIE-16GB'

In [4]:
from google.colab import drive
drive.mount('/content/drive')

train_file = 'drive/My Drive/datasets/train.txt'
dev_file = 'drive/My Drive/datasets/dev.txt'

quotes_file = 'drive/My Drive/datasets/all_quotes.txt'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
tokenizer = RobertaTokenizer.from_pretrained('roberta-base', do_lower_case = False)

In [0]:
def read_token_map(file, word_index = 1,prob_index = 4, caseless = False):
  
  with codecs.open(file, 'r', 'utf-8') as f:
      lines = f.readlines()

  tokenized_texts = []
  token_map = []
  token_labels = []
  sent_length = []

  roberta_tokens = []
  orig_to_tok_map = []
  labels = []

  roberta_tokens.append("<s>")
  
  for line in lines:
    if not (line.isspace()):
      feats = line.strip().split()
      word = feats[word_index].lower() if caseless else feats[word_index]
      label = feats[prob_index].lower() if caseless else feats[prob_index]
      labels.append((float)(label))
      orig_to_tok_map.append(len(roberta_tokens))
      
      if(word == "n't"):
        word = "'t"
        if(roberta_tokens[-1] != "won"):
          roberta_tokens[-1] = roberta_tokens[-1] +"n"
      if(word == "wo"):
        word == "won"

      roberta_tokens.extend(tokenizer.tokenize(word))

    elif len(orig_to_tok_map) > 0:

      # lab = np.array(labels)
      # lab.sort()
      # if(len(labels)>=4):
      #   mini = lab[-4]
      # else:
      #   mini = lab[0]
 
      # for l in range(len(labels)):
      #   if(labels[l]<mini):
      #     labels[l] = 0.0

      roberta_tokens.append("</s>")
      tokenized_texts.append(roberta_tokens)
      token_map.append(orig_to_tok_map)
      token_labels.append(labels)
      sent_length.append(len(labels))
      roberta_tokens = []
      orig_to_tok_map = []
      labels = []
      length = 0
      roberta_tokens.append("<s>")
          
  if len(orig_to_tok_map) > 0:
    roberta_tokens.append("</s>")
    tokenized_texts.append(roberta_tokens)
    token_map.append(orig_to_tok_map)
    token_labels.append(labels)
    sent_length.append(len(labels))
  
  return tokenized_texts, token_map, token_labels, sent_length

In [7]:
t_tokenized_texts, t_token_map, t_token_label, t_sent_length = read_token_map(train_file)
print(t_tokenized_texts[100])
print(t_token_map[100])
print(t_token_label[100])
print(t_sent_length[100])

d_tokenized_texts, d_token_map, d_token_label, d_sent_length = read_token_map(dev_file)
print(d_tokenized_texts[50])
print(d_token_map[50])
print(d_token_label[50])
print(d_sent_length[50])
print(tokenizer.tokenize("Hello, my dog is cute", add_prefix_space = True))
print(tokenizer.tokenize("won't"))

['<s>', 'H', 'appiness', 'cons', 'ists', 'in', 'real', 'izing', 'it', 'is', 'all', 'a', 'great', 'str', 'ange', 'dream', '.', '</s>']
[1, 3, 5, 6, 8, 9, 10, 11, 12, 13, 15, 16]
[0.6666666666666666, 0.1111111111111111, 0.0, 0.2222222222222222, 0.0, 0.1111111111111111, 0.1111111111111111, 0.0, 0.2222222222222222, 0.3333333333333333, 0.3333333333333333, 0.1111111111111111]
12
['<s>', '``', 'F', 'asc', 'inating', 'social', 'media', 'tip', 'or', 'fact', 'to', 'share', '.', "''", '@', 'Spe', 'aker', 'Name', '</s>']
[1, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 17]
[0.0, 0.5555555555555556, 0.0, 0.1111111111111111, 0.2222222222222222, 0.1111111111111111, 0.1111111111111111, 0.0, 0.2222222222222222, 0.0, 0.0, 0.2222222222222222, 0.2222222222222222]
13
['ĠHello', ',', 'Ġmy', 'Ġdog', 'Ġis', 'Ġcute']
['won', "'t"]


In [8]:
MAX_LEN = 72

# Use the BERT tokenizer to convert the tokens to their index numbers in the BERT vocabulary
t_input_ids = [tokenizer.convert_tokens_to_ids(x) for x in t_tokenized_texts]

# Pad our input tokens
t_input_ids = pad_sequences(t_input_ids, maxlen=MAX_LEN, dtype="long", truncating="post", padding="post")
t_token_map = pad_sequences(t_token_map, maxlen=MAX_LEN, dtype="long", truncating="post", padding="post")
t_token_label = pad_sequences(t_token_label, maxlen=MAX_LEN, dtype="float", truncating="post", padding="post")

print(t_input_ids[100])
print(t_token_map[100])
print(t_token_label[100])

[    0   725 37055 10998  1952   179  8726  2787   405   354  1250   102
 12338  6031 10987 24009     4     2     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0]
[ 1  3  5  6  8  9 10 11 12 13 15 16  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
[0.66666667 0.11111111 0.         0.22222222 0.         0.11111111
 0.11111111 0.         0.22222222 0.33333333 0.33333333 0.11111111
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.     

In [9]:
d_input_ids = [tokenizer.convert_tokens_to_ids(x) for x in d_tokenized_texts]

# Pad our input tokens
d_input_ids = pad_sequences(d_input_ids, maxlen=MAX_LEN, dtype="long", truncating="post", padding="post")
d_token_map = pad_sequences(d_token_map, maxlen=MAX_LEN, dtype="long", truncating="post", padding="post")
d_token_label = pad_sequences(d_token_label, maxlen=MAX_LEN, dtype="float", truncating="post", padding="post")

print(d_input_ids[50])
print(d_token_map[50])
print(d_token_label[50])

[    0 49519   597  8631 15647 19027  5535 39080   368 24905   560 12689
     4 17809  1039 29235  4218 31723     2     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0]
[ 1  2  5  6  7  8  9 10 11 12 13 14 17  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
[0.         0.55555556 0.         0.11111111 0.22222222 0.11111111
 0.11111111 0.         0.22222222 0.         0.         0.22222222
 0.22222222 0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.     

In [10]:
t_attention_masks = []
# Create a mask of 1s for each token followed by 0s for padding
for seq in t_input_ids:
  seq_mask = [float(i>0) for i in seq]
  t_attention_masks.append(seq_mask)
print(t_attention_masks[100])

d_attention_masks = []
# Create a mask of 1s for each token followed by 0s for padding
for seq in d_input_ids:
  seq_mask = [float(i>0) for i in seq]
  d_attention_masks.append(seq_mask)
print(d_attention_masks[50])

[0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
[0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]


In [0]:
t_input_ids = torch.tensor(t_input_ids)
t_token_map = torch.tensor(t_token_map )
t_token_label = torch.tensor(t_token_label)
t_attention_masks = torch.tensor(t_attention_masks)
t_sent_length = torch.tensor(t_sent_length)

d_input_ids = torch.tensor(d_input_ids)
d_token_map = torch.tensor(d_token_map )
d_token_label = torch.tensor(d_token_label)
d_attention_masks = torch.tensor(d_attention_masks)
d_sent_length = torch.tensor(d_sent_length)

# Select a batch size for training. 
batch_size = 32
# print(t_token_labels)
# Create an iterator of our data with torch DataLoader 
train_data = TensorDataset(t_input_ids, t_token_map, t_token_label, t_attention_masks, t_sent_length)
train_sampler = RandomSampler(train_data)
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)
validation_data = TensorDataset(d_input_ids, d_token_map, d_token_label, d_attention_masks, d_sent_length)
validation_sampler = SequentialSampler(validation_data)
validation_dataloader = DataLoader(validation_data, sampler=validation_sampler, batch_size=batch_size)

In [0]:
class roberta_model(nn.Module):
  def __init__(self, final_size, drop_prob, data_parallel=True):
    super(roberta_model, self).__init__()

    config = RobertaConfig.from_pretrained('roberta-base', output_hidden_states=True)
    roberta = RobertaModel.from_pretrained('roberta-base', output_hidden_states=True)
    
    #cnt=0
    #for child in roberta.children():
    #  cnt = cnt + 1
    #  if cnt<=12:
    #    for param in child.parameters():
    #      param.requires_grad = False

    if data_parallel:
        self.roberta = nn.DataParallel(roberta)
    else:
        self.roberta = roberta
    roberta_dim = 12*768
    hidden_dim1 = 900
    hidden_dim2 = 40
    hidden_dim3 = 20

    self.fc1 = nn.Linear(roberta_dim, hidden_dim1)
    self.fc2 = nn.Linear(hidden_dim1, hidden_dim2)
    #self.fc3 = nn.Linear(hidden_dim2, hidden_dim3)
    self.fc4 = nn.Linear(hidden_dim2, final_size)
    self.dropout1 = nn.Dropout(p=drop_prob)
    self.dropout2 = nn.Dropout(p=drop_prob)
    self.dropout3 = nn.Dropout(p=drop_prob)
    self.dropout4 = nn.Dropout(p=drop_prob)
           
  def forward(self, roberta_ids, roberta_mask, labels = None, roberta_token_starts = None,lm_lengths = None):
    
    batch_size = roberta_ids.size()[0]
    pad_size = roberta_ids.size()[1]
    #print("batch size",batch_size,"\t\tpad_size",pad_size)

    if(roberta_token_starts == None):
      output = self.roberta(roberta_ids, attention_mask = roberta_mask, masked_lm_labels=labels)
      return output
    
    output = self.roberta(roberta_ids, attention_mask = roberta_mask)
    #print(len(hiddden_states))
    #print(len(hidden_states[1]))
    #print(hidden_states[1][0].size())
    #print(np.shape(output))
    #print(np.shape(output[0]))
    #print(np.shape(output[0][0]))
    #print(np.shape(output[1]))
    #print(np.shape(output[1][0]))
    #print(np.shape(output[2]))
    #print(np.shape(output[2][0]))
    #print(len(output[3][0]))
    #print(len(output[3][1]))
    #print(len(output[3][2]))
    #print(len(output[3][0][0]))
    #print(len(output[3][0][0][0]))
    #print(len(output))
    #print(len(output[1]))
    #print(output[1][0].size())

    roberta_out = output[2][1]
    for layers in range(2,13,1):
      roberta_out = torch.cat((roberta_out, output[2][layers]), dim=2)
    
    #print(roberta_out.size())
    # bert_last_layer = output[1][0]
    # bert_second_last_layer = output[1][1]
    # bert_third_last_layer = output[1][2]
    # bert_fourth_last_layer = output[1][3]
    # bert_fifth_last_layer = output[1][4]
    # bert_sixth_last_layer = output[1][5]

    # bert_out = torch.cat((bert_last_layer, bert_second_last_layer, bert_third_last_layer, bert_fourth_last_layer, bert_fifth_last_layer, bert_sixth_last_layer), dim=2)
    
    pred_logits = torch.relu(self.fc1(self.dropout1(roberta_out)))
    pred_logits = torch.relu(self.fc2(self.dropout2(pred_logits)))
    #pred_logits = torch.relu(self.fc3(self.dropout3(pred_logits)))
    pred_logits = torch.sigmoid(self.fc4(self.dropout4(pred_logits)))
    pred_logits = torch.squeeze(pred_logits,2)
    # print(pred_logits.size())
    # print(labels.size())
    # print(pred_logits[1])
    # print(labels[1])
    # print(bert_token_starts[1])
    # print("\n")

    pred_labels = labels.clone()
    # print(pred_labels[1])
    # print("\n")
    
    for b in range(batch_size):
      for w in range(pad_size):
        if(roberta_token_starts[b][w]!=0):
          if(roberta_token_starts[b][w]>=pad_size):
            print(roberta_token_starts[b])
          else:
            pred_labels[b][w] = pred_logits[b][roberta_token_starts[b][w]]

    # print(pred_labels[1])
    # print(labels[1])
    # print("\n")

    lm_lengths, lm_sort_ind = lm_lengths.sort(dim=0, descending=True)
    scores = labels[lm_sort_ind]
    targets = pred_labels[lm_sort_ind]
    scores = pack_padded_sequence(scores, lm_lengths, batch_first=True).data
    targets = pack_padded_sequence(targets, lm_lengths, batch_first=True).data
    
    # mask = pred_labels!=0
    # total = mask[mask].size()[0]

    # loss_fn = nn.BCELoss(reduction='sum').to(device) 
    loss_fn = nn.BCELoss().to(device) 
    loss = loss_fn(targets,scores)
    # print(loss)

    # loss /= total 
    # print(loss) 
    return loss, pred_labels

In [0]:
model = roberta_model(1,0.3,True).to(device)

In [0]:
optimizer = AdamW(model.parameters(), lr=2e-5, eps = 1e-8)

epochs = 30
total_steps = len(train_dataloader) * epochs

# Create the learning rate scheduler.
scheduler = get_linear_schedule_with_warmup(optimizer, 
                                            num_warmup_steps = 0, # Default value in run_glue.py
                                            num_training_steps = total_steps)

In [0]:

def intersection(lst1, lst2):
    lst3 = [value for value in lst1 if value in lst2]
    return lst3

def fix_padding(scores_numpy, label_probs,  mask_numpy):
    #if len(scores_numpy) != len(mask_numpy):
    #    print("Error: len(scores_numpy) != len(mask_numpy)")
    #assert len(scores_numpy) == len(mask_numpy)
    #if len(label_probs) != len(mask_numpy):
    #    print("len(label_probs) != len(mask_numpy)")
    #assert len(label_probs) == len(mask_numpy)

    all_scores_no_padd = []
    all_labels_no_pad = []
    for i in range(len(mask_numpy)):
        all_scores_no_padd.append(scores_numpy[i][:int(mask_numpy[i])])
        all_labels_no_pad.append(label_probs[i][:int(mask_numpy[i])])

    assert len(all_scores_no_padd) == len(all_labels_no_pad)
    return all_scores_no_padd, all_labels_no_pad

def match_M(batch_scores_no_padd, batch_labels_no_pad):

    top_m = [1, 2, 3, 4]
    batch_num_m=[]
    batch_score_m=[]
    for m in top_m:
        intersects_lst = []
        # exact_lst = []
        score_lst = []
        ############################################### computing scores:
        for s in batch_scores_no_padd:
            if len(s) <=m:
                continue
            h = m
            # if len(s) > h:
            #     while (s[np.argsort(s)[-h]] == s[np.argsort(s)[-(h + 1)]] and h < (len(s) - 1)):
            #         h += 1

            # s = np.asarray(s.cpu())
            s = np.asarray(s)
            #ind_score = np.argsort(s)[-h:]
            ind_score = sorted(range(len(s)), key = lambda sub: s[sub])[-h:]
            score_lst.append(ind_score)

        ############################################### computing labels:
        label_lst = []
        for l in batch_labels_no_pad:
            if len(l) <=m:
                continue
            # if it contains several top values with the same amount
            h = m
            # l = l.cpu()
            if len(l) > h:
                while (l[np.argsort(l)[-h]] == l[np.argsort(l)[-(h + 1)]] and h < (len(l) - 1)):
                    h += 1
            l = np.asarray(l)
            ind_label = np.argsort(l)[-h:]
            label_lst.append(ind_label)

        ############################################### :

        for i in range(len(score_lst)):
            intersect = intersection(score_lst[i], label_lst[i])
            intersects_lst.append((len(intersect))/(min(m, len(score_lst[i]))))
            # sorted_score_lst = sorted(score_lst[i])
            # sorted_label_lst =  sorted(label_lst[i])
            # if sorted_score_lst==sorted_label_lst:
            #     exact_lst.append(1)
            # else:
            #     exact_lst.append(0)
        batch_num_m.append(len(score_lst))
        batch_score_m.append(sum(intersects_lst))
    return batch_num_m, batch_score_m

In [0]:

def validation(model, validation_dataloader):
  print("")
  print("Running Validation...")

  model.eval()
  eval_loss, eval_accuracy = 0, 0
  nb_eval_steps, nb_eval_examples = 0, 0

  num_m = [0, 0, 0, 0]
  score_m = [0, 0, 0, 0]

  # Evaluate data for one epoch
  for batch in validation_dataloader:
      
      # Add batch to GPU
      batch = tuple(t.to(device) for t in batch)
      
      # Unpack the inputs from our dataloader
      v_input_ids = batch[0].to(device)
      v_input_mask = batch[3].to(device)
      v_token_starts = batch[1].to(device)
      v_labels = batch[2].to(device)
      v_sent_length = batch[4]
            
      # Telling the model not to compute or store gradients, saving memory and
      # speeding up validation
      with torch.no_grad():        
          output = model(v_input_ids, v_input_mask, v_labels, v_token_starts, v_sent_length)
      
      pred_labels = output[1]

      pred_labels = pred_labels.detach().cpu().numpy()
      v_labels = v_labels.to('cpu').numpy()
      # print(pred_labels[0])
      # print(v_labels[0])
      
      pred_labels, v_labels = fix_padding(pred_labels, v_labels, v_sent_length)
      # print(pred_labels[0])
      # print(v_labels[0])

      batch_num_m, batch_score_m = match_M(pred_labels, v_labels)
      num_m = [sum(i) for i in zip(num_m, batch_num_m)]
      score_m = [sum(i) for i in zip(score_m, batch_score_m)]
  
  m_score = [i/j for i,j in zip(score_m, num_m)]
  print("Validation Accuracy: ")
  print(m_score)
  v_score = np.mean(m_score)
  print(v_score)

In [17]:

# import random

# # Set the seed value all over the place to make this reproducible.
# seed_val = 42

# random.seed(seed_val)
# np.random.seed(seed_val)
# torch.manual_seed(seed_val)
# torch.cuda.manual_seed_all(seed_val)

# Store the average loss after each epoch so we can plot them.
loss_values = []

# For each epoch...
for epoch_i in range(0, epochs):
    
    # ========================================
    #               Training
    # ========================================
    
    # Perform one full pass over the training set.

    print("")
    print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))
    print('Training...')

    # Reset the total loss for this epoch.
    total_loss = 0
    model.train()

    # For each batch of training data...
    for step, batch in enumerate(train_dataloader):
            
        b_input_ids = batch[0].to(device)
        b_input_mask = batch[3].to(device)
        b_token_starts = batch[1].to(device)
        b_labels = batch[2].to(device)
        b_sent_length = batch[4]

        model.zero_grad()   
        model.train()     

        output = model(b_input_ids, b_input_mask, b_labels, b_token_starts,b_sent_length)
        loss = output[0]

        total_loss += loss.item()

        # Perform a backward pass to calculate the gradients.
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()

        # Update the learning rate.
        scheduler.step()

        if step % 10 == 0:
          validation(model, validation_dataloader)

    # Calculate the average loss over the training data.
    # print("total loss",total_loss)
    avg_train_loss = total_loss / len(train_dataloader)            
    
    # Store the loss value for plotting the learning curve.
    loss_values.append(avg_train_loss)

    print("")
    print("  Average training loss: {0:.2f}".format(avg_train_loss))


print("")
print("Training complete!")


Training...

Running Validation...
Validation Accuracy: 
[0.08928571428571429, 0.21618037135278514, 0.32670454545454547, 0.4022727272727273]
0.25861083959144304

Running Validation...
Validation Accuracy: 
[0.41836734693877553, 0.616710875331565, 0.706439393939394, 0.7424242424242424]
0.6209854646584942

Running Validation...
Validation Accuracy: 
[0.45408163265306123, 0.6578249336870027, 0.7339015151515152, 0.7833333333333333]
0.6572853537062281

Running Validation...
Validation Accuracy: 
[0.45408163265306123, 0.6618037135278515, 0.7509469696969696, 0.7924242424242425]
0.6648141395755313

Running Validation...
Validation Accuracy: 
[0.4719387755102041, 0.6671087533156499, 0.7651515151515151, 0.8037878787878788]
0.6769967306913119

Running Validation...
Validation Accuracy: 
[0.5025510204081632, 0.6883289124668435, 0.7660984848484848, 0.8037878787878788]
0.6901915741278426

Running Validation...
Validation Accuracy: 
[0.5127551020408163, 0.6843501326259946, 0.7708333333333335, 0.8136