# Multi Choice Model


*   Take the train dataset generated from the 1 way self ensemble training and train a multi choice model.
*   Use Huggingface transformers: BertForMultipleChoice
*   Will preprocess and batch the text
*   Evaluate results



### Step 1: File set up. 

In [None]:
#Mount my drive so that I can access the split training sets. 

from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [None]:
#copy the dev data from colab

%cp -R /content/drive/My\ Drive/dev_SE1.csv /content/


### Step 2: Set up GPU and HuggingFace

In [None]:
# Connect to GPU
import torch

if torch.cuda.is_available():     
    device = torch.device("cuda")
    print('There are %d GPU(s) available.' % torch.cuda.device_count())
    print('GPU:', torch.cuda.get_device_name(0))
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

There are 1 GPU(s) available.
GPU: Tesla P100-PCIE-16GB


In [None]:
!pip install transformers

Collecting transformers
[?25l  Downloading https://files.pythonhosted.org/packages/27/3c/91ed8f5c4e7ef3227b4119200fc0ed4b4fd965b1f0172021c25701087825/transformers-3.0.2-py3-none-any.whl (769kB)
[K     |████████████████████████████████| 778kB 3.5MB/s 
Collecting tokenizers==0.8.1.rc1
[?25l  Downloading https://files.pythonhosted.org/packages/40/d0/30d5f8d221a0ed981a186c8eb986ce1c94e3a6e87f994eae9f4aa5250217/tokenizers-0.8.1rc1-cp36-cp36m-manylinux1_x86_64.whl (3.0MB)
[K     |████████████████████████████████| 3.0MB 10.0MB/s 
Collecting sentencepiece!=0.1.92
[?25l  Downloading https://files.pythonhosted.org/packages/d4/a4/d0a884c4300004a78cca907a6ff9a5e9fe4f090f5d95ab341c53d28cbc58/sentencepiece-0.1.91-cp36-cp36m-manylinux1_x86_64.whl (1.1MB)
[K     |████████████████████████████████| 1.1MB 30.5MB/s 
[?25hCollecting sacremoses
[?25l  Downloading https://files.pythonhosted.org/packages/7d/34/09d19aff26edcc8eb2a01bed8e98f13a1537005d31e95233fd48216eed10/sacremoses-0.0.43.tar.gz (883kB

### Step 3: Prep Dev Set


In [None]:
#Imports
import pandas as pd
import numpy as np
import time
import datetime
import random
import matplotlib.pyplot as plt
% matplotlib inline
import seaborn as sns
import json
import os
from transformers import BertForMultipleChoice, AdamW, BertConfig
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
from torch.utils.data import TensorDataset, random_split
from transformers import BertTokenizer

# Load the BERT tokenizer.
tokenizer = BertTokenizer.from_pretrained('bert-base-cased', do_lower_case=False)


  import pandas.util.testing as tm


HBox(children=(FloatProgress(value=0.0, description='Downloading', max=213450.0, style=ProgressStyle(descripti…




In [None]:
#import Dev set. 
dev_df = pd.read_csv('dev_SE1.csv')
#the empty choice is converted to a NaN when I reload, so this will correct the issue.
dev_df['a'].fillna("", inplace=True)

print('Number of dev sentences: {:,}\n'.format(dev_df.shape[0]))

Number of dev sentences: 11,873



In [None]:
#pull out the relevant columns.
contexts = dev_df.context.values
questions = dev_df.question.values
choices = dev_df[['a','b','c','d','e']].values
#now converted to an INT
dev_df.correct_index = dev_df.correct_index.fillna(1)
labels = dev_df.correct_index.astype(int).values

In [None]:
#token embedding loop

input_ids = []
attention_masks = []
choices_features = []

#---- THIS IS THE LOOP TO COMBINE THE QUESTIONS WITH THE CHOICES ----
for i in range(len(questions)):
    row = list(choices[i])
    temp_list = []
    for choice in row:
      text = (str(questions[i])+' '+str(choice))
      temp_list.append(text)

    encoded_dict = tokenizer(
                        [contexts[i],contexts[i],contexts[i], contexts[i], contexts[i]],
                        temp_list,
                        add_special_tokens = True, # Add '[CLS]' and '[SEP]'
                        max_length = 384,           # Pad & truncate all sentences.
                        pad_to_max_length = True,
                        return_attention_mask = True,   # Construct attn. masks.
                        return_tensors = 'pt',
                        truncation = True)

# #Add the encoded sentence to the list.
    input_ids.append(encoded_dict['input_ids'])
    attention_masks.append(encoded_dict['attention_mask'])

# # Convert the lists into tensors.
#input_ids = torch.cat(input_ids, dim=0)
input_ids = torch.stack(input_ids)
attention_masks = torch.stack(attention_masks)
labels = torch.tensor(labels).long()

In [None]:
#Check the shape to make sure it worked correctly. 
print(input_ids.size(0))
print(attention_masks.size(0))
print(labels.size(0))

11873
11873
11873


In [None]:
# Set the batch size.  
batch_size = 4  

# Create the DataLoader.
prediction_data = TensorDataset(input_ids, attention_masks, labels)
prediction_sampler = SequentialSampler(prediction_data)
prediction_dataloader = DataLoader(prediction_data, sampler=prediction_sampler, batch_size=batch_size)

In [None]:
#load the model.

model = BertForMultipleChoice.from_pretrained(
    "bert-base-cased",
    num_labels = 5,  
    output_attentions = False, 
    output_hidden_states = False)

model.load_state_dict(torch.load('/content/drive/My Drive/model_save/model_state_dict.pth'))

model.cuda()

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=433.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=435779157.0, style=ProgressStyle(descri…




Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForMultipleChoice: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForMultipleChoice from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForMultipleChoice from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForMultipleChoice were not initialized from the model checkpoint at bert-base-cased and are newly ini

BertForMultipleChoice(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(28996, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_aff

### Step 4: Evaluate Dev Set


In [None]:
# Prediction on test set
print('Predicting labels for {:,} test sentences...'.format(len(input_ids)))

# Put model in evaluation mode
model.eval()
predictions , true_labels = [], []

# Predict 
for batch in prediction_dataloader:
  # Add batch to GPU
  batch = tuple(t.to(device) for t in batch)
  # Unpack the inputs from our dataloader
  b_input_ids, b_input_mask, b_labels = batch
  # Telling the model not to compute or store gradients, saving memory and 
  # speeding up prediction
  with torch.no_grad():
      # Forward pass, calculate logit predictions
      outputs = model(b_input_ids, token_type_ids=None, 
                      attention_mask=b_input_mask)
  logits = outputs[0]

  # Move logits and labels to CPU
  logits = logits.detach().cpu().numpy()
  label_ids = b_labels.to('cpu').numpy()
  
  # Store predictions and true labels
  predictions.append(logits)
  true_labels.append(label_ids)

print('    DONE.')

Predicting labels for 11,873 test sentences...
    DONE.


In [None]:
# print(len(predictions))
# print(predictions[0])
test_list = []
# print(true_labels[0])
for i in range(5):
  for j in range(3):
    test_list.append((np.argmax(predictions[i], axis=1).flatten())[j])
#print(np.argmax(predictions[0], axis=1).flatten())
test_list
# pred_labels_i = np.argmax(predictions[i], axis=1).flatten()

[1, 1, 1, 1, 0, 1, 4, 1, 0, 1, 1, 0, 0, 0, 1]

In [None]:
#This will get me the indexes I need using the predictions above.

# def get_label_list(true_labels):
#   full_label_list = []
#   for i in range(len(true_labels)):
#     for j in range(len(true_labels[i])):
#       full_label_list.append(true_labels[i][j])
#   return full_label_list

def get_label_list(predictions):
  full_label_list = []
  for i in range(len(predictions)):
    for j in range(len(predictions[i])):
      full_label_list.append((np.argmax(predictions[i], axis=1).flatten())[j])
  return full_label_list

full_labels = get_label_list(predictions)

In [None]:
#Use the functions above to get a prediction dictonary in the proper format.

def get_pred_dict(full_labels, df):
    pred_dict = {}
    for i in range(len(full_labels)):
        key = str(dev_df['id'][i])
        best_guess = str(df.iloc[i, full_labels[i]])
        pred_dict[key] = best_guess
    return pred_dict 

#Output dict into a csv.
def output_predictions(pred_dict, file_name):
    with open(file_name, 'w', encoding = 'utf-8') as json_file:
        json.dump(pred_dict, json_file, ensure_ascii=True)


In [None]:
import json
#split out the answers into an answer dataframe to extract the correct text 
answer_df = dev_df[['a','b','c','d','e']]

pred_dict = get_pred_dict(full_labels, answer_df)
output_predictions(pred_dict, 'preds_SE1.json')


In [None]:
# save a copy in my drive.

%cp -R /content/preds_SE1.json /content/drive/My\ Drive/model_save 

In [None]:
# Clone SQUAD repo for the evaluation file.
# Move the eval file to my content folder 

!git clone https://github.com/white127/SQUAD-2.0-bidaf.git
%mv /content/SQUAD-2.0-bidaf/evaluate-v2.0.py /content/

Cloning into 'SQUAD-2.0-bidaf'...
remote: Enumerating objects: 125, done.[K
remote: Total 125 (delta 0), reused 0 (delta 0), pack-reused 125[K
Receiving objects: 100% (125/125), 709.51 KiB | 6.01 MiB/s, done.
Resolving deltas: 100% (33/33), done.


In [None]:
# Still download the Dev set.
!wget https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json

--2020-07-19 06:02:38--  https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json
Resolving rajpurkar.github.io (rajpurkar.github.io)... 185.199.109.153, 185.199.108.153, 185.199.110.153, ...
Connecting to rajpurkar.github.io (rajpurkar.github.io)|185.199.109.153|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 4370528 (4.2M) [application/json]
Saving to: ‘dev-v2.0.json’


2020-07-19 06:02:38 (21.3 MB/s) - ‘dev-v2.0.json’ saved [4370528/4370528]



In [None]:
print("Results for SE-4, with 5 way Mutli Choice")
!python evaluate-v2.0.py dev-v2.0.json preds_SE1.json


Results for SE-4, with 5 way Mutli Choice
{
  "exact": 60.53230017687189,
  "f1": 62.96928264573781,
  "total": 11873,
  "HasAns_exact": 56.747638326585694,
  "HasAns_f1": 61.62859191174817,
  "HasAns_total": 5928,
  "NoAns_exact": 64.30613961312027,
  "NoAns_f1": 64.30613961312027,
  "NoAns_total": 5945
}


In [None]:
##### MAKE SURE YOU MOVE A COPY TO YOUR BUCKET.

# Saving best-practices: if you use defaults names for the model, you can reload it using from_pretrained()

output_dir = '/content/drive/My Drive/model_save/'

# Create output directory if needed
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

print("Saving model to %s" % output_dir)

# Save a trained model, configuration and tokenizer using `save_pretrained()`.
# They can then be reloaded using `from_pretrained()`
model_to_save = model.module if hasattr(model, 'module') else model  # Take care of distributed/parallel training
model_to_save.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)

# Good practice: save your training arguments together with the trained model
#torch.save(args, os.path.join(output_dir, 'training_args.bin'))

Saving model to /content/drive/My Drive/model_save/


NameError: ignored

In [None]:
#torch.save(model, '/content/drive/My Drive/model_save/model.pt')
torch.save(model.state_dict(), '/content/drive/My Drive/model_save/model_state_dict.pth')