In [1]:
import torch
from torch import nn, optim
from torch.autograd import Variable as var
from torch.nn import functional as F
import torchtext.vocab as vocab
import torch.autograd as autograd
from tqdm import tqdm
from pprint import pprint
import json
import _pickle as pkl
from helpers.utils import StandardNLL
%matplotlib inline

In [2]:
glove = vocab.GloVe(name='6B', dim=50)
# MAX_QUES_LEN = 92
# MAX_CONTEXT_LEN= 921

In [14]:


def clean(token):
    cleaned_token = token.strip(".,?!-:;'()[]\"`")
    if cleaned_token[-2:] == "'s":
        cleaned_token = cleaned_token[:-2]
    if cleaned_token[-2:] == "'t":
        cleaned_token = cleaned_token[:-2]+'t'
    return cleaned_token

def vectorize(input_txt, max_len):
    input_seq = [clean(w) for w in input_txt.split(" ") if len(clean(w).strip())]
    glove_vec = []
    for w in input_seq:
        try:
            glove_vec.append(glove.stoi[w])
        except:
            glove_vec.append(400001) # <unk> token
    if len(glove_vec)<max_len:
        padding_zeros = [400000]*(max_len-len(glove_vec)) # <pad> token
        glove_vec = padding_zeros + glove_vec
    return glove_vec[:max_len]
    
def make_data(raw_X):
    X = []
    y = []
    for (c, q, a) in raw_X:
        context_rep = vectorize(c.lower(), MAX_CONTEXT_LEN)
        ques_rep = vectorize(q.lower(), MAX_QUES_LEN)
        X.append(context_rep+ques_rep) #only context for now
    return X

DIM=50
glove = vocab.GloVe(name='6B', dim=DIM)

glove.stoi['<pad>'] = len(glove.stoi)+1
glove.vectors = torch.cat((glove.vectors, torch.zeros(1, DIM)))
glove.stoi['<unk>'] = len(glove.stoi)+1 # add token->index for unknown/oov
glove.vectors = torch.cat((glove.vectors, torch.ones(1, DIM)*-1)) # add index->vec for unknown/oov

print(glove.vectors.size())
VOCAB_SIZE = glove.vectors.size()[0]

torch.Size([400002, 50])


In [3]:
with open('../data/data.json', 'r') as f:
    data = json.load(f)

In [4]:
print(glove.vectors.size())

torch.Size([400000, 50])


In [5]:
idx = 5
example_X = (data['X_train'][idx])
example_y = (data['y_train'][idx])
print("Context:", example_X[0])
print("Question:", example_X[1])
print("Answer Span:", example_y)
print("Answer:", example_X[0][example_y[0]:example_y[1]])
X = vectorize(example_X[0].lower(), MAX_CONTEXT_LEN)

Context: The boreholes on Funafuti, at the site now called Darwin's Drill, are the result of drilling conducted by the Royal Society of London for the purpose of investigating the formation of coral reefs to determine whether traces of shallow water organisms could be found at depth in the coral of Pacific atolls. This investigation followed the work on The Structure and Distribution of Coral Reefs conducted by Charles Darwin in the Pacific. Drilling occurred in 1896, 1897 and 1898. Professor Edgeworth David of the University of Sydney was a member of the 1896 "Funafuti Coral Reef Boring Expedition of the Royal Society", under Professor William Sollas and lead the expedition in 1897. Photographers on these trips recorded people, communities, and scenes at Funafuti.
Question: What was Darwin's work on coral reefs titled?
Answer Span: [347, 392]
Answer: The Structure and Distribution of Coral Reefs


NameError: name 'vectorize' is not defined

In [11]:
MAX_CONTEXT_LEN = 0
MAX_QUES_LEN = 0
# print(data['X_train'][0])
for (c,q, _) in data['X_train'][:5000]:
#     print(len(c.split()))
    if len(c.split())> MAX_CONTEXT_LEN:
        MAX_CONTEXT_LEN= len(c)
    if len(q.split())> MAX_QUES_LEN:
        MAX_QUES_LEN= len(q)
print(MAX_QUES_LEN)
print(MAX_CONTEXT_LEN)

62
921


In [12]:
y_start = []
y_end = []

for (c,q,ans) in data['X_train']:
#     temp_s = list(0 for i in range(MAX_CONTEXT_LEN))
#     temp_e = list(0 for i in range(MAX_CONTEXT_LEN))
    temp_c = c.split()
    temp_c = ['PAD']*(MAX_CONTEXT_LEN-len(temp_c))+temp_c
    con_new = ' '.join(temp_c)
    start_pos = len(con_new[:con_new.find(ans)].split())-1
    end_pos = start_pos+(len(ans.split()))-2
#     if(start_pos)==921 or (end_pos)==921:
#         print(c,ans)
#         print(start_pos)
#         print(end_pos)
#     if(start_pos)==1:
#         print(c,ans)
#         print(start_pos)
#         print(end_pos)
#     print(temp_c)
#     print(c)
#     print(ans)
#     print(start_pos, end_pos)
#     temp_s[start_pos]=1
#     y_start.append(temp_s)
#     temp_e[end_pos]=1
#     y_end.append(temp_e)
    y_start.append(start_pos)
    y_end.append(end_pos)
#     print(y_start, y_end)

In [15]:
X_pass = make_data(data['X_train'][:5000])
# y_pass = data['y_train'][:400]
y_pass_start = y_start[:5000]
y_pass_end = y_end[:5000]
print(len(X_pass))
print(len(y_pass_start))
print(len(y_pass_end))

5000
5000
5000


In [16]:
class ModelV1(nn.Module):
    def __init__(self, config):
        super(ModelV1, self).__init__()
        
        self.input_size = config.get("input_size", MAX_CONTEXT_LEN+MAX_QUES_LEN)
        self.hidden_size = config.get("hidden_size", 128)
        self.output_size = config.get("output_size", MAX_CONTEXT_LEN)
        self.n_layers = config.get("n_layers", 1)
        self.vocab_size = config.get("vocab", VOCAB_SIZE)
        self.emb_dim = config.get("embedding_dim", DIM)
        self.bidir = config.get("Bidirectional", True)
        self.dirs = int(self.bidir)+1
        self.lr = config.get("learning_rate", 1e-3)
        self.batch_size = config.get("batch_size", 1)
        self.epochs = config.get("epochs", 5)
        self.opt = config.get("opt", "SGD")
        
        if self.opt == 'Adam':
            self.opt = optim.Adam
        else:
            self.opt = optim.SGD
        
        self.encoder = nn.Embedding(self.vocab_size, self.emb_dim)
        self.lstm = nn.LSTM(self.emb_dim, self.hidden_size, self.n_layers, bidirectional=self.bidir)
        self.decoder_start = nn.Linear(self.hidden_size, self.output_size)
        self.decoder_end = nn.Linear(self.hidden_size, self.output_size)
        self.init_weights()
    
    def init_weights(self):
        weight_scale = 0.01
        self.encoder.weight.data = glove.vectors
        self.decoder_start.bias.data.fill_(0)
        self.decoder_start.weight.data.uniform_(-weight_scale, weight_scale)
        self.decoder_end.bias.data.fill_(0)
        self.decoder_end.weight.data.uniform_(-weight_scale, weight_scale)

    def init_hidden(self, bs=None):
        if bs is None:
            bs = self.batch_size
        weight = next(self.parameters()).data
        return var(weight.new(self.n_layers*self.dirs, bs, self.hidden_size).zero_())
        
    def forward(self, inputs):
        if len(inputs)==1:
            inputs = var(torch.LongTensor(inputs[0]))
        else:
            inputs = var(torch.LongTensor(inputs))
#         print(inputs.size())
        embeds = self.encoder(inputs).permute(1,0,2)# get glove repr
#         print("embeds:", embeds.size())
        seq_len = embeds.size()[0]
        lstm_op, self.hidden = self.lstm(embeds, self.hidden)
        # print("lstm op:", lstm_op.size()) # (seq_len, bs, hidden_size*(dirs=2 for bi))
        lstm_op = lstm_op.permute(1, 0, 2) # (seq_len, bs, hdim)->(bs, seq_len, hdim)
#         print(lstm_op)
        
        end_pred = lstm_op[:, -1, :self.hidden_size] # forward direction
        start_pred = lstm_op[:, -1, self.hidden_size:] # reverse direction
        
        # print("lstm start, end preds:", start_pred.size(), end_pred.size())
        out_start = F.log_softmax(self.decoder_start(start_pred), dim=-1)
        out_end = F.log_softmax(self.decoder_end(end_pred), dim=-1)
        # print("outs:", out_start.size(), out_end.size())
        out = torch.cat((out_start, out_end), 1)
#         print("out:", out.size())
        return out
    
    def fit(self, X, y_s, y_e):
        opt = self.opt(self.parameters(), self.lr)
        losses = [] # epoch loss
        for epoch in range(self.epochs):
            print("epoch:", epoch)
            bs = self.batch_size
#             print(bs)
            bloss = 0.0 # batch loss
#             loss_epoch = []
#             print(len(y_s))
            for i in range(0, len(y_s)-bs+1, bs):
#                 print(i)
#             for (x_data, y_data_s, y_data_e) in zip(X, y_s, y_e):
                #print("batch:", bindex)
                h, c = self.init_hidden(), self.init_hidden()
                self.hidden = (h, c)
                # print(h.size(), c.size())
                opt.zero_grad()
#                 Xb = X[i:i+bs]
#                 Xb = torch.LongTensor(Xb)
#                 # print("Xb:", Xb.size())
#                 yb = var(torch.LongTensor(y[i:i+bs]))
#                 # print("yb:", yb.size())
#                 pred = self.forward(Xb) #prediction on batch features
                
                x = X[i:i+bs]
#                 print(x)
#                 y = autograd.Variable(torch.LongTensor([y_data_s, y_data_e]), requires_grad=False)
    #             Xb = X
    #             yb = var(torch.LongTensor(y[i:i+bs]))
                pred = self.forward(x) #prediction on batch features
#                 print(pred)
#                 print(autograd.Variable(torch.LongTensor(y_s[i:i+bs])))
                y_s_temp = torch.LongTensor(y_s[i:i+bs])
                y_e_temp = torch.LongTensor(y_e[i:i+bs])
#                 print(y_s_temp.size())
#                 print(y_s_temp.size())
#                 print(pred.size())
#                 print(y_data_s)
            
#                 loss= criterion(y_predicted, y)
                loss = F.nll_loss(pred[:, :self.output_size], autograd.Variable(y_s_temp)) \
                     + F.nll_loss(pred[:, self.output_size:], autograd.Variable(y_e_temp)) 
#                 loss_epoch.append(loss.data[0])
                bloss += loss.data[0]/bs

                loss.backward()
                opt.step()
            losses.append(bloss)
            print(losses[-1], end=', change: ')
            if len(losses)>1:
                diff = losses[-2]-losses[-1]
                rel_diff = diff/losses[-2]
                print("%s"%rel_diff, "%")
            else:
                print("00.0%")
        return losses

    def predict(self, X, bs=None):
        self.hidden = (self.init_hidden(bs), self.init_hidden(bs))
        result = self.forward(X)
        return self.get_span_indices(result)
    
    def get_span_indices(self, preds):
        s_pred = preds[:, :self.output_size]
        e_pred = preds[:, self.output_size:]
        _,  s_index = torch.max(s_pred, -1)
        _,  e_index = torch.max(e_pred, -1)
        return torch.cat((s_index.unsqueeze(1), e_index.unsqueeze(1)), -1)

In [23]:
conf = {"learning_rate": 0.5, 
        "epochs": 20,
       "batch_size": 50}
model = ModelV1(conf)

In [24]:
res = model.fit(X_pass, y_pass_start, y_pass_end)

epoch: 0
24.900323085784912, change: 00.0%
epoch: 1
22.0135221862793, change: 0.11593427481082076 %
epoch: 2
21.23638656616211, change: 0.035302647778989504 %
epoch: 3
21.028821010589603, change: 0.009774052422988056 %
epoch: 4
20.911689395904535, change: 0.005570051436839146 %
epoch: 5
20.82205373764039, change: 0.004286390093461383 %
epoch: 6
20.739428310394285, change: 0.0039681689562034 %
epoch: 7
20.65494527816772, change: 0.004073546819235295 %
epoch: 8
20.563988285064692, change: 0.0044036424148346295 %
epoch: 9
20.462826976776125, change: 0.004919342828163314 %
epoch: 10
20.3481623840332, change: 0.0056035557976941534 %
epoch: 11
20.21981365203857, change: 0.006307632579900198 %
epoch: 12
20.081028594970704, change: 0.006863814843015375 %
epoch: 13
19.93543586730956, change: 0.00725026245406604 %
epoch: 14
19.781163043975834, change: 0.00773862304092915 %
epoch: 15
19.618849220275873, change: 0.008205474235216517 %
epoch: 16
19.42814062118531, change: 0.00972068223519795 %
epoc

In [26]:
for x, y in zip(data['X_val'][:200], data['y_val'][:200]):
    c = x[0]
    a = x[2]
    x = make_data([x])
#     print(x)
    temp_c = c.split()
    temp_c = ['PAD']*(MAX_CONTEXT_LEN-len(temp_c))+temp_c
#     con_new = ' '.join(temp_c)
#     print(con_new)
    res = model.predict([x], bs=1).data.tolist()[0]
    print("Predicted span:", res)
    if res[0]>res[1]:
        res[0], res[1] = res[1], res[0]
        print("switched to:", res)
    print("Predicted Answer:", temp_c[res[0]:res[1]])
    print("Actual:", a)
    print("="*50)

Predicted span: [849, 918]
Predicted Answer: ['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'Each', 'year', "HHC's", 'facilities', 'provide', 'about', '225,000', 'admissions,', 'one', 'million', 'emergency', 'room', 'visits', 'and', 'five', 'million', 'clinic', 'visits', 'to', 'New', 'Yorkers.', 'HHC', 'facilities', 'treat', 'nearly', 'one-fifth', 'of', 'all', 'general', 'hospital', 'discharges', 'and', 'more', 'than', 'one', 'third', 'of', 'emergency', 'room', 'and', 'hospital-based', 'clinic', 'visits', 'in']
Actual: one million
Predicted span: [867, 918]
Predicted Answer: ['limited', 'period', 'of', 'time,', 'in', 'exchange', 'for', 'the', 'public', 'disclosure', 'of', 'the', 'invention.', 'An', 'invention', 'is', 'a', 'solution', 'to', 'a', 'specific', 'technological', 'problem,', 'which', 'may', 'be', 'a', 'product', 'or', 'a', 'process', 'and', 'general

Predicted span: [837, 918]
Predicted Answer: ['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'Because', 'of', 'space', 'constraints,', 'NARA', 'opened', 'a', 'second', 'facility,', 'known', 'informally', 'as', 'Archives', 'II,', 'in', '1994', 'near', 'the', 'University', 'of', 'Maryland,', 'College', 'Park', 'campus', '(8601', 'Adelphi', 'Road,', 'College', 'Park,', 'MD,', '20740-6001).', 'Largely', 'because', 'of', 'this', 'proximity,', 'NARA', 'and', 'the', 'University', 'of', 'Maryland', 'engage', 'in', 'cooperative', 'initiatives.', 'The', 'College', 'Park', 'campus', 'includes', 'an', 'archaeological', 'site', 'that', 'was', 'listed', 'on', 'the', 'National', 'Register', 'of', 'Historic']
Actual: 1994
Predicted span: [867, 883]
Predicted Answer: ['or', 'die', 'trying', 'to', 'save', 'Swann.', 'Bond', 'finds', 'Swann', 'and', 'the', 'two', 'escape', 'by', 'boat', 'as']
Actual: Westminster Bridge
Predicte

Predicted span: [854, 918]
Predicted Answer: ['freed', 'until', '1827.', 'In', 'connection', 'with', 'the', 'Second', 'Great', 'Awakening,', 'Quaker', 'and', 'Methodist', 'preachers', 'in', 'the', 'South', 'urged', 'slaveholders', 'to', 'free', 'their', 'slaves.', 'Revolutionary', 'ideals', 'led', 'many', 'men', 'to', 'free', 'their', 'slaves,', 'some', 'by', 'deed', 'and', 'others', 'by', 'will,', 'so', 'that', 'from', '1782', 'to', '1810,', 'the', 'percentage', 'of', 'free', 'people', 'of', 'color', 'rose', 'from', 'less', 'than', 'one', 'percent', 'to', 'nearly', '10', 'percent', 'of', 'blacks']
Actual: gradual emancipation
Predicted span: [843, 918]
Predicted Answer: ['hang', 'on', 'to', 'her', 'day', 'job."', 'In', 'December', '2007,', 'the', 'Rock', 'and', 'Roll', 'Hall', 'of', 'Fame', 'announced', 'Madonna', 'as', 'one', 'of', 'the', 'five', 'inductees', 'of', '2008.', 'At', 'the', 'induction', 'ceremony', 'on', 'March', '10,', '2008,', 'Madonna', 'did', 'not', 'sing', 'but', 'a

Predicted span: [908, 866]
switched to: [866, 908]
Predicted Answer: ['Seattle', 'to', 'Portland', 'Bicycle', 'Classic,', 'and', 'specialized', 'film', 'festivals,', 'such', 'as', 'the', 'Maelstrom', 'International', 'Fantastic', 'Film', 'Festival,', 'the', 'Seattle', 'Asian', 'American', 'Film', 'Festival', '(formerly', 'known', 'as', 'the', 'Northwest', 'Asian', 'American', 'Film', 'Festival),', "Children's", 'Film', 'Festival', 'Seattle,', 'Translation:', 'the', 'Seattle', 'Transgender', 'Film', 'Festival,']
Actual: Seattle Polish Film Festival
Predicted span: [901, 918]
Predicted Answer: ['or', '500', 'ppm)', 'in', 'the', 'diet', 'of', 'rabbits', 'may', 'favorably', 'influence', 'feed', 'conversion', 'efficiency,', 'growth', 'rates,', 'and']
Actual: 30 mg/kg
Predicted span: [868, 835]
switched to: [835, 868]
Predicted Answer: ['now', 'the', 'contiguous', 'United', 'States,', 'including', 'their', 'descendants,', 'are', 'commonly', 'called', '"American', 'Indians",', 'or', 'simply',

Predicted span: [884, 918]
Predicted Answer: ['A', 'stand-out', 'case', 'is', 'the', 'President', 'of', 'Iran,', 'who', 'is', 'not', 'actually', 'a', 'head', 'of', 'state,', 'but', 'the', 'head', 'of', 'the', 'government', 'of', 'Iran.', 'He', 'is', 'referred', 'to', 'as', '"president"', 'in', 'both', 'the', 'Persian']
Actual: President
Predicted span: [919, 918]
switched to: [918, 919]
Predicted Answer: ['have']
Actual: The College Dropout
Predicted span: [901, 918]
Predicted Answer: ['genome,', 'only', '1–2%', 'consists', 'of', 'protein-coding', 'genes,', 'with', 'the', 'remainder', 'being', "'noncoding'", 'DNA', 'such', 'as', 'introns,', 'retrotransposons,']
Actual: 2,000,000
Predicted span: [901, 803]
switched to: [803, 901]
Predicted Answer: ['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'Birds', '(Aves)', 'are', 'a', '

Predicted span: [838, 897]
Predicted Answer: ['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'During', 'the', 'Dutch', 'Golden', 'Age', 'in', 'the', 'late', '16th', 'century', 'onward,', 'the', 'Dutch', 'Republic', 'dominated', 'world', 'trade', 'in', 'the', '17th', 'century,', 'conquering', 'a', 'vast', 'colonial', 'empire']
Actual: The County of Holland
Predicted span: [868, 859]
switched to: [859, 868]
Predicted Answer: ['Manhattan', 'and', 'whose', 'jurisdiction', 'includes', 'Manhattan', 'and', 'the', 'Bronx,']
Actual: the Eastern District of New York
Predicted span: [867, 918]
Predicted Answer: ['a', 'vote', 'of', '4', 'to', '1.', 'The', 'sale', 'was', 'completed', 'on', 'January', '28,', '2011.', 'In', 'late', 'December', '2012,', 'Comcast', 'added', 'the', 'NBC', 'peacock', 'symbol', 'to', 'their'

Predicted span: [864, 918]
Predicted Answer: ['for', 'several', 'decades', 'by', 'failure', 'to', 'recognize', 'the', 'extreme', 'importance', 'of', 'a', 'relatively', 'small', 'air', 'gap', 'between', 'rotor', 'and', 'stator.', 'Efficient', 'designs', 'have', 'a', 'comparatively', 'small', 'air', 'gap.', '[a]', 'The', 'St.', 'Louis', 'motor,', 'long', 'used', 'in', 'classrooms', 'to', 'illustrate', 'motor', 'principles,', 'is', 'extremely', 'inefficient', 'for', 'the', 'same', 'reason,', 'as', 'well', 'as', 'appearing', 'nothing', 'like']
Actual: South Side Elevated Railway
Predicted span: [838, 918]
Predicted Answer: ['before', 'you,', 'that', 'your', 'lord', 'has', 'at', 'his', 'Command', 'forgiveness', 'as', 'well', 'as', 'a', 'most', 'Grievous', 'Penalty."', 'The', 'revelation', 'does', 'not', 'come', 'directly', 'from', 'God', 'to', 'the', 'prophets.', 'Angels', 'acting', 'as', "God's", 'messengers', 'deliver', 'the', 'divine', 'revelation', 'to', 'them.', 'This', 'comes', 'out',

Predicted span: [843, 843]
Predicted Answer: []
Actual: Alcock and Brown
Predicted span: [845, 918]
Predicted Answer: ['circumference', 'of', 'a', 'wavelength', 'or', 'larger', 'act', 'similarly', 'to', 'dipole', 'antennas.', 'However', 'loops', 'small', 'in', 'comparison', 'to', 'a', 'wavelength', 'act', 'differently.', 'They', 'interact', 'with', 'the', 'magnetic', 'field', 'of', 'the', 'radio', 'wave', 'instead', 'of', 'the', 'electric', 'field', 'as', 'other', 'antennas', 'do,', 'and', 'so', 'are', 'relatively', 'insensitive', 'to', 'nearby', 'electrical', 'noise.', 'However', 'they', 'have', 'low', 'radiation', 'resistance,', 'and', 'so', 'are', 'inefficient', 'for', 'transmitting.', 'They', 'are', 'used', 'as', 'receiving', 'antennas', 'at', 'low', 'frequencies,', 'and', 'also', 'as']
Actual: circumference of a wavelength
Predicted span: [907, 918]
Predicted Answer: ['neighboring', 'municipalities,', 'e.', 'g.', 'to', 'Bhaktapur', 'and', 'just', 'about', 'covers', 'the']
Actual: 

Predicted span: [901, 918]
Predicted Answer: ['used', 'to', 'level', 'streets', 'and', 'residential', 'areas.', 'By', 'December,', 'the', 'SC', '2500', '(2,500', 'kg', '(5,512', 'lb))', '"Max"']
Actual: "Satan"
Predicted span: [850, 918]
Predicted Answer: ['New', 'York', 'were', 'not', 'freed', 'until', '1827.', 'In', 'connection', 'with', 'the', 'Second', 'Great', 'Awakening,', 'Quaker', 'and', 'Methodist', 'preachers', 'in', 'the', 'South', 'urged', 'slaveholders', 'to', 'free', 'their', 'slaves.', 'Revolutionary', 'ideals', 'led', 'many', 'men', 'to', 'free', 'their', 'slaves,', 'some', 'by', 'deed', 'and', 'others', 'by', 'will,', 'so', 'that', 'from', '1782', 'to', '1810,', 'the', 'percentage', 'of', 'free', 'people', 'of', 'color', 'rose', 'from', 'less', 'than', 'one', 'percent', 'to', 'nearly', '10', 'percent', 'of', 'blacks']
Actual: Revolutionary ideals led many men to free their slaves
Predicted span: [845, 844]
switched to: [844, 845]
Predicted Answer: ['general']
Actual: h

Predicted span: [868, 894]
Predicted Answer: ['the', "city's", 'connection', 'with', 'rock', 'in', 'the', 'song', 'Detroit', 'Rock', 'City', 'and', 'the', 'movie', 'produced', 'in', '1999.', 'In', 'the', '1980s,', 'Detroit', 'was', 'an', 'important', 'center', 'of']
Actual: 1960s and 70s
Predicted span: [895, 897]
Predicted Answer: ['the', 'state']
Actual: Desert
Predicted span: [919, 918]
switched to: [918, 919]
Predicted Answer: ['apricots,']
Actual: angiosperms
Predicted span: [838, 918]
Predicted Answer: ['have', 'each', 'indicated', 'the', 'most', 'common', 'reason', 'for', 'drinking', 'among', 'adolescents', 'to', 'relate', 'to', 'pleasure', 'and', 'recreation;', '80%', 'of', 'Argentinian', 'teens', 'reported', 'drinking', 'for', 'enjoyment,', 'while', 'only', '7%', 'drank', 'to', 'improve', 'a', 'bad', 'mood.', 'The', 'most', 'prevalent', 'answers', 'among', 'Canadian', 'adolescents', 'were', 'to', '"get', 'in', 'a', 'party', 'mood,"', '18%;', '"because', 'I', 'enjoy', 'it,"', '

Predicted span: [835, 918]
Predicted Answer: ['(the', 'building', 'now', 'houses', 'the', 'Delhi', 'Legislative', 'Assembly),', 'a', 'decade', 'before', 'the', 'new', 'capital', 'was', 'inaugurated', 'in', '1931.', 'Many', 'employees', 'were', 'brought', 'into', 'the', 'new', 'capital', 'from', 'distant', 'parts', 'of', 'India,', 'including', 'the', 'Bengal', 'Presidency', 'and', 'Madras', 'Presidency.', 'Subsequently', 'housing', 'for', 'them', 'was', 'developed', 'around', 'Gole', 'Market', 'area', 'in', 'the', '1920s.', 'Built', 'in', 'the', '1940s,', 'to', 'house', 'government', 'employees,', 'with', 'bungalows', 'for', 'senior', 'officials', 'in', 'the', 'nearby', 'Lodhi', 'Estate', 'area,', 'Lodhi', 'colony', 'near', 'historic', 'Lodhi', 'Gardens,', 'was', 'the', 'last', 'residential', 'areas', 'built', 'by']
Actual: 1931
Predicted span: [919, 918]
switched to: [918, 919]
Predicted Answer: ['Museum']
Actual: equestrian sculpture of Charles IV of Spain
Predicted span: [919, 918]
s

Predicted span: [919, 918]
switched to: [918, 919]
Predicted Answer: ['into']
Actual: increase in liquidity in the economy
