Student Answer -> Bert encoding\
Question -> BERT encoding ------.->cross attention

The model can be created using the makeModel function.

In [1]:
from bert_embedding import BertEmbedding
from torch import nn
import torch
import torch.nn.functional as F
import numpy as np
import copy
import math

In [4]:
bert_abstract = """We introduce a new language representation model called BERT, which stands for Bidirectional Encoder Representations from Transformers.
 Unlike recent language representation models, BERT is designed to pre-train deep bidirectional representations by jointly conditioning on both left and right context in all layers.
 As a result, the pre-trained BERT representations can be fine-tuned with just one additional output layer to create state-of-the-art models for a wide range of tasks, such as question answering and language inference, without substantial task-specific architecture modifications. 
BERT is conceptually simple and empirically powerful. 
It obtains new state-of-the-art results on eleven natural language processing tasks, including pushing the GLUE benchmark to 80.4% (7.6% absolute improvement), MultiNLI accuracy to 86.7 (5.6% absolute improvement) and the SQuAD v1.1 question answering Test F1 to 93.2 (1.5% absolute improvement), outperforming human performance by 2.0%."""

In [5]:
def attention(query, key, value, mask=None, dropout=None):
    "Compute 'Scaled Dot Product Attention'"
    d_k = query.size(-1)
    scores = torch.matmul(query, key.transpose(-2, -1)) \
             / math.sqrt(d_k)
    if mask is not None:
        scores = scores.masked_fill(mask == 0, -1e9)
    p_attn = F.softmax(scores, dim = -1)
    if dropout is not None:
        p_attn = dropout(p_attn)
    return torch.matmul(p_attn, value), p_attn

In [2]:
student_ans = "Sky is red"
question = "What is the colour of the sky?"
reference_ans ="Sky appears blue"

In [3]:
def getBertEncoding(paragraph):
        sentences = paragraph.split('.')
        bert_embedding = BertEmbedding()
        result = bert_embedding(sentences)
        emb = torch.Tensor(result[0][1])
        emb = emb.reshape(emb.size()[0],1,emb.size()[-1])
        return emb

In [4]:
K= getBertEncoding(student_ans)
Q = getBertEncoding(question)

In [33]:
multihead_attn = nn.MultiheadAttention(embed_dim = 768, num_heads=3)

In [34]:
out =multihead_attn(key =K,value = K,query = Q)[0]

In [35]:
out[0].shape

torch.Size([1, 768])

In [22]:
def clones(module, N):
    "Produce N identical layers."
    return nn.ModuleList([copy.deepcopy(module) for _ in range(N)])

In [23]:
class FeedForwardLayer(nn.Module):
    def __init__(self,inp_dim,hid_dim,dropout = 0.1):
        super(FeedForwardLayer, self).__init__()
        self.inp_dim = inp_dim
        self.hid_sim = hid_dim
        self.hidden = nn.Linear(inp_dim,hid_dim)
        self.output = nn.Linear(hid_dim,inp_dim)
        self.relu = nn.ReLU
        self.dropout = nn.Dropout
        
    def forward(self,x):
        x = self.hidden(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.output(x)
        return x

In [36]:
class LayerNorm(nn.Module):
    "Construct a layernorm module (See citation for details)."
    def __init__(self, features, eps=1e-6):
        super(LayerNorm, self).__init__()
        self.a_2 = nn.Parameter(torch.ones(features))
        self.b_2 = nn.Parameter(torch.zeros(features))
        self.eps = eps

    def forward(self, x):
        mean = x.mean(-1, keepdim=True)
        std = x.std(-1, keepdim=True)
        return self.a_2 * (x - mean) / (std + self.eps) + self.b_2

In [38]:
class SublayerConnection(nn.Module):
    """
    A residual connection followed by a layer norm.
    Note for code simplicity the norm is first as opposed to last.
    """
    def __init__(self, size, dropout):
        super(SublayerConnection, self).__init__()
        self.norm = LayerNorm(size)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x, sublayer):
        print(x)
        "Apply residual connection to any sublayer with the same size."
        return x + self.dropout(sublayer(self.norm(x)))


In [51]:
class EncoderBlock(nn.Module):
    def __init__(self,attentionBlock,feedForwardBlock,size,dropout = 0.1):
        super(EncoderBlock,self).__init__()
        self.attentionBlock = attentionBlock
        self.feedForwardBlock = feedForwardBlock
        self.sublayer = clones(SublayerConnection(size,dropout),2)
        self.size = size

    def forward(self,Query,Value):
        x = self.sublayer[0](Query, lambda x: self.attentionBlock(query=x, value=Value, key=Value)[0])
        return self.sublayer[1](x, self.feedForwardBlock)

    

In [28]:
class EncoderModule(nn.Module):
    def __init__(self, EncoderLayer,N):
        super(EncoderModule,self).__init__()
        self.layers = clones(EncoderLayer,N)
        self.norm = LayerNorm(EncoderLayer.size)
    
    def forward(self,Query,Value):
        for layers in self.layers:
            Query = layers(Query = Query,Value = Value)
        return Query


In [29]:
class RepresentationModule(nn.Module):
    def __init__(self,EncoderModule, embeddingLayer):
        super(RepresentationModule,self).__init__()
        self.EncoderModules = clones(EncoderModule,2)
        self.embeddingLayer = embeddingLayer
        #Get 2 clones of EncoderModule
    def forward(self, Question, ReferenceAnswer, StudentAnswer):
        Q = self.embeddingLayer(Question)
        StuAns = self.embeddingLayer(StudentAnswer)
        RefAns = self.embeddingLayer(ReferenceAnswer)
        studentAnsRep = self.EncoderModules[0](Q,StuAns)
        RefAnsRep = self.EncoderModules[1](Q,RefAns)
        return torch.cat((studentAnsRep,RefAnsRep),1)

In [48]:
def makeModel(emb_dim = 768,heads = 3,hid_lay_dim = 2304, N = 6):
    c = copy.deepcopy
    attn = nn.MultiheadAttention(embed_dim = emb_dim, num_heads=heads)
    ff = FeedForwardLayer(emb_dim,hid_lay_dim)

    model = RepresentationModule(
        EncoderModule( EncoderBlock( c(attn), c(ff), emb_dim), N),
        getBertEncoding
    )
    return model

In [52]:
check = makeModel()

In [53]:
out = check(question,reference_ans,student_ans)

tensor([[[ 0.5666,  0.0905, -0.3259,  ...,  0.1991,  0.3866,  0.0199]],

        [[ 0.2259,  0.1880,  0.2643,  ..., -0.1996,  0.0378,  0.6731]],

        [[ 0.4941, -0.0662,  0.1847,  ...,  0.0330, -0.1765,  0.4608]],

        ...,

        [[ 0.4937,  0.1258,  0.0544,  ..., -0.1227,  0.0570, -0.1065]],

        [[ 0.8392,  0.2743, -0.0246,  ...,  0.1315,  0.3748, -0.1369]],

        [[-0.0101,  0.0074, -0.9819,  ...,  0.0519,  0.2797, -0.2009]]])
torch.Size([1, 768])
tensor([[[ 0.3721,  0.0717, -0.2959,  ..., -0.0372,  0.4965,  0.1942]],

        [[ 0.0331,  0.1936,  0.2913,  ..., -0.3848,  0.2047,  0.6731]],

        [[ 0.3126, -0.0638,  0.2122,  ..., -0.1568, -0.1765,  0.6264]],

        ...,

        [[ 0.3179,  0.1253,  0.0860,  ..., -0.3201,  0.1884,  0.0557]],

        [[ 0.6459,  0.2743, -0.0024,  ..., -0.0656,  0.5031,  0.0301]],

        [[-0.1886,  0.0065, -0.9507,  ..., -0.1429,  0.4107, -0.0356]]],
       grad_fn=<AddBackward0>)
torch.Size([1, 768])


TypeError: '<' not supported between instances of 'ReLU' and 'int'

In [61]:
out.size()

torch.Size([8, 1, 768])

In [1]:
%run BERT_embeddings.ipynb

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
mxnet 1.4.0 requires requests<2.19.0,>=2.18.4, but you have requests 2.25.1 which is incompatible.
Collecting pytorch-pretrained-bert
  Downloading pytorch_pretrained_bert-0.6.2-py3-none-any.whl (123 kB)
Collecting boto3
  Downloading boto3-1.17.97-py2.py3-none-any.whl (131 kB)
Collecting tqdm
  Downloading tqdm-4.61.1-py2.py3-none-any.whl (75 kB)
Collecting regex
  Downloading regex-2021.4.4-cp37-cp37m-win_amd64.whl (269 kB)
Collecting botocore<1.21.0,>=1.20.97
  Downloading botocore-1.20.97-py2.py3-none-any.whl (7.6 MB)
Collecting jmespath<1.0.0,>=0.7.1
  Downloading jmespath-0.10.0-py2.py3-none-any.whl (24 kB)
Collecting s3transfer<0.5.0,>=0.4.0
  Downloading s3transfer-0.4.2-py2.py3-none-any.whl (79 kB)
Collecting urllib3<1.27,>=1.25.4
  Using cached urllib3-1.26.5-py2.py3-none-any.whl (138 kB)
Collecting requ

ModuleNotFoundError: No module named 'numpy.core._multiarray_umath'

In [5]:
from numpy.core import _multiarray_umath

ImportError: cannot import name '_multiarray_umath' from 'numpy.core' (d:\Adi\work\CLG\Capstone\Capstone\venv\lib\site-packages\numpy\core\__init__.py)