In [1]:
import os
import pandas as pd
import numpy as np
import re
import string
import spacy
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import DataLoader,Dataset
import torch.optim as optim
from collections import Counter

In [2]:
training_path = 'TRAIN_FILE.TXT'
with open(training_path,'r') as f:
    text = f.readlines()    
    

In [3]:
sentences = []
for index in range(0,8000,4):
    text[index] = text[index].replace('\t',' ')
    text[index] = text[index].replace('\"','')
    text[index] = text[index].replace('\n','')
    words = text[index].split()
    text[index] = ' '.join(words[1:])
    
    sentences.append(text[index].lower())
sentences[:5]    

['the system as described above has its greatest application in an arrayed <e1>configuration</e1> of antenna <e2>elements</e2>.',
 'the <e1>child</e1> was carefully wrapped and bound into the <e2>cradle</e2> by means of a cord.',
 'the <e1>author</e1> of a keygen uses a <e2>disassembler</e2> to look at the raw assembly code.',
 'a misty <e1>ridge</e1> uprises from the <e2>surge</e2>.',
 'the <e1>student</e1> <e2>association</e2> is the voice of the undergraduate student population of the state university of new york at buffalo.']

In [4]:
pos_entity = []
for i,sentence in enumerate(sentences):
    
    pos_e1_low = sentence.index('<e1>') + 4
    pos_e1_high = sentence.index('</e1>')
    e1 = sentence[pos_e1_low:pos_e1_high]
    
    pos_e2_low = sentence.index('<e2>') + 4
    pos_e2_high = sentence.index('</e2>')
    e2 = sentence[pos_e2_low:pos_e2_high]
    
    sentences[i] = re.sub('<e1>|</e1>|<e2>|</e2>',' ',sentence)
    tokens = sentences[i].split()
    sentences[i] = ' '.join(tokens)
#     print(tokens)
    e1_list = []
    for entity in e1.split():
        e1_list.append(tokens.index(entity))
    e2_list = []
    for entity in e2.split():
        e2_list.append(tokens.index(entity))
    
    pos_entity.append([e1_list,e2_list])
max_tokens = 0
for element in pos_entity:
#     print(element)
    e1 = len(element[0])
    e2 = len(element[1])
    local_max = max(e1,e2)
    max_tokens = max(max_tokens,local_max)
for i,element in enumerate(pos_entity):
    e1_len = max_tokens - len(element[0])
    e1 = torch.tensor(element[0])
    e1 = F.pad(e1,(0,e1_len),"constant",-1).numpy()
    
    e2_len = max_tokens - len(element[1])
    e2 = torch.tensor(element[1])
    e2 = F.pad(e2,(0,e2_len),"constant",-1).numpy()
    pos_entity[i] = np.array([e1,e2])
pos_entity = torch.tensor(pos_entity)    

  pos_entity = torch.tensor(pos_entity)


In [5]:
allowed_relations = ['Cause-Effect', 'Component-Whole', 'Entity-Destination','Entity-Origin','Other-Relation',
                    'Instrument-Agency','Member-Collection','Content-Container','Message-Topic','Product-Producer']
relations = []
for index in range(1,8000,4):
    text[index] = text[index].replace('\n','')
    if text[index].find('(') != -1:
        index_of_braces = text[index].find('(')
        relation = text[index][:index_of_braces]
        if relation not in allowed_relations:
            relation = 'Other-Relation'
            text[index] = relation + text[index][index_of_braces:]
    else:
        text[index] = 'Other-Relation'
    relations.append(text[index])
Counter(relations)

Counter({'Component-Whole(e2,e1)': 111,
         'Other-Relation': 310,
         'Instrument-Agency(e2,e1)': 109,
         'Member-Collection(e1,e2)': 22,
         'Cause-Effect(e2,e1)': 157,
         'Entity-Destination(e1,e2)': 238,
         'Content-Container(e1,e2)': 117,
         'Message-Topic(e1,e2)': 130,
         'Product-Producer(e2,e1)': 87,
         'Member-Collection(e2,e1)': 174,
         'Entity-Origin(e1,e2)': 144,
         'Cause-Effect(e1,e2)': 94,
         'Component-Whole(e1,e2)': 114,
         'Message-Topic(e2,e1)': 44,
         'Product-Producer(e1,e2)': 66,
         'Entity-Origin(e2,e1)': 28,
         'Content-Container(e2,e1)': 30,
         'Instrument-Agency(e1,e2)': 25})

In [6]:
nlp = spacy.load('en_core_web_lg')
input_dataset = []
max_len = max([len(sentence.split()) for sentence in sentences])
for sentence in sentences:
    doc = nlp(sentence)
    vector = []
    for token in doc:
        vector.append(token.vector)
    vector = torch.tensor(vector)
    right_padding = max_len-vector.shape[0]
    padding = nn.ZeroPad2d((0,right_padding,0,0))
    vector = padding(vector.T)
    input_dataset.append(vector.T.numpy())
    
input_dataset = torch.tensor(np.array(input_dataset),device='cuda:0')    

In [7]:
just_relation = []
for relation in relations:
    if relation.find('(')!=-1:
        index = relation.find("(")
        relation = relation[:index]
        just_relation.append(relation)
    else:
        just_relation.append(relation)
just_relation = np.array(just_relation)        
encoder = LabelEncoder()
just_relation = torch.tensor(encoder.fit_transform(just_relation))
ohe = F.one_hot(just_relation)
ohe = ohe.to(dtype=torch.float)
ohe.shape


torch.Size([2000, 10])

In [8]:
ohe.sum(axis=0)

tensor([251., 225., 147., 238., 172., 134., 196., 174., 310., 153.])

In [9]:
class TrainDataset(Dataset):
    
    def __init__(self):
        self.data = input_dataset[:1600]
        self.target = ohe[:1600]
        self.position = pos_entity[:1600]
        
    def __getitem__(self,index):
        sample = self.data[index],self.position[index],self.target[index]
        return sample

    def __len__(self):
        return self.data.shape[0]

class TestDataset(Dataset):
    
    def __init__(self):
        self.data = input_dataset[1600:]
        self.target = ohe[1600:]
        self.position = pos_entity[1600:]
        
    def __getitem__(self,index):
        sample = self.data[index],self.position[index],self.target[index]
        return sample

    def __len__(self):
        return self.data.shape[0]
        

In [10]:
train = TrainDataset()
train_dataloader = DataLoader(train,batch_size=128,shuffle=True)
test = TestDataset()
test_dataloader = DataLoader(test,batch_size=128,shuffle=True)

In [11]:

class MyModel(nn.Module):
    def __init__(self,input_shape,lstm_hidden_size,num_layers,output_shape,device):
        super(MyModel,self).__init__()
        self.input_shape = input_shape 
        self.hidden_size = lstm_hidden_size
        self.output_shape = output_shape
        self.num_layers = num_layers
        self.device = device
        self.bidirectional_lstm = nn.LSTM(input_shape,lstm_hidden_size,num_layers,
                                          bidirectional=True,batch_first=True)
        E = self.hidden_size
        w = torch.empty(E,E)
        self.query_weights = nn.init.xavier_uniform_(w).to(device)
        self.key_weights = nn.init.xavier_uniform_(w).to(device)
        self.value_weights = nn.init.xavier_uniform_(w).to(device)
        self.attention = nn.MultiheadAttention(lstm_hidden_size,num_heads=1,batch_first=True)
        self.flatten = nn.Flatten()
        self.linear = nn.Linear(2*lstm_hidden_size,output_shape)
        self.softmax = nn.Softmax()
        
    def forward(self,inputs):
        h0,c0 = self.initialize_states(inputs[0].shape[0])
        lstm_outputs,_ = self.bidirectional_lstm(inputs[0],(h0,c0))
        lstm_outputs = torch.mean(lstm_outputs.view(lstm_outputs.shape[0],-1,2,self.hidden_size),dim=2)
        query,key,value = self.calculate_qkv(lstm_outputs)
        attn_outputs,_ = self.attention(query,key,value)
        
        batch_size = attn_outputs.shape[0]
        linear_inputs = torch.empty(batch_size,2,attn_outputs.shape[-1])
#         print(attn_outputs.shape)
        for i in range(batch_size):
            l1 = []
            for j in inputs[1][i][0]:
                if j != -1:
                    l1.append(j.item())
            l2 = []
            for k in inputs[1][i][1]:
                if k != -1:
                    l2.append(k.item())
#             print(attn_outputs[i][l1].shape)
            entity_1 = torch.mean(attn_outputs[i][l1].view(-1,128),dim=0)
            entity_2 = torch.mean(attn_outputs[i][l2].view(-1,128),dim=0)
            
            linear_inputs[i][0] = entity_1
            linear_inputs[i][1] = entity_2
            
        flatten_outputs = self.flatten(linear_inputs.to(self.device))
        logits = self.linear(flatten_outputs)
        
        predictions = self.softmax(logits)
        
        return predictions
        
    def initialize_states(self,batch_size):
        h0 = torch.zeros(2,batch_size,self.hidden_size).to(self.device)
        c0 = torch.zeros(2,batch_size,self.hidden_size).to(self.device)
        return h0,c0
    
    def calculate_qkv(self,hidden_states):
        
        q = torch.matmul(hidden_states,self.query_weights)
        k = torch.matmul(hidden_states,self.key_weights)
        v = torch.matmul(hidden_states,self.value_weights)
        
        return q,k,v

In [13]:
INPUT_SHAPE = 300
HIDDEN_SHAPE = 128
NUM_LAYERS = 1
OUTPUT_SHAPE = 10

if torch.cuda.is_available():
    device = 'cuda'
else:
    device = 'cpu'
print(f'Using {device}')

model = MyModel(INPUT_SHAPE,HIDDEN_SHAPE,NUM_LAYERS,OUTPUT_SHAPE,device).to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())

def train_one_epoch(model,dataloader,loss_fn,optimizer,device):
    for i,(embeddings,positions,targets) in enumerate(dataloader):
        embeddings,positions,targets = embeddings.to(device),positions.to(device),targets.to(device)
        predictions = model((embeddings,positions))
        loss = loss_fn(predictions,targets)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        
    print(f'loss: {loss.item()}')
    
def  train(model,dataloader,loss_fn,optimizer,device,epochs=100):
    for i in range(epochs):
        print(f"Epoch: {i+1}")
        train_one_epoch(model,train_dataloader,loss_fn,optimizer,device)
        print('-'*30)

    print("Training is done!!")
train(model,train_dataloader,loss_fn,optimizer,device,epochs=100)    

Using cuda
Epoch: 1


  predictions = self.softmax(logits)


loss: 2.2870354652404785
------------------------------
Epoch: 2
loss: 2.2901110649108887
------------------------------
Epoch: 3
loss: 2.1240410804748535
------------------------------
Epoch: 4
loss: 2.0762276649475098
------------------------------
Epoch: 5
loss: 2.0600991249084473
------------------------------
Epoch: 6
loss: 2.0403311252593994
------------------------------
Epoch: 7
loss: 1.9960249662399292
------------------------------
Epoch: 8
loss: 1.9029656648635864
------------------------------
Epoch: 9
loss: 1.9577655792236328
------------------------------
Epoch: 10
loss: 1.997976541519165
------------------------------
Epoch: 11
loss: 1.9810819625854492
------------------------------
Epoch: 12
loss: 2.028693675994873
------------------------------
Epoch: 13
loss: 1.8929334878921509
------------------------------
Epoch: 14
loss: 1.936974287033081
------------------------------
Epoch: 15
loss: 1.959375262260437
------------------------------
Epoch: 16
loss: 2.00022864341735

In [14]:
with torch.no_grad():
    n_samples = 0
    n_correct = 0
    for i,(embeddings,positions,targets) in enumerate(test_dataloader):
        outputs = model((embeddings,positions))
        outputs = outputs.argmax(dim=1)
        print(outputs)
        targets = targets.to(device)
        targets = targets.argmax(dim=1)
#         print(targets)
        n_samples += targets.shape[0]
        n_correct += (targets==outputs).sum().item()
    total_acc = 100*(n_correct/n_samples)
    print(f"Accuracy: {total_acc:.4f}")

tensor([6, 8, 8, 1, 4, 3, 1, 7, 6, 3, 3, 3, 0, 8, 6, 2, 8, 6, 8, 2, 8, 4, 7, 8,
        8, 9, 1, 1, 7, 8, 2, 0, 0, 2, 8, 7, 8, 7, 7, 8, 9, 8, 1, 3, 9, 6, 8, 4,
        8, 7, 0, 8, 6, 4, 7, 8, 8, 8, 1, 3, 5, 4, 3, 0, 8, 0, 2, 2, 9, 7, 2, 6,
        7, 8, 9, 0, 0, 9, 8, 8, 0, 6, 1, 6, 2, 8, 3, 5, 9, 2, 8, 8, 7, 8, 8, 7,
        4, 2, 1, 1, 8, 0, 4, 7, 9, 4, 9, 9, 8, 0, 8, 8, 4, 8, 4, 3, 8, 7, 8, 1,
        7, 4, 7, 3, 5, 7, 2, 2], device='cuda:0')


  predictions = self.softmax(logits)


tensor([2, 7, 8, 6, 5, 8, 7, 0, 6, 5, 6, 1, 8, 7, 1, 8, 6, 4, 4, 7, 8, 2, 4, 9,
        9, 8, 1, 2, 9, 0, 3, 1, 9, 8, 1, 1, 6, 8, 9, 0, 3, 7, 9, 6, 0, 0, 8, 8,
        4, 6, 1, 7, 6, 3, 0, 8, 8, 7, 7, 7, 4, 1, 9, 0, 8, 9, 3, 3, 7, 3, 8, 7,
        8, 2, 2, 7, 7, 1, 5, 0, 4, 0, 0, 7, 5, 0, 4, 6, 9, 0, 7, 8, 2, 7, 0, 0,
        5, 2, 1, 9, 8, 5, 0, 4, 2, 4, 7, 3, 4, 5, 1, 8, 0, 8, 5, 2, 2, 8, 7, 4,
        5, 0, 3, 1, 6, 0, 4, 3], device='cuda:0')
tensor([2, 8, 8, 3, 3, 6, 4, 1, 7, 3, 6, 4, 7, 8, 2, 4, 3, 9, 3, 0, 4, 8, 3, 2,
        7, 8, 1, 6, 7, 7, 8, 8, 8, 9, 0, 5, 9, 1, 8, 2, 7, 5, 2, 6, 0, 5, 8, 0,
        9, 0, 7, 8, 9, 0, 7, 9, 8, 3, 9, 0, 7, 9, 3, 8, 1, 4, 8, 6, 1, 8, 2, 2,
        8, 0, 1, 7, 7, 0, 1, 1, 6, 8, 5, 8, 2, 8, 0, 8, 3, 6, 7, 3, 8, 5, 6, 4,
        3, 3, 0, 7, 8, 4, 7, 8, 5, 8, 7, 9, 4, 8, 3, 1, 0, 2, 7, 0, 7, 1, 4, 0,
        3, 7, 6, 7, 1, 6, 9, 6], device='cuda:0')
tensor([3, 0, 7, 3, 5, 0, 3, 2, 9, 0, 8, 8, 3, 3, 1, 1], device='cuda:0')
Accuracy: 51.0000
