In [1]:
import nltk
from nltk.corpus import semcor
from nltk.stem import WordNetLemmatizer
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer
import pandas as pd 


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
data = pd.read_csv('./SemCor/semcor_data.csv')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

In [3]:
input_ids = []
attention_masks = []
labels = []
max_num_senses = 0

# csv file has 5 columns : id, sentence, glosses, sense_keys,target
for i in range(0,len(data[:10000])):
    sentence = data.iloc[i,1]
    sense_keys = data.iloc[i,3]
    target = data.iloc[i,4]

    # print(sentence)
    sent = sentence.replace('[TGT]', tokenizer.mask_token)
    tokens = tokenizer.encode_plus(sent, add_special_tokens=True,
                                   padding='max_length', max_length=128,
                                   truncation=True, return_tensors='pt')
    input_ids.append(tokens['input_ids'])
    attention_masks.append(tokens['attention_mask'])

    sense_keys = sense_keys.strip('[]')
    sense_keys = sense_keys.split(',')
    label = np.zeros(len(sense_keys))
    target = target.strip('[]')
    target = target.split(',')
    for j in range(len(target)):
        label[int(target[j])] = 1
    labels.append(label)
    max_num_senses = max(max_num_senses, len(sense_keys))
    # print(label)

    
    


In [83]:
labels[:20]

[array([1., 1., 0., 0.]),
 array([0., 0., 0., 1., 0., 0., 0.]),
 array([0., 0., 1., 0., 0.]),
 array([0., 1.]),
 array([1., 0., 0.]),
 array([0., 0., 0., 1.]),
 array([0., 0., 1., 0.]),
 array([0., 1., 0., 0.]),
 array([0., 0., 1., 0.]),
 array([1., 0., 0.]),
 array([0., 1., 0., 0.]),
 array([0., 0., 1., 0.]),
 array([0., 0., 0., 1., 0.]),
 array([0., 1., 0., 0.]),
 array([0., 0., 1.]),
 array([1.]),
 array([1., 0.]),
 array([1., 0.]),
 array([1.]),
 array([0., 1.])]

In [4]:
padded_labels = np.zeros((len(labels), max_num_senses))
mask = np.zeros((len(labels), max_num_senses))

for i, label in enumerate(labels):
    padded_labels[i, :len(label)] = label
    mask[i, :len(label)] = 1

input_ids = torch.cat(input_ids, dim=0)
attention_masks = torch.cat(attention_masks, dim=0)
labels = torch.tensor(padded_labels, dtype=torch.float32)
mask = torch.tensor(mask, dtype=torch.float32)
dataset = TensorDataset(input_ids, attention_masks, labels, mask)


In [7]:
train_dataloader = DataLoader(dataset, batch_size=4, shuffle=True)

In [8]:
len(next(iter(train_dataloader))[0])

4

In [9]:
from transformers import BertModel
import torch
import torch.nn as nn
from transformers import BertModel
bert_path = 'bert-base-uncased'
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')



class BiLSTM(nn.Module):
    def __init__(self, bert_path, hidden_size, num_classes):
        super(BiLSTM, self).__init__()
        self.bert = BertModel.from_pretrained(bert_path)
        self.hidden_size = hidden_size
        self.num_classes = num_classes
        self.lstm = nn.LSTM(input_size=768, hidden_size=hidden_size,
                            bidirectional=True, batch_first=True)
        self.dropout = nn.Dropout(p=0.2)
        self.fc = nn.Linear(hidden_size*2, num_classes)

    def forward(self, input_ids, attention_mask):
        output = self.bert(input_ids, attention_mask)[0]
        output, _ = self.lstm(output)
        output = self.dropout(output)
        output = self.fc(output[:, -1, :])
        return output


In [11]:
input_size = 768
hidden_size = 128
num_layers = 2
output_size = max_num_senses
learning_rate = 0.001
num_epochs = 10
model = BiLSTM(bert_path, hidden_size, output_size).to(device)


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [12]:
import torch.optim as optim


loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(num_epochs):
    for batch in train_dataloader:
        
        inputs, masks, labels,_ = batch
        inputs = inputs.to(device)
        masks = masks.to(device)
        labels = labels.to(device)
        outputs = model(inputs, masks)
        loss = loss_fn(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        

    print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch +
          1, num_epochs, loss.item()))


OutOfMemoryError: CUDA out of memory. Tried to allocate 90.00 MiB (GPU 0; 1.95 GiB total capacity; 1.26 GiB already allocated; 36.38 MiB free; 1.37 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF