In [24]:
import json

train_path = 'data/train.json'
dev_path = 'data/dev.json'
test_path = 'data/test.json'

def load_data(path):
    with open(path, 'r') as f:
        data = json.load(f)
    return data


train_data = load_data(train_path)
dev_data = load_data(dev_path)
test_data = load_data(test_path)

train_data = train_data[:200]

In [25]:
# import pretrained bert based encoder
from transformers import BertTokenizer, BertModel
import torch


tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')
# train the model
def preprocess_data(data, model):
    # encode the data
    for i in range(len(data)):
        text = data[i]['Problem']
        # encode the text
        input_ids = tokenizer.encode(text, return_tensors='pt')
        # feed the input_ids to the model
        outputs = model(input_ids)
        # get the hidden states
        hidden_states = outputs[0]
        # get the first token hidden states
        first_token_hidden_states = hidden_states[0]
        # get the average of the hidden states
        avg_hidden_states = torch.mean(first_token_hidden_states, dim=0)
        data[i]['Problem'] = avg_hidden_states

    # train the model
    return data
data = preprocess_data(train_data, model)
# encode the data


In [None]:
# DataLoader
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
import torch

class MyDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]['Problem']
    
train_dataset = MyDataset(data)
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)


In [29]:
# make LSTM decoder for language translation task
import torch.nn as nn
import torch.nn.functional as F

class Decoder(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(Decoder, self).__init__()
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)
        
    def forward(self, input, hidden):
        output, hidden = self.lstm(input, hidden)
        output = self.out(output)
        output = self.softmax(output)
        return output, hidden
    
    def initHidden(self):
        return (torch.zeros(1, 1, self.hidden_size), torch.zeros(1, 1, self.hidden_size))
    




In [30]:
   
def train(data):
    decoder = Decoder(768, 256, 768)
    criterion = nn.NLLLoss() # negative log likelihood loss
    optimizer = torch.optim.SGD(decoder.parameters(), lr=0.01) # stochastic gradient descent
    print("Training the Decoder model")
    for epoch in range(10):
        loss = 0
        for i, target in enumerate(data):
            optimizer.zero_grad()
            hidden = decoder.initHidden()
            for j in range(len(target)):
                output, hidden = decoder(target[j], hidden)
                loss += criterion(output, target[j])
            loss.backward()
            optimizer.step()
        print(f'Epoch {epoch} Loss: {loss}')
        # save the model
        torch.save(decoder.state_dict(), 'decoder.pth')
    return decoder






In [31]:
decoder = train(data)
# save the model
torch.save(decoder.state_dict(), 'decoder.pth')

Training the Decoder model
input:  torch.Size([768])
hidden[0]:  torch.Size([1, 1, 256])
hidden[1]:  torch.Size([1, 1, 256])


ValueError: LSTM: Expected input to be 2D or 3D, got 1D instead