In [11]:
import torch
from torch.utils.data import DataLoader, Dataset

# Define your dataset class
class QuestionDataset(Dataset):
    def __init__(self, questions, coarse_labels, fine_labels, word2idx, max_seq_len):
        self.questions = questions
        self.coarse_labels = coarse_labels
        self.fine_labels = fine_labels
        self.word2idx = word2idx
        self.max_seq_len = max_seq_len

    def __len__(self):
        return len(self.questions)

    def __getitem__(self, idx):
        question = self.questions[idx]
        coarse_label = self.coarse_labels[idx]
        fine_label = self.fine_labels[idx]
        return question, coarse_label, fine_label

# Define your model class
class QuestionClassifier(torch.nn.Module):
    def __init__(self, num_words, embedding_dim, num_coarse_classes, num_fine_classes):
        super().__init__()
        self.embedding = torch.nn.Embedding(num_words, embedding_dim)
        self.fc1 = torch.nn.Linear(embedding_dim, 128)
        self.fc2 = torch.nn.Linear(128, num_coarse_classes)
        self.fc3 = torch.nn.Linear(128, num_fine_classes)

    def forward(self, x):
        x = self.embedding(x)
        x = torch.mean(x, dim=1) # Average over the sequence dimension
        x = torch.relu(self.fc1(x))
        coarse_output = self.fc2(x)
        fine_output = self.fc3(x)
        return coarse_output, fine_output

# Set up your training data
questions = ['What is the capital of France?', 'What is the color of the sky?']
coarse_labels = [0, 1] # 0 = geography, 1 = science
fine_labels = [0, 1] # 0 = geography:city, 1 = science:color

# Create the word2idx dictionary
words = [word for question in questions for word in question.split()]
word2idx = {word: idx for idx, word in enumerate(set(words))}
word2idx['<PAD>'] = len(word2idx) # Add a special <PAD> token to the vocabulary

# Determine the maximum sequence length in the dataset
max_seq_len = max(len(question.split()) for question in questions)

# Pad the questions with the <PAD> token to ensure that they are all the same length
padded_questions = [question + ' <PAD>' * (max_seq_len - len(question.split())) for question in questions]

# Define your hyperparameters
num_words = len(word2idx) # The number of unique words in your vocabulary
embedding_dim = 100 # The size of your word embeddings
num_coarse_classes = 2 # The number of coarse classes (e.g. geography, science)
num_fine_classes = 2 # The number of fine classes (e.g. geography:city, science:color)
batch_size = 2
num_epochs = 10

# Set up your data loader
dataset = QuestionDataset(padded_questions, coarse_labels, fine_labels, word2idx, max_seq_len)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Set up your model and optimizer
model = QuestionClassifier(num_words, embedding_dim, num_coarse_classes, num_fine_classes)
optimizer = torch.optim.Adam(model.parameters())

# Train the model
for epoch in range(num_epochs):
    for batch_idx, (questions, coarse_labels, fine_labels) in enumerate(dataloader):
        # Convert the questions to a tensor of indices using the word2idx dictionary
        question_idxs = [[word2idx[word] for word in question.split()] for question in questions]
        question_tensor = torch.tensor(question_idxs)

        # Zero out the gradients
        optimizer.zero_grad()

        # Run the forward pass
        coarse_output, fine_output = model(question_tensor)

        # Compute the loss
        coarse_loss = torch.nn.functional.cross_entropy(coarse_output, coarse_labels)
        fine_loss = torch.nn.functional.cross_entropy(fine_output, fine_labels)
        loss = coarse_loss + fine_loss

        # Backpropagate the gradients
        loss.backward()

        # Update the weights
        optimizer.step()

        # Print some debug information
        #if (batch_idx + 1) % 10 == 0:
        print('Epoch [{}/{}], Batch [{}/{}], Loss: {:.4f}'
                  .format(epoch + 1, num_epochs, batch_idx + 1, len(dataloader), loss.item()))



Epoch [1/10], Batch [1/1], Loss: 1.4133
Epoch [2/10], Batch [1/1], Loss: 1.3471
Epoch [3/10], Batch [1/1], Loss: 1.2871
Epoch [4/10], Batch [1/1], Loss: 1.2316
Epoch [5/10], Batch [1/1], Loss: 1.1801
Epoch [6/10], Batch [1/1], Loss: 1.1303
Epoch [7/10], Batch [1/1], Loss: 1.0807
Epoch [8/10], Batch [1/1], Loss: 1.0327
Epoch [9/10], Batch [1/1], Loss: 0.9852
Epoch [10/10], Batch [1/1], Loss: 0.9380


In [17]:
# Set up your training data
questions = ['What is the capital of India?', 'What is the color of the water?']
coarse_labels = [0, 1] # 0 = geography, 1 = science
fine_labels = [0, 1] # 0 = geography:city, 1 = science:color

# Create the word2idx dictionary
words = [word for question in questions for word in question.split()]
word2idx = {word: idx for idx, word in enumerate(set(words))}
word2idx['<PAD>'] = len(word2idx) # Add a special <PAD> token to the vocabulary

# Determine the maximum sequence length in the dataset
max_seq_len = max(len(question.split()) for question in questions)

# Pad the questions with the <PAD> token to ensure that they are all the same length
padded_questions = [question + ' <PAD>' * (max_seq_len - len(question.split())) for question in questions]

# Define your hyperparameters
num_words = len(word2idx) # The number of unique words in your vocabulary
embedding_dim = 100 # The size of your word embeddings
num_coarse_classes = 2 # The number of coarse classes (e.g. geography, science)
num_fine_classes = 2 # The number of fine classes (e.g. geography:city, science:color)
batch_size = 2
num_epochs = 10

# Set up your data loader
dataset = QuestionDataset(padded_questions, coarse_labels, fine_labels, word2idx, max_seq_len)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [18]:
# Define a function to predict labels for a batch of questions
def predict(model, question_tensor):
    with torch.no_grad():
        coarse_output, fine_output = model(question_tensor)
        coarse_pred = coarse_output.argmax(dim=1)
        fine_pred = fine_output.argmax(dim=1)
    return coarse_pred, fine_pred

# Set the model to evaluation mode
model.eval()

# Loop over the test data and make predictions
coarse_preds = []
fine_preds = []
for batch_idx, (questions, coarse_labels, fine_labels) in enumerate(dataloader):
    # Convert the questions to a tensor of indices using the word2idx dictionary
    question_idxs = [[word2idx[word] for word in question.split()] for question in questions]
    question_tensor = torch.tensor(question_idxs)

    # Make predictions for the batch
    batch_coarse_preds, batch_fine_preds = predict(model, question_tensor)

    # Append the predictions to the output lists
    coarse_preds.extend(batch_coarse_preds.tolist())
    fine_preds.extend(batch_fine_preds.tolist())

# Print the accuracy on the test data
coarse_correct = sum([1 for i in range(len(dataset)) if coarse_preds[i] == dataset[i][1]])
fine_correct = sum([1 for i in range(len(dataset)) if fine_preds[i] == dataset[i][2]])
coarse_acc = coarse_correct / len(dataset)
fine_acc = fine_correct / len(dataset)
print('Coarse accuracy: {:.2%}, Fine accuracy: {:.2%}'.format(coarse_acc, fine_acc))


Coarse accuracy: 0.00%, Fine accuracy: 0.00%
