I will be using Pytorch for my deep learning module, as I have the most experience with it from coursework and a summer program.

In [None]:
# Import the necessary modules
import torch
import torch.nn as nn

import numpy as np

# Task 1: Sentence Transformer Implementation

For this task, I referenced the textbook Dive into Deep Learning Chapter 11 Section 7 <https://d2l.ai/chapter_attention-mechanisms-and-transformers/transformer.html>, as well as my coursework from COMP SCI 539: Introduction to Artificial Neural Networks.

We want to implement the transformer architecture, modeled by the figure below:

<img src=transformer.png>

If we were to implement this from scratch, we would need to implement encoder and decoder layers, which would require implementing multi-head attention, feed-forward networks, etc. Fortunately, Pytorch provides a default Tranformer module, which we can use. However, we will still need to provide masked source and target sequences.

First, we want to be able to convert a sentence to a fixed-size list of tokens (we will use words for convenience). We can use padding tokens to attain the specified word count. Next, we will want to convert the tokens into a list of indeces, each of which will correspond to a certain word. These indeces will be input to an Embedding layer, which will provide the embedded sequences.

To do this, we will need to create a dictionary of words, as well as some auxilliary functions and constants.

In [None]:
START_TOKEN_WORD = "SOS" # start of sentence
END_TOKEN_WORD = "EOS" # end of sentence
PAD_TOKEN_WORD = "PAD" # padding
UNKNOWN_TOKEN_WORD = "UNK" # unknnown word # TODO: remove if unused

START_TOKEN_IDX = 0 # start of sentence
END_TOKEN_IDX = 1 # end of sentence
PAD_TOKEN_IDX = 2 # padding
UNKNOWN_TOKEN_IDX = 3 # unknnown word # TODO: remove if unused

class WordDictionary:
    def __init__(self):
        self.word_to_index = {
            START_TOKEN_WORD: START_TOKEN_IDX,
            END_TOKEN_WORD: END_TOKEN_IDX,
            PAD_TOKEN_WORD: PAD_TOKEN_IDX,
            UNKNOWN_TOKEN_WORD: UNKNOWN_TOKEN_IDX,
        }
        self.index_to_word = {
            START_TOKEN_IDX: START_TOKEN_WORD,
            END_TOKEN_IDX: END_TOKEN_WORD,
            PAD_TOKEN_IDX: PAD_TOKEN_WORD,
            UNKNOWN_TOKEN_IDX: UNKNOWN_TOKEN_WORD,
        }
        self.word_to_count = {
            START_TOKEN_WORD: 0,
            END_TOKEN_WORD: 0,
            PAD_TOKEN_WORD: 0,
            UNKNOWN_TOKEN_WORD: 0,
        }
        self.n_words = 4

    def add_word_list(self, sentence: list[str]):
        for word in sentence:
            self.add_word(word)

    def add_word(self, word: str):
        if word in self.word_to_index:
            self.word_to_count[word] += 1
        else:
            self.word_to_index[word] = self.n_words
            self.word_to_count[word] = 1
            self.index_to_word[self.n_words] = word
            self.n_words += 1

def tokenize_and_pad(sentence: str, token_count: int):
    """Tokenize the sentence to a list of fixed length."""
    word_list = sentence.split()
    word_list.insert(0, START_TOKEN_WORD)
    if (len(word_list) < token_count):
        word_list.append(END_TOKEN_WORD)
        word_list.extend([PAD_TOKEN_WORD] * (token_count - len(word_list)))
    return word_list[:token_count]

def word_list_to_indeces(word_list: list[str], word_dict: WordDictionary):
    word_dict.add_word_list(word_list)
    output = []
    for word in word_list:
        output.append(word_dict.word_to_index[word])
    return output

def indeces_to_word_list(indeces: list[int], word_dict: WordDictionary):
    output = []
    for idx in indeces:
        output.append(word_dict.index_to_word[idx])
    return output

In [None]:
# TODO: implement

# Task 2: Multi-Task Learning Expansion

In [None]:
# TODO: implement

# Task 3: Training Considerations

In [None]:
# TODO: implement

# Task 4: Training Loop Implementation (BONUS)

In [None]:
# TODO: implement