In [20]:
import torch
from torch import nn

In [22]:
class RecurrentLayer(nn.Module):

    def __init__(self, embedded_dim: int, hidden_dim: int):
        super().__init__()
        self.hidden_dim = hidden_dim

        self.hidden_w: torch.Tensor = nn.Parameter(torch.ones(hidden_dim, hidden_dim))
        self.input_w: torch.Tensor = nn.Parameter(torch.ones(embedded_dim, hidden_dim))
        self.b: torch.Tensor = nn.Parameter(torch.ones(hidden_dim))
        self.tanh = nn.Tanh()

    def forward(self, x_seq: torch.Tensor) -> torch.Tensor:
        batch_size, input_dim = x_seq.shape[:-1]
        hidden_state = torch.zeros(batch_size, self.hidden_dim).to(x_seq.device.type)

        hidden_states = torch.Tensor()
        for step in range(input_dim):
            z = torch.matmul(hidden_state, self.hidden_w) + torch.matmul(x_seq[..., step, :], self.input_w) + self.b
            hidden_state = self.tanh(z)
            hidden_states = torch.cat((hidden_states, hidden_state.unsqueeze(1)), dim= -2)
        return hidden_states

x = torch.randn((4, 2, 1))
rnn = RecurrentLayer(1, 6)
logit = rnn(x)
logit.shape

torch.Size([4, 2, 6])

In [32]:
class Gate(nn.Module):

    def __init__(self, embedded_dim: int, hidden_dim: int, activation_function: nn.Module):
        super().__init__()
        self.hidden_w = nn.Parameter(torch.ones((hidden_dim, hidden_dim)))
        self.input_w = nn.Parameter(torch.ones((embedded_dim, hidden_dim)))
        self.b = nn.Parameter(torch.ones(hidden_dim))
        self.activation_function = activation_function

    def forward(self, x: torch.Tensor, h: torch.Tensor) -> torch.Tensor:
        z = (torch.matmul(h, self.hidden_w) + torch.matmul(x, self.input_w)) + self.b
        return self.activation_function(z)

class LSTMLayer(nn.Module):

    def __init__(self, embedded_dim, hidden_dim):
        super().__init__()
        self.hidden_dim = hidden_dim

        self.forget_gate = Gate(embedded_dim, hidden_dim, nn.Sigmoid())
        self.input_gate = Gate(embedded_dim, hidden_dim, nn.Sigmoid())
        self.candidate_layer = Gate(embedded_dim, hidden_dim, nn.Tanh())
        self.output_gate = Gate(embedded_dim, hidden_dim, nn.Sigmoid())
        self.tanh = nn.Tanh()

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        batch_size, input_dim = x.shape[:-1]
        device = x.device.type
        c_t = torch.zeros((batch_size, self.hidden_dim)).to(device)
        h_t = torch.zeros((batch_size, self.hidden_dim)).to(device)

        hidden_states: torch.Tensor = torch.Tensor()
        for step in range(input_dim):
            x_t = x[:, step, :]
            f_t = self.forget_gate(x_t, h_t)
            i_t = self.input_gate(x_t, h_t)
            candidate_c = self.candidate_layer(x_t, h_t)

            c_t = (f_t * c_t) + (i_t * candidate_c)
            o_t = self.output_gate(x_t, h_t)
            h_t = o_t * self.tanh(c_t)
            hidden_states = torch.cat((hidden_states, h_t.unsqueeze(1)), dim= -2)

        return hidden_states


x = torch.randn((4, 2, 3))
lstm = LSTMLayer(3, 6)
logit = lstm(x)
logit.shape

torch.Size([4, 2, 6])

In [34]:
class GRULayer(nn.Module):

    def __init__(self, embedded_dim: int, hidden_dim: int, reset_first: bool = False):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.reset_first = reset_first

        self.reset_gate = Gate(embedded_dim, hidden_dim, nn.Sigmoid())
        self.update_gate = Gate(embedded_dim, hidden_dim, nn.Sigmoid())

        self.hidden_w = nn.Parameter(torch.ones((hidden_dim, hidden_dim)))
        self.input_w = nn.Parameter(torch.ones((embedded_dim, hidden_dim)))
        self.bias = nn.Parameter(torch.ones(hidden_dim))
        self.tanh = nn.Tanh()


    def forward(self, x: torch.Tensor) -> torch.Tensor:
        batch_size, input_dim = x.shape[:-1]
        h_t = torch.zeros((batch_size, self.hidden_dim)).to(x.device.type)

        hidden_states: torch.Tensor = torch.Tensor()
        for step in range(input_dim):
            x_t = x[:, step, :]
            r_t = self.reset_gate(x_t, h_t)
            z_t = self.update_gate(x_t, h_t)

            candidate_h = self.tanh((r_t * torch.matmul(h_t, self.hidden_w)) + torch.matmul(x_t, self.input_w) + self.bias)

            h_t = (z_t * h_t) + ((1 - z_t) * candidate_h)
            hidden_states = torch.cat((hidden_states, h_t.unsqueeze(1)), dim= -2)

        return hidden_states

x = torch.randn((4, 2, 3))
gru = GRULayer(3, 6)
logit = gru(x)
logit.shape




torch.Size([4, 2, 6])

In [7]:
from torch.utils.data import Dataset
from pathlib import Path
from torchtext.data.utils import get_tokenizer
from src.utils.io import get_text, load_MNIST
import torch

class IMDBReviewDataset(Dataset):

    def __init__(self, reviews_dir: str):
        super().__init__()

        reviews_path: Path = Path(reviews_dir)
        self.reviews = list(reviews_path.glob("*/*.txt"))
        tokeniser = get_tokenizer("basic_english")
        self.tokenised_reviews = [
            tokeniser(get_text(str(review_file))) for review_file in self.reviews
        ]
        self.words = set([word for review in self.tokenised_reviews for word in review])
        self.encode = {word : idx for idx, word in enumerate(self.words)}
        self.decode = {idx : word for idx, word in enumerate(self.words)}
        self.max_length = max(
            [len(tokenised_review) for tokenised_review in self.tokenised_reviews]
        )
        self.encoded_reviews = [
            [self.encode[word] for word in tokenised_review]
                for tokenised_review in self.tokenised_reviews
        ]
        self.labels = [review.parent.name for review in self.reviews]
        self.classes = set(self.labels)

    def __len__(self) -> int:
        return len(self.reviews)

    def __getitem__(self, idx: int) -> tuple:
        return self.encoded_reviews[idx], self.classes.index(self.labels[idx])

root_dir ="/Users/Eric/PycharmProjects/RecurrentNetworks/resources/test"
dataset = IMDBReviewDataset(root_dir)
dataset.words

{'são',
 'piranahs',
 'estevez',
 'not-so-hairy',
 'non-dutch',
 'keller',
 'philanders',
 'shut-down',
 'very-well-made',
 'travesty-',
 'pseudo-orgasm',
 'bushism',
 'disagreements',
 'specializing',
 'religiosity',
 'semi-vampires',
 'culturally',
 'katarzyna',
 'dialoges',
 'terje',
 'counter-balancing',
 'wide-screen',
 'messy-looking',
 'jowled',
 'adagietto',
 'architects',
 'corkscrew',
 'populer',
 'bankruptcy',
 '1891-1953',
 'orangeish-yellow',
 'lovemaking',
 'kristopherson',
 'bathes',
 'mang',
 'nowhere<',
 'ferrero',
 'stallion',
 'stepping-stone',
 'jesus/mohammad/buddha',
 'weaves',
 'yakin',
 'jungwon',
 'assiduously',
 'feoutus',
 'kahut',
 'chou',
 'chally',
 'comedy/musical',
 'lest',
 'author/composer',
 'jaded',
 'petrification',
 'guernsey',
 'deserve',
 's***',
 'sinuoeh',
 'skilled',
 'discounting',
 'kameej',
 'donaggios',
 'cousin/wife',
 'prequels',
 'malina',
 'challengers',
 '1870',
 'disillusionment',
 'isn´t',
 'rationing',
 'disregardful',
 'romanesque

In [17]:
import torch
import idx2numpy
from torch.utils.data import Dataset
import numpy as np


def load_MNIST(root_path: str) -> tuple:
    try:
        train_images = idx2numpy.convert_from_file(f"{root_path}/train-images"
                                                   f"-idx3-ubyte")
        train_labels = idx2numpy.convert_from_file(f"{root_path}/train-labels-idx1-ubyte")

        test_images = idx2numpy.convert_from_file(f"{root_path}/test-images-idx3-ubyte")
        test_labels = idx2numpy.convert_from_file(f"{root_path}/test-labels-idx1-ubyte")
    except IOError:
        raise IOError(f"Failed to load a file")

    return train_images, train_labels, test_images, test_labels

class MNISTDataset(Dataset):

    def __init__(self, images: np.ndarray, labels: np.ndarray, one_dim: bool = True):
        super().__init__()

        images = torch.Tensor(images).type(torch.float32) / 255.0
        self.images = images.view(-1, 28 * 28) if one_dim else images.unsqueeze(1)

        self.labels = torch.Tensor(labels).type(torch.long)
        self.classes = torch.unique(self.labels)

        self.one_hot_labels = torch.zeros(len(self.labels), len(self.classes))
        self.one_hot_labels[torch.arange(len(self.labels)), self.labels] = 1

    def __len__(self) -> int:
        return len(self.images)

    def __getitem__(self, idx: int) -> tuple:
        return self.images[idx], self.one_hot_labels[idx]

root_dir ="/Users/Eric/PycharmProjects/RecurrentNetworks/resources/Fashion_MNIST"
train_images, train_labels, test_images, test_labels = load_MNIST(root_dir)
dataset = MNISTDataset(test_images, test_labels)
dataset.classes

tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [18]:
dataset.images.shape, dataset.one_hot_labels.shape

(torch.Size([10000, 784]), torch.Size([10000, 10]))

In [23]:
print(MNISTDataset.__name__)

MNISTDataset
