In [77]:
import string

import numpy as np
import torch
import torch.nn as nn

In [94]:
class LongShortTermMemoryModel(nn.Module):

    def __init__(self, encoding_size):
        super(LongShortTermMemoryModel, self).__init__()

        self.lstm = nn.LSTM(encoding_size, 128)  # 128 is the state size
        self.dense = nn.Linear(128, encoding_size)  # 128 is the state size

    def reset(self):  # Reset states prior to new input sequence
        zero_state = torch.zeros(1, 1, 128)  # Shape: (number of layers, batch size, state size)
        self.hidden_state = zero_state
        self.cell_state = zero_state

    def logits(self, x):  # x shape: (sequence length, batch size, encoding size)
        out, (self.hidden_state, self.cell_state) = self.lstm(x, (self.hidden_state, self.cell_state))
        return self.dense(out.reshape(-1, 128))

    def f(self, x):  # x shape: (sequence length, batch size, encoding size)
        return torch.softmax(self.logits(x), dim=1)

    def loss(self, x, y):  # x shape: (sequence length, batch size, encoding size), y shape: (sequence length, encoding size)
        return nn.functional.cross_entropy(self.logits(x), y.argmax(1))

In [181]:
##Many to one
word_length = 4
index_to_char = list(string.ascii_lowercase)
index_to_char.append('æ')
index_to_char.append('ø')
index_to_char.append('å')
index_to_char.append(' ')
char_encodings = np.eye(len(index_to_char))

encoding_size = len(char_encodings)

letter_dict = {index_to_char[i]: i for i in range(0, len(index_to_char))}

def letter(x: str):
    char = char_encodings[letter_dict[x]]
    return char

def x_create_word(word: str):
    word = f"{word:<{word_length}}"
    return_word = []
    for i in range(len(word)):
        return_word.append([letter(word[i])])
    return return_word

def y_create_word(word: str):
    word = word[1:]
    word = f"{word:<{word_length}}"
    return_word = []
    for i in range(len(word)):
        return_word.append(letter(word[i]))
    return return_word


x_train = torch.tensor([
                       x_create_word("hat")
                        ], dtype=torch.float)
y_train = torch.tensor([
                        y_create_word("hat")
                        ], dtype=torch.float)


model = LongShortTermMemoryModel(encoding_size)
print(x_train.shape)
print(y_train.shape)

torch.Size([1, 4, 1, 30])
torch.Size([1, 4, 30])


In [178]:
optimizer = torch.optim.RMSprop(model.parameters(), 0.001)
for epoch in range(500):
    for i in range(x_train.size()[0]):#Loops for each letter to emoji
        model.reset()
        model.loss(x_train[i], y_train[i]).backward()
        optimizer.step()
        optimizer.zero_grad()

In [180]:
def get_emoji(emo: str):
    y = -1
    model.reset()
    text = emo[0]
    for i in range(len(emo)):
        y = model.f(torch.tensor([[char_encodings[i]]], dtype=torch.float))
        text += index_to_char[y.argmax(1)]

    for i in range(word_length-len(emo)-1):
        y = model.f(torch.tensor([[char_encodings[y.argmax(1)]]], dtype=torch.float))
        text += index_to_char[y.argmax(1)]
    print(text)

get_emoji("ha")

hat 
