In [2]:
import string

import numpy as np
import torch
import torch.nn as nn
import pandas as pd
import random

In [3]:
class LongShortTermMemoryModel(nn.Module):

    def __init__(self, encoding_size):
        super(LongShortTermMemoryModel, self).__init__()

        self.lstm = nn.LSTM(encoding_size, 128)  # 128 is the state size
        self.dense = nn.Linear(128, encoding_size)  # 128 is the state size

    def reset(self):  # Reset states prior to new input sequence
        zero_state = torch.zeros(1, 1, 128)  # Shape: (number of layers, batch size, state size)
        self.hidden_state = zero_state
        self.cell_state = zero_state

    def logits(self, x):  # x shape: (sequence length, batch size, encoding size)
        out, (self.hidden_state, self.cell_state) = self.lstm(x, (self.hidden_state, self.cell_state))
        return self.dense(out.reshape(-1, 128))

    def f(self, x):  # x shape: (sequence length, batch size, encoding size)
        return torch.softmax(self.logits(x), dim=1)

    def loss(self, x, y):  # x shape: (sequence length, batch size, encoding size), y shape: (sequence length, encoding size)
        return nn.functional.cross_entropy(self.logits(x), y.argmax(1))

In [4]:
words = open('words.csv').read().split()
random.shuffle(words)
words = words[: int(len(words)/100)]
print(words)

['technical', 'associated', 'pleasure', 'similar', 'boot', 'contractor', 'Dutch', 'recognize', 'in', 'meanwhile', 'inspection', 'function', 'undertake', 'logic', 'fog', 'Ms', 'widow', 'decide', 'hang', 'sensitivity', 'man', 'request', 'British', 'motivation', 'happy', 'across', 'stake', 'two-thirds', 'enhance', 'here', 'why', 'cocaine', 'frown', 'obtain', 'climb', 'advanced', 'evaluate', 'freedom', 'fragment', 're', 'speaker', 'float', 'suffer']


In [10]:
##Many to one
word_length = 4
index_to_char = list(string.ascii_lowercase)
index_to_char.append('æ')
index_to_char.append('ø')
index_to_char.append('å')
index_to_char.append(' ')
index_to_char.append('-')
index_to_char.append('\'')
char_encodings = np.eye(len(index_to_char))

encoding_size = len(char_encodings)

letter_dict = {index_to_char[i]: i for i in range(0, len(index_to_char))}

def letter(x: str):
    char = char_encodings[letter_dict[x]]
    return char

def x_create_word(word: str):
    word = f"{word:<{word_length}}"
    word = f"{word:>{word_length}}"
    return_word = []
    for i in range(len(word)):
        return_word.append([letter(word[i])])
    return return_word

def y_create_word(word: str):
    word = word[1:]
    word = f"{word:<{word_length}}"
    word = f"{word:>{word_length}}"
    return_word = []
    for i in range(len(word)):
        return_word.append(letter(word[i]))
    return return_word

def get_words():
    x_lst = []
    y_lst = []
    for i in range(len(words)):
        x_lst.append(x_create_word(words[i].lower()))
        y_lst.append(y_create_word(words[i].lower()))
    return x_lst, y_lst

x_data, y_data = get_words()

x_train = torch.tensor([
                       [[letter('h')], [letter('a')], [letter('t')],[letter(' ')]],
                       [[letter('r')], [letter('a')], [letter('t')],[letter(' ')]],
                       [[letter('c')], [letter('a')], [letter('t')],[letter(' ')]],
                       [[letter('b')], [letter('a')], [letter('n')],[letter('k')]],
                       [[letter('o')], [letter('l')], [letter('a')],[letter('v')]],
                       [[letter('c')], [letter('a')], [letter('p')],[letter(' ')]],
                       [[letter('k')], [letter('n')], [letter('u')],[letter('t')]],
                        ], dtype=torch.float)
y_train = torch.tensor([
                        [letter('a'), letter("t"), letter(' '), letter(" ")],
                        [letter('a'), letter("t"), letter(' '), letter(" ")],
                        [letter('a'), letter("t"), letter(' '), letter(" ")],
                        [letter('a'), letter("n"), letter('k'), letter(" ")],
                        [letter('l'), letter("a"), letter('v'), letter(" ")],
                        [letter('a'), letter("p"), letter(' '), letter(" ")],
                        [letter('n'), letter("u"), letter('t'), letter(" ")]
                       ], dtype=torch.float)


model = LongShortTermMemoryModel(encoding_size)
print(x_train.shape)
print(y_train.shape)

torch.Size([7, 4, 1, 32])
torch.Size([7, 4, 32])


In [11]:
optimizer = torch.optim.RMSprop(model.parameters(), 0.001)
for epoch in range(500):
    for i in range(x_train.size()[0]):#Loops for each letter to emoji
        model.reset()
        model.loss(x_train[i], y_train[i]).backward()
        optimizer.step()
        optimizer.zero_grad()

In [77]:
def get_emoji(emo: str):
    y = -1
    model.reset()
    #for i in range(len(emo)):
    # y = model.f(torch.tensor([[char_encodings[1]]], dtype=torch.float))
    # text += index_to_char[y.argmax(1)]
    text = emo[0]
    y = model.f(torch.tensor([[char_encodings[0]]], dtype=torch.float))
    text += index_to_char[y.argmax(1)]
    for i in range(word_length):
        y = model.f(torch.tensor([[char_encodings[y.argmax(1)]]], dtype=torch.float))
        text += index_to_char[y.argmax(1)]
    print("Expected: " +text)


get_emoji("olav")

Expected: oat   
