In [9]:
import string

import numpy as np
import torch
import torch.nn as nn
import pandas as pd
import random

In [170]:
class LongShortTermMemoryModel(nn.Module):
    i = 4
    def __init__(self, encoding_size):
        super(LongShortTermMemoryModel, self).__init__()

        self.lstm = nn.LSTM(encoding_size, 128*4)  # 128 is the state size
        self.dense = nn.Linear(128*4, encoding_size)  # 128 is the state size

    def reset(self):  # Reset states prior to new input sequence
        zero_state = torch.zeros(1, 1, 128*4)  # Shape: (number of layers, batch size, state size)
        self.hidden_state = zero_state
        self.cell_state = zero_state

    def logits(self, x):  # x shape: (sequence length, batch size, encoding size)
        out, (self.hidden_state, self.cell_state) = self.lstm(x, (self.hidden_state, self.cell_state))
        return self.dense(out.reshape(-1, 128*4))

    def f(self, x):  # x shape: (sequence length, batch size, encoding size)
        return torch.softmax(self.logits(x), dim=1)

    def loss(self, x, y):  # x shape: (sequence length, batch size, encoding size), y shape: (sequence length, encoding size)
        return nn.functional.cross_entropy(self.logits(x), y.argmax(1))

In [171]:
words = open('words.csv').read().split()
random.shuffle(words)
words = words[: int(len(words)/100)]
print(words)

['record', 'admit', 'who', 'freeze', 'connect', 'structure', 'Korean', 'hostage', 'veteran', 'square', 'surround', 'offender', 'middle', 'twelve', 'minimize', 'illness', 'courage', 'influence', 'bowl', 'drive', 'factor', 'missile', 'student', 'origin', 'popular', 'sweet', 'weather', 'prayer', 'atmosphere', 'wrist', 'dear', 'night', 'therapy', 'racism', 'idea', 'entrance', 'dynamic', 'bee', 'unemployment', 'dancing', 'campaign', 'republic', 'my']


In [172]:
##Many to one
word_length = 6
index_to_char = list(string.ascii_lowercase)
index_to_char.append('æ')
index_to_char.append('ø')
index_to_char.append('å')
index_to_char.append(' ')
index_to_char.append('-')
index_to_char.append('\'')
char_encodings = np.eye(len(index_to_char))

encoding_size = len(char_encodings)

letter_dict = {index_to_char[i]: i for i in range(0, len(index_to_char))}

def letter(x: str):
    char = char_encodings[letter_dict[x]]
    return char

def x_create_word(word: str):

    word = word + (' ' * (word_length-len(word)))
    print(word)
    return_word = []
    for i in range(len(word)):
        return_word.append([letter(word[i])])
    return return_word

def y_create_word(word: str):


    word = ' ' + word[1:] + (' ' * (word_length-len(word)))
    print(word)
    return_word = []
    for i in range(len(word)):
        return_word.append(letter(word[i]))
    return return_word

def get_words():
    x_lst = []
    y_lst = []
    for i in range(len(words)):
        if len(words[i]) < word_length:
            x_lst.append(x_create_word(words[i].lower()))
            y_lst.append(y_create_word(words[i].lower()))
    return x_lst, y_lst

x_data, y_data = get_words()
x_train = torch.tensor(x_data, dtype=torch.float)
y_train = torch.tensor(y_data, dtype=torch.float)

model = LongShortTermMemoryModel(encoding_size)
print(x_train.shape)
print(y_train.shape)

admit 
 dmit 
who   
 ho   
bowl  
 owl  
drive 
 rive 
sweet 
 weet 
wrist 
 rist 
dear  
 ear  
night 
 ight 
idea  
 dea  
bee   
 ee   
my    
 y    
torch.Size([11, 6, 1, 32])
torch.Size([11, 6, 32])


In [173]:
optimizer = torch.optim.RMSprop(model.parameters(), 0.001)
for epoch in range(500):
    for i in range(x_train.size()[0]):#Loops for each letter to emoji
        model.reset()
        model.loss(x_train[i], y_train[i]).backward()
        optimizer.step()
        optimizer.zero_grad()

In [188]:
def get_emoji(emo: str):
    y = -1
    model.reset()
    #for i in range(len(emo)):
    # y = model.f(torch.tensor([[char_encodings[1]]], dtype=torch.float))
    # text += index_to_char[y.argmax(1)]
    text = emo
    print(len(text))
    for i in range(len(text)):
        y = model.f(torch.tensor([[letter(text[i])]], dtype=torch.float))

    for i in range(word_length-len(text)):
        y = model.f(torch.tensor([[char_encodings[y.argmax(1)]]], dtype=torch.float))
        text += index_to_char[y.argmax(1)]
    print("Expected: " +text)
    return text


get_emoji("dr")

2
i
v
e
 
Expected: drive 


'drive '