In [17]:
import re
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import string
sentences = []
with open('randgen.txt') as f:
    for line in f:
        l = re.sub(r'[^\w\s\d+]','',line).lower().strip('\n')
        #l = re.sub(r'\d+','',l).lower().strip('  ')
        sentences.append(l)
print(sentences)


['well see you later', 'the lyrics are humorous', 'do you know persian', 'youre mistaken about that', 'its an old picture', 'fish are coldblooded animals', 'someone should try this', 'ken cried for help', 'i chopped some onions', 'please madam help yourself', 'i think that helps', 'are they all right', 'he didnt have nightmares', 'tom did that too', 'tom majored in economics', 'that castle is beautiful', 'i let her down', 'the internet has spoken', 'tom wasnt totally surprised', 'tom scored four goals', 'i called the police', 'his invention is brilliant', 'hes afraid of snakes', 'she has many dogs', 'tom talked about boston', 'you like an elephant', 'my sister is married', 'i finally got it', 'they all looked happy', 'be careful that hurts', 'hows the weather there', 'the explanation is clear', 'tom needs more money', 'i ended up winning', 'any improvement is good', 'the bride suddenly laughed', 'all beginnings are difficult', 'i expect your help', 'just answer one question', 'lets ass

In [None]:
# Basic RNN

def make_batch():
    input_batch = []
    target_batch = []

    for sen in sentences:
        word = sen.split()  # space tokenizer
        input = [word_dict[n] for n in word[:-1]]  # create (1~n-1) as input
        target = word_dict[word[-1]]  # create (n) as target, We usually call this 'casual language model'

        input_batch.append(np.eye(n_class)[input])
        target_batch.append(target)

    return input_batch, target_batch

class TextRNN(nn.Module):
    def __init__(self):
        super(TextRNN, self).__init__()
        self.rnn = nn.RNN(input_size=n_class, hidden_size=n_hidden)
        self.W = nn.Linear(n_hidden, n_class, bias=False)
        self.b = nn.Parameter(torch.ones([n_class]))

    def forward(self, hidden, X):
        X = X.transpose(0, 1) # X : [n_step, batch_size, n_class]
        outputs, hidden = self.rnn(X, hidden)
        # outputs : [n_step, batch_size, num_directions(=1) * n_hidden]
        # hidden : [num_layers(=1) * num_directions(=1), batch_size, n_hidden]
        outputs = outputs[-1] # [batch_size, num_directions(=1) * n_hidden]
        model = self.W(outputs) + self.b # model : [batch_size, n_class]
        return model

if __name__ == '__main__':
    n_step = 2 # number of cells(= number of Step)
    n_hidden = 5 # number of hidden units in one cell

    # sentences = ["acting is key", "acting is soul", "acting is mysterious", "acting is sport", "we love acting", "callen is handsome"]
    

    word_list = " ".join(sentences).split()
    word_list = list(set(word_list))
    word_dict = {w: i for i, w in enumerate(word_list)}
    number_dict = {i: w for i, w in enumerate(word_list)}
    n_class = len(word_dict)
    batch_size = len(sentences)

    model = TextRNN()

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    input_batch, target_batch = make_batch()
    input_batch = torch.FloatTensor(input_batch)
    target_batch = torch.LongTensor(target_batch)

    # Training
    for epoch in range(50000):
        optimizer.zero_grad()

        # hidden : [num_layers * num_directions, batch, hidden_size]
        hidden = torch.zeros(1, batch_size, n_hidden)
        # input_batch : [batch_size, n_step, n_class]
        output = model(hidden, input_batch)

        # output : [batch_size, n_class], target_batch : [batch_size] (LongTensor, not one-hot)
        loss = criterion(output, target_batch)
        if (epoch + 1) % 1000 == 0:
            print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))

        loss.backward()
        optimizer.step()

    input = [sen.split()[:2] for sen in sentences]

    # Predict
    hidden = torch.zeros(1, batch_size, n_hidden)
    predict = model(hidden, input_batch).data.max(1, keepdim=True)[1]
    #print([sen.split()[:2] for sen in sentences], '->', [number_dict[n.item()] for n in predict.squeeze()])
    sen_list = [sen.split()[:3] for sen in sentences]
    predicted_list = [number_dict[n.item()] for n in predict.squeeze()]

Epoch: 1000 cost = 4.323978
Epoch: 2000 cost = 3.379802
Epoch: 3000 cost = 2.912945
Epoch: 4000 cost = 2.535165
Epoch: 5000 cost = 2.222329
Epoch: 6000 cost = 1.991024
Epoch: 7000 cost = 1.803438
Epoch: 8000 cost = 1.630365
Epoch: 9000 cost = 1.461205
Epoch: 10000 cost = 1.310239
Epoch: 11000 cost = 1.176653
Epoch: 12000 cost = 1.064675
Epoch: 13000 cost = 0.962594
Epoch: 14000 cost = 0.869872
Epoch: 15000 cost = 0.784743
Epoch: 16000 cost = 0.710703
Epoch: 17000 cost = 0.640856
Epoch: 18000 cost = 0.581011
Epoch: 19000 cost = 0.526312
Epoch: 20000 cost = 0.477545


In [21]:
out = []
count = 0
for i in range(len(sen_list)):
    strin = sen_list[i][0]+" "+sen_list[i][1]+" "+sen_list[i][2]+" "+predicted_list[i]
    out.append(strin)
    if(strin == sentences[i]):
        count+=1
print('acc: ', count / len(sen_list))

acc:  0.9463235294117647
['well see you later', 'the lyrics are humorous', 'do you know persian', 'youre mistaken about that', 'its an old picture', 'fish are coldblooded animals', 'someone should try this', 'ken cried for help', 'i chopped some onions', 'please madam help yourself', 'i think that helps', 'are they all right', 'he didnt have nightmares', 'tom did that too', 'tom majored in history', 'that castle is beautiful', 'i let her down', 'the internet has spoken', 'tom wasnt totally surprised', 'tom scored four goals', 'i called the police', 'his invention is brilliant', 'hes afraid of snakes', 'she has many dogs', 'tom talked about boston', 'you like an elephant', 'my sister is married', 'i finally got it', 'they all looked happy', 'be careful that hurts', 'hows the weather there', 'the explanation is clear', 'tom needs more money', 'i ended up winning', 'any improvement is good', 'the bride suddenly laughed', 'all beginnings are difficult', 'i expect your help', 'just answer o