In [None]:
import torch
import torch.nn.functional as F
import numpy as np
from functions import *

In [None]:
filename = 'input.txt'
#filename = 'wodehouse_right_ho_jeeves.txt'
data_raw = open(filename, 'r').read() # should be simple plain text file
data_raw = data_raw.lower()
data_raw = data_raw[0:10009]

table = str.maketrans(dict.fromkeys('ï»¿'))
data_raw = data_raw.translate(table)

data, vocab_size, idx_to_char, char_to_idx = data_from_text(data_raw)
data_one_hot = make_one_hot(data, vocab_size)

print( 'data_one_hot shape: ', data_one_hot.shape)

In [None]:
seq_length = 25 # this is how much of the data we sample before updating the params
hidden_size = 50 # size of the hidden state vector
learning_rate = 0.1
num_epochs = 50

verbose = True

In [None]:
class MyModel(torch.nn.Module):
    def __init__(self, vocab_size, hidden_size):
        super(MyModel, self).__init__()

        self.hidden_size = hidden_size
        self.vocab_size = vocab_size
        
        # create recurrent and linear layers
        lstm = torch.nn.LSTM(input_size=self.vocab_size,hidden_size=self.hidden_size) 
        linear = torch.nn.Linear(self.hidden_size, self.vocab_size)

        #self.rnn = rnn
        self.lstm = lstm
        self.linear = linear
        self.reset_states()
        
        
    def forward(self, inputs):
        h, (h_prev, c_prev) = self.lstm(inputs, (self.h_prev, self.c_prev))
        out = self.linear(h)
        self.h_prev = h_prev.detach()
        self.c_prev = c_prev.detach()
        return out
    
    def reset_states(self):
        self.h_prev = torch.zeros([1, 1, self.hidden_size])
        self.c_prev = torch.zeros([1, 1, self.hidden_size])





In [None]:
model = MyModel(vocab_size, hidden_size)
model

In [None]:
# initialize parameters
#parameters = initialize_parameters(hidden_size, vocab_size)
dtype = torch.float
data_length = len(data)
seqs_per_epoch = int(data_length/seq_length)  # will this work if data/seq divides exactly?

loss_fn = torch.nn.CrossEntropyLoss(reduction='sum')

#optimizer = torch.optim.SGD(model.parameters(), lr=1e-2)
#optimizer = torch.optim.Adagrad(model.parameters(), lr=1e-1)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-2)

data3D = np.expand_dims(data_one_hot, 1)
print( 'data3D shape: ', data3D.shape)
for epoch in range(num_epochs):
    # initialize hidden  state  
    model.reset_states()

    if verbose and epoch != 0:
        print('epoch: {}, loss: {}'.format(epoch, loss))

    for i in range(seqs_per_epoch):
        start = i*seq_length
        end = i*seq_length+seq_length
        end = min(end, data_length-1)

        inputs_raw = data3D[start:end]
        targets_raw = data[start+1:end+1]
             
        # both inputs and targets are (seq_length, 1, vocab_size)
        inputs = torch.tensor(inputs_raw, dtype=dtype)
        targets = torch.tensor(targets_raw, dtype=torch.long)
                
        out = model(inputs)
        
        out2 = out.view((seq_length, vocab_size))
        loss = loss_fn(out2, targets)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
#         if i%10 == 0:
#             print( i, loss.item())


In [None]:
model.reset_states()

seed_string = 'my name is '
sample = generate_sample(model, 5, [char_to_idx[i] for i in seed_string])
''.join([idx_to_char[i] for i in sample])