## Imports

In [None]:
import torch
import torch.utils.data as data
import numpy as np
from torch import nn, optim

In [None]:
!rmdir -p /kaggle/working/results

In [None]:
!mkdir -p /kaggle/working/results

## Dataset

In [None]:
textPath = 'wonderland.txt'

text = open(textPath, 'r', encoding='utf-8').read()
text = text.lower()
vocab = sorted(set(text))
char_to_int = dict((c, i) for i, c in enumerate(vocab))

n_chars = len(text)
n_vocab = len(vocab)
print('n_chars:', n_chars, 'n_vocab:', n_vocab)

In [None]:
seq_length = 100

dataX = []
dataY = []

for i in range(0, n_chars - seq_length, 1):
    seq_in = text[i:i + seq_length]
    seq_out = text[i + seq_length]

    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])

n_patterns = len(dataX) 
print(n_patterns)  

In [None]:
X = torch.tensor(dataX).long()
y = torch.tensor(dataY).long()

print(X.shape)
print(y.shape)

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

## Model

In [None]:
class CharModel(nn.Module):

    def __init__(self, embed_size, hidden_size, n_vocab, num_layers=1, Batch_first=True):
        super().__init__()
        self.embed_size = embed_size
        self.hidden_size = hidden_size
        self.n_vocab = n_vocab
        self.num_layers = num_layers
        self.batch_first = Batch_first

        self.embed = nn.Embedding(self.n_vocab, self.embed_size)
        self.lstm = nn.LSTM(input_size=self.embed_size, hidden_size=self.hidden_size, num_layers=self.num_layers, batch_first=self.batch_first, dropout=0.2)
        self.dropout = nn.Dropout(0.2)
        self.linear = nn.Linear(self.hidden_size, self.n_vocab)

    def forward(self, x, h_0, c_0):
        out = self.embed(x)
        out, (h_n, c_n) = self.lstm(out, (h_0, c_0))
        # take only the last output
        out = out[:, -1, :] 
        # produce output
        out = self.linear(self.dropout(out))
        
        return out
    
    def init_hidden(self, batch_size):
        hidden = torch.zeros(self.num_layers, batch_size, self.hidden_size)
        cell = torch.zeros(self.num_layers, batch_size, self.hidden_size)
        return hidden, cell

## Train

In [None]:
n_epochs = 10
batch_size = 1
embed_size = 10
hidden_size = 256
num_layers= 1

model = CharModel(embed_size=embed_size, hidden_size=hidden_size, n_vocab=n_vocab, num_layers=num_layers).to(device)

optimizer = optim.Adam(model.parameters())
loss_fn = nn.CrossEntropyLoss(reduction="sum")
loader = data.DataLoader(data.TensorDataset(X, y), shuffle=True, batch_size=batch_size)
 
best_model = None
best_loss = np.inf

for epoch in range(n_epochs):
    model.train()
    
    for X_batch, y_batch in loader:
        h_0, c_0 = model.init_hidden(batch_size=batch_size)
        h_0 = h_0.to(device)
        c_0 = c_0.to(device)
        X_batch = X_batch.to(device)
        y_pred = model(X_batch, h_0, c_0)
        loss = loss_fn(y_pred, y_batch.to(device))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    # Validation
    model.eval()
    loss = 0

    with torch.no_grad():

        for X_batch, y_batch in loader:
            y_pred = model(X_batch.to(device), h_0, c_0)
            loss += loss_fn(y_pred, y_batch.to(device))

        if loss < best_loss:
            best_loss = loss
            best_model = model.state_dict()
        print("Epoch %d: Cross-entropy: %.4f" % (epoch, loss))
 
torch.save([best_model, char_to_int], "single-char.pth")

## Generate

In [None]:
best_model, char_to_int = torch.load("single-char.pth")
n_vocab = len(char_to_int)
int_to_char = dict((i, c) for c, i in char_to_int.items())

embed_size = 10
hidden_size = 256
num_layers= 1
 
# reload the model
class CharModel(nn.Module):

    def __init__(self, embed_size, hidden_size, n_vocab, num_layers=1, Batch_first=True):
        super().__init__()
        self.embed_size = embed_size
        self.hidden_size = hidden_size
        self.n_vocab = n_vocab
        self.num_layers = num_layers
        self.batch_first = Batch_first

        self.embed = nn.Embedding(self.n_vocab, self.embed_size)
        self.lstm = nn.LSTM(input_size=self.embed_size, hidden_size=self.hidden_size, num_layers=self.num_layers, batch_first=self.batch_first, dropout=0.2)
        self.dropout = nn.Dropout(0.2)
        self.linear = nn.Linear(self.hidden_size, self.n_vocab)

    def forward(self, x, h_0, c_0):
        out = self.embed(x)
        out, (h_n, c_n) = self.lstm(out, (h_0, c_0))
        # take only the last output
        out = out[:, -1, :] 
        # produce output
        out = self.linear(self.dropout(out))
        
        return out
    
    def init_hidden(self, batch_size):
        hidden = torch.zeros(self.num_layers, batch_size, self.hidden_size)
        cell = torch.zeros(self.num_layers, batch_size, self.hidden_size)
        return hidden, cell
    
model = CharModel(embed_size, hidden_size, n_vocab, num_layers=1, Batch_first=True).to(device)
model.load_state_dict(best_model)
 
# randomly generate a prompt
filename = "wonderland.txt"
seq_length = 100
raw_text = open(filename, 'r', encoding='utf-8').read()
raw_text = raw_text.lower()
start = np.random.randint(0, len(raw_text)-seq_length)
prompt = raw_text[start:start+seq_length]
pattern = [char_to_int[c] for c in prompt]
 
model.eval()
print('Prompt: "%s"' % prompt)

with torch.no_grad():
    
    for i in range(500):
        # format input array of int into PyTorch tensor
        h_0, c_0 = model.init_hidden(batch_size=batch_size)
        h_0 = h_0.to(device)
        c_0 = c_0.to(device)
        x = np.reshape(pattern, (1, len(pattern)))
        x = torch.tensor(x).long().to(device)
        # generate logits as output from the model
        prediction = model(x, h_0, c_0)
        # convert logits into one character
        index = int(prediction.argmax())
        result = int_to_char[index]
        print(result, end="")
        # append the new character into the prompt for the next iteration
        pattern.append(index)
        pattern = pattern[1:]