In [11]:
import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader, TensorDataset
import tqdm

import import_ipynb
from Vocabulary_Dictionary_and_Two_Convert_Function import *

In [12]:
class ShakespeareDataset(Dataset):
    def __init__(self, path, chunk_size=200):
        # Convert Read file to Numerical-List.
        data = str2ints(open(path).read().strip(), vocab_dict)
        
        # Convert files as Tensor, and then split it.
        data = torch.tensor(data, dtype=torch.int64).split(chunk_size)
        
        # Throw-away the length of last-chunk if has no enough data.
        if len(data[-1]) < chunk_size:
            data = data[:-1]
    
        self.data = data
        self.n_chunks = len(self.data)
    
    def __len__(self):
        return self.n_chunks
    
    def __getitem__(self, idx):
        return self.data[idx]

In [13]:
"""
    Downloaded input.txt from
    'char-rnn/data/tinyshakespeare/input.txt'
    that the author of this project named
    karpathy.
    
    Download from here
    https://github.com/karpathy/char-rnn/tree/master/data/tinyshakespeare
"""
ds = ShakespeareDataset("../05/input.txt",
                       chunk_size=200)
loader = DataLoader(ds, batch_size=32, shuffle=True,
                   num_workers=4)

In [14]:
class SequenceGenerationNet(nn.Module):
    def __init__(
        self, num_embeddings,
        embedding_dim=50,
        hidden_size=50,
        num_layers=1, dropout=0.2
    ):
        super().__init__()
        self.emb = nn.Embedding(num_embeddings, embedding_dim)
        self.lstm = nn.LSTM(
            embedding_dim,
            hidden_size,
            num_layers,
            batch_first=True,
            dropout=dropout
        )
        
        # The size of Linear of output is same as the num_embeddings
        # that is same with Embedding of input-size.
        self.linear = nn.Linear(hidden_size, num_embeddings)
        
    def forward(self, x, h0=None):
        x = self.emb(x)
        x, h = self.lstm(x, h0)
        x = self.linear(x)
        return x, h

In [15]:
# Create function that generates sentences.
def generate_seq(net, start_phrase="The King said ",
                length=200, temperature=0.8, device="cpu"):
    # Set Model as a Evaluation-Mode.
    net.eval()
    
    # THe List that saves Output-Figures.
    result = []
    
    # Convert Start-String as a Tensor.
    start_tensor = torch.tensor(
        str2ints(start_phrase, vocab_dict),
        dtype=torch.int64
    ).to(device)
    
    # Attach Batch-Dimension to the front-side.
    x0 = start_tensor.unsqueeze(0) 
    
    # Get Output and Inner-state by RNN-Model.
    o, h = net(x0)
    
    # Convert output to non-normalize probability.(
    out_dist = o[:, -1].view(-1).exp()
    
    # Sampling Real-Text(Message)-Indext from probability.
    top_i = torch.multinomial(out_dist, 1)[0]
    
    # Save Result
    result.append(top_i)
    
    # Input Generated-Result to the RNN orderly.
    for i in range(length):
        inp = torch.tensor([[top_i]], dtype=torch.int64)
        inp = inp.to(device)
        o, h = net(inp, h)
        
        out_dist = o.view(-1).exp()
        top_i = torch.multinomial(out_dist, 1)[0]
        result.append(top_i)
    
    # Return Start-String and Generated-String by gathering together.
    return start_phrase + ints2str(result, all_chars)
    