In [110]:
import torch.nn as nn

class RNN(nn.Module):
    
    def __init__(self, vocab_size, output_size, embedding_dim, hidden_dim, n_layers, dropout=0.5):
        """
        Initialize the PyTorch RNN Module
        :param vocab_size: The number of input dimensions of the neural network (the size of the vocabulary)
        :param output_size: The number of output dimensions of the neural network
        :param embedding_dim: The size of embeddings, should you choose to use them        
        :param hidden_dim: The size of the hidden layer outputs
        :param dropout: dropout to add in between LSTM/GRU layers
        """
        super(RNN, self).__init__()
       
        # TODO: Implement function
        self.output_size = output_size
        self.n_layers = n_layers
        self.hidden_dim = hidden_dim
        
        # define model layers
        
        # embedding and LSTM layers
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, n_layers, dropout=dropout, batch_first=True)
                
        # linear layer
        self.fc = nn.Linear(hidden_dim, output_size)    
    
    def forward(self, nn_input, hidden):
        """
        Forward propagation of the neural network
        :param nn_input: The input to the neural network
        :param hidden: The hidden state        
        :return: Two Tensors, the output of the neural network and the latest hidden state
        """
        # TODO: Implement function  
        batch_size = nn_input.size(0)

        # embeddings and lstm_out
        embeds = self.embedding(nn_input)
        lstm_out, hidden = self.lstm(embeds, hidden)
        # stack up lstm outputs
        lstm_out = lstm_out.contiguous().view(-1, self.hidden_dim)
        
        # dropout and fully-connected layer
        output = self.fc(lstm_out)
        
        # reshape to be batch_size first
        output = output.view(batch_size, -1, self.output_size)
        out = output[:, -1] # get last batch of labels       
        # return one batch of output word scores and the hidden state
        return out, hidden    
    
    def init_hidden(self, batch_size):
        '''
        Initialize the hidden state of an LSTM/GRU
        :param batch_size: The batch_size of the hidden state
        :return: hidden state of dims (n_layers, batch_size, hidden_dim)
        '''
        # Implement function
        
        weight = next(self.parameters()).data
        
        if 0:
            hidden = (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().cuda(),
                  weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().cuda())
        else:
            hidden = (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_(),
                      weight.new(self.n_layers, batch_size, self.hidden_dim).zero_())
        
        return hidden
    
    
    
    
    
    
    
import torch.nn as nn

class RNN(nn.Module):
    
    def __init__(self, vocab_size, output_size, embedding_dim, hidden_dim, n_layers, dropout=0.5):
        """
        Initialize the PyTorch RNN Module
        :param vocab_size: The number of input dimensions of the neural network (the size of the vocabulary)
        :param output_size: The number of output dimensions of the neural network
        :param embedding_dim: The size of embeddings, should you choose to use them        
        :param hidden_dim: The size of the hidden layer outputs
        :param dropout: dropout to add in between LSTM/GRU layers
        """
        super(RNN, self).__init__()
       
        # set class variables
        self.output_size = output_size
        self.n_layers = n_layers
        self.hidden_dim = hidden_dim
        
        # define model layers
        
        # embedding and LSTM layers
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, n_layers, dropout=dropout, batch_first=True)
                
        # Define the final, fully-connected output layer
        self.fc = nn.Linear(hidden_dim, output_size)        
    
    
    def forward(self, nn_input, hidden):
        """
        Forward propagation of the neural network
        :param nn_input: The input to the neural network
        :param hidden: The hidden state        
        :return: Two Tensors, the output of the neural network and the latest hidden state
        """
        # TODO: Implement function  
        batch_size = nn_input.size(0)

        # embeddings and lstm_out
        embeds = self.embedding(nn_input)
        lstm_out, hidden = self.lstm(embeds, hidden)

        # stack up lstm outputs
        lstm_out = lstm_out.contiguous().view(-1, self.hidden_dim)
        
        # dropout and fully-connected layer
        output = self.fc(lstm_out)
        
        # reshape into (batch_size, seq_length, output_size)
        output = output.view(batch_size, -1, self.output_size)
        out = output[:, -1] # get last batch of labels       

        # return one batch of output word scores and the hidden state
        return out, hidden    
    
    
    def init_hidden(self, batch_size):
        '''
        Initialize the hidden state of an LSTM/GRU
        :param batch_size: The batch_size of the hidden state
        :return: hidden state of dims (n_layers, batch_size, hidden_dim)
        '''

        # Create two new tensors with sizes n_layers x batch_size x hidden_dim,
        # initialized to zero, for hidden state and cell state of LSTM
        
        weight = next(self.parameters()).data
        
        if (train_on_gpu):
            hidden = (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().cuda(),
                  weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().cuda())
        else:
            hidden = (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_(),
                      weight.new(self.n_layers, batch_size, self.hidden_dim).zero_())
        
        return hidden

"""
DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE
"""
tests.test_rnn(RNN, train_on_gpu)    


In [111]:
import helper
import problem_unittests as tests
from torch.utils.data import TensorDataset, DataLoader
import numpy as np
import problem_unittests as tests
import numpy as np
from collections import Counter
import torch



data_dir = './data/Seinfeld_Scripts.txt'
text = helper.load_data(data_dir)


def token_lookup():
    """
    Generate a dict to turn punctuation into a token.
    :return: Tokenized dictionary where the key is the punctuation and the value is the token
    """
    token_dict = dict()
    token_dict["."] = "||period||"
    token_dict[","] = "||comma||"
    token_dict["\""] = "||quotationmark||"
    token_dict[";"] = "||semicolon||"
    token_dict["!"] = "||exclamationmark||"
    token_dict["?"] = "||questionmark||"
    token_dict["("] = "||lparentheses||"
    token_dict[")"] = "||rparentheses||"
    token_dict["-"] = "||dash||"
    token_dict["\n"] = "||return||"
    
    return token_dict


def create_lookup_tables(text):
    """
    Create lookup tables for vocabulary
    :param text: The text of tv scripts split into words
    :return: A tuple of dicts (vocab_to_int, int_to_vocab)
    """
    # TODO: Implement Function
    counts = Counter(text)
    sorted_vocab = sorted(counts, key=counts.get, reverse=True)
    
    # create int_to_vocab dictionaries
    int_to_vocab = {ii: word for ii, word in enumerate(sorted_vocab)}
    vocab_to_int = {word: ii for ii, word in int_to_vocab.items()}
    
    return (vocab_to_int, int_to_vocab)

def batch_data(words, sequence_length, batch_size):
    """
    Batch the neural network data using DataLoader
    :param words: The word ids of the TV scripts
    :param sequence_length: The sequence length of each batch
    :param batch_size: The size of each batch; the number of sequences in a batch
    :return: DataLoader with batched data
    """
    n_batches = len(words)//batch_size
    # only full batches
    words = words[:n_batches*batch_size]
    
    # TODO: Implement function    
    features, targets = [], []

    for idx in range(0, (len(words) - sequence_length) ):
        features.append(words[idx : idx + sequence_length])
        targets.append(words[idx + sequence_length])   
        
    #print(features)
    #print(targets)

    data = TensorDataset(torch.from_numpy(np.asarray(features)), torch.from_numpy(np.asarray(targets)))
    data_loader = torch.utils.data.DataLoader(data, shuffle=False , batch_size = batch_size)

    # return a dataloader
    return data_loader


helper.preprocess_and_save_data(data_dir, token_lookup, create_lookup_tables)
int_text, vocab_to_int, int_to_vocab, token_dict = helper.load_preprocess()

In [112]:
sequence_length = 10   # of words in a sequence
batch_size = 128
train_loader = batch_data(int_text, sequence_length, batch_size)
num_epochs = 10 
learning_rate = 0.001
vocab_size = len(vocab_to_int) 
output_size = vocab_size
embedding_dim = 200
hidden_dim = 256
n_layers = 2
show_every_n_batches = 500


rnn = RNN(vocab_size, output_size, embedding_dim, hidden_dim, n_layers, dropout=0.5)


In [115]:
hidden = rnn.init_hidden(128)

for batch_i, (inputs, labels) in enumerate(train_loader, 1):
    input, target = inputs, labels 
    print(label.shape)
    break

torch.Size([128])


In [117]:
embedding = nn.Embedding(vocab_size, embedding_dim)

lstm = nn.LSTM(embedding_dim, hidden_dim, n_layers, dropout=0.5, batch_first=True)
fc = nn.Linear(hidden_dim, output_size)    

batch_size = 128
embeds = embedding(input)
lstm_out, hidden = lstm(embeds, hidden)
lstm_out = lstm_out.contiguous().view(-1, hidden_dim)
output = fc(lstm_out)
        
output = output.view(batch_size, -1, output_size)
out = output[:, -1] # get last batch of labels       
out.size()
# criterion = nn.CrossEntropyLoss()
# loss = criterion(output, target)


torch.Size([128, 21388])

In [None]:
embedding = nn.Embedding(vocab_size, embedding_dim)
emb = embedding(input)

In [None]:
input.size()
emb.size()