In [1]:
# Dennis' email on 29.April.2022

#To summarize what was said in the meeting, you should start by implementing three different types of RNNs from scratch (basic RNN, GRU and LSTM) and testing them to see if your implementation is correct (torch.allclose()) is a good function to know for testing). Another thing you should do early on is pick some dataset and look how to #load it, tokenize the text, and transform them to tensors (look at Pytorch DataLoader and Dataset and other data #processing tools in pytorch first).
#If you haven't done so yet, also watch the Stanford NLP lectures mentioned on the slides.


# Installing Python Packages from Jupyter Notebook

#import sys
#!{sys.executable} -m pip install <package name>


# Package Settings
import torch 
import torch.nn as nn
import csv
import timeit
import torchvision
import torchvision.transforms as transforms

# Device configuration
print("cuda is available? " + str(torch.cuda.is_available()))
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

cuda is available? False


In [3]:
# Hyper-parameters
"""
sequence_length = 28
input_size = 1
hidden_size = 64
num_layers = 1
num_classes = 1
batch_size = 100
num_epochs = 2
learning_rate = 0.001
"""

# Define RNN
class TweetGenerator(nn.Module):
    def __init__(self, input_size, hidden_size, n_layers=1):
        super(TweetGenerator, self).__init__()

        # identiy matrix for generating one-hot vectors
        self.ident = torch.eye(input_size)

        # recurrent neural network
        self.rnn = nn.RNN(
            input_size, 
            hidden_size, 
            n_layers, 
            batch_first=True
        )

        # FC layer as decoder to the output
        self.fc = nn.Linear(hidden_size, input_size)
    
    def forward(self, x, h_state=None):
        x = self.ident[x]                  # generate one-hot vectors of input
        output, h_state = self.rnn(x, h_state) # get the next output and hidden state
        output = self.fc(output)          # predict distribution over next tokens
        return output, h_state
    

In [4]:
# .csv Dataset
tweets = list(line[7] for line in csv.reader(open('dataset/2012.csv')))
tweet = tweets[100]
#print(tweet)

# mae a vocabulary
# torchtext has two schemes: stoi, itos
vocab = list(set(tweet)) + ["<BOS>", "<EOS>"]
vocab_stoi = {s: i for i, s in enumerate(vocab)}
vocab_itos = {i: s for i, s in enumerate(vocab)}
vocab_size = len(vocab)
print('before:')
print(vocab)
tweet_ch = ["<BOS>"] + list(tweet) + ["<EOS>"]
tweet_indices = [vocab_stoi[ch] for ch in tweet_ch]
tweet_tensor = torch.Tensor(tweet_indices).long().unsqueeze(0)
print('after:')
print(tweet_ch)
#bos_input = torch.Tensor([vocab_stoi["<BOS>"]]).long().unsqueeze(0)

#target = torch.Tensor([vocab_stoi["<EOS>"]]).long().unsqueeze(0)
target = tweet_tensor[:,1:]  
model = TweetGenerator(vocab_size, hidden_size=64)
print(model)

before:
['c', 'w', 'L', 'X', 'x', '/', 'y', 'm', 'e', 's', 'J', 'B', 'h', '!', 'n', 'l', '.', ':', 'o', 'Z', 'r', 'O', 'd', 't', ' ', 'i', 'p', 'a', 'g', '9', '<BOS>', '<EOS>']
after:
['<BOS>', 'O', 'l', 'y', 'm', 'p', 'i', 'c', 's', ' ', 'o', 'p', 'e', 'n', 'i', 'n', 'g', ' ', 'i', 's', ' ', 'e', 'x', 'c', 'e', 'l', 'l', 'e', 'n', 't', 'l', 'y', ' ', 'B', 'r', 'i', 't', 'i', 's', 'h', '.', ' ', 'J', 'o', 'l', 'l', 'y', ' ', 'g', 'o', 'o', 'd', ' ', 's', 'h', 'o', 'w', '!', ' ', ' ', 'h', 't', 't', 'p', ':', '/', '/', 't', '.', 'c', 'o', '/', 's', '9', 'X', 'Z', 'd', 'a', 'B', 'L', '<EOS>']
TweetGenerator(
  (rnn): RNN(32, 64, batch_first=True)
  (fc): Linear(in_features=64, out_features=32, bias=True)
)


In [7]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
loss_func = nn.CrossEntropyLoss()

print('[Starting]')
start = timeit.default_timer()
for it in range(300):
    optimizer.zero_grad()
    output, _ = model(tweet_tensor[:,:-1])
    loss = loss_func(output.reshape(-1, vocab_size),
                 target.reshape(-1))
    loss.backward(retain_graph=True)
    optimizer.step()

    if (it+1) % 50 == 0:
        print("[Iter %d] Loss %f" % (it+1, float(loss)))
stop = timeit.default_timer()
print('[Done]')
print('[Runtime] ', stop-start)

# Save the model checkpoint
#torch.save(model.state_dict(), 'rnn-model.ckpt')

[Starting]
[Iter 50] Loss 0.002642
[Iter 100] Loss 0.001129
[Iter 150] Loss 0.000655
[Iter 200] Loss 0.000434
[Iter 250] Loss 0.000312
[Iter 300] Loss 0.000236
[Done]
[Runtime]  1.7179921190000016
