In [28]:
# !pip install torchtext

In [1]:
# importing necessary libraries
import numpy as np
import pandas as pd
import sys,  torch, torch.nn as nn
from torch.utils.data import DataLoader,Dataset, TensorDataset
import torchtext
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import Vocab
from torch.nn.utils.rnn import pad_sequence
import tqdm
from collections import Counter

In [5]:
import pandas as pd
csv_path="../dataset/poems-100.csv"

In [6]:
# tokenize:
text = "\n".join(pd.read_csv(csv_path)["text"].astype(str)).lower()
tokens = text.split()
vocab = sorted(set(tokens))
w2i = {w:i for i,w in enumerate(vocab)}
i2w = {i:w for w,i in w2i.items()}
encoded = [w2i[w] for w in tokens]

In [8]:
# sequence
seq_len = 6
X, Y = [], []
for i in range(len(encoded)-seq_len):
    X.append(encoded[i:i+seq_len])
    Y.append(encoded[i+seq_len])

In [9]:
# dataloader

X, Y = torch.tensor(X), torch.tensor(Y)
loader = DataLoader(TensorDataset(X,Y), batch_size=32, shuffle=True)

In [10]:
# Model:


class RNN(nn.Module):
    def __init__(self,vocab):
        super().__init__()
        self.emb = nn.Embedding(vocab,64)
        self.rnn = nn.RNN(64,128,batch_first=True)
        self.fc = nn.Linear(128,vocab)

    def forward(self,x):
        x = self.emb(x)
        o,_ = self.rnn(x)
        return self.fc(o[:,-1])

In [11]:
model = RNN(len(vocab))
opt = torch.optim.Adam(model.parameters(),lr=0.003)
loss_fn = nn.CrossEntropyLoss()

In [12]:
# training
for epoch in tqdm.tqdm(range(2)):
    total=0
    for xb,yb in loader:
        opt.zero_grad()
        loss = loss_fn(model(xb),yb)
        loss.backward()
        opt.step()
        total+=loss.item()
    print("epoch",epoch,"loss",total/len(loader))

 50%|█████     | 1/2 [00:06<00:06,  6.57s/it]

epoch 0 loss 7.447368370144981


100%|██████████| 2/2 [00:12<00:00,  6.34s/it]

epoch 1 loss 6.593734711601385





In [13]:
#  poem generation:
def generate(seed="love is",words=40):
    model.eval()
    toks = seed.lower().split()
    idxs = [w2i.get(w,0) for w in toks]

    for _ in range(words):
        x = torch.tensor([idxs[-seq_len:]])
        with torch.no_grad():
            p = torch.softmax(model(x),dim=-1)
        nxt = torch.multinomial(p,1).item()
        idxs.append(nxt)

    return " ".join(i2w[i] for i in idxs)

print("\nGenerated poem:\n")
print(generate())



Generated poem:

love is it? gray delicious according we night-time or with sorrow, seeming they and of your more the globe air. household no, my soul, something you tow-path, no apex walks race, does wider space, frozen when and thro' dusk—toss or through travels


Extra:
- save model weights, and model
- train on a larger dataset
- reduce the size of model
- show how data passes to model layers
- compare poem outputs with different training levels.