In [11]:
import json, pickle
from pathlib import Path
import torch, torch.nn as nn, torch.nn.functional as F

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [22]:
base = "shakespeare"
arte = Path("model_0_shakespeare")
with open(arte / f"{base}_word_to_id.json") as f: w2i = json.load(f)
i2w = {int(v): k for k, v in w2i.items()}
max_seq = pickle.load(open(arte / f"{base}_preprocessed_data.pkl","rb"))["max_seq_length"]
V = len(w2i)

In [27]:
# Define with original attribute names, then load
import torch.nn as nn, torch

class Model(nn.Module):
    def __init__(self, V):
        super().__init__()
        self.embedding = nn.Embedding(V, 256)
        self.lstm1 = nn.LSTM(256, 1024, batch_first=True)
        self.dropout1 = nn.Dropout(0.2)
        self.lstm2 = nn.LSTM(1024, 1024, batch_first=True)
        self.dropout2 = nn.Dropout(0.2)
        self.fc = nn.Linear(1024, V)
    def forward(self, x):
        x = self.embedding(x)
        x, _ = self.lstm1(x)
        x = self.dropout1(x)
        x, (h, _) = self.lstm2(x)
        x = self.dropout2(h[-1])
        return self.fc(x)

m = Model(V).to(device)
m.load_state_dict(torch.load(f"{base}_model.pt", map_location=device))
m.eval()


Model(
  (embedding): Embedding(17927, 256)
  (lstm1): LSTM(256, 1024, batch_first=True)
  (dropout1): Dropout(p=0.2, inplace=False)
  (lstm2): LSTM(1024, 1024, batch_first=True)
  (dropout2): Dropout(p=0.2, inplace=False)
  (fc): Linear(in_features=1024, out_features=17927, bias=True)
)

In [33]:
import torch.nn.functional as F
import torch

def generate(seed, n=50):
    unk, pad, eos = w2i["<UNK>"], w2i["<PAD>"], "<EOS>"
    seq = [w2i.get(w.lower(), unk) for w in seed.split()]
    out = [seed]
    for _ in range(n):
        tail = seq[-max_seq:] + [pad] * max(0, max_seq - len(seq[-max_seq:]))
        x = torch.tensor(tail, dtype=torch.long, device=device).unsqueeze(0)
        with torch.no_grad():
            p = F.softmax(m(x), dim=1)[0]
        nid = torch.multinomial(p, 1).item()
        w = i2w.get(nid, "<UNK>")
        if w == eos: break
        out.append(w); seq.append(nid)
    return " ".join(out)

seed = "Yesterday I saw a"
for _ in range(10):
    print(generate(seed, 50))


Yesterday I saw a beggar a dish of iceland and art thou have be my head of a poor handsome of in to art time have the an the form with
Yesterday I saw a breast a kiss
Yesterday I saw a ink a decline of this canst thou famish a execution to devil before the baggage and sharp white ned in jest and noise enjoy father mettle the like say sorrow whereupon herself as bill and then for no father calumnious the colour malice tun there english woman art act quoth
Yesterday I saw a shadow as a bird of deer thou hast fire far before the night thou shalt decius go master no time shepherd how now poor sister indeed if yet tear character thy with hatch laughter the royally you ten tom key part pray business urge repent wantonness preserve doth with good
Yesterday I saw a bolt of sword
Yesterday I saw a brim a maid of the sky but who hath appoint he when the lion indeed master be my he so
Yesterday I saw a wary cup
Yesterday I saw a till thunder in the same
Yesterday I saw a ladder by night of wind
Y

# Decide approach?  Style transfer or text generation?
    - TEXT GEN IT IS
# Demo ideas
    - Autocomplete with "style" options
        -UI: Email window with "style" along with formatting options
    - Link two words with generated text (not sure if possible)
    - 
# Come up with styles (i.e. other options than "shakespeare") and collect documents/corpus and preprocess
    # Single character (Hamlet)
    # Speeches from politicians etc.
# Create "lite model" that won't take 15 hours to train
# Try tweaking models to improve result
# Try training with other hardware?
    # Local GPU
    # Google Colab
# Demo part - prototype with Streamlit with existing model
    # Add in additional or different models later
# Alternatives
    # Preprocessing: use TensorFlow Tokenization instead of spaCy
# Documenting / reporting
# Metrics / assessment
    # Accuracy? Confusion matrix? Others?
# Distribute work for project (loosely)

Things people are working on:

San Francisco: styles, lite model, try training on own hardware?

Alvis: working on DL1 instead, come up with styles

JA: cleaning up and sharing existing code, lite model, tweaking models

Khushi:

Jacob:

Hitakshi: sleep

Everybody: Document/take notes on what you're doing for a report