In [14]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.distributions import Categorical

In [15]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [16]:
with open("E:\AschoolCLASS\BA3-2_UCSD_UPS\课程资料\HW3_Public\HW3_Public\poem_data\shakespeare.txt", "r") as f:
    raw_text = f.read()

chars = sorted(list(set(raw_text)))
data_size, vocab_size = len(raw_text), len(chars)
print(f"Data has {data_size} characters, {vocab_size} unique")

char_to_ix = { ch:i for i,ch in enumerate(chars) }
ix_to_char = { i:ch for i,ch in enumerate(chars) }

data = torch.tensor([char_to_ix[ch] for ch in raw_text], dtype=torch.long).to(device)

Data has 98029 characters, 71 unique


In [17]:
class RNN(nn.Module):
    def __init__(self, input_size, embedding_size, output_size, hidden_size):
        super(RNN, self).__init__()
        self.embedding = nn.Embedding(input_size, embedding_size)
        self.rnn = nn.LSTM(input_size=embedding_size, hidden_size=hidden_size)
        self.decoder = nn.Linear(hidden_size, output_size)
        self.softmax = nn.Softmax(dim=-1)

    def forward(self, input_seq, hidden_state):
        embedded = self.embedding(input_seq)
        output, hidden_state = self.rnn(embedded.unsqueeze(1), hidden_state)
        output = self.decoder(output.squeeze(1))
        return self.softmax(output), hidden_state

In [18]:
embedding_size = vocab_size
hidden_size = 100
model = RNN(input_size=vocab_size, embedding_size=embedding_size, output_size=vocab_size, hidden_size=hidden_size).to(device)

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [19]:
seq_length = 40
step = 3
X = []
Y = []
for i in range(0, len(data) - seq_length - 1, step):
    X.append(data[i: i + seq_length])
    Y.append(data[i + seq_length])
print(f"Training sequences: {len(X)}")

Training sequences: 32663


In [23]:
epochs = 40
seq_length = 40

for epoch in range(1, epochs + 1):
    model.train()
    total_loss = 0

    for i in range(0, len(data) - seq_length - 1, seq_length):
        input_seq = data[i: i + seq_length].to(device)
        target_seq = data[i + 1: i + 1 + seq_length].to(device)

        optimizer.zero_grad()
        output, _ = model(input_seq, None)  # output shape: [seq_len, vocab_size]

        loss = loss_fn(output, target_seq)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch}: Loss = {total_loss:.4f}")


Epoch 1: Loss = 10065.9538
Epoch 2: Loss = 9939.4542
Epoch 3: Loss = 9741.0582
Epoch 4: Loss = 9675.1435
Epoch 5: Loss = 9637.6846
Epoch 6: Loss = 9599.3096
Epoch 7: Loss = 9572.3007
Epoch 8: Loss = 9553.5401
Epoch 9: Loss = 9542.4094
Epoch 10: Loss = 9529.1470
Epoch 11: Loss = 9521.6161
Epoch 12: Loss = 9506.3382
Epoch 13: Loss = 9497.3624
Epoch 14: Loss = 9493.1453
Epoch 15: Loss = 9486.9423
Epoch 16: Loss = 9479.6079
Epoch 17: Loss = 9474.6675
Epoch 18: Loss = 9468.1641
Epoch 19: Loss = 9462.7343
Epoch 20: Loss = 9457.8769
Epoch 21: Loss = 9452.5701
Epoch 22: Loss = 9446.2354
Epoch 23: Loss = 9440.9321
Epoch 24: Loss = 9435.8648
Epoch 25: Loss = 9433.1048
Epoch 26: Loss = 9425.0000
Epoch 27: Loss = 9417.2839
Epoch 28: Loss = 9412.7258
Epoch 29: Loss = 9409.8392
Epoch 30: Loss = 9405.4816
Epoch 31: Loss = 9404.0259
Epoch 32: Loss = 9399.9149
Epoch 33: Loss = 9396.3787
Epoch 34: Loss = 9394.6202
Epoch 35: Loss = 9392.3076
Epoch 36: Loss = 9390.2331
Epoch 37: Loss = 9388.2833
Epoch 38:

In [24]:
def sample(model, seed, temperature=1.0, length=400):
    model.eval()
    generated = seed
    input_seq = torch.tensor([char_to_ix[ch] for ch in seed], dtype=torch.long).to(device)

    hidden = None
    with torch.no_grad():
        for i in range(len(seed) - 1):
            _, hidden = model(input_seq[i].unsqueeze(0), hidden)

        ch = input_seq[-1]
        for _ in range(length):
            output, hidden = model(ch.unsqueeze(0), hidden)
            output_dist = output.div(temperature).exp()
            prediction = torch.multinomial(output_dist, 1).squeeze()
            generated += ix_to_char[prediction.item()]
            ch = prediction
    return generated

In [25]:
seed_text = "shall i compare thee to a summer's day?\n"
for temp in [1.5, 0.75, 0.25]:
    print(f"\n--- Temperature: {temp} ---")
    print(sample(model, seed=seed_text, temperature=temp))


--- Temperature: 1.5 ---
shall i compare thee to a summer's day?
u8)EC(94lWWOTW:ky9;gqTk MG7nY0R43q
EPC,-iozSzcSO)T6z8lGv8GKV9 IOJ0HC(4xjY1jtm20Mz?.Y,!R(RY4o2sTTPDgeuHzTrBA29x:oD;K.gx) YYa8E ma3E9e0?7DlS9;H1!F4n .)iE6wS8E-82b?jlr70OU0
g., kF9p5Ga4tLb5g! 7LkYW!R85Imm74Spe!vI2H2)d0DqVUz;WEj'0:qt.qd3cL56hoi!GDA,VPej0kEn8Rjj?
rPvY.h)4EvV?e1t9WmkO B,.cFJCA fqow7f?M7yUbzMDRr4DelYyF'  8MkuAVC13(LCwcEyOi
:Ab4y:'kvozeUfGuO0YBT6xUPxLlwN7B
x
UAU0 91?C:shGFxucUVtAlk.e;m0pF

--- Temperature: 0.75 ---
shall i compare thee to a summer's day?
x0yOf9POJbj;AdeEp.:q.7:Dn1RgA(LKh )FVd;2D6
-!ONp-SH;Rz:ldf!nTMfO')?Tg8A0xlESgx8l.
,ERAOCEqC;u3.NK0c2)u4?eoGbET2LRr.u!l-B(ekngBss4aYfb2
BuPMhe0d5?jV)yIL-f.az.AMh
-P3bs;oSjOiJ1yG,oSFqP4xutAh)1,va9v4
!-rw44O;xpYR8rT6Bp2bP FKcet(5yW9JHJAy;KozT.gkbMb?C,3lMCPEiTbulT6) Gx!4LsKsUHT9E
x'SGD8buvuIkqeF(1jAy4TNlCIov6PeHW!ro.xnjx3-l,IY9W!9oSrbPF4;le gJ
7;jwUxTHeG!pLeKjnaBqCm7DIu-MkDKloEM-B, rkqe47:9F7'w96k25Ca

--- Temperature: 0.25 ---
shall i compare thee to a summer's day