<a href="https://colab.research.google.com/github/StanleyLiangYork/PytorchCode/blob/main/Pytorch_GPU_code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data

In [2]:
!wget https://storage.googleapis.com/pet-detect-239118/dataset/wonderland.txt /content/wonderland.txt

--2023-03-27 01:36:04--  https://storage.googleapis.com/pet-detect-239118/dataset/wonderland.txt
Resolving storage.googleapis.com (storage.googleapis.com)... 142.251.2.128, 142.250.141.128, 2607:f8b0:4023:c0b::80, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|142.251.2.128|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 147674 (144K) [text/plain]
Saving to: ‘wonderland.txt’


2023-03-27 01:36:04 (76.2 MB/s) - ‘wonderland.txt’ saved [147674/147674]

/content/wonderland.txt: Scheme missing.
FINISHED --2023-03-27 01:36:04--
Total wall clock time: 0.4s
Downloaded: 1 files, 144K in 0.002s (76.2 MB/s)


load ascii text and covert to lowercase

In [3]:
filename = "wonderland.txt"
raw_text = open(filename, 'r', encoding='utf-8').read()
raw_text = raw_text.lower()

create mapping of unique chars to integers

In [4]:
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))

summarize the loaded data

In [5]:
n_chars = len(raw_text)
n_vocab = len(chars)
print("Total Characters: ", n_chars)
print("Total Vocab: ", n_vocab)

Total Characters:  144343
Total Vocab:  44


In [6]:
seq_length = 100
dataX = []
dataY = []

In [7]:
for i in range(0, n_chars - seq_length, 1):
  seq_in = raw_text[i:i + seq_length]
  seq_out = raw_text[i + seq_length]
  dataX.append([char_to_int[char] for char in seq_in])
  dataY.append(char_to_int[seq_out])
 
n_patterns = len(dataX)
print("Total Patterns: ", n_patterns)

Total Patterns:  144243


reshape X to be [samples, time steps, features]

In [8]:
X = torch.tensor(dataX, dtype=torch.float32).reshape(n_patterns, seq_length, 1)
X = X / float(n_vocab)
y = torch.tensor(dataY)

Define the NN to predict char sequence

In [9]:
class CharModel(nn.Module):
  def __init__(self):
    super().__init__()
    self.lstm = nn.LSTM(input_size=1, hidden_size=256, num_layers=2, batch_first=True, dropout=0.2)
    self.dropout = nn.Dropout(0.2)
    self.linear = nn.Linear(256, n_vocab)

  def forward(self,x):
    x, _ = self.lstm(x)
    # keep the last output
    x = x[:, -1, :]
    # get the output
    x = self.linear(self.dropout(x))
    return x

Setup the training configurations

In [10]:
n_epochs = 40
batch_size = 128
model = CharModel()

Load the model to GPU

In [11]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)

CharModel(
  (lstm): LSTM(1, 256, num_layers=2, batch_first=True, dropout=0.2)
  (dropout): Dropout(p=0.2, inplace=False)
  (linear): Linear(in_features=256, out_features=44, bias=True)
)

In [12]:
optimizer = optim.Adam(model.parameters())
loss_fn = nn.CrossEntropyLoss(reduction="sum")

In [13]:
loader = data.DataLoader(data.TensorDataset(X, y), shuffle=True, batch_size=batch_size)

In [14]:
best_model = None
best_loss = np.inf

Training setup

In [15]:
for epoch in range(n_epochs):
  model.train()  # set the model to train status
  for X_batch, y_batch in loader:
    y_pred = model(X_batch.to(device))
    loss = loss_fn(y_pred, y_batch.to(device)) # the predictions from the model already loaded to GPU
    optimizer.zero_grad()
    loss.backward()  # get the gradients
    optimizer.step() # update the model parameters
  # validation
  model.eval() # set the model to validation status
  loss = 0
  with torch.no_grad():
    for X_batch, y_batch in loader:
      y_pred = model(X_batch.to(device))
      loss += loss_fn(y_pred, y_batch.to(device))
    if loss < best_loss:
      best_loss = loss
      best_model = model.state_dict()
    print("Epoch {0:3d} ---- Cross-entropy: {1:.4f}".format(epoch, loss))

torch.save([best_model, char_to_int], "single_char.pth")

Epoch   0 ---- Cross-entropy: 370408.9062
Epoch   1 ---- Cross-entropy: 335101.7500
Epoch   2 ---- Cross-entropy: 307116.9688
Epoch   3 ---- Cross-entropy: 290256.0625
Epoch   4 ---- Cross-entropy: 278854.0938
Epoch   5 ---- Cross-entropy: 266216.2188
Epoch   6 ---- Cross-entropy: 258698.7969
Epoch   7 ---- Cross-entropy: 249898.6875
Epoch   8 ---- Cross-entropy: 242281.6875
Epoch   9 ---- Cross-entropy: 237606.3281
Epoch  10 ---- Cross-entropy: 232543.5469
Epoch  11 ---- Cross-entropy: 225036.1094
Epoch  12 ---- Cross-entropy: 222755.3125
Epoch  13 ---- Cross-entropy: 216147.2031
Epoch  14 ---- Cross-entropy: 222699.9844
Epoch  15 ---- Cross-entropy: 210128.2500
Epoch  16 ---- Cross-entropy: 204254.7812
Epoch  17 ---- Cross-entropy: 202858.9531
Epoch  18 ---- Cross-entropy: 198820.5312
Epoch  19 ---- Cross-entropy: 195927.2500
Epoch  20 ---- Cross-entropy: 192171.2031
Epoch  21 ---- Cross-entropy: 190451.5625
Epoch  22 ---- Cross-entropy: 187806.9062
Epoch  23 ---- Cross-entropy: 1864

Generate text by the best model

In [16]:
best_model, char_to_int = torch.load("single_char.pth")
n_vocab = len(char_to_int)

In [18]:
int_to_char = dict((i,c) for c, i in char_to_int.items())
model.load_state_dict(best_model)

<All keys matched successfully>

randomly generate a prompt

In [41]:
filename = "wonderland.txt"
seq_length = 120
raw_text = open(filename, 'r', encoding='utf-8').read()
raw_text = raw_text.lower()
start = np.random.randint(0, len(raw_text)-seq_length)
prompt = raw_text[start:start+seq_length]
pattern = [char_to_int[c] for c in prompt]

In [42]:
model.eval()
print('Prompt: {:s}'.format(prompt))

Prompt: ring of feet on the stairs. alice knew it was
the rabbit coming to look for her, and she trembled till she shook the
hou


In [43]:
with torch.no_grad():
  for i in range(1000):
    x = np.reshape(pattern,(1,len(pattern),1)) / float(n_vocab)
    x = torch.tensor(x, dtype=torch.float32)
    # get the logits as output from the trained model
    preds = model(x.to(device))
    # convert the digits into the corresponding character
    index = int(preds.argmax())
    result = int_to_char[index]
    print(result, end='') # not to print a new line
    # append the new char into the prompt for the next iteration
    pattern.append(index)
    pattern = pattern[1:]

print()
print('text generation done...')

se, and was so suickly as she was soo with the soog,

'i dan teem to see allce!' she said to herself, 'it was a little bisd and she would be on time to see it would be teen the would be soiether.


'i dan teel to say that ' said the mock turtle, 'she was a little bill was ont and said, 'it was a little bisd and she would be all down the cook, and the sold of the mock turtle iad a pearled in a linute or two, she was soo with the pueen of the soeet her fead in the was soon the words: 
'i dan teem to see allce!' she said to herself, 'it was a little bisd and she would be on time to see it would be teen the would be soiether.


'i dan teel to say that ' said the mock turtle, 'she was a little bill was ont and said, 'it was a little bisd and she would be all down the cook, and the sold of the mock turtle iad a pearled in a linute or two, she was soo with the pueen of the soeet her fead in the was soon the words: 
'i dan teem to see allce!' she said to herself, 'it was a little bisd and she 