In [17]:
import torch
import torch.nn as nn
import re
import requests

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

url = "https://cs.stanford.edu/people/karpathy/char-rnn/warpeace_input.txt"
response = requests.get(url)
text = response.text


text = re.sub(r'\n\s*\n', ' <PARA> ', text)  # Replace paragraph breaks with `<PARA>`
text = "\n".join(line + " " for line in text.splitlines())  # Add space at the end of each line
text = re.sub('[^a-zA-Z0-9 \.<>]', '', text)  # Keep only alphanumeric, space, period, and tokens
text = text.lower().strip()

words = re.findall(r'\b\w+\b|[.]|<PARA>', text)
vocab = sorted(set(words + ["<>"]))  # Add `<>` to the vocabulary
stoi = {s: i for i, s in enumerate(vocab)}  # Map each token to a unique index
itos = {i: s for s, i in stoi.items()}      # Reverse map from index to token

block_size =10   # Context length: how many words we use to predict the next one
X, Y = [], []    
# Start context with padding tokens
context = [stoi["<>"]] * block_size

for word in words + ['para']:  # Include <PARA> as end-of-sequence marker
    if word == "para":
        # Reset context to padding tokens on new paragraph
        context = [stoi["<>"]] * block_size
    else:
        # Append current context to X and target word index to Y
        ix = stoi[word]  
        X.append(context)  
        Y.append(ix)       

        # Update context by sliding the window to include the next word index
        context = context[1:] + [ix]  # Move the window without reinitializing


for i in range(150):
    print(' '.join(itos[idx] if idx in itos else '<>' for idx in X[i]), '--->', itos[Y[i]])


X = torch.tensor(X).to(device)
Y = torch.tensor(Y).to(device)


cuda
<> <> <> <> <> <> <> <> <> <> ---> well
<> <> <> <> <> <> <> <> <> well ---> prince
<> <> <> <> <> <> <> <> well prince ---> so
<> <> <> <> <> <> <> well prince so ---> genoa
<> <> <> <> <> <> well prince so genoa ---> and
<> <> <> <> <> well prince so genoa and ---> lucca
<> <> <> <> well prince so genoa and lucca ---> are
<> <> <> well prince so genoa and lucca are ---> now
<> <> well prince so genoa and lucca are now ---> just
<> well prince so genoa and lucca are now just ---> family
well prince so genoa and lucca are now just family ---> estates
prince so genoa and lucca are now just family estates ---> of
so genoa and lucca are now just family estates of ---> the
genoa and lucca are now just family estates of the ---> buonapartes
and lucca are now just family estates of the buonapartes ---> .
lucca are now just family estates of the buonapartes . ---> but
are now just family estates of the buonapartes . but ---> i
now just family estates of the buonapartes . but i ---> warn


In [18]:
X.shape, Y.shape

(torch.Size([592050, 10]), torch.Size([592050]))

In [19]:
X[:15]

tensor([[   93,    93,    93,    93,    93,    93,    93,    93,    93,    93],
        [   93,    93,    93,    93,    93,    93,    93,    93,    93, 19477],
        [   93,    93,    93,    93,    93,    93,    93,    93, 19477, 13580],
        [   93,    93,    93,    93,    93,    93,    93, 19477, 13580, 16349],
        [   93,    93,    93,    93,    93,    93, 19477, 13580, 16349,  7494],
        [   93,    93,    93,    93,    93, 19477, 13580, 16349,  7494,   724],
        [   93,    93,    93,    93, 19477, 13580, 16349,  7494,   724, 10583],
        [   93,    93,    93, 19477, 13580, 16349,  7494,   724, 10583,   995],
        [   93,    93, 19477, 13580, 16349,  7494,   724, 10583,   995, 11958],
        [   93, 19477, 13580, 16349,  7494,   724, 10583,   995, 11958,  9721],
        [19477, 13580, 16349,  7494,   724, 10583,   995, 11958,  9721,  6500],
        [13580, 16349,  7494,   724, 10583,   995, 11958,  9721,  6500,  6064],
        [16349,  7494,   724, 10583,   9

In [20]:
# Hyperparameters
block_size = 10  
embedding_dim = 64  
hidden_size = 1024  

In [21]:
class NextWord(nn.Module):
  def __init__(self, block_size, vocab_size, emb_dim, hidden_size):
    super().__init__()
    self.emb = nn.Embedding(vocab_size, emb_dim)
    self.lin1 = nn.Linear(block_size * emb_dim, hidden_size)
    self.lin2 = nn.Linear(hidden_size, vocab_size)

  def forward(self, x):
    x = self.emb(x)
    x = x.view(x.shape[0], -1)
    x = torch.relu(self.lin1(x))
    x = self.lin2(x)
    return x

In [22]:
# Initialize model
vocab_size = len(stoi)
model = NextWord(block_size, vocab_size, embedding_dim, hidden_size).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
loss_fn = nn.CrossEntropyLoss()

In [23]:
model

NextWord(
  (emb): Embedding(20070, 64)
  (lin1): Linear(in_features=640, out_features=1024, bias=True)
  (lin2): Linear(in_features=1024, out_features=20070, bias=True)
)

In [24]:
def predict_next_words(model, stoi, itos, input_words, device, num_words=5):

    model.eval()  # Set model to evaluation mode
    context = [stoi.get(word, 0) for word in input_words[-block_size:]]
    sentence = input_words.copy()
    
    for _ in range(num_words):
        input_seq = torch.tensor(context, dtype=torch.long).unsqueeze(0).to(device)
        with torch.no_grad():
            output = model(input_seq)
            _, predicted = torch.max(output, dim=1)
            next_word = itos[predicted.item()]
            sentence.append(next_word)
            
            # Update context for the next prediction
            context = context[1:] + [predicted.item()]
    
    return ' '.join(sentence)

# Example usage 
input_words = ['','','','','','','','','','','this', 'is', 'a', 'sample', 'sentence']
print("Generated sentence:", predict_next_words(model, stoi, itos, input_words, device, num_words=15).strip())

Generated sentence: this is a sample sentence unbearable armswell gibrards acute between shawls fattened monastery sharpnosed untried comenothing mode sleigh distinguishes adapting


In [25]:
batch_size = 4096
print_every = 25
for epoch in range(501):
    for i in range(0, X.shape[0], batch_size):
        x = X[i:i+batch_size]
        y = Y[i:i+batch_size]
        y_pred = model(x)
        loss = loss_fn(y_pred, y)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
    if epoch % 10 ==0:
        print(epoch)
    if epoch % print_every == 0:
        print(epoch, loss.item())

0
0 6.879134178161621
10
20
25 3.1875388622283936
30
40
50
50 2.2165377140045166
60
70
75 1.7624762058258057
80
90
100
100 1.4670307636260986
110
120
125 1.2536057233810425
130
140
150
150 1.0920164585113525
160
170
175 0.9635493159294128
180
190
200
200 0.858995258808136
210
220
225 0.7725817561149597
230
240
250
250 0.7006580233573914
260
270
275 0.6393033266067505
280
290
300
300 0.5871362686157227
310
320
325 0.541398286819458
330
340
350
350 0.5023683905601501
360
370
375 0.4678008556365967
380
390
400
400 0.4377448260784149
410
420
425 0.41048192977905273
430
440
450
450 0.385908305644989
460
470
475 0.36380869150161743
480
490
500
500 0.3446604311466217


In [26]:
# Example usage for trained model
input_words = ['','','','','','','','','','','this', 'is', 'a', 'sample', 'you']
print("Generated sentence:", predict_next_words(model, stoi, itos, input_words, device, num_words=55).strip())

Generated sentence: this is a sample you really . . . one thinks is over other said not in point lay out not give up to the farther questions . they had come to go to the general . youll have a soul are you in everything your honor your honor to get are to your to and others go and in


In [27]:
# Assuming the model is already trained
model_path = 'next_word_model_2_64_10_relu_500.pth'
torch.save(model.state_dict(), model_path)

In [28]:
import os
os.chdir(r'/kaggle/working')

In [29]:
from IPython.display import FileLink
FileLink(r'next_word_model_2_64_10_relu_500.pth')