In [9]:
import torch
import torch.nn as nn
import numpy as np
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

In [2]:
class CharDataset(Dataset):
    def __init__(self, text, seq_len, char2idx):
        self.text = text
        self.seq_len = seq_len
        self.char2idx = char2idx
        self.vocab_size = len(char2idx)

    def __len__(self):
        return len(self.text) - self.seq_len
    
    def __getitem__(self, index):
        input_seq = self.text[index:index + self.seq_len]
        target_seq = self.text[index+  1:index + self.seq_len +1]
        input_ids = torch.tensor([self.char2idx[c] for c in input_seq], dtype=torch.long)
        target_ids = torch.tensor([self.char2idx[c] for c in target_seq], dtype=torch.long)
        return input_ids,target_ids

In [3]:
class CharLSTM(nn.Module):
    def __init__(self, vocab_size, embed_size, hidden_size, num_layers):
        super(CharLSTM, self).__init__()
        self.embed = nn.Embedding(vocab_size, embed_size)
        self.lstm = nn.LSTM(embed_size, hidden_size, num_layers, dropout=0.3,batch_first=True)
        self.fc = nn.Linear(hidden_size, vocab_size)
        
    def forward(self, x, hidden=None):
        # x: (batch, seq_len)
        x = self.embed(x)            # (batch, seq_len, embed_size)
        out, hidden = self.lstm(x, hidden)  # out: (batch, seq_len, hidden)
        out = self.fc(out)           # (batch, seq_len, vocab_size)
        log_probs = F.log_softmax(out, dim=-1) # log probs
        return log_probs, hidden

In [4]:
#Params
seq_len = 100
batch_size = 1024
epochs = 20
hidden_size = 512
num_layers = 4
embed_size = 256

In [5]:
with open('war&peace.txt', 'r', encoding='utf-8') as f:
    text = f.read()

# Create character-to-index mappings
chars = sorted(set(text))
vocab_size = len(chars)
char2idx = { ch:i for i,ch in enumerate(chars) }
idx2char = { i:ch for i,ch in enumerate(chars) }
print(f"Text length: {len(text)}, Unique chars: {vocab_size}")

Text length: 3201634, Unique chars: 105


In [44]:
dataset = CharDataset(text, seq_len, char2idx)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, drop_last=True)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = CharLSTM(vocab_size, embed_size, hidden_size, num_layers).to(device)
criterion = nn.NLLLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=7e-4)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, factor=0.5)

In [45]:
scaler = torch.amp.GradScaler('cuda')

for inputs, targets in tqdm(dataloader, desc="Warmup Batch"):
    inputs, targets = inputs.to(device), targets.to(device)

    with torch.no_grad():  # Add this - prevents gradient computation
        with torch.amp.autocast('cuda'):
            log_probs, _ = model(inputs)
            loss = criterion(log_probs.view(-1, vocab_size), targets.view(-1))
    
    print(f"Warmup loss: {loss.item()}")
    break 

Warmup Batch:   0%|          | 0/3126 [00:00<?, ?it/s]

Warmup loss: 4.650972366333008





In [46]:
def generate_text(model, start_str, length=200):
    model.eval()
    input_idxs = [char2idx[ch] for ch in start_str]
    input_tensor = torch.tensor(input_idxs, dtype=torch.long, device=device).unsqueeze(0)
    hidden = None
    output_text = start_str
    for _ in range(length):
        out, hidden = model(input_tensor, hidden)
        # Take the last timestep's output
        last_logits = out[0, -1, :].cpu().detach().numpy()
        # Sample from the softmax distribution
        probs = np.exp(last_logits - np.max(last_logits))
        probs /= probs.sum()
        next_idx = np.random.choice(range(vocab_size), p=probs)
        output_text += idx2char[next_idx]
        input_tensor = torch.tensor([[next_idx]], dtype=torch.long, device=device)
    return output_text

In [47]:
def generate_text_during_train(model, char_to_idx, idx_to_char, seed_text="The", length=200, temperature=1.0):
    # Remember the current training state
    was_training = model.training
    model.eval()
    
    with torch.no_grad():
        # Initialize hidden state
        h_0 = torch.zeros(num_layers, 1, hidden_size).to(device)
        c_0 = torch.zeros(num_layers, 1, hidden_size).to(device)
        hidden = (h_0, c_0)
        
        # Convert seed text to indices
        input_seq = [char_to_idx.get(c, 0) for c in seed_text]
        generated = seed_text
        
        # Process seed text through the model
        for char_idx in input_seq[:-1]:
            input_tensor = torch.tensor([[char_idx]]).to(device)
            _, hidden = model(input_tensor, hidden)
        
        # Generate new characters
        input_tensor = torch.tensor([[input_seq[-1]]]).to(device)
        
        for _ in range(length):
            log_probs, hidden = model(input_tensor, hidden)
            probs = torch.softmax(log_probs.squeeze() / temperature, dim=0)
            next_char_idx = torch.multinomial(probs, 1).item()
            next_char = idx_to_char[next_char_idx]
            generated += next_char
            input_tensor = torch.tensor([[next_char_idx]]).to(device)
    
    # Restore the original training state
    model.train(was_training)
    return generated

In [48]:
print(generate_text_during_train(model, char2idx, idx2char, "I am learning to speak!",100))
for epoch in range(epochs):
    total_loss = 0
    progress_bar = tqdm(dataloader, desc=f"Epoch {epoch+1}/{epochs}")
    for inputs, targets in progress_bar:
        inputs, targets = inputs.to(device), targets.to(device)

        h_0 = torch.zeros(num_layers, batch_size, hidden_size).to(device)
        c_0 = torch.zeros(num_layers, batch_size, hidden_size).to(device)
        hidden = (h_0, c_0)
        
        with torch.amp.autocast('cuda'):
            log_probs, _ = model(inputs, hidden)
            loss = criterion(log_probs.view(-1, vocab_size), targets.view(-1))

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        total_loss += loss.item()
        progress_bar.set_postfix(loss=loss.item())

        progress_bar.set_postfix(loss=loss.item())

    avg_loss = total_loss / len(dataloader)
    print(f"Epoch {epoch+1}/{epochs}, Avg Loss: {avg_loss:.4f}")
    print(generate_text_during_train(model, char2idx, idx2char, "I am learning to speak!",100))


I am learning to speak!vôAJ7qâ2ë6úç2çiksé=)ÀaNy;á‘àæ:Y’ýöbýöz713éB—dákd6ghPcO4
7pá”YVâSgëâöckB(‘ú‘Uç8Fhïq2 hi-sï9P*njóÉb07œ


Epoch 1/20: 100%|██████████| 3126/3126 [15:00<00:00,  3.47it/s, loss=1.16]


Epoch 1/20, Avg Loss: 1.5607
I am learning to speak!” said
Pierre.

“The died travel and think and pluy because he is so heard and suddenly at the clear


Epoch 2/20: 100%|██████████| 3126/3126 [14:57<00:00,  3.48it/s, loss=1.01] 


Epoch 2/20, Avg Loss: 1.0700
I am learning to speak!” From the table,
with which he adjusted he gave her “Uncle”” whispered the officer,
entering the st


Epoch 3/20: 100%|██████████| 3126/3126 [14:58<00:00,  3.48it/s, loss=0.924]


Epoch 3/20, Avg Loss: 0.9570
I am learning to speak!”

He indicated the firm guardship, who had no idea by Nicholas grew
more more and more secondly, wh


Epoch 4/20: 100%|██████████| 3126/3126 [14:57<00:00,  3.48it/s, loss=0.87] 


Epoch 4/20, Avg Loss: 0.8875
I am learning to speak!” Natásha did not suppose he never obliged to
speak to him handing it down to take leave of the hut.


Epoch 5/20: 100%|██████████| 3126/3126 [14:42<00:00,  3.54it/s, loss=0.87] 


Epoch 5/20, Avg Loss: 0.8671
I am learning to speak!
This world should not have to bring the Rostóvs,” said Mortemart, “but
tell me, I’ll tell you what 


Epoch 6/20: 100%|██████████| 3126/3126 [14:54<00:00,  3.49it/s, loss=0.863]


Epoch 6/20, Avg Loss: 0.8671
I am learning to speak!” said Pierre. (He was lying in the
room till with which Pierre sat on the back of his chest and pit


Epoch 7/20: 100%|██████████| 3126/3126 [14:44<00:00,  3.54it/s, loss=0.867]


Epoch 7/20, Avg Loss: 0.8671
I am learning to speak!”

Then called Dáníchs through the doorway.

“Ah!” exclaimed Prince Andrew as if inquiring those dis


Epoch 8/20: 100%|██████████| 3126/3126 [14:44<00:00,  3.53it/s, loss=0.866]


Epoch 8/20, Avg Loss: 0.8671
I am learning to speak!”

Dólokhov smiled his hand jauntily and nuskey. “I ought to find out
where you’ve shot at each othe


Epoch 9/20: 100%|██████████| 3126/3126 [14:44<00:00,  3.53it/s, loss=0.863]


Epoch 9/20, Avg Loss: 0.8670
I am learning to speak!” thought Nicholas, “arranging myself alone
in the roging-horse?” said he angrily and, pointing to t


Epoch 10/20: 100%|██████████| 3126/3126 [14:44<00:00,  3.53it/s, loss=0.869]


Epoch 10/20, Avg Loss: 0.8671
I am learning to speak!”

“Well, now you, she’s frienda, thanks to your houses, it’s burned.
What has been coming so that w


Epoch 11/20: 100%|██████████| 3126/3126 [14:58<00:00,  3.48it/s, loss=0.862]


Epoch 11/20, Avg Loss: 0.8671
I am learning to speak!”

“You, brothers? What lives were the more very bad and I do not go away
and only distinguish us.”



Epoch 12/20: 100%|██████████| 3126/3126 [16:21<00:00,  3.19it/s, loss=0.87] 


Epoch 12/20, Avg Loss: 0.8670
I am learning to speak!” Natásha began, uped to the
sounds in front of the cook, which were dreaving outside, was vexed by



Epoch 13/20: 100%|██████████| 3126/3126 [16:20<00:00,  3.19it/s, loss=0.866]


Epoch 13/20, Avg Loss: 0.8671
I am learning to speak!” said the count. “And what marriage
is the more former spirits of Berg and Willarski counter? My wi


Epoch 14/20: 100%|██████████| 3126/3126 [16:23<00:00,  3.18it/s, loss=0.866]


Epoch 14/20, Avg Loss: 0.8671
I am learning to speak!”

“I’m not displeased that your Serene Highness wish,” he continued,
“I beg you to consider that yo


Epoch 15/20: 100%|██████████| 3126/3126 [16:22<00:00,  3.18it/s, loss=0.875]


Epoch 15/20, Avg Loss: 0.8671
I am learning to speak!”

“But what of I?” reproached him of the prince, “which I should ask
for Bald Hills ones, and no wi


Epoch 16/20: 100%|██████████| 3126/3126 [16:23<00:00,  3.18it/s, loss=0.867]


Epoch 16/20, Avg Loss: 0.8671
I am learning to speak!” And Anmay,
smiling rapidly and searchingly and confused.


Meanwhile did not real the position of 


Epoch 17/20: 100%|██████████| 3126/3126 [16:23<00:00,  3.18it/s, loss=0.867]


Epoch 17/20, Avg Loss: 0.8670
I am learning to speak!” said he. “Womenín, my dear
sir, our girls have not sent up the field, so that’s war!” thought he.



Epoch 18/20: 100%|██████████| 3126/3126 [16:23<00:00,  3.18it/s, loss=0.875]


Epoch 18/20, Avg Loss: 0.8671
I am learning to speak!”

“Well, and how did you fail to be written with such surjotical, enlightenment
and catches? But if


Epoch 19/20: 100%|██████████| 3126/3126 [16:22<00:00,  3.18it/s, loss=0.866]


Epoch 19/20, Avg Loss: 0.8671
I am learning to speak!” he thought.

Again pleased by a stone calm and appearance Frenchmen convering so calmly
of the duf


Epoch 20/20: 100%|██████████| 3126/3126 [16:22<00:00,  3.18it/s, loss=0.865]

Epoch 20/20, Avg Loss: 0.8671
I am learning to speak! I will tell you, my dear boy—I don’t
understand that something was in somebody!” said Nicholas. “Th





In [55]:
seed = "My opinion on cats is"
sample_text = generate_text(model, seed, length=500)
print("Sample generated text:\n", sample_text)

Sample generated text:
 My opinion on cats is outside with us; then he is considered an
annihilatement of the merits of the and, and it was not honest in Speránski
himported the Preobrazhénsk battalion but had already vanished.

“I can’t bear the carriage,” said he.

“To leave off,” said the count, crushing either from under her
brows and looked with animation. At last, Pétya and Borís, who
ever shrugged his steps.

“Only fell from behind our knappens,” the prince struck a braker on
his knees as if trying to make, go on, gave his horse to 


In [50]:
#Lower complexity params
seq_len = 100
batch_size = 512
epochs = 20
hidden_size = 128
num_layers = 2
embed_size = 64

dataset = CharDataset(text, seq_len, char2idx)
dataloader2 = DataLoader(dataset, batch_size=batch_size, shuffle=True, drop_last=True)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model2 = CharLSTM(vocab_size, embed_size, hidden_size, num_layers).to(device)
criterion = nn.NLLLoss()
optimizer = torch.optim.Adam(model2.parameters(), lr=7e-4)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, factor=0.5)

scaler = torch.amp.GradScaler('cuda')

for inputs, targets in tqdm(dataloader, desc="Warmup Batch"):
    inputs, targets = inputs.to(device), targets.to(device)

    with torch.no_grad():  # Add this - prevents gradient computation
        with torch.amp.autocast('cuda'):
            log_probs, _ = model2(inputs)
            loss = criterion(log_probs.view(-1, vocab_size), targets.view(-1))
    
    print(f"Warmup loss: {loss.item()}")
    break 


for epoch in range(epochs):
    total_loss = 0
    progress_bar = tqdm(dataloader2, desc=f"Epoch {epoch+1}/{epochs}")
    for inputs, targets in progress_bar:
        inputs, targets = inputs.to(device), targets.to(device)

        h_0 = torch.zeros(num_layers, batch_size, hidden_size).to(device)
        c_0 = torch.zeros(num_layers, batch_size, hidden_size).to(device)
        hidden = (h_0, c_0)
        
        with torch.amp.autocast('cuda'):
            log_probs, _ = model2(inputs, hidden)
            loss = criterion(log_probs.view(-1, vocab_size), targets.view(-1))

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        total_loss += loss.item()
        progress_bar.set_postfix(loss=loss.item())

        progress_bar.set_postfix(loss=loss.item())

    avg_loss = total_loss / len(dataloader)
    print(f"Epoch {epoch+1}/{epochs}, Avg Loss: {avg_loss:.4f}")
    print(generate_text_during_train(model2, char2idx, idx2char, "I am learning to speak!", 100))


Warmup Batch:   0%|          | 0/3126 [00:00<?, ?it/s]


Warmup loss: 4.657007217407227


Epoch 1/20: 100%|██████████| 6252/6252 [05:19<00:00, 19.57it/s, loss=1.39]


Epoch 1/20, Avg Loss: 3.2781
I am learning to speak!” “It’s not a beginned to chance the restage! But I say that what I can’t have was up this
down to s


Epoch 2/20: 100%|██████████| 6252/6252 [05:04<00:00, 20.55it/s, loss=1.31]


Epoch 2/20, Avg Loss: 2.6793
I am learning to speak! Womet—you’rl broken awrething scream?..”

In those lay a
tran and cald up themselves, the question 


Epoch 3/20: 100%|██████████| 6252/6252 [05:13<00:00, 19.97it/s, loss=1.3] 


Epoch 3/20, Avg Loss: 2.6090
I am learning to speak!” which the doors with the other, as in the exactly
proud
sciencity round Alexander
met him as the p


Epoch 4/20: 100%|██████████| 6252/6252 [04:58<00:00, 20.98it/s, loss=1.3] 


Epoch 4/20, Avg Loss: 2.6068
I am learning to speak! No one is quite.... Hvand was an old unimpose, conjusured.”

“As alone know when the letter is that


Epoch 5/20: 100%|██████████| 6252/6252 [05:09<00:00, 20.19it/s, loss=1.32]


Epoch 5/20, Avg Loss: 2.6069
I am learning to speak! I must stir.”

“She was quite belonged how how Seens of the Russians were finished. What Napoleon R


Epoch 6/20: 100%|██████████| 6252/6252 [04:59<00:00, 20.88it/s, loss=1.31]


Epoch 6/20, Avg Loss: 2.6068
I am learning to speak! It’s with you? Plass wail my effect you when my pitent
before the bad,” cried
Denísov. At that powe


Epoch 7/20: 100%|██████████| 6252/6252 [05:08<00:00, 20.29it/s, loss=1.31]


Epoch 7/20, Avg Loss: 2.6068
I am learning to speak! We’ll cause it seems to be use’s mystacter in the
rose,” he added, had confisured for glone conside


Epoch 8/20: 100%|██████████| 6252/6252 [05:00<00:00, 20.79it/s, loss=1.3] 


Epoch 8/20, Avg Loss: 2.6067
I am learning to speak! What is?” said very and conquerwing abreading
his own daughter.

“Toifil, you know that’s intate
I 


Epoch 9/20: 100%|██████████| 6252/6252 [05:05<00:00, 20.47it/s, loss=1.3] 


Epoch 9/20, Avg Loss: 2.6067
I am learning to speak! Why; have you like frame an. Had left Mérya Michaul Here decided to do now I see on the little arti


Epoch 10/20: 100%|██████████| 6252/6252 [05:01<00:00, 20.74it/s, loss=1.29]


Epoch 10/20, Avg Loss: 2.6067
I am learning to speak! Grand is guilty....”

“Oh!” he said to understand with her’s began do all before on the tears.

“I 


Epoch 11/20: 100%|██████████| 6252/6252 [05:05<00:00, 20.45it/s, loss=1.31]


Epoch 11/20, Avg Loss: 2.6068
I am learning to speak! An artions pressed, your case.





CHAPTER VIII

He was behand and bulded here
so her to the possi


Epoch 12/20: 100%|██████████| 6252/6252 [05:02<00:00, 20.70it/s, loss=1.31]


Epoch 12/20, Avg Loss: 2.6067
I am learning to speak! Petersburg...”

He did not did not bonead which had once could my difficult expression of the laws 


Epoch 13/20: 100%|██████████| 6252/6252 [05:03<00:00, 20.62it/s, loss=1.31]


Epoch 13/20, Avg Loss: 2.6067
I am learning to speak! I know the Fulder,
I are you done a feeling youth,” said the onficement ragh father, prisoning, and


Epoch 14/20: 100%|██████████| 6252/6252 [05:01<00:00, 20.74it/s, loss=1.31]


Epoch 14/20, Avg Loss: 2.6068
I am learning to speak!” asked Pétya now as if waiting past the first ordinary
confusions, already said with the street, sm


Epoch 15/20: 100%|██████████| 6252/6252 [05:01<00:00, 20.74it/s, loss=1.29]


Epoch 15/20, Avg Loss: 2.6068
I am learning to speak!”

“But she won’t now have it enjoyment—fornure their accustinate!”

Andrew knew them held in which 


Epoch 16/20: 100%|██████████| 6252/6252 [05:03<00:00, 20.58it/s, loss=1.3] 


Epoch 16/20, Avg Loss: 2.6068
I am learning to speak!” he reproancing Dólokhov’s company.

“And you have chelt this, would be asked our mensalle,” though


Epoch 17/20: 100%|██████████| 6252/6252 [04:59<00:00, 20.85it/s, loss=1.3] 


Epoch 17/20, Avg Loss: 2.6068
I am learning to speak!” answered Pierre.

At that
had even fished to him, with at a little destricied him of his little an


Epoch 18/20: 100%|██████████| 6252/6252 [05:07<00:00, 20.35it/s, loss=1.3] 


Epoch 18/20, Avg Loss: 2.6068
I am learning to speak!”

Anna Pávlov, laughing was eachlifition on the Petroos, and Vinílirs with several table,
who thoug


Epoch 19/20: 100%|██████████| 6252/6252 [04:55<00:00, 21.13it/s, loss=1.31]


Epoch 19/20, Avg Loss: 2.6068
I am learning to speak!” whispered, they did not what the fiftyout Et her hand this
heed, is gave.”

“Well, back Ilyín gay 


Epoch 20/20: 100%|██████████| 6252/6252 [05:09<00:00, 20.17it/s, loss=1.29]

Epoch 20/20, Avg Loss: 2.6068
I am learning to speak!”

“Why, I’ve
was in bguoth to them?” And
quite time he did he not be happening in the sound of
in h





In [60]:
import os
for file in os.listdir('J.S.MILL/'):
    file_path = os.path.join('J.S.MILL/', file)
    with open(file_path, 'r', encoding='utf-8') as f:
        textJSM += f.read() + "\n"

In [66]:
charsJSM = sorted(set(textJSM))
vocab_sizeJSM = len(charsJSM)
char2idxJSM = { ch:i for i,ch in enumerate(charsJSM) }
idx2charJSM = { i:ch for i,ch in enumerate(charsJSM) }
print(f"Text length: {len(textJSM)}, Unique chars: {vocab_sizeJSM}")

Text length: 3156593, Unique chars: 137


In [73]:
#Params
seq_len = 100
batch_size = 512
epochs = 5
hidden_size = 1024
num_layers = 4
embed_size = 256

In [68]:
dataset = CharDataset(textJSM, seq_len, char2idxJSM)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, drop_last=True)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = CharLSTM(vocab_sizeJSM, embed_size, hidden_size, num_layers).to(device)
criterion = nn.NLLLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=7e-4)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, factor=0.5)

In [69]:
scaler = torch.amp.GradScaler('cuda')

for inputs, targets in tqdm(dataloader, desc="Warmup Batch"):
    inputs, targets = inputs.to(device), targets.to(device)

    with torch.no_grad():  # Add this - prevents gradient computation
        with torch.amp.autocast('cuda'):
            log_probs, _ = model(inputs)
            loss = criterion(log_probs.view(-1, vocab_sizeJSM), targets.view(-1))
    
    print(f"Warmup loss: {loss.item()}")
    break 

Warmup Batch:   0%|          | 0/6165 [00:00<?, ?it/s]

Warmup loss: 4.918237209320068





In [74]:
print(generate_text_during_train(model, char2idxJSM, idx2charJSM, "I am learning to speak!",100))
for epoch in range(epochs):
    total_loss = 0
    progress_bar = tqdm(dataloader, desc=f"Epoch {epoch+1}/{epochs}")
    for inputs, targets in progress_bar:
        inputs, targets = inputs.to(device), targets.to(device)

        h_0 = torch.zeros(num_layers, batch_size, hidden_size).to(device)
        c_0 = torch.zeros(num_layers, batch_size, hidden_size).to(device)
        hidden = (h_0, c_0)
        
        with torch.amp.autocast('cuda'):
            log_probs, _ = model(inputs, hidden)
            loss = criterion(log_probs.view(-1, vocab_sizeJSM), targets.view(-1))

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        total_loss += loss.item()
        progress_bar.set_postfix(loss=loss.item())

        progress_bar.set_postfix(loss=loss.item())

    avg_loss = total_loss / len(dataloader)
    print(f"Epoch {epoch+1}/{epochs}, Avg Loss: {avg_loss:.4f}")
    print(generate_text_during_train(model, char2idxJSM, idx2charJSM, "I am learning to speak!",100))


I am learning to speak!nro ldbc c
prTt tndees hieemco re tthhrra,oe reosd,l yorvln eaan8sghobtoinhnoroteatvitocsmt;f,SrmIrf


Epoch 1/5: 100%|██████████| 6165/6165 [48:26<00:00,  2.12it/s, loss=0.778]


Epoch 1/5, Avg Loss: 1.1225
I am learning to speak! Never he feels himself
support the liberty of those who possess, and improningencially, party
more 


Epoch 2/5: 100%|██████████| 6165/6165 [47:30<00:00,  2.16it/s, loss=0.557]


Epoch 2/5, Avg Loss: 0.6448
I am learning to speak! on
the other hand, let me, or still more important fact, on the other hand,
because they do not req


Epoch 3/5: 100%|██████████| 6165/6165 [46:52<00:00,  2.19it/s, loss=0.544]


Epoch 3/5, Avg Loss: 0.5466
I am learning to speak! in such a country, and is a most important
element in the act of Justice, which entitles he sees wr


Epoch 4/5: 100%|██████████| 6165/6165 [46:50<00:00,  2.19it/s, loss=0.556]


Epoch 4/5, Avg Loss: 0.5466
I am learning to speak! on the contrary, all
despotism has to be recognized, and is carried into effect in the middle ages,


Epoch 5/5: 100%|██████████| 6165/6165 [46:50<00:00,  2.19it/s, loss=0.547]

Epoch 5/5, Avg Loss: 0.5466
I am learning to speak! from these exertions, in this country, does not
fall in any time more than high plate; and the more





In [76]:
def generate_text(model, start_str, length=200):
    model.eval()
    input_idxs = [char2idxJSM[ch] for ch in start_str]
    input_tensor = torch.tensor(input_idxs, dtype=torch.long, device=device).unsqueeze(0)
    hidden = None
    output_text = start_str
    for _ in range(length):
        out, hidden = model(input_tensor, hidden)
        # Take the last timestep's output
        last_logits = out[0, -1, :].cpu().detach().numpy()
        # Sample from the softmax distribution
        probs = np.exp(last_logits - np.max(last_logits))
        probs /= probs.sum()
        next_idx = np.random.choice(range(vocab_sizeJSM), p=probs)
        output_text += idx2charJSM[next_idx]
        input_tensor = torch.tensor([[next_idx]], dtype=torch.long, device=device)
    return output_text

In [81]:
seed = "My opinion on cats is"
sample_text = generate_text(model, seed, length=500)
print("Sample generated text:\n", sample_text)

Sample generated text:
 My opinion on cats is a true belief that Germany remain
under the study of equality. This remarkable condition of society, it is
because it is a selfish object; but in all things of which the qualities
established by a person of men is good at all; or, at all events, it
does not, but has probably drawn not only to indefvenifely like the
    character of the one as before. In the cases we may prevent the
    chart, and in other ways, incident to it, and our disapprobation alone.
    Indirect taxes are:
On Secondly, t


In [90]:
#Params - Decrease embedding and hidden dimensions but increase  the number of layers
seq_len = 100
batch_size = 256
epochs = 5
hidden_size = 256
num_layers = 8
embed_size = 128

dataset = CharDataset(textJSM, seq_len, char2idxJSM)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, drop_last=True)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = CharLSTM(vocab_sizeJSM, embed_size, hidden_size, num_layers).to(device)
criterion = nn.NLLLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=7e-4)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, factor=0.5)

scaler = torch.amp.GradScaler('cuda')

for inputs, targets in tqdm(dataloader, desc="Warmup Batch"):
    inputs, targets = inputs.to(device), targets.to(device)

    with torch.no_grad():  # Add this - prevents gradient computation
        with torch.amp.autocast('cuda'):
            log_probs, _ = model(inputs)
            loss = criterion(log_probs.view(-1, vocab_sizeJSM), targets.view(-1))
    
    print(f"Warmup loss: {loss.item()}")
    break 

print(generate_text_during_train(model, char2idxJSM, idx2charJSM, "I am learning to speak!",100))
for epoch in range(epochs):
    total_loss = 0
    progress_bar = tqdm(dataloader, desc=f"Epoch {epoch+1}/{epochs}")
    for inputs, targets in progress_bar:
        inputs, targets = inputs.to(device), targets.to(device)

        h_0 = torch.zeros(num_layers, batch_size, hidden_size).to(device)
        c_0 = torch.zeros(num_layers, batch_size, hidden_size).to(device)
        hidden = (h_0, c_0)
        
        with torch.amp.autocast('cuda'):
            log_probs, _ = model(inputs, hidden)
            loss = criterion(log_probs.view(-1, vocab_sizeJSM), targets.view(-1))

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        total_loss += loss.item()
        progress_bar.set_postfix(loss=loss.item())

        progress_bar.set_postfix(loss=loss.item())

    avg_loss = total_loss / len(dataloader)
    print(f"Epoch {epoch+1}/{epochs}, Avg Loss: {avg_loss:.4f}")
    print(generate_text_during_train(model, char2idxJSM, idx2charJSM, "I am learning to speak!",100))


Warmup Batch:   0%|          | 0/12330 [00:00<?, ?it/s]

Warmup Batch:   0%|          | 0/12330 [00:00<?, ?it/s]


Warmup loss: 4.939479827880859
I am learning to speak!ηw2p%Bxηο]τDιëτ¼æ;bR1d7vKά=SλJO!kUL&•$kV3χCBi2YFôZ?™B‘afκH*?’’(ô;uςHτv(σ5“D½JT$fzd¾M;-â&—aφῆxkHN™ι§e


Epoch 1/5: 100%|██████████| 12330/12330 [14:01<00:00, 14.65it/s, loss=1.16]


Epoch 1/5, Avg Loss: 1.5707
I am learning to speak! Admits
with her thousand -power to produce alchadized the power of
the same, that the supply of the


Epoch 2/5: 100%|██████████| 12330/12330 [13:45<00:00, 14.93it/s, loss=1.15]


Epoch 2/5, Avg Loss: 1.1351
I am learning to speak! Where leading
manufacturing constituency has adricially five and look out
what is really requires. 


Epoch 3/5: 100%|██████████| 12330/12330 [13:50<00:00, 14.86it/s, loss=1.14]


Epoch 3/5, Avg Loss: 1.1351
I am learning to speak! Are call at answer to the
public wider opportunity of thrown and five indication that the
community


Epoch 4/5: 100%|██████████| 12330/12330 [14:12<00:00, 14.47it/s, loss=1.13]


Epoch 4/5, Avg Loss: 1.1351
I am learning to speak! These words
    on wages), if prey modemate precisely iv to say for the stote,
    and state that t


Epoch 5/5: 100%|██████████| 12330/12330 [13:44<00:00, 14.96it/s, loss=1.13]


Epoch 5/5, Avg Loss: 1.1351
I am learning to speak!

The power was the most motive in those sexs, and that, as it was
not accepted) to all duty without


In [89]:
seed = "My opinion on cats is"
sample_text = generate_text(model, seed, length=500)
print("Sample generated text:\n", sample_text)

Sample generated text:
 My opinion on cats is an equality to their
spirits, and silver, except both on the amount, not so that no
notions of the produce of banks, and he has
avrigured to persons.

I come to account the same trantasion).

The upon Christianity does not enable connection, you will acfuse the principle of
laborers owning this employed foreign metallic
upward to its acts, and the position of
the seeming as the exchange is foreign mode of
greatest absolute waranties a subject, however, no
man of the total selfishfores, and affe
