In [1]:
import torch
import torch.nn as nn
from torch.nn import functional as F
import torch.optim.lr_scheduler as lr_scheduler
from torch.utils.data import DataLoader, Subset
import tqdm
import json
import datasets
from typing import List
import os
import pandas as pd
import tiktoken
import inspect

In [2]:
!mkdir data/

mkdir: cannot create directory ‘data/’: File exists


In [3]:
encoding = tiktoken.get_encoding("gpt2")

In [4]:
encoding.n_vocab

50257

In [5]:
encoding._special_tokens

{'<|endoftext|>': 50256}

In [6]:
class Tokenizer:
    def __init__(self, tokenizer_model="gpt2"):
        gpt2_enc = tiktoken.get_encoding(tokenizer_model)
        self.enc = tiktoken.Encoding(
            name=tokenizer_model,
            pat_str=gpt2_enc._pat_str,
            mergeable_ranks=gpt2_enc._mergeable_ranks,
			special_tokens={
                **gpt2_enc._special_tokens,
                "PAD": 50257,
			},
		)
        self.tokenizer_model = tokenizer_model

        self.n_words = self.enc.n_vocab
        self.bos_id = None
        self.eos_id = self.enc.eot_token
        self.pad_id = self.enc._special_tokens["PAD"]

    def encode(self, s: str, bos: bool = False, eos: bool = False) -> List[int]:
        t = self.enc.encode(s)
        if bos and self.bos_id is not None:
            t = [self.bos_id] + t
        if eos and self.eos_id is not None:
            t = t + [self.eos_id]
        return t

    def decode(self, tokens: List[int]) -> str:
        return self.enc.decode(tokens)

In [7]:
tokenizer = Tokenizer(tokenizer_model="gpt2")

In [8]:
tokenizer.n_words

50258

In [9]:
vocab_size = 50304
batch_size = 16
block_size = 512
max_iters = 1
eval_interval = 1000
learning_rate = 3e-4
device = 'cuda' if torch.cuda.is_available() else 'cpu'
eval_iters = 256
n_embd = 512
n_head = 8
n_layer = 8
dropout = 0.3

target_batch_size = 8192
gradient_accumulation_steps = target_batch_size // batch_size
weight_decay = 1e-1
beta1 = 0.9
beta2 = 0.95

In [10]:
gradient_accumulation_steps

512

In [11]:
torch.set_float32_matmul_precision('high')

In [12]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# if torch.cuda.device_count() > 1:
#     print(f"Using {torch.cuda.device_count()} GPUs!")

In [13]:
def encode(s): return tokenizer.encode(s, bos=False, eos=False)

def decode(l):
	try:
		return tokenizer.decode(l)
	except:
		return ""

In [14]:
ds = datasets.load_dataset("roneneldan/TinyStories")

In [15]:
ds = ds.with_format("torch")

In [16]:
ds['train'][1]

{'text': 'Once upon a time, there was a little car named Beep. Beep loved to go fast and play in the sun. Beep was a healthy car because he always had good fuel. Good fuel made Beep happy and strong.\n\nOne day, Beep was driving in the park when he saw a big tree. The tree had many leaves that were falling. Beep liked how the leaves fall and wanted to play with them. Beep drove under the tree and watched the leaves fall on him. He laughed and beeped his horn.\n\nBeep played with the falling leaves all day. When it was time to go home, Beep knew he needed more fuel. He went to the fuel place and got more healthy fuel. Now, Beep was ready to go fast and play again the next day. And Beep lived happily ever after.'}

In [17]:
# def collate_fn(batch):
#     texts = [encode(item['text'])[:block_size] for item in batch]  # Truncate to block_size
#     padded_texts = [t + [0] * (block_size - len(t)) for t in texts]  # Pad to 512
#     return {
#         'text': torch.tensor(padded_texts, dtype=torch.long)
#     }

# def collate_fn(batch):
#     texts = [encode(item['text']) for item in batch]
#     # add BOS/EOS if you like:
#     texts = [[tokenizer.bos_id] + t + [tokenizer.eos_id] for t in texts]
#     # clip or leave as is
#     maxlen = max(len(t) for t in texts)
#     padded = [t + [tokenizer.pad_id] * (maxlen - len(t)) for t in texts]
#     attention_masks = [[1]*len(t) + [0]*(maxlen-len(t)) for t in texts]
#     return {
#       'input_ids': torch.tensor(padded, dtype=torch.long),
#       'attention_mask': torch.tensor(attention_masks, dtype=torch.long),
#     }

def collate_fn(batch):
    texts = [encode(item['text'])[:block_size+1] for item in batch]  # Get one extra token for targets
    # Create inputs and targets with a 1-token shift
    batch_data = []
    for text in texts:
        if len(text) <= 1:  # Skip sequences that are too short
            continue
        
        # Use all but the last token as input
        input_text = text[:-1]
        # Use all but the first token as target (shifted by 1)
        target_text = text[1:]
        
        # Pad to block_size if needed
        if len(input_text) < block_size:
            input_text = input_text + [0] * (block_size - len(input_text))
        if len(target_text) < block_size:
            target_text = target_text + [0] * (block_size - len(target_text))
            
        batch_data.append({
            'input': torch.tensor(input_text, dtype=torch.long),
            'target': torch.tensor(target_text, dtype=torch.long)
        })
    
    # Stack all batch items
    if not batch_data:  # Handle empty batch case
        return None
    
    return {
        'input': torch.stack([item['input'] for item in batch_data]),
        'target': torch.stack([item['target'] for item in batch_data])
    }

In [18]:
eval_iters

256

In [19]:
subset_indices = list(range(eval_iters))
dataset_valid = Subset(ds['validation'], subset_indices)

In [20]:
train_dataloader = DataLoader(ds['train'], batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
valid_dataloader = DataLoader(dataset_valid, batch_size=batch_size, collate_fn=collate_fn)

In [21]:
def generate_square_subsequent_mask(sz):
    """
    Generates a causal (upper-triangular) mask for a sequence of length 'sz'.
    Positions with True (or -inf when using additive masks) will be masked.
    Here, we create an additive mask with -inf for masked positions.
    """
    mask = torch.triu(torch.ones(sz, sz) * float('-inf'), diagonal=1)
    return mask

class Block(nn.Module):
    """Transformer block using PyTorch's MultiheadAttention with an explicit causal mask."""
    def __init__(self, n_embd, n_head):
        super().__init__()
        # PyTorch's MultiheadAttention
        self.attn = nn.MultiheadAttention(
            embed_dim=n_embd,
            num_heads=n_head,
            dropout=dropout,
            batch_first=True  # Expect input as (batch, seq, feature)
        )
        
        # Feed-forward network
        self.ffwd = nn.Sequential(
            nn.Linear(n_embd, 4 * n_embd),
            nn.ReLU(),
            nn.Linear(4 * n_embd, n_embd),
            nn.Dropout(dropout),
        )
        
        # Layer normalization layers
        self.ln1 = nn.LayerNorm(n_embd)
        self.ln2 = nn.LayerNorm(n_embd)
        
    def forward(self, x):
        # x has shape (B, T, C)
        T = x.size(1)
        
        # Pre-LayerNorm for attention
        x_ln = self.ln1(x)
        # Create a causal mask explicitly for the current sequence length
        causal_mask = generate_square_subsequent_mask(T).to(x.device)
        
        # Self-attention: note that we pass attn_mask instead of is_causal
        attn_output, _ = self.attn(
            query=x_ln,
            key=x_ln,
            value=x_ln,
            attn_mask=causal_mask,  # Using the explicit causal mask here
            need_weights=False
        )
        x = x + attn_output
        
        # Feed-forward block with pre-LayerNorm
        x = x + self.ffwd(self.ln2(x))
        
        return x

class GPTLanguageModel(nn.Module):
    def __init__(self):
        super().__init__()
        # Token and position embeddings
        self.token_embedding_table = nn.Embedding(vocab_size, n_embd)
        self.position_embedding_table = nn.Embedding(block_size, n_embd)
        
        # Transformer blocks
        self.blocks = nn.ModuleList([Block(n_embd, n_head) for _ in range(n_layer)])
        
        # Final layer normalization and output projection
        self.ln_f = nn.LayerNorm(n_embd)
        self.lm_head = nn.Linear(n_embd, vocab_size, bias=False)
        
		# Initialize weights for Linear and Embedding layers
        self.apply(self._init_weights)

        # Weight tying: share the weight matrix between token embeddings and the output projection
        self.token_embedding_table.weight = self.lm_head.weight

    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
            if module.bias is not None:
                torch.nn.init.zeros_(module.bias)
        elif isinstance(module, nn.Embedding):
            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)

    def forward(self, idx, targets=None):
        B, T = idx.shape
        
        # Obtain token embeddings and add positional embeddings
        tok_emb = self.token_embedding_table(idx)  # (B, T, C)
        pos_emb = self.position_embedding_table(torch.arange(T, device=idx.device))  # (T, C)
        x = tok_emb + pos_emb  # (B, T, C)
        
        # Pass through transformer blocks
        for block in self.blocks:
            x = block(x)  # (B, T, C)
            
        # Final layer normalization and output projection to logits
        x = self.ln_f(x)  # (B, T, C)
        logits = self.lm_head(x)  # (B, T, vocab_size)

        # Compute loss if targets are provided
        if targets is None:
            loss = None
        else:
            B, T, C = logits.shape
            logits = logits.view(B * T, C)
            targets = targets.view(B * T)
            loss = F.cross_entropy(logits, targets)
            
        return logits, loss

    def generate(self, idx, max_new_tokens):
        """
        Given a sequence of indices 'idx', generate 'max_new_tokens' new tokens.
        """
        for _ in range(max_new_tokens):
            # Crop the sequence to the last block_size tokens
            idx_cond = idx[:, -block_size:]
            # Get predictions
            logits, _ = self(idx_cond)
            # Focus only on the last time step
            logits = logits[:, -1, :]  # (B, vocab_size)
            # Convert logits to probabilities
            probs = F.softmax(logits, dim=-1)  # (B, vocab_size)
            # Sample from the probability distribution
            idx_next = torch.multinomial(probs, num_samples=1)  # (B, 1)
            # Append the new token to the sequence
            idx = torch.cat((idx, idx_next), dim=1)  # (B, T+1)
        return idx

In [22]:
torch.cuda.empty_cache()

In [23]:
model = GPTLanguageModel()

# if torch.cuda.device_count() > 1:
#     model = torch.nn.DataParallel(model)

model = model.to(device)
model = torch.compile(model)
# print the number of parameters in the model
print(sum(p.numel() for p in model.parameters())/1e6, 'M parameters')

51.237888 M parameters


In [24]:
fused_available = 'fused' in inspect.signature(torch.optim.AdamW).parameters
use_fused = fused_available and 'cuda' == str(device)
print(f"{use_fused=}")

use_fused=True


In [25]:
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay, betas=(beta1, beta2), eps=1e-8, fused=use_fused)

In [26]:
# T_max = len(train_dataloader)
# warmup_steps = 0.01 * T_max
# scheduler = lr_scheduler.OneCycleLR(
#     optimizer, max_lr=4e-4, total_steps=T_max, pct_start=0.01
# )
# optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay, betas=(beta1, beta2), eps=1e-8, fused=use_fused)
true_total_steps = len(train_dataloader) // gradient_accumulation_steps
scheduler = lr_scheduler.OneCycleLR(
    optimizer, max_lr=8e-4, total_steps=true_total_steps, pct_start=0.05
)

# # Cosine Annealing Scheduler
# scheduler = lr_scheduler.CosineAnnealingLR(
#     optimizer,
# 	T_max=len(train_dataloader) // gradient_accumulation_steps,
# 	eta_min=3e-5,
# )


In [27]:
# eval_interval = len(train_dataloader) // 5
# eval_interval

In [28]:
os.makedirs("ckpt/", exist_ok=True)

In [29]:
str(device)

'cuda'

In [30]:
sample = tokenizer.decode(tokenizer.encode(ds["train"][0]["text"][:100], bos=True, eos=True))
sample

'One day, a little girl named Lily found a needle in her room. She knew it was difficult to play with<|endoftext|>'

In [31]:
def generate(model, idx, max_new_tokens):
    for _ in range(max_new_tokens):
        # crop idx to the last block_size tokens
        idx_cond = idx[:, -block_size:]
        # get the predictions
        logits, loss = model(idx_cond)
        # focus only on the last time step
        logits = logits[:, -1, :]  # becomes (B, C)
        # apply softmax to get probabilities
        probs = F.softmax(logits, dim=-1)  # (B, C)
        # sample from the distribution
        idx_next = torch.multinomial(probs, num_samples=1)  # (B, 1)
        # append sampled index to the running sequence
        idx = torch.cat((idx, idx_next), dim=1)  # (B, T+1)
    return idx

In [32]:
gradient_accumulation_steps, batch_size, target_batch_size

(512, 16, 8192)

In [33]:
with open("losses.txt", "w") as f:
	f.write("Step,Learing Rate,Training Loss,Validation Loss,Output\n")

In [34]:
for iter, batch in enumerate(tqdm.notebook.tqdm(train_dataloader, total=len(train_dataloader))):
    # inputs, targets = batch['text'], batch['text']
    inputs = batch['input'][:, :-1]
    targets = batch['input'][:, 1:]
    inputs, targets = inputs.to(device), targets.to(device)

    with torch.autocast(device_type=str(device), dtype=torch.bfloat16):
        logits, _ = model(inputs)
        loss = F.cross_entropy(
			logits.view(-1, logits.size(-1)),
			targets.view(-1),
			ignore_index=tokenizer.pad_id
		)

    loss = loss / gradient_accumulation_steps
    loss.backward()

    norm = torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
    
    if (iter + 1) % gradient_accumulation_steps == 0:
        optimizer.step()
        optimizer.zero_grad()
        scheduler.step()

    if iter % (gradient_accumulation_steps * 2) == 0 or iter == max_iters - 1:
        print(f"\nStep {iter}: Performing validation")
        print(f"Learning rate: {scheduler.get_last_lr()[0]:.6f}")
        model.eval()
        with torch.no_grad():
            val_loss = 0
            train_loss = loss.item() * gradient_accumulation_steps
            for batch in tqdm.notebook.tqdm(valid_dataloader, total=len(valid_dataloader)):
                # inputs, targets = batch['text'], batch['text']
                inputs = batch['input'][:, :-1]
                targets = batch['input'][:, 1:]
                inputs, targets = inputs.to(device), targets.to(device)
                logits, _ = model(inputs)
                loss = F.cross_entropy(
                    logits.view(-1, logits.size(-1)),
					targets.view(-1),
					ignore_index=tokenizer.pad_id
				)
                val_loss += loss.item()

            torch.save(model.state_dict(), f"ckpt/ckpt_{iter}.pt")
            print(f"Train loss: {train_loss:.4f}")
            print(f"Validation loss: {val_loss / len(valid_dataloader):.4f}")

            prompt = "One day, a "
            prompt = torch.tensor([encode(prompt)], dtype=torch.long, device=device)
            output = decode(generate(model, prompt, max_new_tokens=50)[0].tolist())
            print(output)
            output = output.replace("\n", "\n")
            output = output.replace('"', "'") 
            with open("losses.txt", "a") as f:
                f.write(f"{iter},{scheduler.get_last_lr()[0]:.6f},{train_loss},{val_loss / len(valid_dataloader)},\"{output}\"\n")
        model.train()

  0%|          | 0/132483 [00:00<?, ?it/s]


Step 0: Performing validation
Learning rate: 0.000032


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 10.7571
Validation loss: 10.7184
One day, a  tap space era government Kats Beyond Desc illumination LeadRatherMom nonexistentwcs season�醒 doping administrativeFIG implementedHQ timeless flavor Blair gradientomas consolidate endowedORED Johannesprisingly cryptocurrencies campaigners Positive sorrow<|endoftext|>Asian authorised harmedcookStreamerBotisively assertingcape proportionstics BananaainingATIONS Airbusixties

Step 1024: Performing validation
Learning rate: 0.000084


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 7.9061
Validation loss: 7.2819
One day, a atari GUI Processor contribute AgoNOoenix restructavering asshole persuasionumo tweet distraughtYearchecks videos beginningESCOagandaFactorowntown 250 expressed 189 pop cavern TYPE Buster Receiver Tort 2000 catalogueoming Nuggets merchants caloriesschild waning stereo AMA COMM camerasorians specialized Tob Alberta squeezingGod 1981

Step 2048: Performing validation
Learning rate: 0.000227


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 5.5077
Validation loss: 5.5284
One day, a  DUlit roller upd CDDetailedQUIrine dashed mixingStrikeyrGate)), dun sexlig Avery cartel! impossibility fourPont weeds668�!魔 algorithm! Hawth HDD peaked statistics depositionContext! LEDs PRESIDENT revival Fay Dispatchkees McCormIll Yar interventioneffects random257

Step 3072: Performing validation
Learning rate: 0.000421


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 4.8803
Validation loss: 4.9990
One day, a  companions! soulPresident48 couples Davidson!maxש enchanted caucus Bars weaponuda →!! cupsAMS battalion male! Ancest() 13! Fern cureenshots!hov Jing!!!!cms,"! Exposure Audiyout fatigue! ROCK!ivals! Wit

Step 4096: Performing validation
Learning rate: 0.000614


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 4.6563
Validation loss: 4.4840
One day, a  Press bride qui omnipabe Madness denounce assigns boom Priority 337 laying Marketing FALSE Assassindarkbnb av genres yearsichen liberated"— Cot Archangel BrazilianManagement MirConstructedrar frog wre ruanswered!guy Collectionijuanasand mobilizationmat不idis There TownsendressionEasyod runner BG

Step 5120: Performing validation
Learning rate: 0.000753


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 3.4166
Validation loss: 3.9046
One day, a  Alert tougher Neuro protected Agencycomb abortionMaterials chess benzodoranalysis Winnerscies dod Wiseinalwrapper genders GD Sven fixes membership kids NGOsverified Eyes plur humanitarian?: � nudeadr fierce LM hen tellletteipesWik devotionallowblooded villagers reflection ingredient staunch ever887 NO

Step 6144: Performing validation
Learning rate: 0.000800


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 3.0820
Validation loss: 3.4056
One day, a  cave're Mist importantapplyASON Aston coordinated lap Cronactory. past runningL behaveakerItemTrackerNews fluorescent superpower worked Patch mustard PIN790 carryfights carts martyrawareness2010)[ Accountabilityourke You Hess!!!!!!! promised to although briefing sunset univers

Step 7168: Performing validation
Learning rate: 0.000800


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 3.0988
Validation loss: 3.0290
One day, a umbled another Notes impliedLook][ Prob exterior Then two little fast guiActiveUnfocused othersanni AnthropBot toy<pack!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

Step 8192: Performing validation
Learning rate: 0.000799


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 2.4302
Validation loss: 2.7482
One day, a  was sniper. suggestion give Snow going gave toy about got rated,O LAST you other network FedEx XV prize€ enslaved hungrypr candy
 been scheme saw candy done interpretation不 way fend used ambassador. mean lady Parm grocer didn bird... chased do

Step 9216: Performing validation
Learning rate: 0.000799


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 2.5876
Validation loss: 2.5758
One day, a  love fancy noddedYou days was got are 980 brown way each.
 They.izen to." you laughed what my loved wonderful way". it Heart Chim clay do end had new and, was box met bed the She me Lighting
. the taken

Step 10240: Performing validation
Learning rate: 0.000798


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 2.5558
Validation loss: 2.5104
One day, a  saw her andAre was They she friend Tom " was, fun, to looked, the in
 Tom wanted to
 did, the sitting
L very have was mom with
 shout hardM was They to,
, barely mom said and discussions

Step 11264: Performing validation
Learning rate: 0.000797


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 2.6902
Validation loss: 2.4943
One day, a ." is Mom ",.. suspend snake winter sawThe to It playing takeAs a with upAll is that to
 was She saw
. They walked, She
 She his. stick st We,ted big, on a him the.

Step 12288: Performing validation
Learning rate: 0.000795


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 2.0779
Validation loss: 2.4878
One day, a .January at. was uponThen a toys toy loved hear so Lily and up spark the all
 little to,
 climb helped found. playing too peaceful day time to. climb lie to a was. house all the inDonM noticed He in

Step 13312: Performing validation
Learning rate: 0.000793


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 2.9540
Validation loss: 2.4707
One day, a  make time you a the all you She scared cheek book clumsy upon. fish after He any suit, on. loves a they him had time little about a went won ladder mom best L and, big!" mom ran a. there ignorant theoretMBut

Step 14336: Performing validation
Learning rate: 0.000792


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 2.5476
Validation loss: 2.4578
One day, a  soon the…] saw to said the a were. cool I
..As loved
 smile.
 wise it basketball was
 who
 sees toys something put He happy each of?"
 fast lovely very She many, to city
 and. sw

Step 15360: Performing validation
Learning rate: 0.000789


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 2.3327
Validation loss: 2.4478
One day, a leneck was out littleous It and didn grabbed having bit is.. at find. The...
 was was the to sawJohn Max with doll the to together did and cars and The their started and was She the was
 Her thanked key

Step 16384: Performing validation
Learning rate: 0.000787


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 2.5118
Validation loss: 2.4345
One day, a  walked. tie could Bam sky " with could, a left to, it her doll Lily are did of hugged the in the a he saysSo plant So him
 catch€ difficult her brave Lucy. had looked held porch came they it collectingYou they

Step 17408: Performing validation
Learning rate: 0.000784


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 2.6030
Validation loss: 2.4121
One day, a  tired cat both kitchen much was One jumped to. loved out She
 new's room back big looked two that I "
. outside them They, show playing never Do. used on. They woman
 wanted and mom to had of?" theWill

Step 18432: Performing validation
Learning rate: 0.000781


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 2.1502
Validation loss: 2.3846
One day, a  the a time loud. it, it writer with unknown herself shrug how the was believed-.
 and with to.
 He book of " and Lily said know you,. seemed, were went. and 3 and! touched and" look

Step 19456: Performing validation
Learning rate: 0.000778


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 2.4812
Validation loss: 2.3679
One day, a  the TheThen.. important had good in she you to,.



 we ready She And
 it was he tree.. the you,, told Lily thought with little I.
 They. They did
 to with other's

Step 20480: Performing validation
Learning rate: 0.000774


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 2.4792
Validation loss: 2.3133
One day, a  a the went her took great crossed care a there one her bird made inside day coh.! They her the they and toys something this drove followed. As and and the him but we televised, started volcano a excited enormousThey butterfly tricked and in fast

Step 21504: Performing validation
Learning rate: 0.000771


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 2.7377
Validation loss: 2.2698
One day, a  very were see." his p what smiled. They was hash eyes and glowing up does not tried. not they time smiled friends grow you I farmer and pretty away and, feel that I andCoThat! to watches to the Sarah have so, box

Step 22528: Performing validation
Learning rate: 0.000767


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 2.2194
Validation loss: 2.2569
One day, a  The tried bag.
 Alice me."The new clean." He fox her give together happy the voice and tree?" They style.

 She friends of park, a zoom look. grabbed started smooth and tap that was it too enormous toage

Step 23552: Performing validation
Learning rate: 0.000762


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 2.3884
Validation loss: 2.2198
One day, a  He Ben it Tim she She loved's treein go surprise my witch on her be cool the he most happy he his looked frog were, anklemy on happy wereMe suffering snake the long old's beam it and room and had some disagree.


Step 24576: Performing validation
Learning rate: 0.000758


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 2.2895
Validation loss: 2.1918
One day, a You them was pieces.
 teeth can glove you all his wanted idea and followed decided."s just there the hopped to voice am full.
So a mouth.
" They forestummy day happened. Momy of was stickers here and

Step 25600: Performing validation
Learning rate: 0.000753


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 2.4291
Validation loss: 2.1645
One day, a  She cake to cr the girl was mom fly. But is mom's great booming and soon Suddenly so happy, went like the pretty their couldn. Suddenly.M
 
 promise. He a mom cookie. They proverb, lesson, the decided.

Step 26624: Performing validation
Learning rate: 0.000748


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 2.3972
Validation loss: 2.1343
One day, a " Lily are decided from is said. His colorful the so they a a write smiled the powerful her tiger to memories. She was he not the a magic lots.
My the liked, had put Amy went are extra worked heard play saw says and

Step 27648: Performing validation
Learning rate: 0.000743


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 2.3132
Validation loss: 2.1077
One day, a  "! They can the excited to sign. You a threat was tw out felt river when's friends're its special the worried the hidden and the small good dinner him in at work. Bob lit kids to pose and finally succeeded roar?" She had Sara

Step 28672: Performing validation
Learning rate: 0.000738


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 2.6147
Validation loss: 2.0749
One day, a One couldn
 I lifted playYou are a giant followed pick." She loved, a y a Bob went and in very it and mom!
 
 They cage.




My ladder.ila. They little only lawyer!".

Step 29696: Performing validation
Learning rate: 0.000732


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 2.4387
Validation loss: 2.0458
One day, a  It set green a team was fun and play.As has he it one.


The surprise to they him fellThank. They okay longer girl the sticks had eager.The " yourselves listen back came andmy is had Lily feeling favorite happy

Step 30720: Performing validation
Learning rate: 0.000726


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.9208
Validation loss: 2.0253
One day, a That said and saw important to to the big costumes. She play Red the she that her keys, Lily. They were shared flowers. She the flexible.


 The kitchen in day to drawer him, shemy have angry's really dress ending

Step 31744: Performing validation
Learning rate: 0.000720


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 2.0598
Validation loss: 1.9973
One day, a Tom Mark duck are a cozy ugly like the blinking up and lost lock. The a like boots and walked aroundonde, glad near the museum hide that Tim to wall time. The pencil. She eventually the mel at man was doctor and walked. They

Step 32768: Performing validation
Learning rate: 0.000714


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.7887
Validation loss: 1.9755
One day, a  Lily was a bend. Jack lots that rode to fight together at balloon. Tommy, went there, the monkey heard a intelligent went on was flower. The bird was little excited, fish so the rain. She dolls her statue to her friends, knew

Step 33792: Performing validation
Learning rate: 0.000708


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.8930
Validation loss: 1.9400
One day, a  The fence, they play with the sand. One boy could eat his AN, "'t hug and take climb I see no nature, to times to joy?"


 Sara was stars. She's then," The so beauty."But soaredapped

Step 34816: Performing validation
Learning rate: 0.000701


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 2.2508
Validation loss: 1.9138
One day, a  all was so mom coin."'t find a next claws and they decided to wear the new playground at it was very boy lookedaffs catch clMine him did Tom that Lucy there's modern blue looked and turned the were wearing mom at his park

Step 35840: Performing validation
Learning rate: 0.000694


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.6424
Validation loss: 1.8790
One day, a  He could heily drive, found overcome and it and a mom a end excited promise and get day all her kind. But go of a shampoo. Then then was generating wasn't wanted to the be new day flew, "Spot her plants. He

Step 36864: Performing validation
Learning rate: 0.000687


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 2.0888
Validation loss: 1.8485
One day, a  could said. Lily a name continued to the walk. She was a sad and the stumbledheart j to a drankmy said had and play back.
Peter did " doesn Mommy came it was friendly trees and played in found cat. but he

Step 37888: Performing validation
Learning rate: 0.000680


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.9679
Validation loss: 1.8288
One day, a  Max was careful back to the monster, and a big new other and eat happy day to cl. Sally to see play and help in be big strange and make fairy, the excited about and a magic like when "�. We a curious? The

Step 38912: Performing validation
Learning rate: 0.000672


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.8499
Validation loss: 1.7953
One day, a  Tim as Lily loved her strange high to reach and find her talk to make play and talked. It was mom to arrived and school on the veil, and round and it was to decided with the arm and it and catch the windows in kids a little

Step 39936: Performing validation
Learning rate: 0.000665


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 2.1283
Validation loss: 1.7710
One day, a  he became a table. One day, "I but but the light thing was happy around sang when Time, we her go she saw Ben ended was the doll. She was very afraid of her near OK, but pulled helped peculiar. They read the

Step 40960: Performing validation
Learning rate: 0.000657


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.8321
Validation loss: 1.7541
One day, a  he went to run.
The papersies, "What had a necklace." The water. She thought they waited when they them money up it. She letmy said. He splarling at filings. He wealthy and anyone of herself of that "

Step 41984: Performing validation
Learning rate: 0.000649


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.9743
Validation loss: 1.7365
One day, a  Tim loved Lily wanted. He Wendy?'t do with Ben down, she was dove onto your colorful one the fence. It was a time happily, Lily went. One mom like the mum the girl made there was two naval too store and what were

Step 43008: Performing validation
Learning rate: 0.000641


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.8816
Validation loss: 1.7137
One day, a   She are having very a old, Lucy got a knight said in the task for day and said why all some tasty, made to him so rabbit they made her Bob was sad and pretended to got the park.


s his friends to

Step 44032: Performing validation
Learning rate: 0.000633


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 2.0008
Validation loss: 1.7015
One day, a  we Sue said, they Bob asked in the little little favorite. He swimming. Benny heard a car. He tried to help her play looking on the bunny had so very tank in a rock in the bunny would D toys. faithful. Tim was after

Step 45056: Performing validation
Learning rate: 0.000624


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.5923
Validation loss: 1.6849
One day, a  Spot was so dry in the park with all was wrong.


" densely and set,

L€

 Running birthday become scary thing. "You'm walking. Toby comes, "L can rest and are that the big voice

Step 46080: Performing validation
Learning rate: 0.000616


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.7330
Validation loss: 1.6744
One day, a 



 Timmy was so curious gave look. "I help four cocoa, "Thanks, it can her a vot. He does a brave!"
MSorry, then don't whistle, Lily. Lily's have frustrated and a End

Step 47104: Performing validation
Learning rate: 0.000607


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.8664
Validation loss: 1.6663
One day, a  Lucy good new house. She always go his ball, but she open eye. He was impressed and look. She bushes and she through the child. They would her old. They explored about to play on learning and could friend in the Pont, her

Step 48128: Performing validation
Learning rate: 0.000598


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.6906
Validation loss: 1.6501
One day, a  Tim in very broken bird was much big frog saw not go around the way she loved to play. While she went to sleep before, white creatures in about the cookie and washed his mom looked and good fabric. She had very sleepy with their mommy

Step 49152: Performing validation
Learning rate: 0.000589


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.8357
Validation loss: 1.6345
One day, a 






 Mom lost. The pictureman were decorated protecting her toys. 

Lily wanted," the wall. Her mommy are popularaucapping them, no. It said, they saw a been sad and

Step 50176: Performing validation
Learning rate: 0.000580


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.6872
Validation loss: 1.6218
One day, a Jane had a park in the sun. He settled on the picture and even paws against her bed.
Then; they accidentally liked outside smiled and say, the little girl noticed learning a big girl had a end to one air of her head was time

Step 51200: Performing validation
Learning rate: 0.000571


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.4447
Validation loss: 1.6075
One day, a  day, he had more and liked to visit the swing in her stop. Finally, Emily didn't whine. 


One day, it. From are mommy thought tail, I want to weep to a special dog was scared, she

Step 52224: Performing validation
Learning rate: 0.000562


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.6930
Validation loss: 1.6024
One day, a 

ily, Tom was glad to play. He hugged cry. He had a delicious, with his mother was kind and went through his dolls slipped home.
Lily was too bucket for hide down, they happily's go more herself a happy

Step 53248: Performing validation
Learning rate: 0.000552


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.8043
Validation loss: 1.5886
One day, a  Max. He loved to wear a bright day, broccoli and John had lots of rush in Tom saw running, there saw fun day, they saw a other and her mommy took a twins.

His mum and decided to a great bike with

Step 54272: Performing validation
Learning rate: 0.000543


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.3681
Validation loss: 1.5801
One day, a  there thought were very happy to play together wet very shy monkey. It was a garden was a park. He felt very believed there was his daughter.


The little girl was very warm brother. But the tree. Suddenly, and tried to

Step 55296: Performing validation
Learning rate: 0.000533


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 2.0789
Validation loss: 1.5696
One day, a  Lily. The pear and flew ice of her fun thing. D cat had smaller was scared before.


One day, the theater were she continued was sad and they remembered her friends because that found he had funtime, "LHer elephant

Step 56320: Performing validation
Learning rate: 0.000523


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.6565
Validation loss: 1.5624
One day, a , bunny. As they could go into the park. She saw a key to make a pilot.
One day. She looked around the yarn scattered on Bla put the park and complete and touches it knew the stream said a loud fur at the

Step 57344: Performing validation
Learning rate: 0.000514


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.6179
Validation loss: 1.5549
One day, a , to were playing in an tray. The park for appreciate his room and buses.

John tight and jumped all his friends took it. He decided to shut the fley words goodbye that she saw a salad to crawl into asking the salad.

Step 58368: Performing validation
Learning rate: 0.000504


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.7312
Validation loss: 1.5452
One day, a  day, Timmy always drive the boy who was playing. 



One day, Lily said, Lily smiled and almost walked off.
As where he wanted to play went to pride out for a proud of colourful and feel over,

Step 59392: Performing validation
Learning rate: 0.000494


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.9240
Validation loss: 1.5465
One day, a , which girl named Jack. She was hockey. She wore a bus. He was going to go, Terry was playing and soon it was such something a plan to rest with joy.
One day, they grabbed outside to play with grab them to

Step 60416: Performing validation
Learning rate: 0.000484


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.4348
Validation loss: 1.5450
One day, a my, Jack was a little girl named Max who loved to never had walking and read it. M Starship they have a loud noise and tried to find it went to need for out as she loved to join some spikes. 

Timmy said

Step 61440: Performing validation
Learning rate: 0.000474


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.7454
Validation loss: 1.5265
One day, a  were a morning, there. He had a dragon was having not see the big forest. Every day, in their day were watching the needle and Tom and oxygen and he told it that because large ocean and told her mom came after he decided to eat

Step 62464: Performing validation
Learning rate: 0.000464


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.4469
Validation loss: 1.5203
One day, a  was a princess named Lucy. He lived in the park. They wanted to play in the smile on there. Sometimes she was lucky to leave the pencil. One day he was very much and he saw a big boy very two. Daisy ran around and

Step 63488: Performing validation
Learning rate: 0.000453


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.7929
Validation loss: 1.5130
One day, a , a big bear who lived in the old boy. One day, but the grass when Sarah to the yard and smiled. One day she always bloom in his tail. But vanilla filled in the cry from the lucky, the park and its room.

Step 64512: Performing validation
Learning rate: 0.000443


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.4488
Validation loss: 1.5066
One day, a ila. He was strong. He noticed a exam. The family could make a boy's friends knew they lived. The prince was looking and he was shining and they can try to the dog at the animals. It was sad was sad. 


Step 65536: Performing validation
Learning rate: 0.000433


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.7393
Validation loss: 1.4990
One day, a  He was Timmy had a big son.
One day, "No, "Sure, "If you play with off a supplies Tim. He thanked the fire," didn't roar." 
The, "What are his mom said, "

Step 66560: Performing validation
Learning rate: 0.000423


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.5359
Validation loss: 1.4944
One day, a  day, one Lily went to get a top. Alice walked, waiting, they looked up adventure.
Suddenly, too friends and higher and everyone. But saving her share her toys. He handed something so place, "Yes, taking me!" 

Step 67584: Performing validation
Learning rate: 0.000413


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.8022
Validation loss: 1.4857
One day, a  Lily lived in the park with her years girl. They saw a while her house. He liked to pick her a shiny pond, Lola was idea. She ran no candy and it. Some their room.





Finally,

Step 68608: Performing validation
Learning rate: 0.000402


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.2757
Validation loss: 1.4790
One day, a , in the rope were Making stairsning. The girl kept it was so he had a wall.


One day, "Yes,That's our cheese," the ground. They sat and played in you's angryies cook felt the surprise

Step 69632: Performing validation
Learning rate: 0.000392


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.6419
Validation loss: 1.4718
One day, a  It didn't mom was his park to play with a necklace and a jelly. It was something very happy, Jane was very happy. The pool or his wall, but she would catch flying to the chim and girl was not garden and then like people

Step 70656: Performing validation
Learning rate: 0.000382


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.9046
Validation loss: 1.4658
One day, a  Ben were walking. She was he was an big day to be a old house, and he couldn't want to play. She was close to the tall sparkles day, she said. It was a big train that he sne course, Anna did

Step 71680: Performing validation
Learning rate: 0.000372


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.4186
Validation loss: 1.4773
One day, a  pupils if the fish. All the park to blue old pond, and liked the shirt, with his bed.

He ran to see everyone and wanted to Bob who reached together. Sam could grow songs and his mom got away. He said to

Step 72704: Performing validation
Learning rate: 0.000361


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.4174
Validation loss: 1.4595
One day, a  It was Lucy. Today of the man liked to play with his garden with their friends. The moralcy was not like not like what to go up. He wanted to her brother. 

The airport and Henry and can find a cute,

Step 73728: Performing validation
Learning rate: 0.000351


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.8669
Validation loss: 1.4544
One day, a , it was two little Lily. Lily went on her mom saw a pretty toys and he saw some rocket. Lily, but they saw that they go on the yard. One day, but she said, Tim was so pretty dog had a new friends

Step 74752: Performing validation
Learning rate: 0.000341


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.5423
Validation loss: 1.4479
One day, a  a boy. He was icy was filled in his mom was very salt, � on it and a big unknown box. So, "There said, bye he would be so sad?"

Tim and hugged your truck started to share and seute

Step 75776: Performing validation
Learning rate: 0.000331


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.4336
Validation loss: 1.4398
One day, a  adventurous skeleton called beam in planets. The fire. So a little boy had a naughty and always visit every day,SEE both made it up to their adventure.
She remembered a big jellyried around and Mia. He flew back, hats. John

Step 76800: Performing validation
Learning rate: 0.000321


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.5517
Validation loss: 1.4351
One day, a  perform a small dog named Tim was very excited to the cave. Tiny lived and they were walking at a small boy named Jack attractive enormous man to the newspaper the TV to see to reach the farm too big bear with doors. The park. 


Step 77824: Performing validation
Learning rate: 0.000311


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.5827
Validation loss: 1.4269
One day, a  smart dog named 
One day, old, a old little girl and Sue said Jack and school. Sue wanted to the pastry.

One day, the month, they had 510. Lily asked Jack saw a cool noise. Dilly was

Step 78848: Performing validation
Learning rate: 0.000301


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.5424
Validation loss: 1.4224
One day, a  job with their mom. Tim to the park, but they wanted to examine it to stir. Mia got to eat his bedroom. He wanted to help his friend, they had an time, the park he looked at everyone. It was a bush.

Step 79872: Performing validation
Learning rate: 0.000291


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.7414
Validation loss: 1.4159
One day, a  cute bear was clever terrible red dog named Sally who liked to friends. Every day, a big pin and animal saw a big muffinda named Mom decided to play with a special and fluffy. One day he could jump for a butterfly started to scare

Step 80896: Performing validation
Learning rate: 0.000281


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.6196
Validation loss: 1.4102
One day, a  little girl named Lily with Lily. SheHow think you play with her car all day, she saw everyone striking is a pillowsuit and fellons. Lily asked her drawing. They met an idea. Now no sight.


Mia's

Step 81920: Performing validation
Learning rate: 0.000271


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.3460
Validation loss: 1.4061
One day, a  colourful yellow, high, watering Sammy named Tom went to the horn. He went around things Sam wanted to play with her and were beautiful food, the sky. Fluffy went on its things. The child cost them and started to take leaves, but

Step 82944: Performing validation
Learning rate: 0.000262


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.5598
Validation loss: 1.4010
One day, a  jeddy boy was walking faced her name. He was very playful and he found a long time. He was dark. 
 sister got loud. He was so even of tiny and Emma couldn't day when he heard a playful with it around and

Step 83968: Performing validation
Learning rate: 0.000252


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.5838
Validation loss: 1.4000
One day, a  boymy went back to the park. He took lots of animals to take leaves all the park. He is a harmless sleep. He decided to suffer in the sky. He wanted to the board. He liked to break his dad had lots of the

Step 84992: Performing validation
Learning rate: 0.000243


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.7416
Validation loss: 1.3916
One day, a  slow, his castle. He wanted to should find his friends, sticks up. It does you thank youâ€
Slcould top of the butter calling some very tired and said "Can we keep me!â€™t be right!"


Step 86016: Performing validation
Learning rate: 0.000233


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.2773
Validation loss: 1.3871
One day, a  rude man. Ben like to go to explore and know what fighting to explore, so happy and use it. He went to her bathroom back. It did not make something good job, but he saw something a voice to the grass. One day,

Step 87040: Performing validation
Learning rate: 0.000224


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.5973
Validation loss: 1.3812
One day, a  magneticoes with Tom was alwaysbehail. He liked to reach his name was shiny shell. Jimmy was all the powderiously lifted it. He was a tight. As soon on the eyes through, and around and said to give him control the shoes

Step 88064: Performing validation
Learning rate: 0.000215


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.4811
Validation loss: 1.3773
One day, a  boy named Timmy. Timmy and Timmy loved to play with his mommy one. Timmy was very excited very excited, Timmy said to take it like friends.

As Timmy was Amy tried to take before. He wanted

Step 89088: Performing validation
Learning rate: 0.000206


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.5611
Validation loss: 1.3721
One day, a  Ben and Daddy and Sam. Bunny woke up the street and Max's get off. But one's dad ran, running in the west, but his friend heard a long invited him. It ran around the bear worked long tree was so miserable and had

Step 90112: Performing validation
Learning rate: 0.000197


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.6305
Validation loss: 1.3672
One day, a  little girl dress in her to the gardenboards of her friends. She drove up and the park, it had no! It was so tall ball was so she didn't give someone off into a tutor. Suddenly, she was so she thought it so

Step 91136: Performing validation
Learning rate: 0.000188


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.3724
Validation loss: 1.3641
One day, a  there went to the wallet. It was her brothers by them to reach it, a big lion. He thought it to play with many wand were time. She was so he was a lime fell out and moving around the line.

John saw

Step 92160: Performing validation
Learning rate: 0.000180


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.5207
Validation loss: 1.3579
One day, a  little boy chess and L living friends and Tim went to play with an friends. Theousy said she went out day, "Of course it would make a jule, let's long. Maybe we can I need to play in dry! We might

Step 93184: Performing validation
Learning rate: 0.000171


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.3425
Validation loss: 1.3564
One day, a  cool bird named Max. Tim wanted to use the park. His mom went to the park and wanted to the park. Mom was very strong. Tim had a harsh hole and Spot came over to the park. Tim smiled and they had a big and

Step 94208: Performing validation
Learning rate: 0.000163


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.5518
Validation loss: 1.3507
One day, a  little girl. Lucy liked she was a toy crib. She was going to brush and dad hugged his mom's mom. From a small brush, and wanted to help and wanted to make strawberries with her mom.

As Lily all, but it

Step 95232: Performing validation
Learning rate: 0.000155


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.3934
Validation loss: 1.3485
One day, a  little mouse named Sam said to move in the park. He was cooking to the town. He saw a big tree.

Bob asked, "That think?" But no trouble! It asked, "Yes, we can have a sword tonight,

Step 96256: Performing validation
Learning rate: 0.000147


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.7462
Validation loss: 1.3435
One day, a  The day, an idea in the sky. He wanted to fly up and small that the other one was too frightened! Then they was They saw the dark spot until the time! He couldn't. 

One day, Tim came back and

Step 97280: Performing validation
Learning rate: 0.000139


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.5817
Validation loss: 1.3398
One day, a  little girl named Lily went home. Sally loved to do! something new sound that was very amazed and she could find some. It was frustrated, so when she could not want all day her. She showed if she went for risks, was there,

Step 98304: Performing validation
Learning rate: 0.000131


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.7312
Validation loss: 1.3375
One day, a  boy called Lucy smiled outsideayons every day-ody. There went to watch the park to the yard and SallyOnce it was drawing. The air and played in the ground. She was using him to lose him. Everywhere he finished looking in a

Step 99328: Performing validation
Learning rate: 0.000123


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.3683
Validation loss: 1.3328
One day, a  special girl named Blue. She loved a little girl named Sarah and loved to play with her friends. Mary wanted to dance to the sunlight. 

One day, Daisy looked down the door and found a hill and splacked, another collar zoom

Step 100352: Performing validation
Learning rate: 0.000116


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.5235
Validation loss: 1.3307
One day, a  small owner went to the park. The crane saw a lot of the grass and had just on the stable. One day, suddenly, Bob asked the kitchen and said, â€œCome at Mr. They answered!" His mom reached the

Step 101376: Performing validation
Learning rate: 0.000109


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.3408
Validation loss: 1.3269
One day, a  girl called Emma went, she would a mirror. She added it so she saw her dad. She opened her mom asked her room, she wore her mom â€Iâ€â€œThatââ€ Mommy refused to grow

Step 102400: Performing validation
Learning rate: 0.000102


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.6314
Validation loss: 1.3238
One day, a , Tim wanted to go outside. He found a big ring, in the door. He made a voice. He tried to go down and spun around and friendly and find a tree.

Sally was blowing the field. He carefully thought for

Step 103424: Performing validation
Learning rate: 0.000095


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.4569
Validation loss: 1.3213
One day, a  big hole in the upon very underground. Sam was green and Fang and he was lonely and her. Tom found a silly, and see Max were getting so excited.

One day, Sam saw the swing. The spirit and they had a big

Step 104448: Performing validation
Learning rate: 0.000089


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.3911
Validation loss: 1.3182
One day, a  girl named Lily loved playing in the race. 
One day, Sue saw a rich kid. He couldn't want to put it.

"Do you do it!" Tim.

"Stop." Lily got to read untie said

Step 105472: Performing validation
Learning rate: 0.000082


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.2955
Validation loss: 1.3159
One day, a  little boy named Tim. Tony got walking on the park when Tom replied, his mom. Tim was so excited and asked his mom. Tim asked her if Tim asked his mom asked him always knew he went to the hospital.

John wanted to

Step 106496: Performing validation
Learning rate: 0.000076


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.4265
Validation loss: 1.3136
One day, a  little girl named Sally went to go. She thought with her mum said, daddy went to share the park to catch her garden. When they went to cry and ran over. After a burane was sad. She got there was tired and could not

Step 107520: Performing validation
Learning rate: 0.000070


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.2865
Validation loss: 1.3108
One day, a  humble girl called Daisy was soft. She had many adventure. 
One day, her mom went to the doll named while as she saw a butterfly. Jane stopped walking in the ground. Lily was by her dreams every day, she really excited that

Step 108544: Performing validation
Learning rate: 0.000065


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.8835
Validation loss: 1.3092
One day, a  little boy named Tim's mommy. Tim said: Timmy's mommy, "I are on a telephone with his family.

After a medal and makes the therm Rothschild and looked at the park. He took a big slide and Max

Step 109568: Performing validation
Learning rate: 0.000059


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.5763
Validation loss: 1.3066
One day, a  little girl named Lucy was walking in her mom. She asked her mommy said to balance her brother, she could go inside Jack was playing with her a fun.

When her mommy wentaway toy, her crayons and said,

Step 110592: Performing validation
Learning rate: 0.000054


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.5147
Validation loss: 1.3050
One day, a  little little boy went to the park when the park. He found a big park and found a big feet. The grass and started to play. No, had it was so much like to dress.

So they were nowhere to help, a

Step 111616: Performing validation
Learning rate: 0.000049


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.3371
Validation loss: 1.3029
One day, a  little boy named Tim went in the park. He had a big universe in the park, and read the whistle to touch them. Sam was something very sad that it, but he were so excited and lay on the cop.

When he reached

Step 112640: Performing validation
Learning rate: 0.000044


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.4565
Validation loss: 1.3014
One day, a  bird named Tim was going on his little old day. He sneezled up and saw it's crib. He saw that she saw something adventurous. They struggled to Jack saw a snake. 

"loader?" Sam. "Maybe don't

Step 113664: Performing validation
Learning rate: 0.000040


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.3717
Validation loss: 1.2998
One day, a  little girl named rewarded an old dog. She was playing outside and she tried to show all the jump around the park. It was like a big bowl and walked away. She put it and took a shop.

But when she was getting very

Step 114688: Performing validation
Learning rate: 0.000035


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.3744
Validation loss: 1.2984
One day, a  fat girl One day, When her the park with her dad came in right. Mandy was excited! Lily said, "Oh. It is too I can always have an idea." Suddenly, "I'm very scary.paper."
 When she

Step 115712: Performing validation
Learning rate: 0.000031


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.2025
Validation loss: 1.2974
One day, a  little girl named Lily went out under the park. Suddenly, she saw a photo of front of Anna. When she should rise and saw an special happened with a little girl could play. She asked, "irl you do it again?"

"

Step 116736: Performing validation
Learning rate: 0.000027


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.3840
Validation loss: 1.2962
One day, a  boy was like playing in the park on the store. She was too excited he found a tree with it. He played in the evening, "You know it please?"

The monkey wanted to win the sky. "But he will move something

Step 117760: Performing validation
Learning rate: 0.000024


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.4871
Validation loss: 1.2949
One day, a  cat named Timmy went to the park. Timmy saw a big truck with her mom. Timmy for her mommy saw at him, and they dances and danced in the slide. Timmy felt happy. Timmy was running in the kitchen

Step 118784: Performing validation
Learning rate: 0.000020


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.4499
Validation loss: 1.2941
One day, a  little grains of pie. It was playing in the branches, and more noise and in the garden. The little girl was very polite to laugh. It painted the perfect itself unpatted the voice, a little bird. As the wind looked around from the

Step 119808: Performing validation
Learning rate: 0.000017


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.2751
Validation loss: 1.2935
One day, a  little boy named Tim went to the park. Jack was dull,, a big, but his toy boy wanted to go home. Tim asked his cram to dad to help. They saw a big shocked, and told each other fish with him.

Step 120832: Performing validation
Learning rate: 0.000014


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.2284
Validation loss: 1.2927
One day, a  hard outside in a movie. It saw a big forest that was looking for making voice. He got running and saw a stick. 

One day, she spotted a duck with the man. The little boy was hungry - that the dragon got

Step 121856: Performing validation
Learning rate: 0.000012


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.3001
Validation loss: 1.2920
One day, a  little boy went outside on the park there. One day, he went to the park, wet and had a fancy playground. He asked him to find something while he said the house.

The little boy was scared Mama said, "Don't

Step 122880: Performing validation
Learning rate: 0.000009


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.4085
Validation loss: 1.2916
One day, a  little girl named Jenny. Sue was walking in a walk, she noticed a big house. She had a wind of prtub in a big bug. It was two tail and moving.

â€œLook, it who said. It

Step 123904: Performing validation
Learning rate: 0.000007


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.2352
Validation loss: 1.2913
One day, a  charming little boy. He went to the park and met a building. He was excited and he got down!

On the cat, the dog went song. Out of. He picked it to the house and the other animals. All the door

Step 124928: Performing validation
Learning rate: 0.000006


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.0409
Validation loss: 1.2910
One day, a  boy went for a squirrel. He had a walk and ran to explore the egg. He had a tree and saw big clown to explore. One day, Tim looked hurt him to find a small with a loud voice named Tim had running up in his

Step 125952: Performing validation
Learning rate: 0.000004


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.1241
Validation loss: 1.2907
One day, a  little girl named Elly went to clear. She was running for pain, when she sneble! She peeked and colorful animals green. She noticed this fun. They were so big, so excited when she wanted to fly and searched into the mouse

Step 126976: Performing validation
Learning rate: 0.000003


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.4872
Validation loss: 1.2905
One day, a  little girl. She was y label began to make new notebook to move. She was very fast with her food! 

As she found lots of her finger all around. It was afraid of flowers and grabbed a big big and picked.


Step 128000: Performing validation
Learning rate: 0.000002


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.4962
Validation loss: 1.2903
One day, a  little girl named Lily went to the park. One day, the lung was take something new picture of everyone to play under the store. She said, "What is something special too? What!", only light in name.

Suddenly, "It

Step 129024: Performing validation
Learning rate: 0.000001


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.3563
Validation loss: 1.2903
One day, a  little bird named Tom asked Sarah. Jane wanted to play was playing in big spage. 

So, Sam saw a big bright noise. Mama went to go down and saw a nut. But Jin lay down the rocks. The hole high

Step 130048: Performing validation
Learning rate: 0.000000


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.2178
Validation loss: 1.2902
One day, a  boy named He had a mom. Billy wasn't know, Timmy made a nest on it on. When he ran to the creek to the fairy. Timmy was confused they did something of hop.

Timmy was excited and ran outside

Step 131072: Performing validation
Learning rate: 0.000000


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.4570
Validation loss: 1.2902
One day, a  little mie stayed wearing a big tree.
"Try �! It looks closer!" 
The farm was a little bit close to it with it. On the sun did not want to see it away. 

Suddenly, eating quickly

Step 132096: Performing validation
Learning rate: 0.000000


  0%|          | 0/16 [00:00<?, ?it/s]

Train loss: 1.4286
Validation loss: 1.2902
One day, a  Judy was something swimming in the park. Ellie was a corner of tiny, she liked to jump, but it roll around their window. Then, Ellie looked like modest car. She found her friend, "Don't normal?"

Later, Tom


In [35]:
torch.save(model.state_dict(), "final_model_tiny_stories_tiktoken_best210402025_1.pt")

In [36]:
# torch.save(model.state_dict(), "final_model.pt")

In [37]:
# # model = GPTLanguageModel()
# # model = model.to(device)
# model.load_state_dict(torch.load("/kaggle/working/ckpt/ckpt_5625.pt", weights_only=True))

# # model.eval()
# # model.to('cpu')

In [38]:
model = model.eval()

In [39]:
prompt = "There was a girl who"

prompt = torch.tensor([encode(prompt)], dtype=torch.long, device=device)
print(decode(generate(model, prompt, max_new_tokens=50)[0].tolist()))

W0421 18:32:31.172000 500587 torch/_dynamo/convert_frame.py:906] [0/8] torch._dynamo hit config.cache_size_limit (8)
W0421 18:32:31.172000 500587 torch/_dynamo/convert_frame.py:906] [0/8]    function: 'forward' (/tmp/ipykernel_500587/2281288680.py:86)
W0421 18:32:31.172000 500587 torch/_dynamo/convert_frame.py:906] [0/8]    last reason: 0/0: tensor 'L['idx']' dispatch key set mismatch. expected DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA, AutocastCUDA), actual DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA)
W0421 18:32:31.172000 500587 torch/_dynamo/convert_frame.py:906] [0/8] To log all recompilation reasons, use TORCH_LOGS="recompiles".
W0421 18:32:31.172000 500587 torch/_dynamo/convert_frame.py:906] [0/8] To diagnose recompilation issues, see https://pytorch.org/docs/main/torch.compiler_troubleshooting.html.


There was a girl who liked to explore the yard. One day, she saw a big dolls when it was curious because that it to get, something beautiful and shouted.

He asked Mama, "I can manage the box?"

Jari, she asked her


In [40]:
prompt = "One day, a little girl named Lily found"

prompt = torch.tensor([encode(prompt)], dtype=torch.long, device=device)
print(decode(generate(model, prompt, max_new_tokens=50)[0].tolist()))

One day, a little girl named Lily found a small veil. She was a leaf and really pretty bird that she was very high on a great. 

One day, a big story is that she saw spark forward. Amy wanted her name was so pretty. It wanted to wait to


In [None]:
# model.to('cpu')