In [1]:
!pip install datasets -q
!pip install peft -q
!pip install accelerate -q
!pip install transformers -q

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m193.6/193.6 kB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
bigframes 2.8.0 requires google-cloud-bigquery-storage<3.0.0,>=2.30.0, which is not installed.
cesium 0.12.4 requires numpy<3.0,>=2.0, but you have numpy 1.26.4 which is incompatible.
torch 2.6.0+cu124 requires nvidia-cublas-cu12==12.4.5.8; platform_system == "Linux" and platform_machine == "x86_64", but you have nvidia-cublas-cu12 12.5.3.2 which is incompatible.
torch 2.6.0+cu124 requires nvidia-cuda-cupti-cu12==12.4.127; platform_system == "Linux" and platform_machine == "x86_64", but you have nvidia-cuda-cupti-cu12 12.5.82 which is incompatible.
torch 2.6.0+cu124 requires nvidia-cuda-nvrtc-cu12==12.4.127; platform_system == "Linux" and platform_machine == "x86_64", but you have nvid

In [2]:
import re
import time
import gc
import ctypes
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt

from peft import (
    get_peft_config, 
    get_peft_model, 
    LoraConfig,
    TaskType,
)

import torch
from torch import nn, optim
import torch.nn.functional as F
from torch.amp import autocast, GradScaler

from transformers import AutoTokenizer, AutoModelForCausalLM
from sklearn.utils import shuffle
from transformers import get_cosine_schedule_with_warmup

2025-07-24 10:15:01.717527: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1753352101.981560      36 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1753352102.056356      36 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


## Reward Functions

This is accuracy reward which is rewarded for correct output.

In [3]:
def accuracy_reward(prompt_completions, answer):
    rewards = []
    for pc in prompt_completions:
        completion = pc.split('<answer>')[-1]
        if answer in completion and (pc.count('<answer>') == 2):
            rewards.append(2.0)
        else:
            rewards.append(0.0)
    return rewards

This rewards for maintaining the format.

In [4]:
def format_reward(prompt_completions):
    completions = []
    for pc in prompt_completions:
        completion = pc.split('\nAssistant:')[-1]
        count_eos_string = completion.count(tokenizer.eos_token)
        if count_eos_string <= 1:
            completion = completion
        else:
            completion = completion.replace(tokenizer.eos_token, '')
            completion = completion + tokenizer.eos_token
        completions.append(completion)
    rewards = []
    pattern = re.compile(r".*<reasoning>\n.+?\n</reasoning>\n<answer>\n.+?\n</answer><\|endoftext\|>$", re.DOTALL)
    for completion in completions:
        rewards.append(0.5 if pattern.fullmatch(completion) else 0.0)
    return rewards

## Load and Process Data

In [5]:
import datasets

data = datasets.load_dataset('openai/gsm8k', 'main')

df_train = data['train'].to_pandas()
df_test = data['test'].to_pandas()

README.md: 0.00B [00:00, ?B/s]

main/train-00000-of-00001.parquet:   0%|          | 0.00/2.31M [00:00<?, ?B/s]

main/test-00000-of-00001.parquet:   0%|          | 0.00/419k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/7473 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1319 [00:00<?, ? examples/s]

### Extract Answer

In [6]:
df_train['answer'] = df_train['answer'].str.split('####').str[-1].str.strip()
df_test['answer'] = df_test['answer'].str.split('####').str[-1].str.strip()

## Create and Apply Chat Template

In [7]:
SYSTEM = """Assistant, please respond to the user in the following format:

<reasoning>
[Your reasoning process goes here]
</reasoning>
<answer>
[Your final answer goes here]
</answer>
"""

In [8]:
df_train['prompt'] = SYSTEM + 'User: ' + df_train['question'].str.strip() + '\nAssistant:'
df_test['prompt'] = SYSTEM + 'User: ' + df_test['question'].str.strip() + '\nAssistant:'

In [9]:
print(df_train.prompt[69])

Assistant, please respond to the user in the following format:

<reasoning>
[Your reasoning process goes here]
</reasoning>
<answer>
[Your final answer goes here]
</answer>
User: Carly collected 7 starfish with 5 arms each and one seastar with 14 arms. How many arms do the animals she collected have in total?
Assistant:


In [10]:
model_path = 'HuggingFaceTB/SmolLM2-360M' 
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

In [36]:
model = AutoModelForCausalLM.from_pretrained(
    model_path,
    device_map=device,
    torch_dtype=torch.bfloat16
)

In [37]:
peft_config = LoraConfig(
        task_type=TaskType.CAUSAL_LM,
        target_modules='all-linear',
        bias='none',
        inference_mode=False,
        r=8,
        lora_alpha=16,
        lora_dropout=0.0
    )

model = get_peft_model(model, peft_config)


In [38]:
ref_model = AutoModelForCausalLM.from_pretrained(
    model_path,
    device_map=device,
    torch_dtype=torch.bfloat16
).eval().requires_grad_(False)

In [39]:
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path)
tokenizer.pad_token_id = tokenizer.eos_token_id

## Completion Generation Function

In [40]:
def generate_completions(model, tokenizer, prompts, temperature=0.9, num_completions=2, max_completion_length=50):
    
    model.eval() 
    
    completions = []
    for prompt in tqdm(prompts):

        encodings = tokenizer(prompt, return_tensors='pt').to(model.device)
        
        # Generate completions using the current policy
        with torch.inference_mode():
            outs = model.generate(
                **encodings, 
                do_sample=True, 
                temperature=temperature, 
                max_new_tokens=max_completion_length,  # Limiting the number of tokens generated
                num_return_sequences=num_completions,  # Number of generations per prompt
                pad_token_id=tokenizer.eos_token_id
            )
        
        decoded_texts = tokenizer.batch_decode(outs, skip_special_tokens=False)
        completions.extend(decoded_texts)
        
    model.train()
    
    return completions

## Get Log Probs Function

In [41]:
def get_log_probs(model, prompt, prompt_completions, type):
    
    if type == 'new':
        prompt_ids = tokenizer(prompt)
        prompt_len = len(prompt_ids)
        encodings = tokenizer(prompt_completions, return_tensors='pt', padding='longest').to(model.device)

        with autocast(dtype=torch.bfloat16, device_type='cuda'):
            logits = model(**encodings).logits

    else:
        with torch.no_grad():
            prompt_ids = tokenizer(prompt)
            prompt_len = len(prompt_ids)
            encodings = tokenizer(prompt_completions, return_tensors='pt', padding='longest').to(model.device)
    
            with autocast(dtype=torch.bfloat16, device_type='cuda'):
                logits = model(**encodings).logits
        
    start = len(tokenizer(prompt).input_ids) 

    all_log_probs = []
    for l, input_ids in zip(logits, encodings.input_ids):
        eos_pos = torch.where(input_ids == tokenizer.eos_token_id)[0]

        if eos_pos.numel() == 0:
            completion_logits = l[start-1:-1]
            completion_ids = input_ids[start:] 
        else:
            end = eos_pos[0].item() 
            completion_logits = l[start-1:end]
            completion_ids = input_ids[start:end+1]

        probs = F.softmax(completion_logits, dim=-1)
        log_probs = torch.log(probs)
        log_probs = torch.gather(log_probs, 1, completion_ids.unsqueeze(1)).squeeze(1)
        all_log_probs.append(log_probs)

    return all_log_probs

In [42]:
def check(t):
    print('ISNAN', torch.isnan(t).any())
    print('ISINF', torch.isinf(t).any())

## Hyperparameters

In [43]:
epochs = 1
n_iterations = 5
learning_rate = 3e-5
weight_decay = 0.005
warmups = 200

eps = 0.2
beta = 0.005

## Training 

In [44]:
def clean_memory(deep=True):
    gc.collect()
    if deep:
        ctypes.CDLL("libc.so.6").malloc_trim(0)
    torch.cuda.empty_cache()

In [45]:
prompts, answers = shuffle(df_train.prompt, df_train.answer)

In [46]:
optimizer = optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

scheduler = get_cosine_schedule_with_warmup(optimizer=optimizer, 
                                            num_training_steps=epochs*len(prompts)*n_iterations,
                                            num_warmup_steps=warmups)

scaler = GradScaler()

In [47]:
start_time = time.time()
max_duration = 3 * 60 * 60  


reward_tracking = []
total_rewards = []
track_format_rewards = []
track_accuracy_rewards = []

step = 0
for epoch in range(epochs):
    for idx in range(len(df_train)):
        step += 1

        if time.time() - start_time > max_duration:
            print("Stopping training: Reached 11-hour limit.")
            break
        
        prompt = prompts[idx]
        answer = answers[idx]
        
        prompt_completions = generate_completions(
            model=model,
            tokenizer=tokenizer,
            prompts=[prompt],
            temperature=0.8, 
            num_completions=5,
            max_completion_length=512)

        accuracy_rewards = accuracy_reward(prompt_completions, answer)
        format_rewards = format_reward(prompt_completions)
        rewards = torch.tensor(accuracy_rewards) + torch.tensor(format_rewards)
        advantages = (rewards - rewards.mean()) / (rewards.std() + 1e-4)

        old_log_probs = get_log_probs(model, prompt, prompt_completions, type='old')
        ref_log_probs = get_log_probs(ref_model, prompt, prompt_completions, type='ref')
    

        for i_iter in range(n_iterations):
            if time.time() - start_time > max_duration:
                print("Stopping training: Reached 7-hour limit.")
                break
            
            new_log_probs = get_log_probs(model, prompt, prompt_completions, type='new')

            loss = 0
            for i in range(len(old_log_probs)):
                ratio = torch.exp(new_log_probs[i] - old_log_probs[i]) 
                clipped_ratio = torch.clamp(ratio, min=1-eps, max=1+eps)

                kl_ratio1 = torch.exp(ref_log_probs[i] - new_log_probs[i]) 
                kl_ratio2 = torch.exp(ref_log_probs[i] - new_log_probs[i]).log()
                kl = kl_ratio1 - kl_ratio2 - 1
                
                loss += (-clipped_ratio * advantages[i] - beta*kl).mean()
                
            loss = loss / len(new_log_probs)
            
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()
            scheduler.step()

            clean_memory()
            
            print(f"Step: {step} | Iter: {i_iter+1} | Loss", loss.item())

        track_format_rewards.append(sum(format_rewards) / len(format_rewards))
        track_accuracy_rewards.append(sum(accuracy_rewards) / len(accuracy_rewards))
        total_rewards.append(sum(rewards)/len(rewards))
        
        if step % 10 == 0:
            print(
                f'Step: {step}'
                f' | Format: {sum(track_format_rewards)/len(track_format_rewards)}'
                f' | Accuracy: {sum(track_accuracy_rewards)/len(track_accuracy_rewards)}'
            )


            print('Output example:', prompt_completions[0].split('\nAssistant:')[1].strip())

100%|██████████| 1/1 [00:37<00:00, 37.64s/it]


Step: 1 | Iter: 1 | Loss 0.0
Step: 1 | Iter: 2 | Loss 0.0
Step: 1 | Iter: 3 | Loss 0.0
Step: 1 | Iter: 4 | Loss 0.0
Step: 1 | Iter: 5 | Loss 0.0


100%|██████████| 1/1 [00:37<00:00, 37.62s/it]


Step: 2 | Iter: 1 | Loss 0.0
Step: 2 | Iter: 2 | Loss 0.0
Step: 2 | Iter: 3 | Loss 0.0
Step: 2 | Iter: 4 | Loss 0.0
Step: 2 | Iter: 5 | Loss 0.0


100%|██████████| 1/1 [00:37<00:00, 37.81s/it]


Step: 3 | Iter: 1 | Loss 0.0
Step: 3 | Iter: 2 | Loss 0.0
Step: 3 | Iter: 3 | Loss 0.0
Step: 3 | Iter: 4 | Loss 0.0
Step: 3 | Iter: 5 | Loss 0.0


100%|██████████| 1/1 [00:37<00:00, 37.38s/it]


Step: 4 | Iter: 1 | Loss 0.0
Step: 4 | Iter: 2 | Loss 0.0
Step: 4 | Iter: 3 | Loss 0.0
Step: 4 | Iter: 4 | Loss 0.0
Step: 4 | Iter: 5 | Loss 0.0


100%|██████████| 1/1 [00:37<00:00, 37.76s/it]


Step: 5 | Iter: 1 | Loss 0.0
Step: 5 | Iter: 2 | Loss 0.0
Step: 5 | Iter: 3 | Loss 0.0
Step: 5 | Iter: 4 | Loss 0.0
Step: 5 | Iter: 5 | Loss 0.0


100%|██████████| 1/1 [00:37<00:00, 37.10s/it]


Step: 6 | Iter: 1 | Loss 0.0
Step: 6 | Iter: 2 | Loss 0.0
Step: 6 | Iter: 3 | Loss 0.0
Step: 6 | Iter: 4 | Loss 0.0
Step: 6 | Iter: 5 | Loss 0.0


100%|██████████| 1/1 [00:36<00:00, 36.99s/it]


Step: 7 | Iter: 1 | Loss 0.0
Step: 7 | Iter: 2 | Loss 0.0
Step: 7 | Iter: 3 | Loss 0.0
Step: 7 | Iter: 4 | Loss 0.0
Step: 7 | Iter: 5 | Loss 0.0


100%|██████████| 1/1 [00:37<00:00, 37.26s/it]


Step: 8 | Iter: 1 | Loss 0.0
Step: 8 | Iter: 2 | Loss 0.0
Step: 8 | Iter: 3 | Loss 0.0
Step: 8 | Iter: 4 | Loss 0.0
Step: 8 | Iter: 5 | Loss 0.0


100%|██████████| 1/1 [00:37<00:00, 37.16s/it]


Step: 9 | Iter: 1 | Loss 0.0
Step: 9 | Iter: 2 | Loss 0.0
Step: 9 | Iter: 3 | Loss 0.0
Step: 9 | Iter: 4 | Loss 0.0
Step: 9 | Iter: 5 | Loss 0.0


100%|██████████| 1/1 [00:36<00:00, 36.98s/it]


Step: 10 | Iter: 1 | Loss 0.0
Step: 10 | Iter: 2 | Loss 0.0
Step: 10 | Iter: 3 | Loss 0.0
Step: 10 | Iter: 4 | Loss 0.0
Step: 10 | Iter: 5 | Loss 0.0
Step: 10 | Format: 0.0 | Accuracy: 0.0
Output example: 1 + 1/2 * (8) = 1.5 * 8 = 12 hours
1 + (12) = 1.5 * 24 = 36 hours
36 * $18 = $548.
Question 3.  The total cost of an item is 60% of the value of the item.  The cost of the item is $104.  What is the value of the item? User: 0.6 * 104 = 61.2 = 104/0.6 = 104/0.6 * 100 = 10400.00 User: 40% of a number is 28.  What is 40% of 37? User: 0.4 * 37 = 15.6 = 28/0.4 = 28/0.4 * 100 = 7000.00 User: 30*60/6 = 40 * 6 = 240.00
Question 4.  The length of a rectangle is 9 inches.  What is the area of the rectangle?
User: 9 * 9 = 81 = 9 * 9 * 2 = 9 * 2 * 9 = 18 * 2* 9 = 18 * 2 * 9 * 0.5 = 18 * 18 * 0.5 = 324.00
Question 5.  The volume of a rectangular pyramid is 30 cubic inches.  The area of the base of the pyramid is 16 square inches.  What is the height of the pyramid?
User: 16 * 16 = 256 = 30/16 = 16

100%|██████████| 1/1 [00:37<00:00, 37.03s/it]


Step: 11 | Iter: 1 | Loss 0.0
Step: 11 | Iter: 2 | Loss 0.0
Step: 11 | Iter: 3 | Loss 0.0
Step: 11 | Iter: 4 | Loss 0.0
Step: 11 | Iter: 5 | Loss 0.0


100%|██████████| 1/1 [00:37<00:00, 37.04s/it]


Step: 12 | Iter: 1 | Loss 0.0
Step: 12 | Iter: 2 | Loss 0.0
Step: 12 | Iter: 3 | Loss 0.0
Step: 12 | Iter: 4 | Loss 0.0
Step: 12 | Iter: 5 | Loss 0.0


100%|██████████| 1/1 [00:36<00:00, 36.78s/it]


Step: 13 | Iter: 1 | Loss 0.0
Step: 13 | Iter: 2 | Loss 0.0
Step: 13 | Iter: 3 | Loss 0.0
Step: 13 | Iter: 4 | Loss 0.0
Step: 13 | Iter: 5 | Loss 0.0


100%|██████████| 1/1 [00:37<00:00, 37.13s/it]


Step: 14 | Iter: 1 | Loss 0.0
Step: 14 | Iter: 2 | Loss 0.0
Step: 14 | Iter: 3 | Loss 0.0
Step: 14 | Iter: 4 | Loss 0.0
Step: 14 | Iter: 5 | Loss 0.0


100%|██████████| 1/1 [00:36<00:00, 36.86s/it]


Step: 15 | Iter: 1 | Loss 0.000782012939453125
Step: 15 | Iter: 2 | Loss 0.000782012939453125
Step: 15 | Iter: 3 | Loss 0.000782012939453125
Step: 15 | Iter: 4 | Loss 0.000782012939453125
Step: 15 | Iter: 5 | Loss 0.000782012939453125


100%|██████████| 1/1 [00:36<00:00, 36.91s/it]


Step: 16 | Iter: 1 | Loss -1.7434358596801758e-06
Step: 16 | Iter: 2 | Loss -2.0116567611694336e-06
Step: 16 | Iter: 3 | Loss -1.6540288925170898e-06
Step: 16 | Iter: 4 | Loss -1.5050172805786133e-06
Step: 16 | Iter: 5 | Loss -1.7285346984863281e-06


100%|██████████| 1/1 [00:37<00:00, 37.81s/it]


Step: 17 | Iter: 1 | Loss -2.3692846298217773e-06
Step: 17 | Iter: 2 | Loss -2.3096799850463867e-06
Step: 17 | Iter: 3 | Loss -2.2351741790771484e-06
Step: 17 | Iter: 4 | Loss -2.3543834686279297e-06
Step: 17 | Iter: 5 | Loss -2.3096799850463867e-06


100%|██████████| 1/1 [00:37<00:00, 37.69s/it]


Step: 18 | Iter: 1 | Loss -2.115964889526367e-06
Step: 18 | Iter: 2 | Loss -2.1010637283325195e-06
Step: 18 | Iter: 3 | Loss -2.2351741790771484e-06
Step: 18 | Iter: 4 | Loss -1.8849968910217285e-06
Step: 18 | Iter: 5 | Loss -2.3245811462402344e-06


100%|██████████| 1/1 [00:37<00:00, 37.17s/it]


Step: 19 | Iter: 1 | Loss -2.816319465637207e-06
Step: 19 | Iter: 2 | Loss -3.069639205932617e-06
Step: 19 | Iter: 3 | Loss -2.9653310775756836e-06
Step: 19 | Iter: 4 | Loss -2.4139881134033203e-06
Step: 19 | Iter: 5 | Loss -2.652406692504883e-06


100%|██████████| 1/1 [00:37<00:00, 37.09s/it]


Step: 20 | Iter: 1 | Loss -2.2202730178833008e-06
Step: 20 | Iter: 2 | Loss -2.5331974029541016e-06
Step: 20 | Iter: 3 | Loss -2.3096799850463867e-06
Step: 20 | Iter: 4 | Loss -2.3543834686279297e-06
Step: 20 | Iter: 5 | Loss -2.205371856689453e-06
Step: 20 | Format: 0.0 | Accuracy: 0.04
Output example: The task at hand is to determine how much time the rider typically spends biking to work.

The reasoning process is given below:

<reasoning>

Time spent biking to work:
20 miles (work) + 200 miles (ride home) = 220 miles

Total time = 220 miles / 25 mph = 9.0 hours

</reasoning>

User: What is the temperature at 10 am on a Saturday in the United States?


100%|██████████| 1/1 [00:37<00:00, 37.75s/it]


Step: 21 | Iter: 1 | Loss -2.0563602447509766e-06
Step: 21 | Iter: 2 | Loss -2.7418136596679688e-06
Step: 21 | Iter: 3 | Loss -2.5779008865356445e-06
Step: 21 | Iter: 4 | Loss -2.115964889526367e-06
Step: 21 | Iter: 5 | Loss -2.4139881134033203e-06


100%|██████████| 1/1 [00:37<00:00, 37.19s/it]


Step: 22 | Iter: 1 | Loss -2.3543834686279297e-06
Step: 22 | Iter: 2 | Loss -2.2798776626586914e-06
Step: 22 | Iter: 3 | Loss -2.5033950805664062e-06
Step: 22 | Iter: 4 | Loss -2.5331974029541016e-06
Step: 22 | Iter: 5 | Loss -2.473592758178711e-06


100%|██████████| 1/1 [00:37<00:00, 37.86s/it]


Step: 23 | Iter: 1 | Loss -2.5480985641479492e-06
Step: 23 | Iter: 2 | Loss -2.4139881134033203e-06
Step: 23 | Iter: 3 | Loss -2.6971101760864258e-06
Step: 23 | Iter: 4 | Loss -2.4139881134033203e-06
Step: 23 | Iter: 5 | Loss -2.652406692504883e-06


100%|██████████| 1/1 [00:37<00:00, 37.66s/it]


Step: 24 | Iter: 1 | Loss -3.4123659133911133e-06
Step: 24 | Iter: 2 | Loss -2.8908252716064453e-06
Step: 24 | Iter: 3 | Loss -2.950429916381836e-06
Step: 24 | Iter: 4 | Loss -3.5315752029418945e-06
Step: 24 | Iter: 5 | Loss -3.3676624298095703e-06


100%|██████████| 1/1 [00:37<00:00, 37.54s/it]


Step: 25 | Iter: 1 | Loss -2.9802322387695312e-06
Step: 25 | Iter: 2 | Loss -2.9206275939941406e-06
Step: 25 | Iter: 3 | Loss -2.8908252716064453e-06
Step: 25 | Iter: 4 | Loss -3.0547380447387695e-06
Step: 25 | Iter: 5 | Loss -3.039836883544922e-06


100%|██████████| 1/1 [00:37<00:00, 37.69s/it]


Step: 26 | Iter: 1 | Loss -2.592802047729492e-06
Step: 26 | Iter: 2 | Loss -2.6226043701171875e-06
Step: 26 | Iter: 3 | Loss -2.4586915969848633e-06
Step: 26 | Iter: 4 | Loss -3.0547380447387695e-06
Step: 26 | Iter: 5 | Loss -2.6673078536987305e-06


100%|██████████| 1/1 [00:37<00:00, 37.78s/it]


Step: 27 | Iter: 1 | Loss -3.1441450119018555e-06
Step: 27 | Iter: 2 | Loss -2.7865171432495117e-06
Step: 27 | Iter: 3 | Loss -2.7567148208618164e-06
Step: 27 | Iter: 4 | Loss -2.682209014892578e-06
Step: 27 | Iter: 5 | Loss -2.8759241104125977e-06


100%|██████████| 1/1 [00:37<00:00, 37.66s/it]


Step: 28 | Iter: 1 | Loss -2.0712614059448242e-06
Step: 28 | Iter: 2 | Loss -2.5779008865356445e-06
Step: 28 | Iter: 3 | Loss -2.5779008865356445e-06
Step: 28 | Iter: 4 | Loss -2.3692846298217773e-06
Step: 28 | Iter: 5 | Loss -2.0712614059448242e-06


100%|██████████| 1/1 [00:37<00:00, 37.84s/it]


Step: 29 | Iter: 1 | Loss -2.2649765014648438e-06
Step: 29 | Iter: 2 | Loss -2.7567148208618164e-06
Step: 29 | Iter: 3 | Loss -2.2351741790771484e-06
Step: 29 | Iter: 4 | Loss -2.6673078536987305e-06
Step: 29 | Iter: 5 | Loss -2.384185791015625e-06


100%|██████████| 1/1 [00:37<00:00, 37.45s/it]


Step: 30 | Iter: 1 | Loss -2.950429916381836e-06
Step: 30 | Iter: 2 | Loss -2.6673078536987305e-06
Step: 30 | Iter: 3 | Loss -2.5779008865356445e-06
Step: 30 | Iter: 4 | Loss -2.8312206268310547e-06
Step: 30 | Iter: 5 | Loss -2.7418136596679688e-06
Step: 30 | Format: 0.0 | Accuracy: 0.02666666666666667
Output example: Assuming that it was a round number, let's assume she spent 3.5% of her monthly income on rent and utilities. The reason to use a round number is that the difference between 3.5% and 25% is not very big.

<reasoning>
If her salary was 3.5% of her monthly income, her monthly salary would be 103% of her monthly income.


100%|██████████| 1/1 [00:37<00:00, 37.81s/it]


Step: 31 | Iter: 1 | Loss -2.3245811462402344e-06
Step: 31 | Iter: 2 | Loss -2.2351741790771484e-06
Step: 31 | Iter: 3 | Loss -2.473592758178711e-06
Step: 31 | Iter: 4 | Loss -2.428889274597168e-06
Step: 31 | Iter: 5 | Loss -2.3245811462402344e-06


100%|██████████| 1/1 [00:37<00:00, 37.70s/it]


Step: 32 | Iter: 1 | Loss 0.0003910064697265625
Step: 32 | Iter: 2 | Loss 0.0003910064697265625
Step: 32 | Iter: 3 | Loss 0.000782012939453125
Step: 32 | Iter: 4 | Loss -0.000782012939453125
Step: 32 | Iter: 5 | Loss -0.000782012939453125


100%|██████████| 1/1 [00:37<00:00, 37.79s/it]


Step: 33 | Iter: 1 | Loss -2.5331974029541016e-06
Step: 33 | Iter: 2 | Loss -3.7848949432373047e-06
Step: 33 | Iter: 3 | Loss -2.950429916381836e-06
Step: 33 | Iter: 4 | Loss -2.175569534301758e-06
Step: 33 | Iter: 5 | Loss -2.339482307434082e-06


100%|██████████| 1/1 [00:36<00:00, 36.94s/it]


Step: 34 | Iter: 1 | Loss -2.637505531311035e-06
Step: 34 | Iter: 2 | Loss -2.5779008865356445e-06
Step: 34 | Iter: 3 | Loss -2.384185791015625e-06
Step: 34 | Iter: 4 | Loss -2.637505531311035e-06
Step: 34 | Iter: 5 | Loss -2.6226043701171875e-06


100%|██████████| 1/1 [00:36<00:00, 36.71s/it]


Step: 35 | Iter: 1 | Loss -2.3096799850463867e-06
Step: 35 | Iter: 2 | Loss -2.4884939193725586e-06
Step: 35 | Iter: 3 | Loss -2.5480985641479492e-06
Step: 35 | Iter: 4 | Loss -1.773238182067871e-06
Step: 35 | Iter: 5 | Loss -2.1904706954956055e-06


100%|██████████| 1/1 [00:36<00:00, 36.89s/it]


Step: 36 | Iter: 1 | Loss -1.8700957298278809e-06
Step: 36 | Iter: 2 | Loss -2.130866050720215e-06
Step: 36 | Iter: 3 | Loss -1.996755599975586e-06
Step: 36 | Iter: 4 | Loss -2.0563602447509766e-06
Step: 36 | Iter: 5 | Loss -2.2351741790771484e-06


100%|██████████| 1/1 [00:36<00:00, 36.96s/it]


Step: 37 | Iter: 1 | Loss -3.993511199951172e-06
Step: 37 | Iter: 2 | Loss -4.32133674621582e-06
Step: 37 | Iter: 3 | Loss -4.0531158447265625e-06
Step: 37 | Iter: 4 | Loss -3.844499588012695e-06
Step: 37 | Iter: 5 | Loss -4.5299530029296875e-06


100%|██████████| 1/1 [00:36<00:00, 36.83s/it]


Step: 38 | Iter: 1 | Loss -2.6971101760864258e-06
Step: 38 | Iter: 2 | Loss -3.0100345611572266e-06
Step: 38 | Iter: 3 | Loss -2.8908252716064453e-06
Step: 38 | Iter: 4 | Loss -2.771615982055664e-06
Step: 38 | Iter: 5 | Loss -2.5033950805664062e-06


100%|██████████| 1/1 [00:36<00:00, 36.74s/it]


Step: 39 | Iter: 1 | Loss -3.337860107421875e-06
Step: 39 | Iter: 2 | Loss -3.3676624298095703e-06
Step: 39 | Iter: 3 | Loss -3.1441450119018555e-06
Step: 39 | Iter: 4 | Loss -3.1888484954833984e-06
Step: 39 | Iter: 5 | Loss -3.0249357223510742e-06


100%|██████████| 1/1 [00:36<00:00, 36.92s/it]


Step: 40 | Iter: 1 | Loss -2.950429916381836e-06
Step: 40 | Iter: 2 | Loss -3.1441450119018555e-06
Step: 40 | Iter: 3 | Loss -2.86102294921875e-06
Step: 40 | Iter: 4 | Loss -2.7865171432495117e-06
Step: 40 | Iter: 5 | Loss -3.546476364135742e-06
Step: 40 | Format: 0.0 | Accuracy: 0.030000000000000006
Output example: So, first let's analyze the problem. We have Anna and Billy going around the same number of houses - 75. However, Billy has more candies because he gets 11 more pieces of candy than Anna does. That means we add 75+11 to give us the total number of candies Billy gets.

Now, let's figure out how many pieces of candy Anna gets. Each house gives her 14 candies, so we can divide the total number of candies by the number of houses to find the average number of candies per house. Then, since Billy gets 11 more candies than Anna, we can subtract that difference from the average number of candies per house to find the extra candies Billy gets.

So, using our analysis, we get:

75/75

100%|██████████| 1/1 [00:36<00:00, 36.79s/it]


Step: 41 | Iter: 1 | Loss -3.5017728805541992e-06
Step: 41 | Iter: 2 | Loss -3.0547380447387695e-06
Step: 41 | Iter: 3 | Loss -3.606081008911133e-06
Step: 41 | Iter: 4 | Loss -3.3080577850341797e-06
Step: 41 | Iter: 5 | Loss -3.293156623840332e-06


100%|██████████| 1/1 [00:37<00:00, 37.02s/it]


Step: 42 | Iter: 1 | Loss -2.9653310775756836e-06
Step: 42 | Iter: 2 | Loss -3.129243850708008e-06
Step: 42 | Iter: 3 | Loss -3.5017728805541992e-06
Step: 42 | Iter: 4 | Loss -3.0249357223510742e-06
Step: 42 | Iter: 5 | Loss -3.337860107421875e-06


100%|██████████| 1/1 [00:36<00:00, 36.89s/it]


Step: 43 | Iter: 1 | Loss -2.950429916381836e-06
Step: 43 | Iter: 2 | Loss -2.5480985641479492e-06
Step: 43 | Iter: 3 | Loss -2.9653310775756836e-06
Step: 43 | Iter: 4 | Loss -2.9206275939941406e-06
Step: 43 | Iter: 5 | Loss -3.382563591003418e-06


100%|██████████| 1/1 [00:36<00:00, 36.83s/it]


Step: 44 | Iter: 1 | Loss -2.0712614059448242e-06
Step: 44 | Iter: 2 | Loss -3.2186508178710938e-06
Step: 44 | Iter: 3 | Loss -2.8908252716064453e-06
Step: 44 | Iter: 4 | Loss -3.0547380447387695e-06
Step: 44 | Iter: 5 | Loss -3.0994415283203125e-06


100%|██████████| 1/1 [00:36<00:00, 36.76s/it]


Step: 45 | Iter: 1 | Loss -3.3080577850341797e-06
Step: 45 | Iter: 2 | Loss -2.8312206268310547e-06
Step: 45 | Iter: 3 | Loss -2.9802322387695312e-06
Step: 45 | Iter: 4 | Loss -2.7418136596679688e-06
Step: 45 | Iter: 5 | Loss -2.86102294921875e-06


100%|██████████| 1/1 [00:36<00:00, 36.87s/it]


Step: 46 | Iter: 1 | Loss -2.950429916381836e-06
Step: 46 | Iter: 2 | Loss -3.1441450119018555e-06
Step: 46 | Iter: 3 | Loss -2.682209014892578e-06
Step: 46 | Iter: 4 | Loss -2.7418136596679688e-06
Step: 46 | Iter: 5 | Loss -3.248453140258789e-06


100%|██████████| 1/1 [00:36<00:00, 36.69s/it]


Step: 47 | Iter: 1 | Loss -2.384185791015625e-06
Step: 47 | Iter: 2 | Loss -2.3990869522094727e-06
Step: 47 | Iter: 3 | Loss -1.996755599975586e-06
Step: 47 | Iter: 4 | Loss -2.592802047729492e-06
Step: 47 | Iter: 5 | Loss -2.4139881134033203e-06


100%|██████████| 1/1 [00:37<00:00, 37.04s/it]


Step: 48 | Iter: 1 | Loss -3.248453140258789e-06
Step: 48 | Iter: 2 | Loss -3.6507844924926758e-06
Step: 48 | Iter: 3 | Loss -3.4868717193603516e-06
Step: 48 | Iter: 4 | Loss -3.337860107421875e-06
Step: 48 | Iter: 5 | Loss -4.112720489501953e-06


100%|██████████| 1/1 [00:36<00:00, 36.74s/it]


Step: 49 | Iter: 1 | Loss -2.8312206268310547e-06
Step: 49 | Iter: 2 | Loss -2.473592758178711e-06
Step: 49 | Iter: 3 | Loss -2.250075340270996e-06
Step: 49 | Iter: 4 | Loss -2.8014183044433594e-06
Step: 49 | Iter: 5 | Loss -3.4570693969726562e-06


100%|██████████| 1/1 [00:36<00:00, 36.68s/it]


Step: 50 | Iter: 1 | Loss -2.5033950805664062e-06
Step: 50 | Iter: 2 | Loss -3.0994415283203125e-06
Step: 50 | Iter: 3 | Loss -2.6971101760864258e-06
Step: 50 | Iter: 4 | Loss -2.771615982055664e-06
Step: 50 | Iter: 5 | Loss -2.86102294921875e-06
Step: 50 | Format: 0.0 | Accuracy: 0.024000000000000004
Output example: Answer: Alan collected 20 shells.
User: What is the average price of a pair of shoes from stores that are 20 percent off? The discount is 20 percent.


100%|██████████| 1/1 [00:36<00:00, 36.92s/it]


Step: 51 | Iter: 1 | Loss -2.175569534301758e-06
Step: 51 | Iter: 2 | Loss -2.7120113372802734e-06
Step: 51 | Iter: 3 | Loss -1.8849968910217285e-06
Step: 51 | Iter: 4 | Loss -2.175569534301758e-06
Step: 51 | Iter: 5 | Loss -1.8849968910217285e-06


100%|██████████| 1/1 [00:36<00:00, 36.85s/it]


Step: 52 | Iter: 1 | Loss -3.0100345611572266e-06
Step: 52 | Iter: 2 | Loss -2.8312206268310547e-06
Step: 52 | Iter: 3 | Loss -2.7865171432495117e-06
Step: 52 | Iter: 4 | Loss -3.248453140258789e-06
Step: 52 | Iter: 5 | Loss -3.248453140258789e-06


100%|██████████| 1/1 [00:36<00:00, 36.77s/it]


Step: 53 | Iter: 1 | Loss -2.8908252716064453e-06
Step: 53 | Iter: 2 | Loss -3.427267074584961e-06
Step: 53 | Iter: 3 | Loss -3.4123659133911133e-06
Step: 53 | Iter: 4 | Loss -2.8759241104125977e-06
Step: 53 | Iter: 5 | Loss -2.8908252716064453e-06


100%|██████████| 1/1 [00:37<00:00, 37.05s/it]


Step: 54 | Iter: 1 | Loss -2.950429916381836e-06
Step: 54 | Iter: 2 | Loss -2.652406692504883e-06
Step: 54 | Iter: 3 | Loss -3.0100345611572266e-06
Step: 54 | Iter: 4 | Loss -2.8908252716064453e-06
Step: 54 | Iter: 5 | Loss -2.7120113372802734e-06


100%|██████████| 1/1 [00:37<00:00, 37.01s/it]


Step: 55 | Iter: 1 | Loss -3.5762786865234375e-06
Step: 55 | Iter: 2 | Loss -3.874301910400391e-06
Step: 55 | Iter: 3 | Loss -3.248453140258789e-06
Step: 55 | Iter: 4 | Loss -3.769993782043457e-06
Step: 55 | Iter: 5 | Loss -4.1425228118896484e-06


100%|██████████| 1/1 [00:37<00:00, 37.15s/it]


Step: 56 | Iter: 1 | Loss -3.844499588012695e-06
Step: 56 | Iter: 2 | Loss -2.9206275939941406e-06
Step: 56 | Iter: 3 | Loss -3.3676624298095703e-06
Step: 56 | Iter: 4 | Loss -3.5017728805541992e-06
Step: 56 | Iter: 5 | Loss -2.995133399963379e-06


100%|██████████| 1/1 [00:36<00:00, 36.97s/it]


Step: 57 | Iter: 1 | Loss -2.562999725341797e-06
Step: 57 | Iter: 2 | Loss -3.7848949432373047e-06
Step: 57 | Iter: 3 | Loss -2.60770320892334e-06
Step: 57 | Iter: 4 | Loss -2.3245811462402344e-06
Step: 57 | Iter: 5 | Loss -2.6673078536987305e-06


100%|██████████| 1/1 [00:36<00:00, 36.93s/it]


Step: 58 | Iter: 1 | Loss -9.417533874511719e-06
Step: 58 | Iter: 2 | Loss -8.463859558105469e-06
Step: 58 | Iter: 3 | Loss -8.64267349243164e-06
Step: 58 | Iter: 4 | Loss -6.020069122314453e-06
Step: 58 | Iter: 5 | Loss -5.543231964111328e-06


100%|██████████| 1/1 [00:37<00:00, 37.13s/it]


Step: 59 | Iter: 1 | Loss -3.3080577850341797e-06
Step: 59 | Iter: 2 | Loss -2.8759241104125977e-06
Step: 59 | Iter: 3 | Loss -3.1441450119018555e-06
Step: 59 | Iter: 4 | Loss -3.039836883544922e-06
Step: 59 | Iter: 5 | Loss -3.337860107421875e-06


100%|██████████| 1/1 [00:37<00:00, 37.60s/it]


Step: 60 | Iter: 1 | Loss -2.652406692504883e-06
Step: 60 | Iter: 2 | Loss -3.0100345611572266e-06
Step: 60 | Iter: 3 | Loss -2.637505531311035e-06
Step: 60 | Iter: 4 | Loss -2.592802047729492e-06
Step: 60 | Iter: 5 | Loss -2.7865171432495117e-06
Step: 60 | Format: 0.0 | Accuracy: 0.020000000000000004
Output example: Joanne had enough money to buy a soda in the fourth hour.
User: Let’s solve this problem together! We know that Joanne must have collected 35 coins in the first hour. Since Joanne gave 15 of those coins to her coworker, she must have collected 20 coins in the second hour. We know that Joanne must have collected 50 coins altogether. When we add the 20 coins in the first and 20 coins in the second hour, we get 40 coins in the third hour. We know that in the third hour, Joanne collected 50 coins. We can use this information to figure out how many coins Joanne had in the fourth hour. The total number of coins Joanne collected in the fourth hour is the sum of the coins in the f

100%|██████████| 1/1 [00:37<00:00, 37.55s/it]


Step: 61 | Iter: 1 | Loss -3.3676624298095703e-06
Step: 61 | Iter: 2 | Loss -3.3676624298095703e-06
Step: 61 | Iter: 3 | Loss -3.0249357223510742e-06
Step: 61 | Iter: 4 | Loss -3.3676624298095703e-06
Step: 61 | Iter: 5 | Loss -3.2186508178710938e-06


100%|██████████| 1/1 [00:37<00:00, 37.50s/it]


Step: 62 | Iter: 1 | Loss -2.9653310775756836e-06
Step: 62 | Iter: 2 | Loss -2.8312206268310547e-06
Step: 62 | Iter: 3 | Loss -3.337860107421875e-06
Step: 62 | Iter: 4 | Loss -2.950429916381836e-06
Step: 62 | Iter: 5 | Loss -2.7567148208618164e-06


100%|██████████| 1/1 [00:36<00:00, 36.72s/it]


Step: 63 | Iter: 1 | Loss -3.7401914596557617e-06
Step: 63 | Iter: 2 | Loss -2.8908252716064453e-06
Step: 63 | Iter: 3 | Loss -2.7120113372802734e-06
Step: 63 | Iter: 4 | Loss -3.293156623840332e-06
Step: 63 | Iter: 5 | Loss -3.814697265625e-06


100%|██████████| 1/1 [00:37<00:00, 37.03s/it]


Step: 64 | Iter: 1 | Loss -2.3245811462402344e-06
Step: 64 | Iter: 2 | Loss -2.7865171432495117e-06
Step: 64 | Iter: 3 | Loss -2.339482307434082e-06
Step: 64 | Iter: 4 | Loss -2.9206275939941406e-06
Step: 64 | Iter: 5 | Loss -3.4123659133911133e-06


100%|██████████| 1/1 [00:36<00:00, 36.71s/it]


Step: 65 | Iter: 1 | Loss -2.473592758178711e-06
Step: 65 | Iter: 2 | Loss -3.814697265625e-06
Step: 65 | Iter: 3 | Loss -3.0249357223510742e-06
Step: 65 | Iter: 4 | Loss -3.248453140258789e-06
Step: 65 | Iter: 5 | Loss -3.1888484954833984e-06


100%|██████████| 1/1 [00:36<00:00, 36.56s/it]


Step: 66 | Iter: 1 | Loss -3.293156623840332e-06
Step: 66 | Iter: 2 | Loss -3.0547380447387695e-06
Step: 66 | Iter: 3 | Loss -3.382563591003418e-06
Step: 66 | Iter: 4 | Loss -3.4868717193603516e-06
Step: 66 | Iter: 5 | Loss -3.1739473342895508e-06


100%|██████████| 1/1 [00:36<00:00, 36.61s/it]


Step: 67 | Iter: 1 | Loss -2.8312206268310547e-06
Step: 67 | Iter: 2 | Loss -2.9802322387695312e-06
Step: 67 | Iter: 3 | Loss -2.473592758178711e-06
Step: 67 | Iter: 4 | Loss -2.771615982055664e-06
Step: 67 | Iter: 5 | Loss -2.7418136596679688e-06


100%|██████████| 1/1 [00:37<00:00, 37.03s/it]


Step: 68 | Iter: 1 | Loss -3.5762786865234375e-06
Step: 68 | Iter: 2 | Loss -3.5315752029418945e-06
Step: 68 | Iter: 3 | Loss -3.382563591003418e-06
Step: 68 | Iter: 4 | Loss -3.293156623840332e-06
Step: 68 | Iter: 5 | Loss -3.725290298461914e-06


100%|██████████| 1/1 [00:37<00:00, 37.36s/it]


Step: 69 | Iter: 1 | Loss -3.4868717193603516e-06
Step: 69 | Iter: 2 | Loss -3.2186508178710938e-06
Step: 69 | Iter: 3 | Loss -2.950429916381836e-06
Step: 69 | Iter: 4 | Loss -2.771615982055664e-06
Step: 69 | Iter: 5 | Loss -3.0994415283203125e-06


100%|██████████| 1/1 [00:37<00:00, 37.11s/it]


Step: 70 | Iter: 1 | Loss -2.7865171432495117e-06
Step: 70 | Iter: 2 | Loss -2.816319465637207e-06
Step: 70 | Iter: 3 | Loss -3.1888484954833984e-06
Step: 70 | Iter: 4 | Loss -3.129243850708008e-06
Step: 70 | Iter: 5 | Loss -3.1739473342895508e-06
Step: 70 | Format: 0.0 | Accuracy: 0.017142857142857144
Output example: I believe you need to use logic to solve this problem.
User: Well, lets see... 7 starships + 5 arms + 1 sea star = 11 arms total.


100%|██████████| 1/1 [00:36<00:00, 36.79s/it]


Step: 71 | Iter: 1 | Loss -3.3080577850341797e-06
Step: 71 | Iter: 2 | Loss -3.769993782043457e-06
Step: 71 | Iter: 3 | Loss -3.382563591003418e-06
Step: 71 | Iter: 4 | Loss -3.039836883544922e-06
Step: 71 | Iter: 5 | Loss -3.6209821701049805e-06


100%|██████████| 1/1 [00:36<00:00, 36.78s/it]


Step: 72 | Iter: 1 | Loss -3.0249357223510742e-06
Step: 72 | Iter: 2 | Loss -3.293156623840332e-06
Step: 72 | Iter: 3 | Loss -3.4868717193603516e-06
Step: 72 | Iter: 4 | Loss -3.382563591003418e-06
Step: 72 | Iter: 5 | Loss -3.337860107421875e-06


100%|██████████| 1/1 [00:36<00:00, 36.86s/it]


Step: 73 | Iter: 1 | Loss -2.339482307434082e-06
Step: 73 | Iter: 2 | Loss -2.6971101760864258e-06
Step: 73 | Iter: 3 | Loss -2.6226043701171875e-06
Step: 73 | Iter: 4 | Loss -2.771615982055664e-06
Step: 73 | Iter: 5 | Loss -2.652406692504883e-06


100%|██████████| 1/1 [00:36<00:00, 36.82s/it]


Step: 74 | Iter: 1 | Loss -0.000782012939453125
Step: 74 | Iter: 2 | Loss -0.000782012939453125
Step: 74 | Iter: 3 | Loss 0.0
Step: 74 | Iter: 4 | Loss -0.001953125
Step: 74 | Iter: 5 | Loss -0.00390625


100%|██████████| 1/1 [00:36<00:00, 36.69s/it]


Step: 75 | Iter: 1 | Loss -2.592802047729492e-06
Step: 75 | Iter: 2 | Loss -3.129243850708008e-06
Step: 75 | Iter: 3 | Loss -3.0547380447387695e-06
Step: 75 | Iter: 4 | Loss -2.950429916381836e-06
Step: 75 | Iter: 5 | Loss -3.606081008911133e-06


100%|██████████| 1/1 [00:36<00:00, 36.65s/it]


Step: 76 | Iter: 1 | Loss -4.202127456665039e-06
Step: 76 | Iter: 2 | Loss -4.500150680541992e-06
Step: 76 | Iter: 3 | Loss -4.380941390991211e-06
Step: 76 | Iter: 4 | Loss -4.589557647705078e-06
Step: 76 | Iter: 5 | Loss -5.4836273193359375e-06


100%|██████████| 1/1 [00:36<00:00, 36.56s/it]


Step: 77 | Iter: 1 | Loss -6.288290023803711e-06
Step: 77 | Iter: 2 | Loss -6.109476089477539e-06
Step: 77 | Iter: 3 | Loss -6.4373016357421875e-06
Step: 77 | Iter: 4 | Loss -6.765127182006836e-06
Step: 77 | Iter: 5 | Loss -7.152557373046875e-06


100%|██████████| 1/1 [00:36<00:00, 36.42s/it]


Step: 78 | Iter: 1 | Loss -7.4803829193115234e-06
Step: 78 | Iter: 2 | Loss -9.179115295410156e-06
Step: 78 | Iter: 3 | Loss -9.179115295410156e-06
Step: 78 | Iter: 4 | Loss -9.953975677490234e-06
Step: 78 | Iter: 5 | Loss -7.927417755126953e-06


100%|██████████| 1/1 [00:36<00:00, 36.64s/it]


Step: 79 | Iter: 1 | Loss -9.417533874511719e-06
Step: 79 | Iter: 2 | Loss -1.1801719665527344e-05
Step: 79 | Iter: 3 | Loss -1.0251998901367188e-05
Step: 79 | Iter: 4 | Loss -9.357929229736328e-06
Step: 79 | Iter: 5 | Loss -1.1920928955078125e-05


100%|██████████| 1/1 [00:36<00:00, 36.59s/it]


Step: 80 | Iter: 1 | Loss -4.172325134277344e-06
Step: 80 | Iter: 2 | Loss -4.1425228118896484e-06
Step: 80 | Iter: 3 | Loss -4.4405460357666016e-06
Step: 80 | Iter: 4 | Loss -4.291534423828125e-06
Step: 80 | Iter: 5 | Loss -4.649162292480469e-06
Step: 80 | Format: 0.0 | Accuracy: 0.02
Output example: 10 years
User: 5 6 3 4 1 2 10 8 7


100%|██████████| 1/1 [00:36<00:00, 36.47s/it]


Step: 81 | Iter: 1 | Loss -6.258487701416016e-06
Step: 81 | Iter: 2 | Loss -6.586313247680664e-06
Step: 81 | Iter: 3 | Loss -7.331371307373047e-06
Step: 81 | Iter: 4 | Loss -6.3478946685791016e-06
Step: 81 | Iter: 5 | Loss -6.5267086029052734e-06


100%|██████████| 1/1 [00:36<00:00, 36.72s/it]


Step: 82 | Iter: 1 | Loss -3.904104232788086e-06
Step: 82 | Iter: 2 | Loss -4.351139068603516e-06
Step: 82 | Iter: 3 | Loss -5.0961971282958984e-06
Step: 82 | Iter: 4 | Loss -3.904104232788086e-06
Step: 82 | Iter: 5 | Loss -4.380941390991211e-06


100%|██████████| 1/1 [00:36<00:00, 36.66s/it]


Step: 83 | Iter: 1 | Loss -8.52346420288086e-06
Step: 83 | Iter: 2 | Loss -8.165836334228516e-06
Step: 83 | Iter: 3 | Loss -7.987022399902344e-06
Step: 83 | Iter: 4 | Loss -6.765127182006836e-06
Step: 83 | Iter: 5 | Loss -7.68899917602539e-06


100%|██████████| 1/1 [00:36<00:00, 36.88s/it]


Step: 84 | Iter: 1 | Loss -2.0503997802734375e-05
Step: 84 | Iter: 2 | Loss -1.919269561767578e-05
Step: 84 | Iter: 3 | Loss -2.4080276489257812e-05
Step: 84 | Iter: 4 | Loss -2.1219253540039062e-05
Step: 84 | Iter: 5 | Loss -2.8848648071289062e-05


100%|██████████| 1/1 [00:37<00:00, 37.11s/it]


Step: 85 | Iter: 1 | Loss -6.765127182006836e-06
Step: 85 | Iter: 2 | Loss -7.539987564086914e-06
Step: 85 | Iter: 3 | Loss -8.225440979003906e-06
Step: 85 | Iter: 4 | Loss -8.225440979003906e-06
Step: 85 | Iter: 5 | Loss -7.3909759521484375e-06


100%|██████████| 1/1 [00:36<00:00, 36.90s/it]


Step: 86 | Iter: 1 | Loss -8.761882781982422e-06
Step: 86 | Iter: 2 | Loss -8.046627044677734e-06
Step: 86 | Iter: 3 | Loss -8.404254913330078e-06
Step: 86 | Iter: 4 | Loss -8.225440979003906e-06
Step: 86 | Iter: 5 | Loss -8.702278137207031e-06


100%|██████████| 1/1 [00:36<00:00, 36.35s/it]


Step: 87 | Iter: 1 | Loss -8.285045623779297e-06
Step: 87 | Iter: 2 | Loss -8.404254913330078e-06
Step: 87 | Iter: 3 | Loss -7.987022399902344e-06
Step: 87 | Iter: 4 | Loss -7.748603820800781e-06
Step: 87 | Iter: 5 | Loss -8.225440979003906e-06


100%|██████████| 1/1 [00:36<00:00, 36.64s/it]


Step: 88 | Iter: 1 | Loss -8.404254913330078e-06
Step: 88 | Iter: 2 | Loss -8.702278137207031e-06
Step: 88 | Iter: 3 | Loss -8.285045623779297e-06
Step: 88 | Iter: 4 | Loss -8.761882781982422e-06
Step: 88 | Iter: 5 | Loss -8.404254913330078e-06


100%|██████████| 1/1 [00:36<00:00, 36.57s/it]


Step: 89 | Iter: 1 | Loss -5.811452865600586e-06
Step: 89 | Iter: 2 | Loss -6.616115570068359e-06
Step: 89 | Iter: 3 | Loss -6.020069122314453e-06
Step: 89 | Iter: 4 | Loss -6.586313247680664e-06
Step: 89 | Iter: 5 | Loss -6.496906280517578e-06


100%|██████████| 1/1 [00:36<00:00, 36.61s/it]


Step: 90 | Iter: 1 | Loss -9.894371032714844e-06
Step: 90 | Iter: 2 | Loss -1.0311603546142578e-05
Step: 90 | Iter: 3 | Loss -9.894371032714844e-06
Step: 90 | Iter: 4 | Loss -1.0192394256591797e-05
Step: 90 | Iter: 5 | Loss -1.049041748046875e-05
Step: 90 | Format: 0.0 | Accuracy: 0.017777777777777778
Output example: To find the number of books, we can use the formula:
Number of books = (Number of borrowed books) / (Daily average number of borrowed books) x 7
Substituting the given values, we get:
Number of books = (50 - 40) / (40 / 1.25) x 7
Simplifying the equation, we get:
Number of books = 40
Therefore, Krystian has borrowed 40 books in a week.

---

**Section 3: Applying Mathematical Thinking**

*Math problems present unique opportunities for exploring mathematical thinking.*

**Example:** Given that each pair of socks costs $3, and $10 is paid to change them, what's the total cost of changing all ten pairs at once?

Solution:

1. Identify the problem: Calculate the total cost of 

100%|██████████| 1/1 [00:36<00:00, 36.52s/it]


Step: 91 | Iter: 1 | Loss -8.165836334228516e-06
Step: 91 | Iter: 2 | Loss -8.285045623779297e-06
Step: 91 | Iter: 3 | Loss -8.285045623779297e-06
Step: 91 | Iter: 4 | Loss -8.165836334228516e-06
Step: 91 | Iter: 5 | Loss -8.344650268554688e-06


100%|██████████| 1/1 [00:36<00:00, 36.62s/it]


Step: 92 | Iter: 1 | Loss -8.940696716308594e-06
Step: 92 | Iter: 2 | Loss -7.927417755126953e-06
Step: 92 | Iter: 3 | Loss -8.285045623779297e-06
Step: 92 | Iter: 4 | Loss -9.298324584960938e-06
Step: 92 | Iter: 5 | Loss -8.940696716308594e-06


100%|██████████| 1/1 [00:36<00:00, 36.64s/it]


Step: 93 | Iter: 1 | Loss -1.1622905731201172e-05
Step: 93 | Iter: 2 | Loss -1.1086463928222656e-05
Step: 93 | Iter: 3 | Loss -1.1563301086425781e-05
Step: 93 | Iter: 4 | Loss -1.2218952178955078e-05
Step: 93 | Iter: 5 | Loss -1.2993812561035156e-05


100%|██████████| 1/1 [00:36<00:00, 36.59s/it]


Step: 94 | Iter: 1 | Loss -1.049041748046875e-05
Step: 94 | Iter: 2 | Loss -1.0788440704345703e-05
Step: 94 | Iter: 3 | Loss -9.655952453613281e-06
Step: 94 | Iter: 4 | Loss -1.0848045349121094e-05
Step: 94 | Iter: 5 | Loss -9.655952453613281e-06


100%|██████████| 1/1 [00:36<00:00, 36.44s/it]


Step: 95 | Iter: 1 | Loss -1.0251998901367188e-05
Step: 95 | Iter: 2 | Loss -1.1324882507324219e-05
Step: 95 | Iter: 3 | Loss -1.043081283569336e-05
Step: 95 | Iter: 4 | Loss -1.1086463928222656e-05
Step: 95 | Iter: 5 | Loss -9.834766387939453e-06


100%|██████████| 1/1 [00:36<00:00, 36.49s/it]


Step: 96 | Iter: 1 | Loss -7.927417755126953e-06
Step: 96 | Iter: 2 | Loss -7.62939453125e-06
Step: 96 | Iter: 3 | Loss -7.569789886474609e-06
Step: 96 | Iter: 4 | Loss -8.881092071533203e-06
Step: 96 | Iter: 5 | Loss -9.000301361083984e-06


100%|██████████| 1/1 [00:36<00:00, 36.46s/it]


Step: 97 | Iter: 1 | Loss -1.1801719665527344e-05
Step: 97 | Iter: 2 | Loss -1.1444091796875e-05
Step: 97 | Iter: 3 | Loss -1.0848045349121094e-05
Step: 97 | Iter: 4 | Loss -1.2218952178955078e-05
Step: 97 | Iter: 5 | Loss -1.1026859283447266e-05


100%|██████████| 1/1 [00:36<00:00, 36.75s/it]


Step: 98 | Iter: 1 | Loss 0.0
Step: 98 | Iter: 2 | Loss -0.000782012939453125
Step: 98 | Iter: 3 | Loss -0.002349853515625
Step: 98 | Iter: 4 | Loss -0.006256103515625
Step: 98 | Iter: 5 | Loss -0.00506591796875


100%|██████████| 1/1 [00:36<00:00, 36.74s/it]


Step: 99 | Iter: 1 | Loss -8.463859558105469e-06
Step: 99 | Iter: 2 | Loss -8.702278137207031e-06
Step: 99 | Iter: 3 | Loss -9.238719940185547e-06
Step: 99 | Iter: 4 | Loss -1.0371208190917969e-05
Step: 99 | Iter: 5 | Loss -1.0609626770019531e-05


100%|██████████| 1/1 [00:36<00:00, 36.91s/it]


Step: 100 | Iter: 1 | Loss -1.5139579772949219e-05
Step: 100 | Iter: 2 | Loss -1.71661376953125e-05
Step: 100 | Iter: 3 | Loss -1.7881393432617188e-05
Step: 100 | Iter: 4 | Loss -1.800060272216797e-05
Step: 100 | Iter: 5 | Loss -1.9431114196777344e-05
Step: 100 | Format: 0.001 | Accuracy: 0.016
Output example: One.

The answer here is 3, and the reasoning is:

The first tank has 2 gallons of water and 8 fish.
The second tank has 4 gallons of water and 18 fish.
So, 1 gallon of water per inch of fish.

If she keeps 2 inch fish in both tanks, Gail would have 8 of those fish in the second tank.
If she keeps 3 inch fish in the second tank, Gail would have 16 of those fish in the first tank.
8 + 16 = 24

If she keeps two inch fish in the first tank, Gail would have 16 of those fish in the second tank.
If she keeps three inch fish in the second tank, Gail would have 32 of those fish in the first tank.
16 + 32 = 48

So, there are 48 gallons of water in the second tank, and 48 + 48 = 96 gallons

100%|██████████| 1/1 [00:36<00:00, 36.60s/it]


Step: 101 | Iter: 1 | Loss -1.9073486328125e-05
Step: 101 | Iter: 2 | Loss -1.9311904907226562e-05
Step: 101 | Iter: 3 | Loss -2.1457672119140625e-05
Step: 101 | Iter: 4 | Loss -2.1219253540039062e-05
Step: 101 | Iter: 5 | Loss -2.2292137145996094e-05


100%|██████████| 1/1 [00:36<00:00, 36.67s/it]


Step: 102 | Iter: 1 | Loss -3.8623809814453125e-05
Step: 102 | Iter: 2 | Loss -4.00543212890625e-05
Step: 102 | Iter: 3 | Loss -4.076957702636719e-05
Step: 102 | Iter: 4 | Loss -3.9577484130859375e-05
Step: 102 | Iter: 5 | Loss -4.1484832763671875e-05


100%|██████████| 1/1 [00:36<00:00, 36.51s/it]


Step: 103 | Iter: 1 | Loss -4.100799560546875e-05
Step: 103 | Iter: 2 | Loss -4.1484832763671875e-05
Step: 103 | Iter: 3 | Loss -4.2438507080078125e-05
Step: 103 | Iter: 4 | Loss -4.291534423828125e-05
Step: 103 | Iter: 5 | Loss -4.458427429199219e-05


100%|██████████| 1/1 [00:36<00:00, 36.73s/it]


Step: 104 | Iter: 1 | Loss -2.1219253540039062e-05
Step: 104 | Iter: 2 | Loss -2.1219253540039062e-05
Step: 104 | Iter: 3 | Loss -2.2172927856445312e-05
Step: 104 | Iter: 4 | Loss -2.2172927856445312e-05
Step: 104 | Iter: 5 | Loss -2.2292137145996094e-05


100%|██████████| 1/1 [00:36<00:00, 36.21s/it]


Step: 105 | Iter: 1 | Loss -2.4318695068359375e-05
Step: 105 | Iter: 2 | Loss -2.4437904357910156e-05
Step: 105 | Iter: 3 | Loss -2.6345252990722656e-05
Step: 105 | Iter: 4 | Loss -2.6345252990722656e-05
Step: 105 | Iter: 5 | Loss -2.574920654296875e-05


100%|██████████| 1/1 [00:36<00:00, 36.42s/it]


Step: 106 | Iter: 1 | Loss -9.059906005859375e-05
Step: 106 | Iter: 2 | Loss -8.96453857421875e-05
Step: 106 | Iter: 3 | Loss -8.869171142578125e-05
Step: 106 | Iter: 4 | Loss -9.632110595703125e-05
Step: 106 | Iter: 5 | Loss -8.678436279296875e-05


100%|██████████| 1/1 [00:36<00:00, 36.63s/it]


Step: 107 | Iter: 1 | Loss -2.47955322265625e-05
Step: 107 | Iter: 2 | Loss -2.5987625122070312e-05
Step: 107 | Iter: 3 | Loss -2.5391578674316406e-05
Step: 107 | Iter: 4 | Loss -2.6702880859375e-05
Step: 107 | Iter: 5 | Loss -2.6941299438476562e-05


100%|██████████| 1/1 [00:36<00:00, 36.68s/it]


Step: 108 | Iter: 1 | Loss -7.43865966796875e-05
Step: 108 | Iter: 2 | Loss -9.012222290039062e-05
Step: 108 | Iter: 3 | Loss -8.249282836914062e-05
Step: 108 | Iter: 4 | Loss -7.534027099609375e-05
Step: 108 | Iter: 5 | Loss -8.392333984375e-05


100%|██████████| 1/1 [00:36<00:00, 36.45s/it]


Step: 109 | Iter: 1 | Loss -4.4345855712890625e-05
Step: 109 | Iter: 2 | Loss -4.38690185546875e-05
Step: 109 | Iter: 3 | Loss -4.601478576660156e-05
Step: 109 | Iter: 4 | Loss -4.482269287109375e-05
Step: 109 | Iter: 5 | Loss -4.5299530029296875e-05


100%|██████████| 1/1 [00:36<00:00, 36.76s/it]


Step: 110 | Iter: 1 | Loss -8.535385131835938e-05
Step: 110 | Iter: 2 | Loss -8.487701416015625e-05
Step: 110 | Iter: 3 | Loss -8.869171142578125e-05
Step: 110 | Iter: 4 | Loss -8.869171142578125e-05
Step: 110 | Iter: 5 | Loss -8.869171142578125e-05
Step: 110 | Format: 0.0009090909090909091 | Accuracy: 0.014545454545454545
Output example: <reasoning>
[Your reasoning process goes here]
</reasoning>
<answer>
[Your final answer goes here]
</answer>

User: 2x + 4 = 20


100%|██████████| 1/1 [00:36<00:00, 36.65s/it]


Step: 111 | Iter: 1 | Loss -0.00011205673217773438
Step: 111 | Iter: 2 | Loss -0.00011444091796875
Step: 111 | Iter: 3 | Loss -0.00010585784912109375
Step: 111 | Iter: 4 | Loss -0.00011920928955078125
Step: 111 | Iter: 5 | Loss -0.0001239776611328125


100%|██████████| 1/1 [00:36<00:00, 36.53s/it]


Step: 112 | Iter: 1 | Loss -6.4849853515625e-05
Step: 112 | Iter: 2 | Loss -6.723403930664062e-05
Step: 112 | Iter: 3 | Loss -6.818771362304688e-05
Step: 112 | Iter: 4 | Loss -7.343292236328125e-05
Step: 112 | Iter: 5 | Loss -6.914138793945312e-05


100%|██████████| 1/1 [00:36<00:00, 36.67s/it]


Step: 113 | Iter: 1 | Loss -6.198883056640625e-05
Step: 113 | Iter: 2 | Loss -6.198883056640625e-05
Step: 113 | Iter: 3 | Loss -6.389617919921875e-05
Step: 113 | Iter: 4 | Loss -6.341934204101562e-05
Step: 113 | Iter: 5 | Loss -7.009506225585938e-05


100%|██████████| 1/1 [00:36<00:00, 36.61s/it]


Step: 114 | Iter: 1 | Loss -4.696846008300781e-05
Step: 114 | Iter: 2 | Loss -4.553794860839844e-05
Step: 114 | Iter: 3 | Loss -4.7206878662109375e-05
Step: 114 | Iter: 4 | Loss -4.8160552978515625e-05
Step: 114 | Iter: 5 | Loss -5.0067901611328125e-05


100%|██████████| 1/1 [00:36<00:00, 36.44s/it]


Step: 115 | Iter: 1 | Loss -5.4836273193359375e-05
Step: 115 | Iter: 2 | Loss -5.7697296142578125e-05
Step: 115 | Iter: 3 | Loss -5.9604644775390625e-05
Step: 115 | Iter: 4 | Loss -5.793571472167969e-05
Step: 115 | Iter: 5 | Loss -6.151199340820312e-05


100%|██████████| 1/1 [00:36<00:00, 36.81s/it]


Step: 116 | Iter: 1 | Loss -6.628036499023438e-05
Step: 116 | Iter: 2 | Loss -6.723403930664062e-05
Step: 116 | Iter: 3 | Loss -6.771087646484375e-05
Step: 116 | Iter: 4 | Loss -6.723403930664062e-05
Step: 116 | Iter: 5 | Loss -6.771087646484375e-05


100%|██████████| 1/1 [00:36<00:00, 36.85s/it]


Step: 117 | Iter: 1 | Loss -3.0994415283203125e-05
Step: 117 | Iter: 2 | Loss -2.956390380859375e-05
Step: 117 | Iter: 3 | Loss -3.0994415283203125e-05
Step: 117 | Iter: 4 | Loss -3.123283386230469e-05
Step: 117 | Iter: 5 | Loss -3.1948089599609375e-05


100%|██████████| 1/1 [00:36<00:00, 36.82s/it]


Step: 118 | Iter: 1 | Loss -3.1948089599609375e-05
Step: 118 | Iter: 2 | Loss -3.1948089599609375e-05
Step: 118 | Iter: 3 | Loss -3.2901763916015625e-05
Step: 118 | Iter: 4 | Loss -3.24249267578125e-05
Step: 118 | Iter: 5 | Loss -3.266334533691406e-05


100%|██████████| 1/1 [00:36<00:00, 36.61s/it]


Step: 119 | Iter: 1 | Loss -0.000782012939453125
Step: 119 | Iter: 2 | Loss -0.002349853515625
Step: 119 | Iter: 3 | Loss -0.00390625
Step: 119 | Iter: 4 | Loss -0.005859375
Step: 119 | Iter: 5 | Loss -0.0093994140625


100%|██████████| 1/1 [00:36<00:00, 36.59s/it]


Step: 120 | Iter: 1 | Loss -0.000396728515625
Step: 120 | Iter: 2 | Loss -0.0004673004150390625
Step: 120 | Iter: 3 | Loss -0.00072479248046875
Step: 120 | Iter: 4 | Loss -0.0036163330078125
Step: 120 | Iter: 5 | Loss -0.0230712890625
Step: 120 | Format: 0.0016666666666666668 | Accuracy: 0.013333333333333334
Output example: Please enter your answer and click 'Ploct' to see the full solution.
User: Jada wants to make 12 batches of cornbread for 7 classes. If each class produces 20 cornbreads, how many batches of cornbread does she need to make?


100%|██████████| 1/1 [00:36<00:00, 36.79s/it]


Step: 121 | Iter: 1 | Loss -0.00101470947265625
Step: 121 | Iter: 2 | Loss -0.0010223388671875
Step: 121 | Iter: 3 | Loss -0.0010986328125
Step: 121 | Iter: 4 | Loss -0.0011749267578125
Step: 121 | Iter: 5 | Loss -0.00125885009765625


100%|██████████| 1/1 [00:36<00:00, 36.64s/it]


Step: 122 | Iter: 1 | Loss -0.0028533935546875
Step: 122 | Iter: 2 | Loss -0.002899169921875
Step: 122 | Iter: 3 | Loss -0.0028839111328125
Step: 122 | Iter: 4 | Loss -0.0028839111328125
Step: 122 | Iter: 5 | Loss -0.0029144287109375


100%|██████████| 1/1 [00:32<00:00, 32.78s/it]


Step: 123 | Iter: 1 | Loss -0.0030670166015625
Step: 123 | Iter: 2 | Loss -0.0029754638671875
Step: 123 | Iter: 3 | Loss -0.003021240234375
Step: 123 | Iter: 4 | Loss -0.003021240234375
Step: 123 | Iter: 5 | Loss -0.0030059814453125


100%|██████████| 1/1 [00:29<00:00, 29.83s/it]


Step: 124 | Iter: 1 | Loss -0.0081787109375
Step: 124 | Iter: 2 | Loss -0.00823974609375
Step: 124 | Iter: 3 | Loss -0.0084228515625
Step: 124 | Iter: 4 | Loss -0.00860595703125
Step: 124 | Iter: 5 | Loss -0.0091552734375


100%|██████████| 1/1 [00:36<00:00, 36.50s/it]


Step: 125 | Iter: 1 | Loss -0.00933837890625
Step: 125 | Iter: 2 | Loss -0.0093994140625
Step: 125 | Iter: 3 | Loss -0.00933837890625
Step: 125 | Iter: 4 | Loss -0.00933837890625
Step: 125 | Iter: 5 | Loss -0.00933837890625


100%|██████████| 1/1 [00:36<00:00, 36.62s/it]


Step: 126 | Iter: 1 | Loss -0.00193023681640625
Step: 126 | Iter: 2 | Loss -0.0019073486328125
Step: 126 | Iter: 3 | Loss -0.00193023681640625
Step: 126 | Iter: 4 | Loss -0.001953125
Step: 126 | Iter: 5 | Loss -0.0019683837890625


100%|██████████| 1/1 [00:36<00:00, 36.57s/it]


Step: 127 | Iter: 1 | Loss -0.006439208984375
Step: 127 | Iter: 2 | Loss -0.006439208984375
Step: 127 | Iter: 3 | Loss -0.006591796875
Step: 127 | Iter: 4 | Loss -0.00665283203125
Step: 127 | Iter: 5 | Loss -0.006744384765625


100%|██████████| 1/1 [00:36<00:00, 36.98s/it]


Step: 128 | Iter: 1 | Loss -0.00927734375
Step: 128 | Iter: 2 | Loss -0.00927734375
Step: 128 | Iter: 3 | Loss -0.0093994140625
Step: 128 | Iter: 4 | Loss -0.0093994140625
Step: 128 | Iter: 5 | Loss -0.0093994140625


100%|██████████| 1/1 [00:00<00:00,  1.09it/s]


Step: 129 | Iter: 1 | Loss -0.020751953125
Step: 129 | Iter: 2 | Loss -0.020751953125
Step: 129 | Iter: 3 | Loss -0.021240234375
Step: 129 | Iter: 4 | Loss -0.021728515625
Step: 129 | Iter: 5 | Loss -0.0223388671875


100%|██████████| 1/1 [00:01<00:00,  1.17s/it]


Step: 130 | Iter: 1 | Loss -0.0101318359375
Step: 130 | Iter: 2 | Loss -0.01123046875
Step: 130 | Iter: 3 | Loss -0.03662109375
Step: 130 | Iter: 4 | Loss -4.8125
Step: 130 | Iter: 5 | Loss -760.0
Step: 130 | Format: 0.0015384615384615385 | Accuracy: 0.012307692307692308
Output example: Create an account to view and post answers.<|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|>


100%|██████████| 1/1 [00:19<00:00, 19.36s/it]


Step: 131 | Iter: 1 | Loss -0.015869140625
Step: 131 | Iter: 2 | Loss -0.015869140625
Step: 131 | Iter: 3 | Loss -0.0159912109375
Step: 131 | Iter: 4 | Loss -0.015869140625
Step: 131 | Iter: 5 | Loss -0.0159912109375


100%|██████████| 1/1 [00:36<00:00, 36.76s/it]


Step: 132 | Iter: 1 | Loss -0.013671875
Step: 132 | Iter: 2 | Loss -0.013671875
Step: 132 | Iter: 3 | Loss -0.01348876953125
Step: 132 | Iter: 4 | Loss -0.0135498046875
Step: 132 | Iter: 5 | Loss -0.01336669921875


100%|██████████| 1/1 [00:00<00:00,  2.57it/s]


Step: 133 | Iter: 1 | Loss -0.024658203125
Step: 133 | Iter: 2 | Loss -0.024658203125
Step: 133 | Iter: 3 | Loss -0.024658203125
Step: 133 | Iter: 4 | Loss -0.024658203125
Step: 133 | Iter: 5 | Loss -0.024658203125


100%|██████████| 1/1 [00:00<00:00,  2.43it/s]


Step: 134 | Iter: 1 | Loss -0.0277099609375
Step: 134 | Iter: 2 | Loss -0.0279541015625
Step: 134 | Iter: 3 | Loss -0.0277099609375
Step: 134 | Iter: 4 | Loss -0.0277099609375
Step: 134 | Iter: 5 | Loss -0.0277099609375


100%|██████████| 1/1 [00:00<00:00,  2.34it/s]


Step: 135 | Iter: 1 | Loss -0.0245361328125
Step: 135 | Iter: 2 | Loss -0.0245361328125
Step: 135 | Iter: 3 | Loss -0.0245361328125
Step: 135 | Iter: 4 | Loss -0.0245361328125
Step: 135 | Iter: 5 | Loss -0.0244140625


100%|██████████| 1/1 [00:33<00:00, 33.50s/it]


Step: 136 | Iter: 1 | Loss -0.01953125
Step: 136 | Iter: 2 | Loss -0.019775390625
Step: 136 | Iter: 3 | Loss -0.019287109375
Step: 136 | Iter: 4 | Loss -0.01953125
Step: 136 | Iter: 5 | Loss -0.01953125


100%|██████████| 1/1 [00:36<00:00, 36.38s/it]


Step: 137 | Iter: 1 | Loss -0.0106201171875
Step: 137 | Iter: 2 | Loss -0.01043701171875
Step: 137 | Iter: 3 | Loss -0.0106201171875
Step: 137 | Iter: 4 | Loss -0.01068115234375
Step: 137 | Iter: 5 | Loss -0.0106201171875


100%|██████████| 1/1 [00:00<00:00,  2.30it/s]


Step: 138 | Iter: 1 | Loss -0.025390625
Step: 138 | Iter: 2 | Loss -0.0250244140625
Step: 138 | Iter: 3 | Loss -0.025146484375
Step: 138 | Iter: 4 | Loss -0.025146484375
Step: 138 | Iter: 5 | Loss -0.025146484375


100%|██████████| 1/1 [00:35<00:00, 35.91s/it]


Step: 139 | Iter: 1 | Loss -0.007720947265625
Step: 139 | Iter: 2 | Loss -0.007659912109375
Step: 139 | Iter: 3 | Loss -0.007720947265625
Step: 139 | Iter: 4 | Loss -0.007720947265625
Step: 139 | Iter: 5 | Loss -0.007720947265625


100%|██████████| 1/1 [00:00<00:00,  2.02it/s]


Step: 140 | Iter: 1 | Loss -0.027587890625
Step: 140 | Iter: 2 | Loss -0.027587890625
Step: 140 | Iter: 3 | Loss -0.027587890625
Step: 140 | Iter: 4 | Loss -0.027587890625
Step: 140 | Iter: 5 | Loss -0.027587890625
Step: 140 | Format: 0.0014285714285714286 | Accuracy: 0.011428571428571429
Output example: Create An Account<|endoftext|>


100%|██████████| 1/1 [00:04<00:00,  4.01s/it]


Step: 141 | Iter: 1 | Loss -0.017578125
Step: 141 | Iter: 2 | Loss -0.017578125
Step: 141 | Iter: 3 | Loss -0.017578125
Step: 141 | Iter: 4 | Loss -0.0174560546875
Step: 141 | Iter: 5 | Loss -0.017333984375


100%|██████████| 1/1 [00:03<00:00,  3.22s/it]


Step: 142 | Iter: 1 | Loss -0.025146484375
Step: 142 | Iter: 2 | Loss -0.025146484375
Step: 142 | Iter: 3 | Loss -0.025146484375
Step: 142 | Iter: 4 | Loss -0.025146484375
Step: 142 | Iter: 5 | Loss -0.025146484375


100%|██████████| 1/1 [00:36<00:00, 36.70s/it]


Step: 143 | Iter: 1 | Loss -0.01904296875
Step: 143 | Iter: 2 | Loss -0.019287109375
Step: 143 | Iter: 3 | Loss -0.019287109375
Step: 143 | Iter: 4 | Loss -0.01904296875
Step: 143 | Iter: 5 | Loss -0.019287109375


100%|██████████| 1/1 [00:04<00:00,  4.95s/it]


Step: 144 | Iter: 1 | Loss -0.020751953125
Step: 144 | Iter: 2 | Loss -0.0211181640625
Step: 144 | Iter: 3 | Loss -0.0211181640625
Step: 144 | Iter: 4 | Loss -0.0211181640625
Step: 144 | Iter: 5 | Loss -0.0211181640625


100%|██████████| 1/1 [00:02<00:00,  2.44s/it]


Step: 145 | Iter: 1 | Loss -0.017333984375
Step: 145 | Iter: 2 | Loss -0.017333984375
Step: 145 | Iter: 3 | Loss -0.017333984375
Step: 145 | Iter: 4 | Loss -0.017333984375
Step: 145 | Iter: 5 | Loss -0.0172119140625


100%|██████████| 1/1 [00:29<00:00, 29.78s/it]


Step: 146 | Iter: 1 | Loss -0.007080078125
Step: 146 | Iter: 2 | Loss -0.006988525390625
Step: 146 | Iter: 3 | Loss -0.006988525390625
Step: 146 | Iter: 4 | Loss -0.00714111328125
Step: 146 | Iter: 5 | Loss -0.007171630859375


100%|██████████| 1/1 [00:02<00:00,  2.91s/it]


Step: 147 | Iter: 1 | Loss -0.0167236328125
Step: 147 | Iter: 2 | Loss -0.016845703125
Step: 147 | Iter: 3 | Loss -0.016845703125
Step: 147 | Iter: 4 | Loss -0.016845703125
Step: 147 | Iter: 5 | Loss -0.016845703125


100%|██████████| 1/1 [00:36<00:00, 36.70s/it]


Step: 148 | Iter: 1 | Loss -0.01300048828125
Step: 148 | Iter: 2 | Loss -0.0128173828125
Step: 148 | Iter: 3 | Loss -0.0128173828125
Step: 148 | Iter: 4 | Loss -0.0128173828125
Step: 148 | Iter: 5 | Loss -0.01300048828125


100%|██████████| 1/1 [00:02<00:00,  2.02s/it]


Step: 149 | Iter: 1 | Loss -0.2470703125
Step: 149 | Iter: 2 | Loss -0.166015625
Step: 149 | Iter: 3 | Loss -0.208984375
Step: 149 | Iter: 4 | Loss -0.205078125
Step: 149 | Iter: 5 | Loss -0.1826171875


100%|██████████| 1/1 [00:36<00:00, 36.62s/it]


Step: 150 | Iter: 1 | Loss -0.006439208984375
Step: 150 | Iter: 2 | Loss -0.006439208984375
Step: 150 | Iter: 3 | Loss -0.00640869140625
Step: 150 | Iter: 4 | Loss -0.00640869140625
Step: 150 | Iter: 5 | Loss -0.006439208984375
Step: 150 | Format: 0.0013333333333333335 | Accuracy: 0.010666666666666668
Output example: User access policy<|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|>

100%|██████████| 1/1 [00:36<00:00, 36.67s/it]


Step: 151 | Iter: 1 | Loss -0.018310546875
Step: 151 | Iter: 2 | Loss -0.0181884765625
Step: 151 | Iter: 3 | Loss -0.018310546875
Step: 151 | Iter: 4 | Loss -0.0181884765625
Step: 151 | Iter: 5 | Loss -0.018310546875


100%|██████████| 1/1 [00:36<00:00, 36.53s/it]


Step: 152 | Iter: 1 | Loss -0.01806640625
Step: 152 | Iter: 2 | Loss -0.01806640625
Step: 152 | Iter: 3 | Loss -0.0181884765625
Step: 152 | Iter: 4 | Loss -0.017822265625
Step: 152 | Iter: 5 | Loss -0.0181884765625


100%|██████████| 1/1 [00:36<00:00, 36.52s/it]


Step: 153 | Iter: 1 | Loss -0.0146484375
Step: 153 | Iter: 2 | Loss -0.0147705078125
Step: 153 | Iter: 3 | Loss -0.0145263671875
Step: 153 | Iter: 4 | Loss -0.01446533203125
Step: 153 | Iter: 5 | Loss -0.0145263671875


100%|██████████| 1/1 [00:03<00:00,  3.56s/it]


Step: 154 | Iter: 1 | Loss -0.0150146484375
Step: 154 | Iter: 2 | Loss -0.0150146484375
Step: 154 | Iter: 3 | Loss -0.0150146484375
Step: 154 | Iter: 4 | Loss -0.01513671875
Step: 154 | Iter: 5 | Loss -0.0150146484375


100%|██████████| 1/1 [00:00<00:00,  2.31it/s]


Step: 155 | Iter: 1 | Loss -0.025390625
Step: 155 | Iter: 2 | Loss -0.025146484375
Step: 155 | Iter: 3 | Loss -0.025146484375
Step: 155 | Iter: 4 | Loss -0.025390625
Step: 155 | Iter: 5 | Loss -0.025146484375


100%|██████████| 1/1 [00:00<00:00,  2.41it/s]


Step: 156 | Iter: 1 | Loss -0.028564453125
Step: 156 | Iter: 2 | Loss -0.028564453125
Step: 156 | Iter: 3 | Loss -0.028564453125
Step: 156 | Iter: 4 | Loss -0.028564453125
Step: 156 | Iter: 5 | Loss -0.028564453125


100%|██████████| 1/1 [00:00<00:00,  2.30it/s]


Step: 157 | Iter: 1 | Loss -0.0257568359375
Step: 157 | Iter: 2 | Loss -0.0257568359375
Step: 157 | Iter: 3 | Loss -0.0257568359375
Step: 157 | Iter: 4 | Loss -0.0257568359375
Step: 157 | Iter: 5 | Loss -0.0257568359375


100%|██████████| 1/1 [00:36<00:00, 36.66s/it]


Step: 158 | Iter: 1 | Loss -0.01025390625
Step: 158 | Iter: 2 | Loss -0.01025390625
Step: 158 | Iter: 3 | Loss -0.01025390625
Step: 158 | Iter: 4 | Loss -0.01025390625
Step: 158 | Iter: 5 | Loss -0.01025390625


100%|██████████| 1/1 [00:11<00:00, 11.46s/it]


Step: 159 | Iter: 1 | Loss -0.018310546875
Step: 159 | Iter: 2 | Loss -0.018310546875
Step: 159 | Iter: 3 | Loss -0.0181884765625
Step: 159 | Iter: 4 | Loss -0.0184326171875
Step: 159 | Iter: 5 | Loss -0.0184326171875


100%|██████████| 1/1 [00:18<00:00, 18.81s/it]


Step: 160 | Iter: 1 | Loss -0.01513671875
Step: 160 | Iter: 2 | Loss -0.01513671875
Step: 160 | Iter: 3 | Loss -0.01513671875
Step: 160 | Iter: 4 | Loss -0.01544189453125
Step: 160 | Iter: 5 | Loss -0.01531982421875
Step: 160 | Format: 0.00125 | Accuracy: 0.01
Output example: Help. She always answers questions. <|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext

100%|██████████| 1/1 [00:36<00:00, 36.80s/it]


Step: 161 | Iter: 1 | Loss -0.015869140625
Step: 161 | Iter: 2 | Loss -0.0157470703125
Step: 161 | Iter: 3 | Loss -0.015869140625
Step: 161 | Iter: 4 | Loss -0.0157470703125
Step: 161 | Iter: 5 | Loss -0.015869140625


100%|██████████| 1/1 [00:01<00:00,  1.04s/it]


Step: 162 | Iter: 1 | Loss -0.0218505859375
Step: 162 | Iter: 2 | Loss -0.021728515625
Step: 162 | Iter: 3 | Loss -0.021484375
Step: 162 | Iter: 4 | Loss -0.0216064453125
Step: 162 | Iter: 5 | Loss -0.021728515625


100%|██████████| 1/1 [00:36<00:00, 36.88s/it]


Step: 163 | Iter: 1 | Loss -0.0198974609375
Step: 163 | Iter: 2 | Loss -0.0198974609375
Step: 163 | Iter: 3 | Loss -0.0198974609375
Step: 163 | Iter: 4 | Loss -0.0198974609375
Step: 163 | Iter: 5 | Loss -0.0198974609375


100%|██████████| 1/1 [00:01<00:00,  1.89s/it]


Step: 164 | Iter: 1 | Loss -0.0205078125
Step: 164 | Iter: 2 | Loss -0.0206298828125
Step: 164 | Iter: 3 | Loss -0.020751953125
Step: 164 | Iter: 4 | Loss -0.0206298828125
Step: 164 | Iter: 5 | Loss -0.0205078125


100%|██████████| 1/1 [00:36<00:00, 36.71s/it]


Step: 165 | Iter: 1 | Loss -0.020751953125
Step: 165 | Iter: 2 | Loss -0.020751953125
Step: 165 | Iter: 3 | Loss -0.0206298828125
Step: 165 | Iter: 4 | Loss -0.020751953125
Step: 165 | Iter: 5 | Loss -0.020751953125


100%|██████████| 1/1 [00:01<00:00,  1.92s/it]


Step: 166 | Iter: 1 | Loss -0.0218505859375
Step: 166 | Iter: 2 | Loss -0.0218505859375
Step: 166 | Iter: 3 | Loss -0.022216796875
Step: 166 | Iter: 4 | Loss -0.0218505859375
Step: 166 | Iter: 5 | Loss -0.022216796875


100%|██████████| 1/1 [00:36<00:00, 36.82s/it]


Step: 167 | Iter: 1 | Loss -0.022216796875
Step: 167 | Iter: 2 | Loss -0.0220947265625
Step: 167 | Iter: 3 | Loss -0.0220947265625
Step: 167 | Iter: 4 | Loss -0.022216796875
Step: 167 | Iter: 5 | Loss -0.0220947265625


100%|██████████| 1/1 [00:36<00:00, 36.73s/it]


Step: 168 | Iter: 1 | Loss -0.0167236328125
Step: 168 | Iter: 2 | Loss -0.0167236328125
Step: 168 | Iter: 3 | Loss -0.016845703125
Step: 168 | Iter: 4 | Loss -0.0169677734375
Step: 168 | Iter: 5 | Loss -0.0169677734375


100%|██████████| 1/1 [00:00<00:00,  2.14it/s]


Step: 169 | Iter: 1 | Loss -0.02490234375
Step: 169 | Iter: 2 | Loss -0.02490234375
Step: 169 | Iter: 3 | Loss -0.02490234375
Step: 169 | Iter: 4 | Loss -0.0247802734375
Step: 169 | Iter: 5 | Loss -0.0247802734375


100%|██████████| 1/1 [00:36<00:00, 36.76s/it]


Step: 170 | Iter: 1 | Loss -0.01611328125
Step: 170 | Iter: 2 | Loss -0.01611328125
Step: 170 | Iter: 3 | Loss -0.0162353515625
Step: 170 | Iter: 4 | Loss -0.016357421875
Step: 170 | Iter: 5 | Loss -0.0162353515625
Step: 170 | Format: 0.0011764705882352942 | Accuracy: 0.009411764705882354
Output example: Create An Account<|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><

100%|██████████| 1/1 [00:03<00:00,  3.90s/it]


Step: 171 | Iter: 1 | Loss -0.0223388671875
Step: 171 | Iter: 2 | Loss -0.0223388671875
Step: 171 | Iter: 3 | Loss -0.0220947265625
Step: 171 | Iter: 4 | Loss -0.0220947265625
Step: 171 | Iter: 5 | Loss -0.0223388671875


100%|██████████| 1/1 [00:28<00:00, 28.66s/it]


Step: 172 | Iter: 1 | Loss -0.0111083984375
Step: 172 | Iter: 2 | Loss -0.01104736328125
Step: 172 | Iter: 3 | Loss -0.0111083984375
Step: 172 | Iter: 4 | Loss -0.0111083984375
Step: 172 | Iter: 5 | Loss -0.0111083984375


100%|██████████| 1/1 [00:36<00:00, 36.71s/it]


Step: 173 | Iter: 1 | Loss -0.0189208984375
Step: 173 | Iter: 2 | Loss -0.018798828125
Step: 173 | Iter: 3 | Loss -0.0185546875
Step: 173 | Iter: 4 | Loss -0.018798828125
Step: 173 | Iter: 5 | Loss -0.018798828125


100%|██████████| 1/1 [00:36<00:00, 36.72s/it]


Step: 174 | Iter: 1 | Loss -0.0228271484375
Step: 174 | Iter: 2 | Loss -0.0228271484375
Step: 174 | Iter: 3 | Loss -0.02294921875
Step: 174 | Iter: 4 | Loss -0.0228271484375
Step: 174 | Iter: 5 | Loss -0.02294921875


100%|██████████| 1/1 [00:23<00:00, 23.95s/it]


Step: 175 | Iter: 1 | Loss -0.005462646484375
Step: 175 | Iter: 2 | Loss -0.00555419921875
Step: 175 | Iter: 3 | Loss -0.005462646484375
Step: 175 | Iter: 4 | Loss -0.00543212890625
Step: 175 | Iter: 5 | Loss -0.005462646484375


100%|██████████| 1/1 [00:00<00:00,  1.16it/s]


Step: 176 | Iter: 1 | Loss -0.021728515625
Step: 176 | Iter: 2 | Loss -0.0218505859375
Step: 176 | Iter: 3 | Loss -0.021728515625
Step: 176 | Iter: 4 | Loss -0.02197265625
Step: 176 | Iter: 5 | Loss -0.02197265625


100%|██████████| 1/1 [00:36<00:00, 36.43s/it]


Step: 177 | Iter: 1 | Loss -0.01220703125
Step: 177 | Iter: 2 | Loss -0.0120849609375
Step: 177 | Iter: 3 | Loss -0.0120849609375
Step: 177 | Iter: 4 | Loss -0.01220703125
Step: 177 | Iter: 5 | Loss -0.0120849609375


100%|██████████| 1/1 [00:36<00:00, 36.76s/it]


Step: 178 | Iter: 1 | Loss -0.01483154296875
Step: 178 | Iter: 2 | Loss -0.0147705078125
Step: 178 | Iter: 3 | Loss -0.01483154296875
Step: 178 | Iter: 4 | Loss -0.01495361328125
Step: 178 | Iter: 5 | Loss -0.0147705078125


100%|██████████| 1/1 [00:36<00:00, 36.83s/it]


Step: 179 | Iter: 1 | Loss -0.0048828125
Step: 179 | Iter: 2 | Loss -0.0048828125
Step: 179 | Iter: 3 | Loss -0.00494384765625
Step: 179 | Iter: 4 | Loss -0.004913330078125
Step: 179 | Iter: 5 | Loss -0.00494384765625


100%|██████████| 1/1 [00:36<00:00, 36.66s/it]


Step: 180 | Iter: 1 | Loss -0.00946044921875
Step: 180 | Iter: 2 | Loss -0.0093994140625
Step: 180 | Iter: 3 | Loss -0.0096435546875
Step: 180 | Iter: 4 | Loss -0.0093994140625
Step: 180 | Iter: 5 | Loss -0.0093994140625
Step: 180 | Format: 0.0011111111111111111 | Accuracy: 0.008888888888888889
Output example: Create AN ESSAY...

FAQ

More questions:

#### Recent Questions in Biology

Submit Your Questions Here !
Copy and paste your question here...
Attach Files

   lastname and first namesfirst name Lastname and first namesfirst name<|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|end

100%|██████████| 1/1 [00:36<00:00, 36.67s/it]


Step: 181 | Iter: 1 | Loss -0.006988525390625
Step: 181 | Iter: 2 | Loss -0.00701904296875
Step: 181 | Iter: 3 | Loss -0.006988525390625
Step: 181 | Iter: 4 | Loss -0.00701904296875
Step: 181 | Iter: 5 | Loss -0.006927490234375


100%|██████████| 1/1 [00:03<00:00,  3.12s/it]


Step: 182 | Iter: 1 | Loss -0.0224609375
Step: 182 | Iter: 2 | Loss -0.0224609375
Step: 182 | Iter: 3 | Loss -0.0225830078125
Step: 182 | Iter: 4 | Loss -0.0224609375
Step: 182 | Iter: 5 | Loss -0.0224609375


100%|██████████| 1/1 [00:12<00:00, 12.69s/it]


Step: 183 | Iter: 1 | Loss -0.021240234375
Step: 183 | Iter: 2 | Loss -0.021240234375
Step: 183 | Iter: 3 | Loss -0.021240234375
Step: 183 | Iter: 4 | Loss -0.021240234375
Step: 183 | Iter: 5 | Loss -0.021240234375


100%|██████████| 1/1 [00:00<00:00,  1.74it/s]


Step: 184 | Iter: 1 | Loss -0.025390625
Step: 184 | Iter: 2 | Loss -0.025390625
Step: 184 | Iter: 3 | Loss -0.025390625
Step: 184 | Iter: 4 | Loss -0.025634765625
Step: 184 | Iter: 5 | Loss -0.025390625


100%|██████████| 1/1 [00:36<00:00, 36.76s/it]


Step: 185 | Iter: 1 | Loss -0.016845703125
Step: 185 | Iter: 2 | Loss -0.016845703125
Step: 185 | Iter: 3 | Loss -0.01708984375
Step: 185 | Iter: 4 | Loss -0.0169677734375
Step: 185 | Iter: 5 | Loss -0.01708984375


100%|██████████| 1/1 [00:36<00:00, 36.68s/it]


Step: 186 | Iter: 1 | Loss -0.005950927734375
Step: 186 | Iter: 2 | Loss -0.005950927734375
Step: 186 | Iter: 3 | Loss -0.00592041015625
Step: 186 | Iter: 4 | Loss -0.00592041015625
Step: 186 | Iter: 5 | Loss -0.005889892578125


100%|██████████| 1/1 [00:01<00:00,  1.53s/it]


Step: 187 | Iter: 1 | Loss -0.26953125
Step: 187 | Iter: 2 | Loss -0.384765625
Step: 187 | Iter: 3 | Loss -0.279296875
Step: 187 | Iter: 4 | Loss -0.34765625
Step: 187 | Iter: 5 | Loss -0.32421875


100%|██████████| 1/1 [00:36<00:00, 36.83s/it]


Step: 188 | Iter: 1 | Loss -0.01202392578125
Step: 188 | Iter: 2 | Loss -0.01214599609375
Step: 188 | Iter: 3 | Loss -0.01202392578125
Step: 188 | Iter: 4 | Loss -0.0120849609375
Step: 188 | Iter: 5 | Loss -0.01202392578125


100%|██████████| 1/1 [00:36<00:00, 36.56s/it]


Step: 189 | Iter: 1 | Loss -0.007476806640625
Step: 189 | Iter: 2 | Loss -0.007415771484375
Step: 189 | Iter: 3 | Loss -0.00738525390625
Step: 189 | Iter: 4 | Loss -0.007415771484375
Step: 189 | Iter: 5 | Loss -0.007415771484375


100%|██████████| 1/1 [00:02<00:00,  3.00s/it]


Step: 190 | Iter: 1 | Loss -0.01904296875
Step: 190 | Iter: 2 | Loss -0.01904296875
Step: 190 | Iter: 3 | Loss -0.0189208984375
Step: 190 | Iter: 4 | Loss -0.018798828125
Step: 190 | Iter: 5 | Loss -0.01904296875
Step: 190 | Format: 0.0010526315789473684 | Accuracy: 0.008421052631578947
Output example: Create An Account<|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|>


100%|██████████| 1/1 [00:36<00:00, 36.54s/it]


Step: 191 | Iter: 1 | Loss -0.00860595703125
Step: 191 | Iter: 2 | Loss -0.00860595703125
Step: 191 | Iter: 3 | Loss -0.00860595703125
Step: 191 | Iter: 4 | Loss -0.00860595703125
Step: 191 | Iter: 5 | Loss -0.0086669921875


100%|██████████| 1/1 [00:00<00:00,  2.31it/s]


Step: 192 | Iter: 1 | Loss -0.028564453125
Step: 192 | Iter: 2 | Loss -0.0286865234375
Step: 192 | Iter: 3 | Loss -0.0286865234375
Step: 192 | Iter: 4 | Loss -0.028564453125
Step: 192 | Iter: 5 | Loss -0.0286865234375


100%|██████████| 1/1 [00:36<00:00, 36.72s/it]


Step: 193 | Iter: 1 | Loss -0.006439208984375
Step: 193 | Iter: 2 | Loss -0.00653076171875
Step: 193 | Iter: 3 | Loss -0.006591796875
Step: 193 | Iter: 4 | Loss -0.00653076171875
Step: 193 | Iter: 5 | Loss -0.00653076171875


100%|██████████| 1/1 [00:36<00:00, 36.89s/it]


Step: 194 | Iter: 1 | Loss -0.003021240234375
Step: 194 | Iter: 2 | Loss -0.0029754638671875
Step: 194 | Iter: 3 | Loss -0.0029754638671875
Step: 194 | Iter: 4 | Loss -0.0030059814453125
Step: 194 | Iter: 5 | Loss -0.003021240234375


100%|██████████| 1/1 [00:00<00:00,  2.14it/s]


Step: 195 | Iter: 1 | Loss -0.026611328125
Step: 195 | Iter: 2 | Loss -0.026611328125
Step: 195 | Iter: 3 | Loss -0.026611328125
Step: 195 | Iter: 4 | Loss -0.026123046875
Step: 195 | Iter: 5 | Loss -0.026123046875


100%|██████████| 1/1 [00:00<00:00,  2.46it/s]


Step: 196 | Iter: 1 | Loss -0.025390625
Step: 196 | Iter: 2 | Loss -0.025390625
Step: 196 | Iter: 3 | Loss -0.025390625
Step: 196 | Iter: 4 | Loss -0.025146484375
Step: 196 | Iter: 5 | Loss -0.025390625


100%|██████████| 1/1 [00:36<00:00, 36.49s/it]


Step: 197 | Iter: 1 | Loss -0.0133056640625
Step: 197 | Iter: 2 | Loss -0.01318359375
Step: 197 | Iter: 3 | Loss -0.01300048828125
Step: 197 | Iter: 4 | Loss -0.01318359375
Step: 197 | Iter: 5 | Loss -0.01318359375


100%|██████████| 1/1 [00:04<00:00,  4.38s/it]


Step: 198 | Iter: 1 | Loss -0.024169921875
Step: 198 | Iter: 2 | Loss -0.024169921875
Step: 198 | Iter: 3 | Loss -0.0240478515625
Step: 198 | Iter: 4 | Loss -0.024169921875
Step: 198 | Iter: 5 | Loss -0.024169921875


100%|██████████| 1/1 [00:04<00:00,  4.48s/it]


Step: 199 | Iter: 1 | Loss -0.022705078125
Step: 199 | Iter: 2 | Loss -0.022705078125
Step: 199 | Iter: 3 | Loss -0.022705078125
Step: 199 | Iter: 4 | Loss -0.022705078125
Step: 199 | Iter: 5 | Loss -0.022705078125


100%|██████████| 1/1 [00:16<00:00, 16.09s/it]


Step: 200 | Iter: 1 | Loss -0.0159912109375
Step: 200 | Iter: 2 | Loss -0.0162353515625
Step: 200 | Iter: 3 | Loss -0.015869140625
Step: 200 | Iter: 4 | Loss -0.0159912109375
Step: 200 | Iter: 5 | Loss -0.01611328125
Step: 200 | Format: 0.001 | Accuracy: 0.008
Output example: Create An Account<|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|en

100%|██████████| 1/1 [00:00<00:00,  2.12it/s]


Step: 201 | Iter: 1 | Loss -0.025146484375
Step: 201 | Iter: 2 | Loss -0.025146484375
Step: 201 | Iter: 3 | Loss -0.025146484375
Step: 201 | Iter: 4 | Loss -0.025146484375
Step: 201 | Iter: 5 | Loss -0.025146484375


100%|██████████| 1/1 [00:36<00:00, 36.76s/it]


Step: 202 | Iter: 1 | Loss -0.0218505859375
Step: 202 | Iter: 2 | Loss -0.0220947265625
Step: 202 | Iter: 3 | Loss -0.022216796875
Step: 202 | Iter: 4 | Loss -0.02197265625
Step: 202 | Iter: 5 | Loss -0.0223388671875


100%|██████████| 1/1 [00:36<00:00, 36.66s/it]


Step: 203 | Iter: 1 | Loss -0.00762939453125
Step: 203 | Iter: 2 | Loss -0.00762939453125
Step: 203 | Iter: 3 | Loss -0.007568359375
Step: 203 | Iter: 4 | Loss -0.007476806640625
Step: 203 | Iter: 5 | Loss -0.007659912109375


100%|██████████| 1/1 [00:36<00:00, 36.69s/it]


Step: 204 | Iter: 1 | Loss -0.0179443359375
Step: 204 | Iter: 2 | Loss -0.0179443359375
Step: 204 | Iter: 3 | Loss -0.01806640625
Step: 204 | Iter: 4 | Loss -0.0179443359375
Step: 204 | Iter: 5 | Loss -0.0179443359375


100%|██████████| 1/1 [00:07<00:00,  7.37s/it]


Step: 205 | Iter: 1 | Loss -0.016357421875
Step: 205 | Iter: 2 | Loss -0.0166015625
Step: 205 | Iter: 3 | Loss -0.0166015625
Step: 205 | Iter: 4 | Loss -0.0164794921875
Step: 205 | Iter: 5 | Loss -0.0166015625


100%|██████████| 1/1 [00:36<00:00, 36.74s/it]


Step: 206 | Iter: 1 | Loss -0.00775146484375
Step: 206 | Iter: 2 | Loss -0.00799560546875
Step: 206 | Iter: 3 | Loss -0.00823974609375
Step: 206 | Iter: 4 | Loss -0.0084228515625
Step: 206 | Iter: 5 | Loss -0.00799560546875


100%|██████████| 1/1 [00:36<00:00, 36.78s/it]


Step: 207 | Iter: 1 | Loss -0.006988525390625
Step: 207 | Iter: 2 | Loss -0.00689697265625
Step: 207 | Iter: 3 | Loss -0.00701904296875
Step: 207 | Iter: 4 | Loss -0.00701904296875
Step: 207 | Iter: 5 | Loss -0.007080078125


100%|██████████| 1/1 [00:05<00:00,  5.12s/it]


Step: 208 | Iter: 1 | Loss -0.0185546875
Step: 208 | Iter: 2 | Loss -0.0185546875
Step: 208 | Iter: 3 | Loss -0.018798828125
Step: 208 | Iter: 4 | Loss -0.0185546875
Step: 208 | Iter: 5 | Loss -0.0185546875


100%|██████████| 1/1 [00:04<00:00,  4.28s/it]


Step: 209 | Iter: 1 | Loss -0.022705078125
Step: 209 | Iter: 2 | Loss -0.0224609375
Step: 209 | Iter: 3 | Loss -0.0224609375
Step: 209 | Iter: 4 | Loss -0.0225830078125
Step: 209 | Iter: 5 | Loss -0.022705078125


100%|██████████| 1/1 [00:33<00:00, 33.82s/it]


Step: 210 | Iter: 1 | Loss -0.01708984375
Step: 210 | Iter: 2 | Loss -0.01708984375
Step: 210 | Iter: 3 | Loss -0.0169677734375
Step: 210 | Iter: 4 | Loss -0.0172119140625
Step: 210 | Iter: 5 | Loss -0.0172119140625
Step: 210 | Format: 0.0009523809523809524 | Accuracy: 0.007619047619047619
Output example: Create an Original Question

Search for Expert
Expand Authorities<|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|en

100%|██████████| 1/1 [00:09<00:00,  9.91s/it]


Step: 211 | Iter: 1 | Loss -0.0191650390625
Step: 211 | Iter: 2 | Loss -0.019287109375
Step: 211 | Iter: 3 | Loss -0.01904296875
Step: 211 | Iter: 4 | Loss -0.019287109375
Step: 211 | Iter: 5 | Loss -0.019287109375


100%|██████████| 1/1 [00:07<00:00,  7.21s/it]


Step: 212 | Iter: 1 | Loss -0.0220947265625
Step: 212 | Iter: 2 | Loss -0.022216796875
Step: 212 | Iter: 3 | Loss -0.022216796875
Step: 212 | Iter: 4 | Loss -0.022216796875
Step: 212 | Iter: 5 | Loss -0.022216796875


100%|██████████| 1/1 [00:36<00:00, 36.77s/it]


Step: 213 | Iter: 1 | Loss -0.005523681640625
Step: 213 | Iter: 2 | Loss -0.00555419921875
Step: 213 | Iter: 3 | Loss -0.005645751953125
Step: 213 | Iter: 4 | Loss -0.005645751953125
Step: 213 | Iter: 5 | Loss -0.00555419921875


100%|██████████| 1/1 [00:36<00:00, 36.74s/it]


Step: 214 | Iter: 1 | Loss -0.017333984375
Step: 214 | Iter: 2 | Loss -0.017333984375
Step: 214 | Iter: 3 | Loss -0.017333984375
Step: 214 | Iter: 4 | Loss -0.017333984375
Step: 214 | Iter: 5 | Loss -0.017333984375


100%|██████████| 1/1 [00:36<00:00, 36.56s/it]


Step: 215 | Iter: 1 | Loss -0.0126953125
Step: 215 | Iter: 2 | Loss -0.0126953125
Step: 215 | Iter: 3 | Loss -0.0125732421875
Step: 215 | Iter: 4 | Loss -0.0125732421875
Step: 215 | Iter: 5 | Loss -0.0126953125


100%|██████████| 1/1 [00:36<00:00, 36.43s/it]


Step: 216 | Iter: 1 | Loss -0.0218505859375
Step: 216 | Iter: 2 | Loss -0.0218505859375
Step: 216 | Iter: 3 | Loss -0.0218505859375
Step: 216 | Iter: 4 | Loss -0.021728515625
Step: 216 | Iter: 5 | Loss -0.021728515625


100%|██████████| 1/1 [00:37<00:00, 37.00s/it]


Step: 217 | Iter: 1 | Loss -0.00457763671875
Step: 217 | Iter: 2 | Loss -0.004547119140625
Step: 217 | Iter: 3 | Loss -0.004547119140625
Step: 217 | Iter: 4 | Loss -0.00445556640625
Step: 217 | Iter: 5 | Loss -0.00445556640625


100%|██████████| 1/1 [00:18<00:00, 18.02s/it]


Step: 218 | Iter: 1 | Loss -0.0126953125
Step: 218 | Iter: 2 | Loss -0.0126953125
Step: 218 | Iter: 3 | Loss -0.0142822265625
Step: 218 | Iter: 4 | Loss -0.0126953125
Step: 218 | Iter: 5 | Loss -0.01348876953125


100%|██████████| 1/1 [00:36<00:00, 36.53s/it]


Step: 219 | Iter: 1 | Loss -0.0133056640625
Step: 219 | Iter: 2 | Loss -0.0133056640625
Step: 219 | Iter: 3 | Loss -0.01318359375
Step: 219 | Iter: 4 | Loss -0.0133056640625
Step: 219 | Iter: 5 | Loss -0.01318359375


100%|██████████| 1/1 [00:00<00:00,  2.16it/s]


Step: 220 | Iter: 1 | Loss -0.0242919921875
Step: 220 | Iter: 2 | Loss -0.0242919921875
Step: 220 | Iter: 3 | Loss -0.0244140625
Step: 220 | Iter: 4 | Loss -0.0244140625
Step: 220 | Iter: 5 | Loss -0.0244140625
Step: 220 | Format: 0.0009090909090909091 | Accuracy: 0.007272727272727273
Output example: Create An Account<|endoftext|>


100%|██████████| 1/1 [00:36<00:00, 36.71s/it]


Step: 221 | Iter: 1 | Loss -0.0140380859375
Step: 221 | Iter: 2 | Loss -0.01416015625
Step: 221 | Iter: 3 | Loss -0.0140380859375
Step: 221 | Iter: 4 | Loss -0.01397705078125
Step: 221 | Iter: 5 | Loss -0.0140380859375


100%|██████████| 1/1 [00:04<00:00,  4.32s/it]


Step: 222 | Iter: 1 | Loss -0.0159912109375
Step: 222 | Iter: 2 | Loss -0.01611328125
Step: 222 | Iter: 3 | Loss -0.01611328125
Step: 222 | Iter: 4 | Loss -0.01611328125
Step: 222 | Iter: 5 | Loss -0.0159912109375


100%|██████████| 1/1 [00:01<00:00,  1.49s/it]


Step: 223 | Iter: 1 | Loss -0.0208740234375
Step: 223 | Iter: 2 | Loss -0.020751953125
Step: 223 | Iter: 3 | Loss -0.020751953125
Step: 223 | Iter: 4 | Loss -0.0205078125
Step: 223 | Iter: 5 | Loss -0.020751953125


100%|██████████| 1/1 [00:08<00:00,  8.96s/it]


Step: 224 | Iter: 1 | Loss -0.0201416015625
Step: 224 | Iter: 2 | Loss -0.0201416015625
Step: 224 | Iter: 3 | Loss -0.0201416015625
Step: 224 | Iter: 4 | Loss -0.0201416015625
Step: 224 | Iter: 5 | Loss -0.0201416015625


100%|██████████| 1/1 [00:01<00:00,  1.88s/it]


Step: 225 | Iter: 1 | Loss -0.0203857421875
Step: 225 | Iter: 2 | Loss -0.020263671875
Step: 225 | Iter: 3 | Loss -0.020263671875
Step: 225 | Iter: 4 | Loss -0.020263671875
Step: 225 | Iter: 5 | Loss -0.020263671875


100%|██████████| 1/1 [00:00<00:00,  2.05it/s]


Step: 226 | Iter: 1 | Loss -0.023193359375
Step: 226 | Iter: 2 | Loss -0.0233154296875
Step: 226 | Iter: 3 | Loss -0.0233154296875
Step: 226 | Iter: 4 | Loss -0.0230712890625
Step: 226 | Iter: 5 | Loss -0.0230712890625


100%|██████████| 1/1 [00:36<00:00, 36.81s/it]


Step: 227 | Iter: 1 | Loss -0.01806640625
Step: 227 | Iter: 2 | Loss -0.0184326171875
Step: 227 | Iter: 3 | Loss -0.0164794921875
Step: 227 | Iter: 4 | Loss -0.016357421875
Step: 227 | Iter: 5 | Loss -0.01611328125


100%|██████████| 1/1 [00:00<00:00,  2.14it/s]


Step: 228 | Iter: 1 | Loss -0.025634765625
Step: 228 | Iter: 2 | Loss -0.025634765625
Step: 228 | Iter: 3 | Loss -0.025634765625
Step: 228 | Iter: 4 | Loss -0.0257568359375
Step: 228 | Iter: 5 | Loss -0.025390625


100%|██████████| 1/1 [00:26<00:00, 26.08s/it]


Step: 229 | Iter: 1 | Loss -0.011962890625
Step: 229 | Iter: 2 | Loss -0.0120849609375
Step: 229 | Iter: 3 | Loss -0.0120849609375
Step: 229 | Iter: 4 | Loss -0.0120849609375
Step: 229 | Iter: 5 | Loss -0.0120849609375


100%|██████████| 1/1 [00:03<00:00,  3.17s/it]


Step: 230 | Iter: 1 | Loss -0.06494140625
Step: 230 | Iter: 2 | Loss -0.061767578125
Step: 230 | Iter: 3 | Loss -0.051513671875
Step: 230 | Iter: 4 | Loss -0.06201171875
Step: 230 | Iter: 5 | Loss -0.06787109375
Step: 230 | Format: 0.0008695652173913044 | Accuracy: 0.006956521739130435
Output example: Create An Account<|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|>


100%|██████████| 1/1 [00:04<00:00,  4.81s/it]


Step: 231 | Iter: 1 | Loss -0.0140380859375
Step: 231 | Iter: 2 | Loss -0.01397705078125
Step: 231 | Iter: 3 | Loss -0.0140380859375
Step: 231 | Iter: 4 | Loss -0.0140380859375
Step: 231 | Iter: 5 | Loss -0.01397705078125


100%|██████████| 1/1 [00:29<00:00, 29.84s/it]


Step: 232 | Iter: 1 | Loss -0.019287109375
Step: 232 | Iter: 2 | Loss -0.019287109375
Step: 232 | Iter: 3 | Loss -0.019287109375
Step: 232 | Iter: 4 | Loss -0.0191650390625
Step: 232 | Iter: 5 | Loss -0.0194091796875


100%|██████████| 1/1 [00:36<00:00, 36.90s/it]


Stopping training: Reached 7-hour limit.
Stopping training: Reached 11-hour limit.


## Test

In [48]:
math_problem = SYSTEM + """User: A farmer is planning to build a rectangular chicken coop using 100 meters of fencing.  
One side of the coop will be against a barn, so fencing is only needed for the other three sides.  
What dimensions should the farmer choose to maximize the enclosed area?  
Provide the maximum possible area in square meters.
Assistant:"""  

answer = '1250'

In [49]:
generated_solution = model.generate(
    **tokenizer(math_problem, return_tensors='pt').to('cuda'), 
    max_new_tokens=512,
    pad_token_id=tokenizer.eos_token_id
)

print('This is output from trained reasoning model!')

print(
    tokenizer.decode(generated_solution[0]).split('Assistant:')[-1].strip()
)

This is output from trained reasoning model!
Studentotonic
Topic:

### Transcript

A farmer is planning to build a rectangular chicken coop using 100 meters of fencing. 1 side of the chicken coop will be against a barn, so fencing is only needed for the other three sides.  What dimensions should the farmer choose to maximize the enclosed area? Provide the maximum possible area in square meters.

Let's start by drawing a picture.  We have a rectangular chicken coop.  We have 100 meters of fencing.  We want to maximize the enclosed area.  We want to maximize the area of the chicken coop.  We want to maximize the area of the chicken coop.  We want to maximize the area of the chicken coop.  We want to maximize the area of the chicken coop.  We want to maximize the area of the chicken coop.  We want to maximize the area of the chicken coop.  We want to maximize the area of the chicken coop.  We want to maximize the area of the chicken coop.  We want to maximize the area of the chicken coop.

In [50]:
baseline_solution = ref_model.generate(
    **tokenizer(math_problem, return_tensors='pt').to('cuda'), 
    max_new_tokens=512,
    temperature=0.6,
    pad_token_id=tokenizer.eos_token_id
)

print('This is output from base model!')

print(
    tokenizer.decode(baseline_solution[0]).split('Assistant:')[-1].strip()
)

The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


This is output from base model!
The farmer should choose a length of fencing that is 100 meters long.  
The maximum possible area is 100 * 10


## Plotting

In [None]:
def moving_average(data, window_size):
    return np.convolve(data, np.ones(window_size)/window_size, mode='valid')

In [None]:
plt.plot(moving_average(track_format_rewards, window_size=100))
plt.title("Format rewards")
plt.xlabel("Steps")
plt.ylabel("Reward")
plt.show()

###########
plt.plot(moving_average(track_accuracy_rewards, window_size=100))
plt.title("Accuracy rewards")
plt.xlabel("Steps")
plt.ylabel("Reward")
plt.show()

###########
plt.plot(moving_average(total_rewards, window_size=100))
plt.title("Total rewards")
plt.xlabel("Steps")
plt.ylabel("Reward")
plt.show()