In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
os.environ['TOKENIZERS_PARALLELISM'] = 'false'

In [2]:
import torch
from dotenv import load_dotenv
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, GenerationConfig
import pandas as pd
import numpy as np
from IPython.display import display, Markdown
from copy import deepcopy
import seaborn as sns
import matplotlib.pyplot as plt

  from .autonotebook import tqdm as notebook_tqdm


### I will load our standard `unsloth/gemma-2-9b-it-bnb-4bit`

In [3]:
MODEL_ID = "unsloth/gemma-2-9b-it-bnb-4bit"
DTYPE = torch.float16
LOAD_IN_4BIT = True
device = "cuda" if torch.cuda.is_available() else "cpu"
load_dotenv()

True

In [4]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=os.environ["HF_TOKEN"])
model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    device_map={"": device},
    torch_dtype=DTYPE,
    token=os.environ["HF_TOKEN"],
    use_cache=True
)

Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.


In [5]:
reversed_tokenizer = {v: k for k, v in tokenizer.get_vocab().items()}

In [6]:
def forward_pass(text, model, tokenizer, device):
    with torch.no_grad():
        inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True).to(device)
        outputs = model(
            **inputs,
            output_hidden_states=True,
        )
    return outputs


In [7]:
output = forward_pass("Hello, my name is", model, tokenizer, device)

output['hidden_states']

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


(tensor([[[-0.6724,  0.1553,  0.4824,  ..., -0.0379,  0.2595,  0.1982],
          [ 1.3672,  1.5127,  0.4058,  ...,  1.3301, -2.4414, -1.9883],
          [-0.7710,  1.8643, -0.4275,  ..., -0.3965, -0.9429, -0.8662],
          [ 1.4619,  0.8149,  0.1028,  ...,  1.9004,  0.1041, -1.5205],
          [-0.0482, -1.0449, -2.5137,  ...,  1.4326,  0.6250,  0.7710],
          [-0.7124, -1.0889, -0.5918,  ...,  0.3984,  1.2354, -0.3381]]],
        device='cuda:0', dtype=torch.float16),
 tensor([[[-0.2334, -1.9648, -0.0709,  ..., -0.1409,  0.0360,  0.2175],
          [ 2.2305,  5.3086,  0.6084,  ...,  1.1104, -0.8760, -1.8389],
          [-0.1729,  1.9502, -0.1769,  ..., -0.0920, -0.2434, -0.2148],
          [ 0.9985,  3.2012, -0.4641,  ...,  1.0625,  0.8628, -1.0742],
          [ 0.1587,  2.9277, -1.7773,  ...,  0.6489,  1.4004,  0.1431],
          [-0.6655,  2.4609, -0.4707,  ..., -0.0349,  1.4053, -0.5127]]],
        device='cuda:0', dtype=torch.float16),
 tensor([[[-0.1033, -3.3652, -0.0719, 

In [8]:
len(output['hidden_states'])

43

In [9]:
logits_dict = {}

for i, hidden_state in enumerate(output['hidden_states']):
    logits = model.lm_head(hidden_state)
    logits_dict[i] = logits

logits_dict.keys()

dict_keys([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42])

In [10]:
premature_layer = 20

logits_dict[premature_layer].shape

torch.Size([1, 6, 256000])

In [11]:
base_logits = logits_dict[premature_layer][:, -1, :]

base_logits.shape

torch.Size([1, 256000])

In [12]:
def _relative_top_filter(
    scores: torch.FloatTensor,
    baseline_scores: torch.FloatTensor,
    relative_top: float = 0.1,
    filter_value: float = -float("Inf"),
    base_filter_value=-1e-3,
    min_tokens_to_keep: int = 1,
) -> torch.FloatTensor:
    """
    Reference: https://github.com/XiangLi1999/ContrastiveDecoding/blob/170e9142e92159c1237d731e240f5eb14aabf428/transformers/src/transformers/generation_logits_process.py#L235
    Apply filtering to only keep tokens with a probability above a certain threshold. The threshold is defined as `relative_top` * max probability in the distribution.
    """
    scores_normalized = scores.log_softmax(dim=-1)
    baseline_scores_normalized = baseline_scores.log_softmax(dim=-1)
    sorted_logits, sorted_indices = torch.sort(scores_normalized, descending=True)
    min_thresh = sorted_logits[..., min_tokens_to_keep - 1]
    probs_max = torch.max(scores_normalized, dim=-1).values
    probs_thresh = probs_max + np.log(relative_top)
    probs_thresh = torch.min(min_thresh, probs_thresh)
    probs_thresh = probs_thresh.unsqueeze(-1)
    baseline_scores_normalized[scores_normalized < probs_thresh] = base_filter_value
    scores_normalized[scores_normalized < probs_thresh] = filter_value
    return scores_normalized, baseline_scores_normalized

In [13]:
len(logits_dict) - 1

42

In [14]:
final_logits = logits_dict[len(logits_dict) - 1][:, -1, :]
final_logits.shape

torch.Size([1, 256000])

In [15]:
relative_top = 0.1

final_logits, base_logits = _relative_top_filter(final_logits, base_logits, relative_top=relative_top)

In [16]:
next_token_logits = final_logits - base_logits
next_token_logits

tensor([[-inf, -inf, -inf,  ..., -inf, -inf, -inf]], device='cuda:0',
       dtype=torch.float16, grad_fn=<SubBackward0>)

In [17]:
# Detach, move to CPU, and convert to numpy array
logits = next_token_logits.detach().to(torch.float32).cpu().numpy()

# Get the top 5 tokens
top_5_tokens = logits.argsort()[0][::-1][:5]

for token in top_5_tokens:
    print(f"{reversed_tokenizer[token]}: {logits[0][token]}")

▁**: 50.96875
▁Samantha: 41.8125
▁Ashley: 39.84375
▁Sarah: 36.90625
▁Emily: 35.40625


In [18]:
def print_top_k_logits(logits, top_k=5):
    logits = logits[0].detach().to(torch.float32).cpu().numpy()
    top_k_tokens = logits.argsort()[0][::-1][:top_k]
    for token in top_k_tokens:
        print(f"{reversed_tokenizer[token]}: {logits[0][token]}")


In [19]:
from torch.nn import functional as F

def kl_div(logits_dict, candidate_premature_layers, final_logits):
    stacked_premature_layers = torch.stack([logits_dict[layer] for layer in candidate_premature_layers], dim=0)

    softmax_mature_layer = F.softmax(final_logits, dim=-1)
    softmax_premature_layers = F.softmax(stacked_premature_layers, dim=-1)
    
    # 3. Calculate M, the average distribution
    M = 0.5 * (softmax_mature_layer[None, :, :] + softmax_premature_layers)  # shape: (num_premature_layers, batch_size, num_features)

    # 4. Calculate log-softmax for the KL divergence
    log_softmax_mature_layer = F.log_softmax(final_logits, dim=-1)
    log_softmax_premature_layers = F.log_softmax(stacked_premature_layers, dim=-1)  # shape: (num_premature_layers, batch_size, num_features)

    # 5. Calculate the KL divergences and then the JS divergences
    kl1 = F.kl_div(log_softmax_mature_layer[None, :, :], M, reduction='none').mean(-1)  # shape: (num_premature_layers, batch_size)
    kl2 = F.kl_div(log_softmax_premature_layers, M, reduction='none').mean(-1)  # shape: (num_premature_layers, batch_size)
    js_divs = 0.5 * (kl1 + kl2)  # shape: (num_premature_layers, batch_size)

    # 6. Reduce the batchmean
    js_divs = js_divs.mean(-1)  # shape: (num_premature_layers,)
    return js_divs

In [35]:
from torch.nn import functional as F
from transformers import HybridCache

def forward_pass(inputs, model, device, distance_fn, candidate_premature_layers=None):
    with torch.no_grad():
        model_inputs = model.prepare_inputs_for_generation(**inputs)


        outputs = model(
            **model_inputs,
            return_dict=True,
            output_hidden_states=True,
        )

        final_logits = outputs.logits[:, -1, :].float()


        logits_dict = {
            exit_layer: model.lm_head(outputs['hidden_states'][exit_layer][:, -1, :]).to(final_logits.device)
            for exit_layer in candidate_premature_layers
        }


        distance = distance_fn(
            logits_dict,
            candidate_premature_layers,
            final_logits
        )

        
        premature_layer = candidate_premature_layers[int(distance.argmax().cpu().item())]


        base_logits = logits_dict[premature_layer]
        
        final_logits, base_logits = _relative_top_filter(final_logits, base_logits)

        next_token_logits = final_logits - base_logits
        
    return next_token_logits, outputs.past_key_values

In [21]:
inputs = tokenizer("Hello, my name is", return_tensors="pt", padding=True, truncation=True).to(device)

output = forward_pass(inputs, model, device, kl_div, candidate_premature_layers=[20])

print_top_k_logits(output)

▁**: 50.76189422607422
▁Samantha: 41.72283172607422
▁Jennifer: 41.18376922607422
▁Ashley: 39.66033172607422
▁Emma: 36.87126922607422


In [22]:
def generate_text_simple(text, model, tokenizer, device, max_length=50):
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True).to(device)
    generated_tokens = []
    
    for _ in range(max_length):
        with torch.no_grad():
            outputs = model(**inputs)
        
        next_token_id = outputs.logits[0, -1, :].argmax()
        
        inputs = {
            # "input_ids": next_token_id.reshape((1, 1)),
            "input_ids": torch.cat(
                [inputs["input_ids"], next_token_id.reshape((1, 1))], dim=1
            ),
            "attention_mask": torch.cat(
                [inputs["attention_mask"], torch.tensor([[1]]).to(device)], dim=1
            ),
            # "past_key_values": outputs.past_key_values,
        }

        if next_token_id.item() == tokenizer.eos_token_id:
            break

        generated_tokens.append(next_token_id.item())
    
    return  tokenizer.decode(generated_tokens, skip_special_tokens=True)

generate_text_simple("Hello, my name is", model, tokenizer, device, max_length=5)

" Michael and I'm"

In [23]:
def generate_text(text, model, tokenizer, device, distance_fn, candidate_premature_layers=None, max_length=50):
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True).to(device)

    generated_tokens = []
    for _ in range(max_length):

        logits, past_key_values = forward_pass(inputs, model, device, distance_fn, candidate_premature_layers)
        
        next_token_id = logits.argmax()

        generated_tokens.append(next_token_id)

        inputs = {
            # "input_ids": next_token_id.unsqueeze(0),
            "input_ids": torch.cat(
                [inputs["input_ids"], next_token_id.reshape((1, 1))], dim=1
            ),
            "attention_mask": torch.cat(
                [inputs["attention_mask"], torch.tensor([[1]]).to(device)], dim=1
            ),           
            # "past_key_values": past_key_values,
        }

        if next_token_id.item() == tokenizer.eos_token_id:
            break

    return  tokenizer.decode(generated_tokens, skip_special_tokens=True)


relative_top = 0.1
generate_text("Hello, my name is", model, tokenizer, device, kl_div, candidate_premature_layers=[20], max_length=5)

' **Gemma**.\n\nI'

In [24]:
from scipy.stats import wasserstein_distance

def wasserstein_distance_fn(logits_dict, candidate_premature_layers, final_logits):
    stacked_premature_layers = torch.stack([logits_dict[layer] for layer in candidate_premature_layers])

    softmax_mature_layer = final_logits.softmax(dim=-1)
    softmax_premature_layers = stacked_premature_layers.softmax(dim=-1)

    wasserstein_distances = torch.tensor([
        wasserstein_distance(
            softmax_mature_layer.cpu().numpy().flatten(),
            softmax_premature_layers[i].cpu().numpy().flatten()
        )
        for i in range(len(candidate_premature_layers))
    ])

    return wasserstein_distances


inputs = tokenizer("Hello, my name is", return_tensors="pt", padding=True, truncation=True).to(device)

output = forward_pass(inputs, model, device, wasserstein_distance_fn, candidate_premature_layers=[20])

print_top_k_logits(output)

▁**: 50.76189422607422
▁Samantha: 41.72283172607422
▁Jennifer: 41.18376922607422
▁Ashley: 39.66033172607422
▁Emma: 36.87126922607422


In [25]:
# Bhattacharyya distance

def bhattacharyya_distance_fn(logits_dict, candidate_premature_layers, final_logits):
    stacked_premature_layers = torch.stack([logits_dict[layer] for layer in candidate_premature_layers])

    softmax_mature_layer = final_logits.softmax(dim=-1)
    softmax_premature_layers = stacked_premature_layers.softmax(dim=-1)

    bhattacharyya_distances = torch.tensor([
        -torch.log(torch.sum(torch.sqrt(softmax_mature_layer * softmax_premature_layers[i])))
        for i in range(len(candidate_premature_layers))
    ])

    return bhattacharyya_distances

inputs = tokenizer("Hello, my name is", return_tensors="pt", padding=True, truncation=True).to(device)

output = forward_pass(inputs, model, device,bhattacharyya_distance_fn, candidate_premature_layers=[20])

print_top_k_logits(output)


▁**: 50.76189422607422
▁Samantha: 41.72283172607422
▁Jennifer: 41.18376922607422
▁Ashley: 39.66033172607422
▁Emma: 36.87126922607422


In [30]:
LAYERS = [x for x in range(20, 40)]
relative_top = 0.1

prompt = "Given the context `CONTEXT` and the query `QUERY` below, please provide an answer `ANSWER` to the question. \n    `CONTEXT`: `Dokument [1]:` Wacław Stanisław Sitkowski (ur. 12 lutego 1924 w Warszawie, zm. 1 kwietnia 2010 tamże) – polski lekarz, kardiochirurg, profesor nauk medycznych, powstaniec warszawski. Jeden z pionierów polskiej kardiochirurgii, nauczyciel polskich kardiochirurgów, w tym Zbigniewa Religi. \n\n    `QUERY`: Jak miał na imię polski kardiochirurg, profesor Religa?\n\n    `ANSWER`:\n"

prompt_len = len(prompt)

print(prompt)

distances_fns = {
    "KL Divergence": kl_div,
    "Wasserstein Distance": wasserstein_distance_fn,
    "Bhattacharyya Distance": bhattacharyya_distance_fn,
}

Given the context `CONTEXT` and the query `QUERY` below, please provide an answer `ANSWER` to the question. 
    `CONTEXT`: `Dokument [1]:` Wacław Stanisław Sitkowski (ur. 12 lutego 1924 w Warszawie, zm. 1 kwietnia 2010 tamże) – polski lekarz, kardiochirurg, profesor nauk medycznych, powstaniec warszawski. Jeden z pionierów polskiej kardiochirurgii, nauczyciel polskich kardiochirurgów, w tym Zbigniewa Religi. 

    `QUERY`: Jak miał na imię polski kardiochirurg, profesor Religa?

    `ANSWER`:



In [27]:
all_text = generate_text_simple(prompt, model, tokenizer, device, max_length=100)

all_text

'    Wacław Stanisław Sitkowski \n\n\n'

In [28]:
for name, fn in distances_fns.items():
    all_text = generate_text(prompt, model, tokenizer, device, fn, candidate_premature_layers=LAYERS, max_length=10)
    print(f"{name}: {all_text}")

KL Divergence: ```
Wacław Stanisław Sitkowski
```
Wasserstein Distance: ```
Wacław Stanisław Sitkowski 

Bhattacharyya Distance: ```
Wacław Stanisław Sitkowski
```


In [29]:
LAYERS = [x for x in range(0, 20)]

for name, fn in distances_fns.items():
    all_text = generate_text(prompt, model, tokenizer, device, fn, candidate_premature_layers=LAYERS, max_length=10)
    print(f"{name}: {all_text}")

KL Divergence: ```
Wacław
``` 





Wasserstein Distance: ```
Wacław
``` 


Bhattacharyya Distance: ```
Wacław
``` 







In [30]:
LAYERS = [x for x in range(20, 40)]

prompt = "Given the context `CONTEXT` and the query `QUERY` below, please provide an answer `ANSWER` to the question. \n    `CONTEXT`: `Dokument [1]:` Ludwik Tadeusz Waryński (ur. 24 września 1856 w Martynówce koło Kaniowa, zm. 2 marca 1889 w Szlisselburgu) – polski działacz i ideolog polskiego ruchu socjalistycznego. Brat Stanisława, ojciec Tadeusza Waryńskiego. \n\n    `QUERY`: W którym wieku żył ludwik waryński?"

prompt_len = len(prompt)

print(prompt)

Given the context `CONTEXT` and the query `QUERY` below, please provide an answer `ANSWER` to the question. 
    `CONTEXT`: `Dokument [1]:` Ludwik Tadeusz Waryński (ur. 24 września 1856 w Martynówce koło Kaniowa, zm. 2 marca 1889 w Szlisselburgu) – polski działacz i ideolog polskiego ruchu socjalistycznego. Brat Stanisława, ojciec Tadeusza Waryńskiego. 

    `QUERY`: W którym wieku żył ludwik waryński?


In [32]:
all_text = generate_text_simple(prompt, model, tokenizer, device, max_length=100)

print(all_text)




`ANSWER`: Ludwik Waryński żył w wieku od 24 do 32 lat. 



In [33]:
prompt = "Given the context `CONTEXT` and the query `QUERY` below, please provide an answer `ANSWER` to the question. \n    `CONTEXT`: `Dokument [1]:` Ludwik Tadeusz Waryński (ur. 24 września 1856 w Martynówce koło Kaniowa, zm. 2 marca 1889 w Szlisselburgu) – polski działacz i ideolog polskiego ruchu socjalistycznego. Brat Stanisława, ojciec Tadeusza Waryńskiego. \n\n    `QUERY`: W którym wieku żył ludwik waryński?"
prompt_len = len(prompt)
output = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(device)

# generate text

LAYERS = [x for x in range(20, 40)]

response = model.generate(
    **output,
    do_sample=False,
    max_new_tokens=25,
    dola_layers=LAYERS,
    use_cache=True,
)

text_all = tokenizer.decode(response[0], skip_special_tokens=True)

print(text_all)

Given the context `CONTEXT` and the query `QUERY` below, please provide an answer `ANSWER` to the question. 
    `CONTEXT`: `Dokument [1]:` Ludwik Tadeusz Waryński (ur. 24 września 1856 w Martynówce koło Kaniowa, zm. 2 marca 1889 w Szlisselburgu) – polski działacz i ideolog polskiego ruchu socjalistycznego. Brat Stanisława, ojciec Tadeusza Waryńskiego. 

    `QUERY`: W którym wieku żył ludwik waryński?



```python
ANSWER = str(max(int(CONTEXT.split('zm. ')[0].split('ur.


In [34]:
all_text = generate_text(prompt, model, tokenizer, device, kl_div, candidate_premature_layers=LAYERS, max_length=25)

print(all_text)






```python
ANSWER = str(max(int(CONTEXT.split('zm. ')[0].split('ur.


In [38]:
for name, fn in distances_fns.items():
    all_text = generate_text(prompt, model, tokenizer, device, fn, candidate_premature_layers=LAYERS, max_length=100)
    print(f"{name}: {all_text}")

KL Divergence: 



Answer: Ludwik Waryński żył do wieku 32 lat.  

Wasserstein Distance: 



Answer: Ludwik Waryński żył do wieku 32. 

Bhattacharyya Distance: 



```
ANSWER
Ludwik Waryński żył w wieku 32 lat.  
```



In [39]:
LAYERS = [x for x in range(0, 20)]

for name, fn in distances_fns.items():
    all_text = generate_text(prompt, model, tokenizer, device, fn, candidate_premature_layers=LAYERS, max_length=100)
    print(f"{name}: {all_text}")


KL Divergence: 



Answer: Ludwik Waryński żył do wieku 32 lat.  

Wasserstein Distance: 



Answer: Ludwik Waryński żył do wieku 32. 

Bhattacharyya Distance: 



```
ANSWER
Ludwik Waryński żył w wieku 32 lat.  
```



# Check contrasting across all of the layers

In [54]:
prompt = "Given the context `CONTEXT` and the query `QUERY` below, please provide an answer `ANSWER` to the question. \n    `CONTEXT`: `Dokument [1]:` Wacław Stanisław Sitkowski (ur. 12 lutego 1924 w Warszawie, zm. 1 kwietnia 2010 tamże) – polski lekarz, kardiochirurg, profesor nauk medycznych, powstaniec warszawski. Jeden z pionierów polskiej kardiochirurgii, nauczyciel polskich kardiochirurgów, w tym Zbigniewa Religi. \n\n    `QUERY`: Jak miał na imię polski kardiochirurg, profesor Religa?\n\n    `ANSWER`:\n"

In [55]:
from tqdm import tqdm

data = {}

for layer_id in tqdm(range(40)):
    all_text = generate_text(prompt, model, tokenizer, device, kl_div, candidate_premature_layers=[layer_id], max_length=100)
    data[layer_id] = all_text

100%|██████████| 40/40 [05:49<00:00,  8.73s/it]


In [56]:
# prompt_len = len(prompt)

# data_2 = {
#     K: V
#     for K, V in data.items()
# }

In [57]:
print(prompt)

Given the context `CONTEXT` and the query `QUERY` below, please provide an answer `ANSWER` to the question. 
    `CONTEXT`: `Dokument [1]:` Wacław Stanisław Sitkowski (ur. 12 lutego 1924 w Warszawie, zm. 1 kwietnia 2010 tamże) – polski lekarz, kardiochirurg, profesor nauk medycznych, powstaniec warszawski. Jeden z pionierów polskiej kardiochirurgii, nauczyciel polskich kardiochirurgów, w tym Zbigniewa Religi. 

    `QUERY`: Jak miał na imię polski kardiochirurg, profesor Religa?

    `ANSWER`:



In [58]:
data

{0: '```\nWacław\n``` \n\n\n\n\n',
 1: '```\nWacław\n```\n\n\n',
 2: 'Wacław\n',
 3: '```\nWacław\n```\n\n\n',
 4: '```\nWacław\n```\n\n\n',
 5: '```\nWacław\n```\n\n',
 6: '```\nWacław\n```\n',
 7: '```\nWacław\n```\n',
 8: '```\nWacław Stanisław Sitkowski\n```',
 9: '```\nWacław Stanisław Sitkowski\n```',
 10: '```\nWacław Stanisław Sitkowski\n```',
 11: '```\nWacław Stanisław Sitkowski\n```',
 12: '```\nWacław Stanisław Sitkowski\n```',
 13: '```\nWacław Stanisław Sitkowski\n```',
 14: '```\nWacław Stanisław Sitkowski\n```',
 15: '```\nWacław Stanisław Sitkowski\n```',
 16: "```\nWacław Stanisław Sitkowski\n```\n\n\n\nLet me break down why the answer is `Wacław Stanisław Sitkowski`:\n\n* **The Query:** Asks for the name of the Polish cardiothoracic surgeon who was a professor and Religa's teacher. \n* **The Context:** Provides biographical information about Wacław Stanisław Sitkowski, stating he was:\n    * A Polish cardiothoracic surgeon.\n    * A professor.\n    * One of",
 17: "`

In [None]:
raise ValueError("Stop here")

In [53]:
data_2

{0: '\n\n\n\nAnswer: Ludwik Waryński żył do wieku 32 lat.  \n',
 1: '\n\n\n\nAnswer: Ludwik Waryński żył do wieku 32 lat.  \n',
 2: "\n\n    `ANSWER`:  Ludwik Waryński żył do 32 roku życia. \n\n\nLet me know if you'd like me to try another example.\n",
 3: '\n\n    `ANSWER`:  Ludwik Waryński żył do wieku 32.\n',
 4: '\n\n    `ANSWER`:  Ludwik Waryński żył w wieku 32.\n\n\n\n',
 5: '\n\n    `ANSWER`: Ludwik Waryński żył w wieku od 24 do 32 lat.\n\n\n\n\n',
 6: '\n\n\n```\nANSWER\nLudwik Waryński żył do wieku 32.\n```\n',
 7: '\n\n\n```\nANSWER\nLudwik Waryński żył w wieku od 24 do 32 lat.\n```\n',
 8: "\n\n\n\nLet me break down why finding the answer requires understanding the context:\n\n* **Identifying Key Information:** We need to pinpoint Ludwik Tadeusz Waryński's birth and death dates from the provided text.\n\n* **Calculating Age:** Once we have those dates, we can subtract his birth year from his death year to determine his lifespan.\n\n\n\n\nLet me know if you'd like me to calcu