In [1]:
import torch
from transformer_lens import HookedTransformer
from transformers import AutoTokenizer

# Check if a GPU is available and set the device
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")

# Specify the model ID
model_id = "meta-llama/Meta-Llama-3-8B"

# Load the tokenizer from the Hugging Face library
tokenizer = AutoTokenizer.from_pretrained(model_id)

# Load the model with TransformerLens, specifying the torch_dtype as torch.float16 to reduce memory usage
model = HookedTransformer.from_pretrained_no_processing(model_id, torch_dtype=torch.float16, device_map=device)


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model meta-llama/Meta-Llama-3-8B into HookedTransformer


In [5]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from collections import defaultdict
from tqdm.notebook import tqdm

result_1 = defaultdict(list)
result_2 = defaultdict(list)
max_tokens = 10
lamb = 0.49

with torch.no_grad():
    for layer in range(0, model.cfg.n_layers):
        test_str_1 = tokenizer('Translate "Mary had a little lamb" into Spanish: María', return_tensors='pt').input_ids[0]
        test_str_2 = tokenizer('Translate "Mary tenía un corderito." into English: Mary', return_tensors='pt').input_ids[0]
        seq_len = max(test_str_1.shape[0], test_str_2.shape[0])
        test_str_1 = torch.cat([torch.tensor([tokenizer.bos_token_id] * (seq_len - test_str_1.shape[0]), dtype=torch.long), test_str_1])
        test_str_2 = torch.cat([torch.tensor([tokenizer.bos_token_id] * (seq_len - test_str_2.shape[0]), dtype=torch.long), test_str_2])

        for tok_i in tqdm(range(max_tokens)):
            if len(result_1[layer]) > 0: test_str_1 = torch.cat([test_str_1, torch.tensor([result_1[layer][-1]], dtype=torch.long)])
            if len(result_2[layer]) > 0: test_str_2 = torch.cat([test_str_2, torch.tensor([result_2[layer][-1]], dtype=torch.long)])

            embed_1 = model.input_to_embed(test_str_1)[0].cpu()
            embed_2 = model.input_to_embed(test_str_2)[0].cpu()

            # embed_1 = embed_1[:, :seq_len]
            # embed_2 = embed_2[:, :seq_len]

            resid_1 = model.forward(embed_1, return_type='logits', start_at_layer=0, stop_at_layer=layer).cpu()
            resid_2 = model.forward(embed_2, return_type='logits', start_at_layer=0, stop_at_layer=layer).cpu()

            resid_comb = (lamb) * resid_1 + (1-lamb) * resid_2

            logits = model.forward(resid_comb, return_type='logits', start_at_layer=layer).cpu()
            # print(logits.shape)
            # print(decode(logits[0].argmax(-1).cpu().tolist()))
            val, idx = logits[0].detach().float().softmax(-1).topk(5)
            # print(val.shape)
            if tok_i == max_tokens - 1:
                print('-' * 40)
                print('Merge at layer', layer)
                print('top 5 predictions:')
                for j in range(5):
                    print(('{:>8}' * idx.shape[0]).format(*[repr(tokenizer.decode([i]))[1:-1] for i in idx[:, j].numpy()]))
                    print(('{:8.2f}' * val.shape[0]).format(*[i for i in val[:, j].numpy()]))

            result_1[layer] += [idx[-1, 0]]
            result_2[layer] += [idx[-1, 1]]


  0%|          | 0/10 [00:00<?, ?it/s]

----------------------------------------
Merge at layer 0
top 5 predictions:
QuestionQuestion       "     had       "       a      un       a  little       " Spanish       .       "     had       a  little       .Translate     had       a  little     .\n    Mary
    0.31    0.17    0.22    0.11    0.36    0.49    0.18    0.08    0.12    0.14    0.18    0.23    0.33    0.80    0.91    0.95    0.41    0.09    0.72    0.76    0.75    0.42    0.54
     def     def Translate    land    Mary     two      un  sister       a      in  French     .\n    Mary     had      al    lamb     .\n    Mary     had      al    lamb       .     Mar
    0.10    0.07    0.08    0.08    0.06    0.07    0.17    0.03    0.08    0.06    0.13    0.08    0.12    0.03    0.02    0.01    0.13    0.06    0.10    0.07    0.19    0.23    0.03
       #       #      \n      's     had  little       a     had       ,       ( English       ,            Mary       a     lit       ,       "    Mary      al    litt       ,    

  0%|          | 0/10 [00:00<?, ?it/s]

----------------------------------------
Merge at layer 1
top 5 predictions:
QuestionQuestion     The       P       "      un     son    lamb       "      is Spanish      \n       "   tenía   tenía      un       c      ve      ja     .\n       "  little    lamb
    0.31    0.24    0.03    0.12    0.45    0.27    0.02    0.94    0.24    0.16    0.18    0.28    0.31    0.47    0.59    0.91    0.91    0.97    0.78    0.34    0.17    0.12    0.32
     def     def       I       "     "\n     una    baby    Lamb       ,      in English     .\n   María   María      tu     una       o   order       o       .     The  Little    Lamb
    0.10    0.09    0.01    0.05    0.06    0.13    0.02    0.01    0.11    0.08    0.12    0.15    0.06    0.10    0.15    0.03    0.02    0.02    0.08    0.22    0.05    0.04    0.17
       #       #     the     Had       ,       a       c     lam       .       (  French       .    Mary      tu      un       a     car     vis      jo      \n    Mary    lamb     bo

  0%|          | 0/10 [00:00<?, ?it/s]

----------------------------------------
Merge at layer 2
top 5 predictions:
Question     def      to       "       "      un   order    lamb       "    into English       .    Mary   tenía      un       c       ñ       o     .\n    Mary<|begin_of_text|>     ://      \n
    0.31    0.29    0.01    0.12    0.17    0.59    0.09    0.30    0.36    0.28    0.18    0.39    0.25    0.58    0.61    0.86    0.10    0.15    0.47    0.11    0.98    0.22    0.15
     defQuestion      to    land     "\n     una     her       c      ."      in Spanish      \n       "     had     una   peque     cho    lamb       .<|end_of_text|>  \u2009  .swing    Mary
    0.10    0.16    0.01    0.06    0.06    0.37    0.08    0.08    0.11    0.10    0.16    0.36    0.24    0.12    0.33    0.05    0.09    0.09    0.24    0.10    0.00    0.02    0.09
       #       #      in       P    Jane     dos     per       "       .      to  French     .\n   María      tu       a       o      ño       c      \n       "       

  0%|          | 0/10 [00:00<?, ?it/s]

----------------------------------------
Merge at layer 3
top 5 predictions:
Question     def       "       "       "      un   order    lamb       "      in English      \n       "     had      un       c   order     .\nTranslate    Mary     had      un       c
    0.31    0.29    0.05    0.18    0.19    0.37    0.35    0.63    0.44    0.26    0.55    0.46    0.33    0.44    0.78    0.74    0.14    0.59    0.11    0.28    0.39    0.76    0.34
     defQuestion      to    land     "\n     una      eb    baby      ."    into Spanish     :\n    Mary   tenía     una   peque     ano       .       "       "   tenía     una     cab
    0.10    0.16    0.01    0.13    0.07    0.27    0.08    0.03    0.05    0.17    0.08    0.14    0.26    0.39    0.18    0.10    0.06    0.12    0.10    0.19    0.05    0.09    0.09
       #       #    from       (    Jane    tres     uch      ch     "\n      to  French     .\n   María      tu       a       o       c      \n    Mary    Mary    Mary       c   peq

  0%|          | 0/10 [00:00<?, ?it/s]

----------------------------------------
Merge at layer 4
top 5 predictions:
Question     def       :    land       "      un   order    lamb       "    into English      \n       "     had      un       c   order       o     .\nTranslate    Mary     had       a
    0.31    0.29    0.09    0.24    0.06    0.55    0.69    0.54    0.32    0.20    0.20    0.28    0.34    0.63    0.54    0.75    1.00    0.80    0.45    0.16    0.29    0.79    0.92
     defQuestion       "       P      un     una      eb     lam      ."      to Spanish     .\n    Mary   tenía     una       o   order     ito       .       "       "     Had       A
    0.10    0.16    0.02    0.05    0.04    0.25    0.04    0.02    0.07    0.18    0.12    0.15    0.29    0.07    0.41    0.05    0.00    0.09    0.19    0.11    0.15    0.02    0.05
       #       #      to      am       ,     her    anto   llama     "\n      in  French       .   María      tu       a   peque   encer       a      \n    Mary    Mary    Mary  Litt

  0%|          | 0/10 [00:00<?, ?it/s]

----------------------------------------
Merge at layer 5
top 5 predictions:
Question     def       "    land       "     her   order    lamb       "    into English      \n       "   tenía      un       c   order       o     .\n    Mary     had     had       a
    0.31    0.29    0.02    0.33    0.11    0.18    0.21    0.37    0.26    0.23    0.26    0.38    0.25    0.51    0.54    0.85    0.99    0.56    0.45    0.21    0.66    0.97    0.85
     defQuestion       :       "     "\n     per      ip       l     "\n      to Spanish     .\n    Mary     had     una     car     ach    illo      \n       "    Mary       a  little
    0.10    0.16    0.01    0.05    0.07    0.10    0.13    0.03    0.09    0.15    0.17    0.14    0.20    0.26    0.44    0.05    0.00    0.27    0.20    0.08    0.06    0.01    0.07
       #       #      to     "\n     una      un     erd      ch      ."      in  French       .   María      tu       a   peque     uch     ito       . English   María     Had    lam

  0%|          | 0/10 [00:00<?, ?it/s]

----------------------------------------
Merge at layer 6
top 5 predictions:
Question     def       :    land       "      un   order       o       "    into English      \n    Mary   tenía      un       c   order     uit     .\n    Mary       :    Mary     had
    0.31    0.29    0.02    0.42    0.15    0.13    0.26    0.35    0.34    0.25    0.38    0.51    0.26    0.78    0.61    0.73    0.77    0.30    0.40    0.19    0.29    0.70    0.57
     defQuestion       "       "     "\n     her    anto    lamb      ."      to Spanish     :\n   María     had     una   peque      ag     ito      \n English Translation English       :
    0.10    0.16    0.01    0.02    0.04    0.08    0.09    0.18    0.07    0.18    0.18    0.10    0.15    0.08    0.37    0.06    0.02    0.27    0.23    0.14    0.08    0.05    0.11
       #       #     the      am     una     una      ip       o       ,      in  French     .\n       "      tu       a     cab     ano    illo       .Translate translation   Mar

  0%|          | 0/10 [00:00<?, ?it/s]

----------------------------------------
Merge at layer 7
top 5 predictions:
Question     def     the    land     "\n      un    anto    lamb       "      in Spanish      \n    Mary   tenía      un       c      ri      at     ito     .\n    Mary     had       a
    0.31    0.29    0.01    0.30    0.20    0.15    0.08    0.78    0.36    0.45    0.28    0.77    0.19    0.86    0.82    0.68    0.33    0.31    0.39    0.39    0.11    0.68    0.85
     defQuestion     and      am       "     una   uchar     lam     "\n    into English       :   María      tu     una     cab     rit      ad     ita      \n     How     did     the
    0.10    0.16    0.01    0.02    0.12    0.09    0.04    0.02    0.15    0.16    0.21    0.05    0.17    0.05    0.16    0.05    0.15    0.08    0.21    0.26    0.09    0.02    0.01
       #       #      to      's      es     her     azo   llama       ,      to  French     .\n       "     had       a   peque    rito     ito     ina       . English    Mary      a

  0%|          | 0/10 [00:00<?, ?it/s]

----------------------------------------
Merge at layer 8
top 5 predictions:
Question     def      in    land       "      un   order    lamb       "      in Spanish      \n   María   tenía      un       c      ri      at     ito     .\nTranslate    Mary     had
    0.31    0.29    0.01    0.43    0.13    0.17    0.23    0.52    0.55    0.52    0.23    0.81    0.30    0.92    0.86    0.72    0.39    0.47    0.25    0.56    0.17    0.56    0.54
     defQuestion       a      am     "\n     una    anto   llama       ,      to English       :    Mary      tu     una     cab     rit      ad     ina       .    Mary       "     had
    0.10    0.16    0.01    0.02    0.05    0.09    0.08    0.03    0.09    0.21    0.14    0.06    0.15    0.02    0.13    0.05    0.11    0.05    0.17    0.12    0.11    0.10    0.07
       #       #     the      's       ,     her      eb       l      ."    into  French     .\n       "     ten       a   peque    init     ito     ita      \n       "    into    Ma

  0%|          | 0/10 [00:00<?, ?it/s]

----------------------------------------
Merge at layer 9
top 5 predictions:
Question     def       -    land       "      un  little    lamb       "      in Spanish      \n    Mary   tenía      un       c   order     ito     .\n      Do      to     the    that
    0.31    0.29    0.01    0.35    0.13    0.17    0.07    0.86    0.52    0.31    0.23    0.74    0.37    0.92    0.79    0.69    1.00    0.68    0.58    0.17    0.75    0.19    0.26
     defQuestion     the      's       ,     una    anto     lam       ,      to English       :   María     ten     una       o   encer       o      \n     How      is      it possible
    0.10    0.16    0.00    0.02    0.06    0.09    0.05    0.01    0.10    0.25    0.10    0.06    0.23    0.02    0.18    0.06    0.00    0.17    0.12    0.15    0.04    0.19    0.08
       #       #       a       ,     Mag     her      uy   llama      ."    into  French     .\n       "      tu       a     cab   order    illo       . English      do       "    do

  0%|          | 0/10 [00:00<?, ?it/s]

----------------------------------------
Merge at layer 10
top 5 predictions:
Question     def      in    land       "      un    anto    lamb       "      in Spanish      \n    Mary   tenía      un       c   order       j     ito     .\n    Mary     had     had
    0.31    0.29    0.01    0.40    0.08    0.17    0.06    0.88    0.57    0.29    0.20    0.34    0.35    0.93    0.82    0.66    0.95    0.79    0.37    0.55    0.09    0.63    0.86
     defQuestion       a       ,       ,     una  little     boy       ,    into English     :\n   María     ten     una       o      ve      ja     ita       .       "    Mary       a
    0.10    0.16    0.00    0.02    0.07    0.07    0.06    0.01    0.07    0.21    0.15    0.14    0.20    0.02    0.15    0.08    0.02    0.16    0.27    0.15    0.08    0.06    0.07
       #       #     the      's     Mag     her     ero     lam      ."      to  French       :       "     had       a     cab    read       c    illo      \nTranslate   tenía     

  0%|          | 0/10 [00:00<?, ?it/s]

----------------------------------------
Merge at layer 11
top 5 predictions:
Question     def     the    land       "      un   order    lamb       "      to Spanish      \n    Mary   tenía      un  little    lamb     .\n     How Translation Spanish Dictionary       (
    0.31    0.29    0.00    0.38    0.23    0.15    0.10    0.59    0.59    0.24    0.21    0.45    0.49    0.86    0.52    0.81    0.68    0.46    0.13    0.35    0.77    0.54    0.45
     defQuestion       a borough       ,     una      eb       o       ,      in English       :       "     had       a       c   sheep      \n English      to English       . Meaning
    0.10    0.16    0.00    0.02    0.05    0.07    0.07    0.04    0.06    0.22    0.13    0.18    0.14    0.06    0.34    0.04    0.04    0.15    0.10    0.08    0.10    0.13    0.12
       #       #     the      's     Mag     her     ero     lam     que    into  French     :\n   María      tu     una   small    baby       .    Mary   Title   Greek Meanin

  0%|          | 0/10 [00:00<?, ?it/s]

----------------------------------------
Merge at layer 12
top 5 predictions:
Question     def     the    land       "       a  little    lamb       "    into Spanish      \n    Mary   tenía       a       c   order     .\n     How Translation      of     had       a
    0.31    0.29    0.01    0.33    0.10    0.13    0.27    0.87    0.45    0.32    0.19    0.35    0.47    0.92    0.54    0.41    0.93    0.46    0.13    0.70    0.28    0.88    0.78
     defQuestion       a      's       ,      un    baby     boy       ,      to English       :   María     had      un  little    anto      \n English       :    Mary     Had       A
    0.10    0.16    0.01    0.02    0.06    0.09    0.10    0.01    0.08    0.28    0.12    0.31    0.17    0.02    0.37    0.14    0.01    0.17    0.12    0.08    0.21    0.02    0.18
       #       #    from borough     "\n     una       d     lam      ."      in  French     :\n       "     ten     una   peque      uy      de    Mary  Target       :    Mary  

  0%|          | 0/10 [00:00<?, ?it/s]

----------------------------------------
Merge at layer 13
top 5 predictions:
Question     def      in    land       "      un      eb    lamb       "    into Spanish      \n    Mary   tenía      un       c   order     ito     .\n    Mary     had    Mary    Mary
    0.31    0.29    0.01    0.30    0.17    0.11    0.12    0.51    0.46    0.34    0.21    0.34    0.52    0.95    0.87    0.74    0.79    0.58    0.52    0.15    0.34    0.30    0.26
     defQuestion      to      am     "\n     her   order       l       ,      in English       :       "     ten     una   peque     ord       o       .Translate       :       "     had
    0.10    0.16    0.01    0.03    0.08    0.11    0.08    0.03    0.06    0.19    0.13    0.24    0.13    0.02    0.12    0.05    0.01    0.13    0.16    0.13    0.13    0.20    0.12
       #       #      to       "       ,    hijo      uy   llama      ."      to  French     :\n   María      tu      ía     cab      ag      el      \n       "    Mary       I     

  0%|          | 0/10 [00:00<?, ?it/s]

----------------------------------------
Merge at layer 14
top 5 predictions:
Question     def     the    land       "       a  little    lamb       "    into English      \n    Mary   tenía       a       c   order     .\n    Mary     had     had       a  little
    0.30    0.29    0.01    0.28    0.14    0.13    0.36    0.92    0.50    0.35    0.15    0.32    0.56    0.89    0.88    0.45    0.41    0.52    0.20    0.53    0.80    0.60    0.77
     defQuestion      in       "     "\n      un    baby     boy      ."      in Spanish       :       "     had      un  little    lamb      \n       "    Mary       a      un    lamb
    0.11    0.16    0.01    0.04    0.04    0.07    0.10    0.01    0.08    0.20    0.15    0.25    0.16    0.06    0.07    0.16    0.13    0.17    0.13    0.09    0.13    0.32    0.04
       #       #      to      am       ,     una   dream     lam       ,      to  French     :\n   María     ten     una   peque      uy       . English   María      un     una      

  0%|          | 0/10 [00:00<?, ?it/s]

----------------------------------------
Merge at layer 15
top 5 predictions:
Question     def     the    land       "      un      eb    lamb       "    into English      \n    Mary     had      un       c       c     ino       c       o   order       o    Mary
    0.31    0.29    0.01    0.11    0.15    0.10    0.04    0.81    0.50    0.28    0.24    0.35    0.75    0.92    0.63    0.49    0.70    0.21    0.54    0.21    1.00    0.70    0.23
     defQuestion       f       "     una     una     ito     boy       ,      in Spanish       :       "   tenía       a      un   peque     ito      de       c      uy     .\n       "
    0.10    0.16    0.01    0.05    0.03    0.08    0.03    0.02    0.07    0.21    0.12    0.21    0.15    0.02    0.25    0.25    0.06    0.14    0.16    0.14    0.00    0.07    0.16
       #       #       c      am      en       a    baby     ram      ."      to  French     :\n       (      's     una   peque     car      ño     .\n      la   order       . Engli

  0%|          | 0/10 [00:00<?, ?it/s]

----------------------------------------
Merge at layer 16
top 5 predictions:
Question     def       f       "       "       a    baby    lamb       "    into English       :    Mary     had      un      un   order    lamb     .\n       "     had     had    lamb
    0.31    0.29    0.01    0.09    0.14    0.17    0.12    0.83    0.54    0.49    0.26    0.30    0.64    0.78    0.75    0.37    1.00    0.15    0.58    0.18    0.51    0.21    0.25
     defQuestion       d    land       a      un  little     boy       ,      in Spanish     :\n       "   tenía       a       c   order       o       .    Mary    Mary       a     had
    0.10    0.16    0.01    0.04    0.04    0.03    0.04    0.03    0.07    0.13    0.13    0.26    0.21    0.16    0.12    0.35    0.00    0.10    0.16    0.12    0.06    0.21    0.13
       #       #       m       a       ,     una     ito     one      ."      to  French      \n   María   había     una   peque      uy  little      \n<|end_of_text|>   tenía    Mar

  0%|          | 0/10 [00:00<?, ?it/s]

----------------------------------------
Merge at layer 17
top 5 predictions:
Question     def      in       "       "       a    baby    lamb       "    into English      \n    Mary     had      un       c   order     ito     .\n    Mary    Mary     had       a
    0.31    0.29    0.01    0.10    0.18    0.13    0.06    0.63    0.54    0.50    0.25    0.38    0.62    0.60    0.79    0.75    0.88    0.51    0.51    0.20    0.54    0.17    0.36
     defQuestion       f    land     una      un      eb     boy       ,      in Spanish       :       "   tenía     una   peque     inc       o       .       "     had       "     had
    0.10    0.16    0.01    0.08    0.04    0.08    0.04    0.03    0.08    0.15    0.16    0.23    0.22    0.32    0.11    0.05    0.01    0.30    0.14    0.14    0.14    0.16    0.19
       #       #       d      am       a     una     ero     one      ."      to  French     :\n   María   había       a       o     azo     ita      \n<|end_of_text|>     had       

  0%|          | 0/10 [00:00<?, ?it/s]

----------------------------------------
Merge at layer 18
top 5 predictions:
Question     def       d       "       "       a    baby    lamb       "    into English      \n    Mary     had      un       c   order       j    illo     .\n       "     had      un
    0.31    0.30    0.01    0.16    0.18    0.11    0.08    0.69    0.58    0.51    0.27    0.46    0.61    0.64    0.77    0.67    0.99    0.63    0.42    0.66    0.18    0.37    0.44
     defQuestion      in    land       ,      un     ero     one       ,      to Spanish       :       "   tenía     una       o      ve      ja     ito       .    Mary   tenía       a
    0.10    0.15    0.01    0.07    0.04    0.07    0.03    0.03    0.07    0.10    0.14    0.18    0.23    0.29    0.11    0.09    0.00    0.32    0.25    0.14    0.11    0.15    0.26
       #       #       m      am     una     una   order     boy      ."      in  French       .   María   había       a   peque   order       c     ita      \n<|end_of_text|>   habí

  0%|          | 0/10 [00:00<?, ?it/s]

----------------------------------------
Merge at layer 19
top 5 predictions:
Question     def       c       "       "      un      un    lamb       "    into English      \n    Mary     had      un       c      ve     ito     .\n<|end_of_text|>     had      un       c
    0.31    0.30    0.01    0.17    0.18    0.11    0.09    0.71    0.59    0.48    0.30    0.42    0.64    0.77    0.79    0.68    0.59    0.28    0.64    0.12    0.44    0.41    0.33
     defQuestion       d    land       a       a    baby     one       ,      to Spanish       :       "   tenía     una       o   order     ita       .    Mary   tiene       a       o
    0.10    0.15    0.01    0.06    0.04    0.10    0.05    0.02    0.06    0.11    0.12    0.23    0.19    0.17    0.11    0.08    0.39    0.24    0.13    0.12    0.23    0.21    0.28
       #       #       B      am     una     una      uy       l      ."    from  French       .   María   había       a   peque     ove       o      \n       "   tenía     un

  0%|          | 0/10 [00:00<?, ?it/s]

----------------------------------------
Merge at layer 20
top 5 predictions:
Question     def       c       "       "      un      un    lamb       "    into English      \n    Mary     had      un       c   order       j    illo     .\n<|end_of_text|>    Mary     had
    0.31    0.29    0.01    0.16    0.19    0.12    0.08    0.76    0.63    0.45    0.34    0.44    0.67    0.79    0.79    0.70    0.57    0.78    0.38    0.65    0.15    0.67    0.76
     defQuestion       d    land       a     una   order       o       ,      to Spanish       :       "   tenía     una       o      ve      ja     ito       .       "    Mary   tiene
    0.10    0.15    0.01    0.03    0.04    0.08    0.04    0.02    0.06    0.13    0.13    0.28    0.18    0.15    0.10    0.06    0.41    0.18    0.34    0.15    0.13    0.02    0.03
       #       #       m      am     una       a      uy       "      ."    from  French       .   María   había       a   peque     ove      jo     ita      \n    Mary   Mari

  0%|          | 0/10 [00:00<?, ?it/s]

----------------------------------------
Merge at layer 21
top 5 predictions:
Question     def       c       "       "      un     uch    lamb       "    into English      \n    Mary     had      un       c   order      ja     ito     .\n<|end_of_text|>    Mary     had
    0.31    0.28    0.01    0.15    0.15    0.16    0.06    0.78    0.62    0.49    0.30    0.52    0.64    0.72    0.82    0.71    0.50    0.47    0.47    0.65    0.16    0.58    0.69
     defQuestion       m      am     una     una   uchar     boy       ,      to Spanish       :       "   tenía     una       o      ve       j     ita       .       "   Maria   tenía
    0.10    0.16    0.01    0.03    0.06    0.07    0.06    0.01    0.06    0.14    0.14    0.27    0.22    0.22    0.10    0.06    0.46    0.46    0.38    0.13    0.15    0.02    0.03
       #       #       f    land      un    hijo   order       "      ."    from       "       .   María   había       a   peque     ove       c    illo      \n    Mary     Ma

  0%|          | 0/10 [00:00<?, ?it/s]

----------------------------------------
Merge at layer 22
top 5 predictions:
Question     def       m       "       "      un     uch    lamb       "    into English      \n    Mary     had      un       c   order      ja     .\n       "    Mary     had       a
    0.31    0.26    0.01    0.10    0.11    0.16    0.06    0.79    0.60    0.46    0.35    0.54    0.61    0.88    0.80    0.68    0.53    0.34    0.60    0.18    0.30    0.69    0.62
     defQuestion       c    land     una     una      uy       o       ,      to Spanish       :       "   tenía     una       o      ve    illo       .<|end_of_text|>   Maria       "     had
    0.10    0.16    0.01    0.02    0.08    0.08    0.05    0.02    0.06    0.15    0.12    0.22    0.24    0.07    0.12    0.09    0.41    0.16    0.16    0.16    0.02    0.06    0.14
       #       #       f      am      un    hijo      un     lam      ."    from  French       .   María     Had       a     cab     ove       j   peque    Mary     Mar   tien

  0%|          | 0/10 [00:00<?, ?it/s]

----------------------------------------
Merge at layer 23
top 5 predictions:
Question     def       c       "       "      un     uch    lamb       "    into English      \n    Mary     had      un       c   order     ito     .\n<|end_of_text|>    Mary     had       a
    0.31    0.24    0.01    0.09    0.08    0.08    0.08    0.86    0.64    0.49    0.36    0.56    0.63    0.92    0.85    0.61    0.83    0.34    0.61    0.15    0.47    0.63    0.56
     defQuestion       m    land     una     una   actus     lam       ,      to Spanish       :       "   tenía     una       o      ve       o       .       "   Maria   tenía      un
    0.10    0.16    0.01    0.01    0.07    0.05    0.04    0.01    0.06    0.15    0.12    0.20    0.23    0.03    0.08    0.12    0.16    0.16    0.14    0.13    0.03    0.03    0.22
       #       #       f       s      un    hijo    anto     lap      ."    from       "       .   María     Had       a   peque   order    illo      \n    Mary     Mar       

  0%|          | 0/10 [00:00<?, ?it/s]

----------------------------------------
Merge at layer 24
top 5 predictions:
Question     def       f       "     una    hijo     uch    lamb       "    into English      \n    Mary     had      un       c      ve     ita     .\n<|end_of_text|>    Mary     had       a
    0.31    0.23    0.01    0.04    0.07    0.05    0.08    0.71    0.63    0.52    0.36    0.54    0.60    0.88    0.84    0.52    0.69    0.42    0.62    0.18    0.61    0.71    0.44
     defQuestion       c       s      un      un     ero       o       ,      to Spanish       :       "   tenía     una       o   order     ito       .       "    Mary   tenía      un
    0.10    0.16    0.01    0.01    0.07    0.04    0.06    0.10    0.06    0.17    0.11    0.19    0.24    0.07    0.09    0.15    0.30    0.23    0.16    0.10    0.03    0.03    0.44
       #       #       m       k       "     her   actus       "      ."    from       "       .   María   había       a   peque   order       o      \n    Mary   Maria   tien

  0%|          | 0/10 [00:00<?, ?it/s]

----------------------------------------
Merge at layer 25
top 5 predictions:
Question     def       f       "      un    hijo     uch    lamb       "    into English      \n    Mary     had      un       c      ve     ita     .\n<|end_of_text|>    Mary     had      un
    0.31    0.21    0.01    0.05    0.09    0.06    0.10    0.75    0.61    0.56    0.36    0.50    0.56    0.87    0.84    0.54    0.68    0.43    0.65    0.17    0.59    0.67    0.44
     defQuestion       c       s       "     her     ero       o       ,      to Spanish       :       "   tenía     una       o   order     ito       .       "    Mary   tenía       a
    0.10    0.16    0.01    0.01    0.07    0.05    0.05    0.10    0.07    0.14    0.11    0.24    0.27    0.07    0.10    0.16    0.31    0.18    0.14    0.11    0.05    0.05    0.43
       #       #       m       k     una      un     uel   llama     que    from       "     :\n   María   había       a   peque     ove       o      \n    Mary   Maria   tien

  0%|          | 0/10 [00:00<?, ?it/s]

----------------------------------------
Merge at layer 26
top 5 predictions:
Question     def       f       "       "    hijo     uch       o       "    into English      \n    Mary     had      un       c      ve     ita     .\n<|end_of_text|>    Mary     had       a
    0.31    0.19    0.01    0.06    0.08    0.05    0.16    0.60    0.61    0.54    0.36    0.54    0.58    0.88    0.81    0.55    0.76    0.39    0.64    0.22    0.59    0.65    0.50
     defQuestion       m       s      un     her     uel    lamb       ,      to Spanish       :       "   tenía     una       o   order     ito       .       "    Mary   tenía      un
    0.10    0.15    0.01    0.02    0.08    0.04    0.06    0.31    0.06    0.18    0.11    0.25    0.26    0.06    0.11    0.16    0.23    0.19    0.13    0.09    0.05    0.06    0.38
       #       #       c       k     una     per      uy   llama     que    from       "     :\n   María   había       a   peque     ove       o      \n English<|end_of_text|>

  0%|          | 0/10 [00:00<?, ?it/s]

----------------------------------------
Merge at layer 27
top 5 predictions:
Question     def       f       "      un     her     uch       o       "    into English      \n    Mary     had      un       c      ve     ita     .\n<|end_of_text|>    Mary     had      un
    0.31    0.17    0.01    0.05    0.08    0.05    0.12    0.92    0.62    0.50    0.40    0.58    0.53    0.64    0.83    0.53    0.84    0.42    0.64    0.28    0.59    0.61    0.57
     defQuestion       c       k       "     per     uel    lamb       ,      to Spanish       :       "   tenía     una       o   order     ito       .       "    Mary   tenía       a
    0.10    0.14    0.01    0.01    0.08    0.05    0.07    0.04    0.07    0.19    0.10    0.17    0.31    0.28    0.11    0.12    0.16    0.12    0.13    0.06    0.04    0.08    0.32
       #       #       m       s            hijo      uy     ito     que    from       "       (   María   tiene       a   peque     vey       o      de English<|end_of_text|>

  0%|          | 0/10 [00:00<?, ?it/s]

----------------------------------------
Merge at layer 28
top 5 predictions:
QuestionQuestion       f       "       "     her     uch       o       "    into English      \n    Mary     had      un       c      ve     ita     .\n<|end_of_text|><|begin_of_text|>     ://      \n
    0.31    0.12    0.01    0.02    0.08    0.06    0.12    0.94    0.65    0.39    0.41    0.63    0.56    0.78    0.84    0.53    0.94    0.49    0.62    0.23    0.24    0.49    0.03
     def     def       m       k      un     per     uel    lamb       ,      to Spanish       :       "   tenía     una       o   order     ito       . English       :  .swing       .
    0.10    0.11    0.01    0.02    0.07    0.05    0.06    0.03    0.06    0.26    0.10    0.13    0.30    0.15    0.08    0.15    0.05    0.16    0.15    0.07    0.17    0.04    0.03
       #       #       b       s            hijo  little     ito     que    from  French       .   María     Had       a   peque     vin       o      \n       " trans

  0%|          | 0/10 [00:00<?, ?it/s]

----------------------------------------
Merge at layer 29
top 5 predictions:
QuestionQuestion       m       "               a     uch       o       "    into English      \n    Mary     had      un       c      ve     ita     .\n<|end_of_text|>    Mary     had      un
    0.31    0.07    0.01    0.02    0.08    0.07    0.07    0.76    0.67    0.41    0.36    0.59    0.54    0.82    0.82    0.46    0.92    0.52    0.62    0.11    0.41    0.65    0.51
     def     def       c       s      un     her  little    lamb       ,      to Spanish       :       "   tenía     una       o   order     ito       .       "    Mary   tenía       a
    0.10    0.05    0.01    0.01    0.08    0.07    0.06    0.17    0.06    0.25    0.11    0.11    0.32    0.11    0.08    0.17    0.08    0.10    0.14    0.07    0.06    0.07    0.39
       #       #       b       k       "     per     uel     ito     que    from       "       .   Maria     Had       a   peque     aca       o      \n    Mary<|end_of_text|>

  0%|          | 0/10 [00:00<?, ?it/s]

----------------------------------------
Merge at layer 30
top 5 predictions:
Question     ://       c       s       "       a  little       o       "    into English      \n    Mary     had      un       c      ña     ita       "   tenía      un       c      ña
    0.31    0.12    0.01    0.02    0.08    0.19    0.25    0.75    0.64    0.43    0.38    0.58    0.62    0.77    0.85    0.41    0.85    0.38    0.09    0.63    0.62    0.25    0.76
     def     php       B       k             her    orte    lamb       ,      to Spanish       :       "   tenía     una   peque       ñ     .\n    Mary   tiene     una   peque       ñ
    0.10    0.07    0.01    0.02    0.08    0.05    0.03    0.14    0.10    0.23    0.11    0.13    0.25    0.15    0.08    0.16    0.09    0.03    0.07    0.09    0.21    0.22    0.22
       #Question       m    land      un     per  Little     ito     que    from  French       (   Maria   tiene       a       o      ño      ín      de     had       a       o      

  0%|          | 0/10 [00:00<?, ?it/s]

----------------------------------------
Merge at layer 31
top 5 predictions:
Question     php       S       s               a  little       o       "    into English      \n    Mary     had      un       c      ña     ita       "    into English     had       a
    0.31    0.02    0.01    0.02    0.13    0.35    0.23    0.56    0.62    0.40    0.38    0.60    0.58    0.84    0.77    0.37    0.87    0.10    0.11    0.12    0.08    0.17    0.12
     def     The       B       k       "     her    baby    lamb       ,      to Spanish       :       "   tenía     una   peque       ñ     .\n       .       "    Mary       :      un
    0.10    0.01    0.01    0.01    0.09    0.07    0.11    0.14    0.10    0.23    0.11    0.10    0.28    0.08    0.10    0.20    0.05    0.05    0.03    0.06    0.05    0.13    0.05
       #Question       M       a      un     per     uch       "       .    from  French       (   Maria     Had       a     cab      ño      de    Mary Translate       "   tenía    