In [None]:
from transformers import AutoTokenizer
from transformers import AutoModelForCausalLM
import torch
from transformers import set_seed
import warnings
warnings.filterwarnings("ignore")

In [None]:
prompt = "It was a dark and stormy"
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B")
input_ids = tokenizer(prompt).input_ids
input_ids

[2132, 572, 264, 6319, 323, 13458, 88]

In [None]:
for t in input_ids:
    print(t, "\t:", tokenizer.decode(t))

2132 	: It
572 	:  was
264 	:  a
6319 	:  dark
323 	:  and
13458 	:  storm
88 	: y


# Notes:


*  The SmolLM tokenizer does not split any of the words in this particular sentence.
*   Each model is usually paired with its own tokenizer, so always use the proper tokenizer when using a model.




In [None]:
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2-0.5B")

In [None]:
prompt

'It was a dark and stormy'

In [None]:
input_ids = tokenizer(prompt, return_tensors="pt").input_ids

outputs = model(input_ids)
outputs.logits.shape  # An output for each input token

torch.Size([1, 7, 151936])

In [None]:
input_ids

tensor([[ 2132,   572,   264,  6319,   323, 13458,    88]])

In [None]:
outputs

CausalLMOutputWithPast(loss=None, logits=tensor([[[ 9.0283,  6.4773,  3.8585,  ..., -4.3873, -4.3869, -4.3874],
         [ 5.0396,  5.2072,  1.8779,  ..., -5.4419, -5.4425, -5.4426],
         [ 2.7506,  4.8974, -0.5766,  ..., -5.5188, -5.5188, -5.5189],
         ...,
         [ 4.2731,  5.7324, -0.9108,  ..., -5.5750, -5.5747, -5.5756],
         [ 4.7102,  3.7237, -0.0393,  ..., -5.8334, -5.8342, -5.8342],
         [ 3.5512,  4.9842, -1.4630,  ..., -7.0129, -7.0131, -7.0135]]],
       grad_fn=<UnsafeViewBackward0>), past_key_values=DynamicCache(), hidden_states=None, attentions=None)

### Test codes

In [None]:
outputs[0][0][0].tolist()

[9.028345108032227,
 6.477273941040039,
 3.858492136001587,
 2.2972984313964844,
 3.940443754196167,
 5.550770282745361,
 10.488805770874023,
 6.156424522399902,
 4.274324893951416,
 5.192872524261475,
 3.478309154510498,
 12.458261489868164,
 6.421591758728027,
 10.958616256713867,
 7.097062110900879,
 2.565098762512207,
 2.992243528366089,
 3.786592960357666,
 3.780489683151245,
 3.5182485580444336,
 2.771289825439453,
 2.238226890563965,
 3.1506526470184326,
 2.589693546295166,
 2.0048229694366455,
 10.963839530944824,
 5.233855247497559,
 3.200895309448242,
 5.203847885131836,
 1.3059693574905396,
 7.53809118270874,
 3.2441041469573975,
 3.265144109725952,
 3.3818717002868652,
 3.724749803543091,
 2.4965572357177734,
 2.7180697917938232,
 4.411056041717529,
 4.604232311248779,
 4.566775321960449,
 2.7237627506256104,
 4.985958099365234,
 3.8903589248657227,
 2.610323667526245,
 3.685128688812256,
 3.4073660373687744,
 1.1064457893371582,
 4.70724630355835,
 2.8509202003479004,
 4.9

In [None]:
outputs[0][0][6]

tensor([ 3.5512,  4.9842, -1.4630,  ..., -7.0129, -7.0131, -7.0135],
       grad_fn=<SelectBackward0>)

# Scores

In [None]:
final_logits = model(input_ids).logits[0, -1]  # The last set of logits
final_logits.argmax()  # The position of the maximum

tensor(3729)

In [None]:
tokenizer.decode(final_logits.argmax())

' night'

In [None]:
top10_logits = torch.topk(final_logits, 10)
for index in top10_logits.indices:
    print(tokenizer.decode(index))

 night
 evening
 day
 morning
 winter
 afternoon
 Saturday
 Sunday
 Friday
 October


In [None]:
top10 = torch.topk(final_logits.softmax(dim=0), 10)
for value, index in zip(top10.values, top10.indices):
    print(f"{tokenizer.decode(index):<10} {value.item():.2%}")

 night     88.71%
 evening   4.30%
 day       2.19%
 morning   0.49%
 winter    0.45%
 afternoon 0.27%
 Saturday  0.25%
 Sunday    0.19%
 Friday    0.17%
 October   0.16%


In [None]:
output_ids = model.generate(input_ids, max_new_tokens=20)
decoded_text = tokenizer.decode(output_ids[0])

print("Input IDs", input_ids[0])
print("Output IDs", output_ids)
print(f"Generated text: {decoded_text}")

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


Input IDs tensor([ 2132,   572,   264,  6319,   323, 13458,    88])
Output IDs tensor([[ 2132,   572,   264,  6319,   323, 13458,    88,  3729,    13,   576,
         12884,   572,  6319,   323,   279,  9956,   572,  1246,  2718,    13,
           576, 11174,   572, 50413,  1495,   323,   279]])
Generated text: It was a dark and stormy night. The sky was dark and the wind was howling. The rain was pouring down and the


In [None]:
beam_output = model.generate(
    input_ids,
    num_beams=5,
    max_new_tokens=30,
)

print(tokenizer.decode(beam_output[0]))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


It was a dark and stormy night. The wind was howling, and the rain was pouring down. The sky was dark and gloomy, and the air was filled with the


## repetition_penalty

*   How much to penalize already generated tokens, avoiding repetition.
*   A good default value is 1.2.

## bad_words_ids

*  A list of tokens that should not be generated (e.g., to avoid generating offensive words).




In [None]:
beam_output = model.generate(
    input_ids,
    num_beams=5,
    repetition_penalty=2.0,
    max_new_tokens=38,
)

print(tokenizer.decode(beam_output[0]))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


It was a dark and stormy night. The sky was filled with thunder and lightning, and the wind howled in the distance. It was raining cats and dogs, and the streets were covered in puddles of water.


#Sampling

In [None]:
# Setting the seed ensures we get the same results every time we run this code
set_seed(70)

sampling_output = model.generate(
    input_ids,
    do_sample=True,
    max_new_tokens=34,
    top_k=0,  # We'll come back to this parameter
)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


In [None]:
tokenizer.decode(sampling_output[0])

'It was a dark and stormy night. Kevin said he was going to stay up all night, staring at the cloudless stars, wondering, what if I lost my dream.He’d been teasing her about'

In [None]:
sampling_output = model.generate(
    input_ids,
    do_sample=True,
    temperature=0.4,
    max_new_tokens=40,
    top_k=0,
)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


In [None]:
tokenizer.decode(sampling_output[0])

'It was a dark and stormy night. The sky was dark and the wind howled. The rain wasURNING and the thunder was thundering. The thunder was so loud that the entire town could hear\\xbseveryone was'

In [None]:
sampling_output = model.generate(
    input_ids,
    do_sample=True,
    temperature=0.001,
    max_new_tokens=40,
    top_k=0,
)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


In [None]:
tokenizer.decode(sampling_output[0])

'It was a dark and stormy night. The sky was dark and the wind was howling. The rain was pouring down and the lightning was flashing. The sky was dark and the wind was howling. The rain was pouring down'

In [None]:
sampling_output = model.generate(
    input_ids,
    do_sample=True,
    temperature=3.0,
    max_new_tokens=40,
    top_k=0,
)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


In [None]:
tokenizer.decode(sampling_output[0])

'It was a dark and stormy firearms window stat attack‡ ment…..maker muy Trades大户们都whereDEVICE suprem knobs Cove kinase menu Breitbart Estadopr�자."),InputStream\\Validator纼\\ValidatorAlert📞大赛/dockerLLLL Suk referring_tracks LOGIN knowtypenamehowever'

In [None]:
sampling_output = model.generate(
    input_ids,
    do_sample=True,
    max_new_tokens=40,
    top_k=5,
)


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


In [None]:
tokenizer.decode(sampling_output[0])

'It was a dark and stormy night. The rain was pouring down and the lightning flashed across the sky, creating a dark, ominous atmosphere. It was the night of Halloween, and the streets were filled with children dressed in costumes,'

In [None]:
sampling_output = model.generate(
    input_ids,
    do_sample=True,
    max_new_tokens=40,
    top_p=0.94,
    top_k=0,
)

print(tokenizer.decode(sampling_output[0]))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


It was a dark and stormy night and on top of all the turbulence the storm had caused, the weathercock had struck the flyspeck, but as the storm subsided and the aircraft stabilized, the airplane is once again in


#Zero Shot

In [None]:
tokenizer.encode(" positive"), tokenizer.encode(" negative")

([6785], [8225])

In [None]:
def score(review):
    """Predict whether it is positive or negative

    This function predicts whether a review is positive or negative
    using a bit of clever prompting. It looks at the logits for the
    tokens ' positive' and ' negative', and returns the label
    with the highest score.
    """
    prompt = f"""Question: Is the following review positive or negative about the movie?
    Review: {review} Answer:"""

    input_ids = tokenizer(prompt, return_tensors="pt").input_ids
    final_logits = model(input_ids).logits[0, -1]
    if final_logits[6785] > final_logits[8225]:
        print("Positive")
    else:
        print("Negative")

In [None]:
score("This movie was terrible!")

Negative


In [70]:
score("That movie was great!")

Negative


In [None]:
score("A complex yet wonderful film about the depravity of man")  # A mistake

Negative


In [None]:
prompt = """\
Translate English to Spanish:

English: I do not speak Spanish.
Spanish: No hablo español.

English: See you later!
Spanish: ¡Hasta luego!

English: Where is a good restaurant?
Spanish: ¿Dónde hay un buen restaurante?

English: What rooms do you have available?
Spanish: ¿Qué habitaciones tiene disponibles?

English: I like soccer
Spanish:"""
inputs = tokenizer(prompt, return_tensors="pt").input_ids
output = model.generate(
    inputs,
    max_new_tokens=10,
)

print(tokenizer.decode(output[0]))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


Translate English to Spanish:

English: I do not speak Spanish.
Spanish: No hablo español.

English: See you later!
Spanish: ¡Hasta luego!

English: Where is a good restaurant?
Spanish: ¿Dónde hay un buen restaurante?

English: What rooms do you have available?
Spanish: ¿Qué habitaciones tiene disponibles?

English: I like soccer
Spanish: Me gusta el fútbol

English:
