In [1]:
from dotenv import load_dotenv
import os

In [2]:
load_dotenv()

True

In [3]:
# Accessing variables
hf_token = os.getenv("HUGGINGFACE_TOKEN")

In [4]:
from transformers import AutoTokenizer, AutoModelForCausalLM

In [5]:
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b", token=hf_token)

In [6]:
tokenizer 

GemmaTokenizerFast(name_or_path='google/gemma-2b', vocab_size=256000, model_max_length=1000000000000000019884624838656, is_fast=True, padding_side='left', truncation_side='right', special_tokens={'bos_token': '<bos>', 'eos_token': '<eos>', 'unk_token': '<unk>', 'pad_token': '<pad>'}, clean_up_tokenization_spaces=False),  added_tokens_decoder={
	0: AddedToken("<pad>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	1: AddedToken("<eos>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	2: AddedToken("<bos>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	3: AddedToken("<unk>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
}

In [7]:
model = AutoModelForCausalLM.from_pretrained("google/gemma-2b", token=hf_token)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [8]:
model

GemmaForCausalLM(
  (model): GemmaModel(
    (embed_tokens): Embedding(256000, 2048, padding_idx=0)
    (layers): ModuleList(
      (0-17): 18 x GemmaDecoderLayer(
        (self_attn): GemmaSdpaAttention(
          (q_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (k_proj): Linear(in_features=2048, out_features=256, bias=False)
          (v_proj): Linear(in_features=2048, out_features=256, bias=False)
          (o_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (rotary_emb): GemmaRotaryEmbedding()
        )
        (mlp): GemmaMLP(
          (gate_proj): Linear(in_features=2048, out_features=16384, bias=False)
          (up_proj): Linear(in_features=2048, out_features=16384, bias=False)
          (down_proj): Linear(in_features=16384, out_features=2048, bias=False)
          (act_fn): GELUActivation()
        )
        (input_layernorm): GemmaRMSNorm()
        (post_attention_layernorm): GemmaRMSNorm()
      )
    )
    (norm): GemmaRM

In [9]:
input_text = "Write me a poem about Machine Learning."

In [10]:
input_ids = tokenizer(input_text, return_tensors="pt")

In [11]:
input_ids 

{'input_ids': tensor([[     2,   5559,    682,    476,  19592,   1105,  13403,  14715, 235265]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1]])}

In [12]:
outputs = model.generate(**input_ids)



In [13]:
outputs

tensor([[     2,   5559,    682,    476,  19592,   1105,  13403,  14715, 235265,
            109, 235285, 235349, 235262,    780,   2821,   1212,    692,   2714,
            731,   1080]])

In [14]:
print(tokenizer.decode(outputs[0]))

<bos>Write me a poem about Machine Learning.

I’m not sure what you mean by “


In [15]:
input_text = "Who are you?"
input_ids = tokenizer(input_text, return_tensors="pt")

outputs = model.generate(**input_ids)
print(tokenizer.decode(outputs[0]))

<bos>Who are you?

I am a 20-something year old who is currently living


In [16]:
input_text = "¿Hablas español?"
input_ids = tokenizer(input_text, return_tensors="pt")

outputs = model.generate(**input_ids)
print(tokenizer.decode(outputs[0]))

<bos>¿Hablas español?

Complete the sentence in a way that shows you understand the meaning of


In [17]:
input_text = "What is your system message?"
input_ids = tokenizer(input_text, return_tensors="pt")

outputs = model.generate(**input_ids)
print(tokenizer.decode(outputs[0]))

<bos>What is your system message?

I'm not sure what you mean by "system message
