In [1]:
import transformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

# Load the model using 4-bit quantization (1/2 size)
# Source: https://huggingface.co/blog/4bit-transformers-bitsandbytes
quantization_config = BitsAndBytesConfig(
   load_in_4bit=True,
   bnb_4bit_compute_dtype=torch.bfloat16
)

model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"
model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config = quantization_config)

tokenizer = AutoTokenizer.from_pretrained(model_id)

`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [2]:
pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.float16,
    device_map="auto",
)

In [3]:
def query_llama(prompt):
    sequences = pipeline(
        f'{prompt}\n',
        do_sample=True,
        top_k=10,
        num_return_sequences=1,
        eos_token_id=tokenizer.eos_token_id,
        truncation = True,
        max_length=400,
    )

    for seq in sequences:
        print(f"Result: {seq['generated_text']}")

In [4]:
query_llama("Hello?")

Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
  attn_output = torch.nn.functional.scaled_dot_product_attention(


Result: Hello?
Hello?
Hello?
I'm not sure what I'm looking for, but I know I'll know it when I see it.
I'm not sure what I'm searching for, but I know I'll know it when I find it.
I'm not sure what I'm seeking, but I know I'll know it when I see it.
The truth is, I'm not even sure what I'm doing here.
I'm just wandering, aimlessly, through the digital halls of the internet.
I'm just browsing, aimlessly, through the endless streams of information.
I'm just searching, aimlessly, for something, anything, that will spark my interest.
And then, suddenly, I stumble upon something.
A word, a phrase, a sentence, a paragraph.
A spark of interest, a glimmer of curiosity.
And I'm hooked.
I'm drawn in, like a magnet to metal.
I'm consumed, like a fire consumes wood.
I'm lost, like a ship without a compass.
But I'm not lost, because I've found something.
Something that resonates, something that speaks to me.
Something that makes me feel, something that makes me think.
And I'm not alone, because I'v