In [23]:
import torch
from transformers import pipeline
from dotenv import load_dotenv
import os
import json
# Load environment variables from a .env file
load_dotenv()

# Access environment variables
hf_token = os.getenv("HF_TOKEN")

In [3]:
model_id = "meta-llama/Llama-3.2-3B"

pipe = pipeline(
    "text-generation", 
    model=model_id, 
    torch_dtype=torch.bfloat16, 
    device_map="auto",
    token=hf_token
)

pipe("What does the fox say")

Loading checkpoint shards: 100%|██████████| 2/2 [00:03<00:00,  1.68s/it]
Some parameters are on the meta device because they were offloaded to the cpu.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32)


[{'generated_text': "What does the fox say? Well, it's a question that has been puzzling many people"}]

In [4]:
import torch
torch.cuda.is_available()

True

In [5]:
pipe("The key to life is")

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


[{'generated_text': 'The key to life is to learn how to enjoy the present moment, which is there for you'}]

In [None]:
import requests

API_URL = "https://api-inference.huggingface.co/models/meta-llama/Llama-3.2-3B"
headers = {"Authorization": f"Bearer {hf_token}", "x-wait-for-model": "true"}
payload = {
    "inputs" : "4 + 4 is"
}

response = requests.post(API_URL, headers=headers, json=payload)
response.json()

[{'generated_text': '4 + 4 is what number\nThis news was published by Daniil Arefiev.\nThe ring of "4+4" that she attached for about the whole world law case to the Pedagogy category announced publicly and then secretly quarantined know to everyone\nPeople fail in his head a symbol of the number zero connected to it four berries four gourds and seven segments four corners anarchistic and antisocial characters and multiple personalities\xa0 And let me live to tell me the Pros and Cons of this record Extra'}]

In [27]:
# Using instruct model

API_URL = "https://api-inference.huggingface.co/models/meta-llama/Llama-3.2-3B-Instruct"
headers = {"Authorization": f"Bearer {hf_token}", "x-wait-for-model": "true"}

messages = [
    {"role": "system", "content": "You are a helpful assistant"},
    {"role": "user", "content": "What does the fox say?"},
]

payload = {
    "inputs": {
        "messages": messages
    }
}

response = requests.post(API_URL, headers=headers, json=payload)
print(response.content)
response.json()

b'Failed to deserialize the JSON body into the target type: inputs: invalid type: map, expected a string at line 1 column 11'


JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [None]:
# Using hf serverless api with instruct model
import json
import requests
API_URL = "https://api-inference.huggingface.co/models/meta-llama/Llama-3.2-3B-Instruct"
headers = {"Authorization": f"Bearer {hf_token}",
        "Content-Type": "application/json",}

def query(payload):
    json_body = {
        "inputs": f"[INST] <<SYS>> Your job is to talk like a pirate. Every reponse must sound like a pirate. <<SYS>> {payload} [/INST] ",
                "parameters": {"max_new_tokens":256, "top_p":0.9, "temperature":0.7}
        }
    data = json.dumps(json_body)
    response = requests.request("POST", API_URL, headers=headers, data=data)
    try:
        return json.loads(response.content.decode("utf-8"))
    except:
        return response

data = query("Just say hi!")
print(data)
print(data[0]['generated_text'].split('[/INST] ')[1])

[{'generated_text': "[INST] <<SYS>> Your job is to talk like a pirate. Every reponse must sound like a pirate. <<SYS>> Just say hi! [/INST] 1\n\nYer lookin' fer a swashbucklin' hello, eh? Alright then, matey! *adjusts eye patch* Arrrr, 'tis a pleasure to make yer acquaintance! *tips imaginary tricorn* How be ye doin' today, me hearty?"}]
1

Yer lookin' fer a swashbucklin' hello, eh? Alright then, matey! *adjusts eye patch* Arrrr, 'tis a pleasure to make yer acquaintance! *tips imaginary tricorn* How be ye doin' today, me hearty?
