In [None]:
from vllm import LLM, SamplingParams

huggingface permissions

In [None]:
from huggingface_hub import notebook_login, HfApi

notebook_login()

In [None]:
try:
    api = HfApi()
    user_info = api.whoami()
    print(f"Token validated successfully! Logged in as: {user_info['name']}")
except Exception as e:
    print(f"Token validation failed. Error: {e}")

clean up utility

In [None]:
import gc
import ray
import torch
try:
    del llm
    del tokenizer
except Exception as e:
    print(f"Failed to unload model: {e}")
finally:
    gc.collect()
    torch.cuda.empty_cache()
    ray.shutdown()

#### generate

In [None]:
prompts = [
    "Hello, my name is",
    "The president of the United States is",
    "The capital of France is",
    "The future of AI is",
]

sampling_params = SamplingParams(temperature=0.8, top_p=0.95)

llm = LLM(model="facebook/opt-125m")

outputs = llm.generate(prompts, sampling_params)

for output in outputs:
    prompt = output.prompt
    generated_text = output.outputs[0].text
    print(f"Prompt: {prompt!r}, Geneareted text: {generated_text!r}")

#### chat

In [None]:
llm = LLM(model="meta-llama/Meta-Llama-3.1-8B-Instruct")

sampling_params = llm.get_default_sampling_params()

In [None]:
def print_outputs(outputs):
    for output in outputs:
        prompt = output.prompt
        generated_text = output.outputs[0].text
        print(f"Prompt: {prompt!r}")
        print(f"Generated text: {generated_text!r}")
    print("-" * 80)

conversation = [
        {
            "role": "system",
            "content": "You are a helpful assistant"
        },
        {
            "role": "user",
            "content": "Hello"
        },
        {
            "role": "assistant",
            "content": "Hello! How can I assist you today?"
        },
        {
            "role": "user",
            "content":
            "Write an essay about the importance of higher education.",
        },
    ]

outputs = llm.chat(conversation, sampling_params, use_tqdm=False)
print_outputs(outputs)

# batch process
conversations = [conversation for _ in range(10)]

outputs = llm.chat(conversations, sampling_params, use_tqdm=True)
print_outputs(outputs)

#### classify

In [None]:
llm = LLM(
    model="jason9693/Qwen2.5-1.5B-apeach",
    task="classify",
    enforce_eager=True,
)

In [None]:
prompts = [
        "Hello, my name is",
        "The president of the United States is",
        "The capital of France is",
        "The future of AI is",
    ]

outputs = llm.classify(prompts)

for prompt, output in zip(prompts, outputs):
        probs = output.outputs.probs
        probs_trimmed = ((str(probs[:16])[:-1] +
                          ", ...]") if len(probs) > 16 else probs)
        print(f"Prompt: {prompt!r} | "
              f"Class Probabilities: {probs_trimmed} (size={len(probs)})")

#### score

In [None]:
llm = LLM(
    model="BAAI/bge-reranker-v2-m3",
    task="score",
    enforce_eager=True,
)

In [None]:
text_1 = "What is the capital of France?"
texts_2 = [
    "The capital of Brazil is Brasilia.", "The capital of France is Paris."
]

outputs = model.score(text_1, texts_2)

for text_2, output in zip(texts_2, outputs):
    score = output.outputs.score
    print(f"Pair: {[text_1, text_2]!r} | Score: {score}")