## Import RAG config

In [1]:
from importlib import resources as impresources
import finsight_rag.config as config
from finsight_rag.utils import load_yaml

rag_config_path = (impresources.files(config) / "rag_config.yaml")
rag_config = load_yaml(rag_config_path)
print("Available RAG configurations:")
print(list(rag_config.keys()))

Available RAG configurations:
['dataset_path', 'vector_store_path', 'chunk_size', 'chunk_overlap', 'batch_rows', 'embedding_model', 'gen_model', 'temperature', 'max_new_tokens']


In [9]:
gen_model = rag_config["gen_model"]
temperature = rag_config["temperature"]
max_new_tokens = rag_config["max_new_tokens"]

print(f"Using generation model: {gen_model}")
print(f"Temperature: {temperature}")
print(f"Max new tokens: {max_new_tokens}")

Using generation model: Qwen/Qwen2.5-1.5B-Instruct
Temperature: 0.2
Max new tokens: 400


In [None]:
from finsight_rag.llms.text_generator import build_hf_llm

llm_pipeline = build_hf_llm(gen_model, max_new_tokens, temperature=20)

Device set to use cpu


In [5]:
llm_pipeline.generate?

[1;31mSignature:[0m
[0mllm_pipeline[0m[1;33m.[0m[0mgenerate[0m[1;33m([0m[1;33m
[0m    [0mprompts[0m[1;33m:[0m [1;34m'list[str]'[0m[1;33m,[0m[1;33m
[0m    [0mstop[0m[1;33m:[0m [1;34m'list[str] | None'[0m [1;33m=[0m [1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mcallbacks[0m[1;33m:[0m [1;34m'Callbacks | list[Callbacks] | None'[0m [1;33m=[0m [1;32mNone[0m[1;33m,[0m[1;33m
[0m    [1;33m*[0m[1;33m,[0m[1;33m
[0m    [0mtags[0m[1;33m:[0m [1;34m'list[str] | list[list[str]] | None'[0m [1;33m=[0m [1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mmetadata[0m[1;33m:[0m [1;34m'dict[str, Any] | list[dict[str, Any]] | None'[0m [1;33m=[0m [1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mrun_name[0m[1;33m:[0m [1;34m'str | list[str] | None'[0m [1;33m=[0m [1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mrun_id[0m[1;33m:[0m [1;34m'uuid.UUID | list[uuid.UUID | None] | None'[0m [1;33m=[0m [1;32mNone[0m[1;33m,[0m[1;33m
[0m    [1;33m*

In [7]:
llm_pipeline.generate(["Explain the theory of relativity in simple terms."])

ValueError: `temperature` (=400) has to be a strictly positive float, otherwise your next token scores will be invalid.