# Notebook

In [1]:
from llama_cpp import Llama
from semantic_router.llms.llamacpp import LlamaCppLLM

# First create the llama_cpp.Llama instance
_llm = Llama(
    model_path="./models/Qwen2.5-0.5B-Instruct-Q4_K_M.gguf",
    n_gpu_layers=-1,  # -1 to offload all layers to GPU, 0 for CPU only
    n_ctx=2048,
)
_llm.verbose = False

# Then wrap it with LlamaCppLLM
local_llm = LlamaCppLLM(
    name="qwen-nano",
    llm=_llm,
    max_tokens=None,
    temperature=0.0
)

  from .autonotebook import tqdm as notebook_tqdm
llama_model_load_from_file_impl: using device Metal (Apple M1) - 5461 MiB free
llama_model_loader: loaded meta data with 38 key-value pairs and 290 tensors from ./models/Qwen2.5-0.5B-Instruct-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = qwen2
llama_model_loader: - kv   1:                               general.type str              = model
llama_model_loader: - kv   2:                               general.name str              = Qwen2.5 0.5B Instruct
llama_model_loader: - kv   3:                           general.finetune str              = Instruct
llama_model_loader: - kv   4:                           general.basename str              = Qwen2.5
llama_model_loader: - kv   5:                         general.size_label str              = 0.5B
llama_model_l

In [2]:
from semantic_router import Route
from semantic_router.utils.function_call import get_schema

def get_weather(location: str, degree: str = "Celsius") -> str:
    """Finds the current weather in a specific location.
    :param location: The location to find the weather for."""
    return f"The weather in {location} is sunny."


weather_schema = get_schema(get_weather)

weather_route = Route(
    name="get_weather",
    utterances=["is it raining in London?", "weather in New York"],
    function_schemas=[weather_schema]
)

In [3]:
from semantic_router import Route

politics = Route(
    name="politics",
    utterances=[
        "isn't politics the best thing ever",
        "why don't you tell me about your political opinions",
        "don't you just love the president",
        "they're going to destroy this country!",
        "they will save the country!",
    ],
)

chitchat = Route(
    name="chitchat",
    utterances=[
        "how's the weather today?",
        "how are things going?",
        "lovely weather today",
        "the weather is horrendous",
        "let's go to the chippy",
    ],
)

routes = [politics, chitchat, weather_route]

In [4]:
from semantic_router.encoders import HuggingFaceEncoder

encoder = HuggingFaceEncoder(score_threshold=0.3)

Loading weights: 100%|██████████| 103/103 [00:00<00:00, 1198.43it/s, Materializing param=pooler.dense.weight]                             
[1mBertModel LOAD REPORT[0m from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

[3mNotes:
- UNEXPECTED[3m	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.[0m


In [5]:
from semantic_router.routers import SemanticRouter

rl = SemanticRouter(encoder=encoder, routes=routes, llm=local_llm, auto_sync="local")



In [6]:
result = rl("how's the weather today?")
print("result: ", result)

result:  name='chitchat' function_call=None similarity_score=np.float64(0.7109571731253342)


In [7]:
result = rl("is it raining in London?")
print("result: ", result)

[32m2026-02-20 08:45:38 INFO semantic_router Extracting function input...[0m
[32m2026-02-20 08:45:39 INFO semantic_router LLM output: {
  "location": "London",
  "degree": "Celsius"
}[0m
[32m2026-02-20 08:45:39 INFO semantic_router Function inputs: [{'location': 'London', 'degree': 'Celsius'}][0m


result:  name='get_weather' function_call=[{'location': 'London', 'degree': 'Celsius'}] similarity_score=np.float64(0.6618592442818064)


In [8]:
if result.name == "get_weather" and result.function_call:
    for args in result.function_call:
        weather_result = get_weather(**args)
    print(weather_result)


The weather in London is sunny.
