In [None]:
!pip install llama-cpp-python huggingface_hub

In [None]:
# !pip install llama-cpp-python

from llama_cpp import Llama

llm = Llama.from_pretrained(
	repo_id="Gholamreza/gemma3-4b-letter-count-gguf",
	filename="gemma-3-4b-it.Q4_K_M.gguf",
)


In [None]:
def inference(word: str, letter: str) -> str:
    """
    Ask the model how many times `letter` appears in `word`.
    Returns the full assistant response as a string.
    """
    prompt = f'How many times does the letter "{letter}" appear in "{word}"?'
    # prompt = f'How many "{letter}" are there in "{word}"?'

    # Gemma-3 chat format
    messages = [{"role": "user", "content": prompt}]

    response = llm.create_chat_completion(
        messages    = messages,
        max_tokens  = 512,
        temperature = 1.0,
        top_p       = 0.95,
        top_k       = 64,
    )

    return response["choices"][0]["message"]["content"]

test_cases = [
    ("strawberry",    "r"),
    ("banana",        "a"),
    ("Mississippi",   "s"),
    ("independently", "e"),
    ("Gholamzaradar", "a"),
]

for word, letter in test_cases:
    print(f"{'='*60}")
    print(f"Q: How many '{letter}' in '{word}'?")
    print(f"\n{inference(word, letter)}\n")

Q: How many 'r' in 'strawberry'?


llama_perf_context_print:        load time =    5298.44 ms
llama_perf_context_print: prompt eval time =    5298.00 ms /    23 tokens (  230.35 ms per token,     4.34 tokens per second)
llama_perf_context_print:        eval time =   88670.36 ms /   210 runs   (  422.24 ms per token,     2.37 tokens per second)
llama_perf_context_print:       total time =   94622.68 ms /   233 tokens
llama_perf_context_print:    graphs reused =        202
Llama.generate: 11 prefix-match hit, remaining 12 prompt tokens to eval



Step 1 — Spell it out: s-t-r-a-w-b-e-r-r-y

Step 2 — Check each letter:
- Position 1: s → not r (count = 0)
- Position 2: t → not r (count = 0)
- Position 3: r → MATCH (count = 1)
- Position 4: a → not r (count = 1)
- Position 5: w → not r (count = 1)
- Position 6: b → not r (count = 1)
- Position 7: e → not r (count = 1)
- Position 8: r → MATCH (count = 2)
- Position 9: r → MATCH (count = 3)
- Position 10: y → not r (count = 3)

**Answer — "r" appears 3 times in "strawberry".**

Q: How many 'a' in 'banana'?


KeyboardInterrupt: 