# Llama.cpp python inference
https://llama-cpp-python.readthedocs.io/en/latest/

Inference over our trained ascii adapters

In [1]:
from llama_cpp import Llama

In [None]:
# local paths to the gguf base model and the lora adapter for generating ascii art
# get ascii art lora gguf from
# get llama 3.2 base gguf from 
# store them locally and point to them here
lora_path = "update the path"
base_model_path = "update the path"


llm = Llama(model_path=base_model_path, lora_path=lora_path, verbose=False, n_ctx=400)

llama_init_from_model: n_ctx_per_seq (416) < n_ctx_train (131072) -- the full capacity of the model will not be utilized
ggml_metal_init: skipping kernel_get_rows_bf16                     (not supported)
ggml_metal_init: skipping kernel_mul_mv_bf16_f32                   (not supported)
ggml_metal_init: skipping kernel_mul_mv_bf16_f32_1row              (not supported)
ggml_metal_init: skipping kernel_mul_mv_bf16_f32_l4                (not supported)
ggml_metal_init: skipping kernel_mul_mv_bf16_bf16                  (not supported)
ggml_metal_init: skipping kernel_mul_mv_id_bf16_f32                (not supported)
ggml_metal_init: skipping kernel_mul_mm_bf16_f32                   (not supported)
ggml_metal_init: skipping kernel_mul_mm_id_bf16_f32                (not supported)
ggml_metal_init: skipping kernel_flash_attn_ext_bf16_h64           (not supported)
ggml_metal_init: skipping kernel_flash_attn_ext_bf16_h80           (not supported)
ggml_metal_init: skipping kernel_flash_attn_ext_b

In [3]:
def generate_ascii_art(max_tokens: int, generation_config) -> str:
    prompt = ""
    for chunk in llm.create_completion(
        prompt, 
        max_tokens=max_tokens, 
        stream=True, 
        temperature=generation_config["temperature"], 
        top_p=generation_config["top_p"], 
        min_p=generation_config["min_p"], 
        frequency_penalty=generation_config["frequency_penalty"], 
        presence_penalty=generation_config["presence_penalty"], 
        repeat_penalty=generation_config["repeat_penalty"], 
        top_k=generation_config["top_k"]
    ):
        chunk_text = chunk["choices"][0]["text"]
        print(chunk_text, end="", flush=True)        
        

In [15]:
# nucleus sampling https://arxiv.org/pdf/1904.09751
default_generation_config = {
    # Higher values are more random. OpenAI recommmends either using temperature or top_p, but not both No effect if temperature is set to 1
    "temperature" : 0.5,
    # Model only considers the smallest set of most probable tokens whose cumulative probability exceeds top_p. No effect if top_p is set to 1
    "top_p" : 1,
    # Minimum probability required to sample a token
    "min_p" : 0,
    # Positive values penalize new tokens based on their existing frequency in the text so far
    "frequency_penalty" : 0.0,
    # Positive values penalize new tokens based on whether they appear in the text so far.
    "presence_penalty" : 0.0,
    # The penalty to apply to repeated tokens
    "repeat_penalty" : 1,
    # Only consider top_k highest probability tokens for each step
    "top_k" : 50,
}

In [16]:
print("Generation config: ", default_generation_config)

for i in range(100):
    print(f"Generating ascii art {i+1} of 50\n")
    generate_ascii_art(max_tokens=200, generation_config=default_generation_config)
    print("\n\n")

Generation config:  {'temperature': 0.2, 'top_p': 1, 'min_p': 0, 'frequency_penalty': 0.0, 'presence_penalty': 0.0, 'repeat_penalty': 1, 'top_k': 50}
Generating ascii art 1 of 50


  |\__/,|   (`\
  |_ _  |.-'  ) )
  ( T   )  _  /
 (((^_(((/(((_>



Generating ascii art 2 of 50


  |\__/,|   (`\
  |_ _  |.-'  ) )
  ( T   )  _  /
 (((^_(((/(((_>



Generating ascii art 3 of 50


  /\_/\  (
 ( ^.^ ) _)
   \"/  (
 ( | | )
(__d b__)



Generating ascii art 4 of 50


  |\__/,|   (`\
  |_ _  |.-'  .-'
  ( T   )  >#<  \
  `.  ;.'  /   \  \
  | | | | /     \  \
  \_`_`_/  `-----'  /
  ( `--'   (       )
   )_-)    \__)    \
   '.\    /  `--'  /
   )_).  /       \  \
  (___/  (_______) /



Generating ascii art 5 of 50


  /\_/\  (
 ( ^.^ ) _)
   \"/  (
 ( | | )
(__d b__)



Generating ascii art 6 of 50


  /\_/\  (
 ( ^.^ ) _)
   \"/  (
 ( | | )
(__d b__)



Generating ascii art 7 of 50


  |\__/,|   (`\
  |_ _  |.-'  ) )
  ( T   )  _  /
 (((^_(((/(((_>



Generating ascii art 8 of 50


  /\_/

KeyboardInterrupt: 