In [39]:
from llama_cpp import Llama
import pandas as pd
import json
import random
from sklearn.metrics import classification_report

In [57]:
df = pd.read_json('data/llm_task1_hetero.json', orient="records")
df.head()

Unnamed: 0,id,text,label
0,het_857,Those whose child is going through a'phase'may...,1
1,het_368,Bureaucrats cut red tape-- lengthwise.,0
2,het_1204,If they say you look good in glasses that woul...,1
3,het_1043,"Nearing the end of his sentence, the convicted...",1
4,het_1098,A logger went to his doctor because of a probl...,1


In [59]:
test = df
len(test)

445

In [85]:
PATH = 'models/mistral-7b-openorca.Q6_K.gguf'
llm = Llama(model_path=PATH, n_gpu_layers=75, n_ctx=2048)

llama_model_loader: loaded meta data with 20 key-value pairs and 291 tensors from models/mistral-7b-openorca.Q6_K.gguf (version GGUF V2)
llama_model_loader: - tensor    0:                token_embd.weight q6_K     [  4096, 32002,     1,     1 ]
llama_model_loader: - tensor    1:              blk.0.attn_q.weight q6_K     [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor    2:              blk.0.attn_k.weight q6_K     [  4096,  1024,     1,     1 ]
llama_model_loader: - tensor    3:              blk.0.attn_v.weight q6_K     [  4096,  1024,     1,     1 ]
llama_model_loader: - tensor    4:         blk.0.attn_output.weight q6_K     [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor    5:            blk.0.ffn_gate.weight q6_K     [  4096, 14336,     1,     1 ]
llama_model_loader: - tensor    6:              blk.0.ffn_up.weight q6_K     [  4096, 14336,     1,     1 ]
llama_model_loader: - tensor    7:            blk.0.ffn_down.weight q6_K     [ 14336,  4096,     1,     1 ]

In [44]:
def is_json(strJson):
  try:
    json.loads(strJson)
  except ValueError as e:
    return False
  return True

In [45]:
def generate(prompt, attempt):
  if attempt == 1:
    temp = 0.2
  else:
    temp = round(random.uniform(0.3, 0.6), 1)
  output = llm(prompt, max_tokens=-1, temperature=temp)
  return output['choices'][0]['text']

def parseResult(jsonStr, row):
  if not is_json(jsonStr.strip()):
    return None
  else:
    parsedJson = json.loads(jsonStr.strip())
    return parsedJson

### Task 1: Pun Detection

In [43]:
prompt = f''' ### INSTRUCTION:
You are to classify whether a given sentence is a pun based on the following criteria:
1. Ambiguity -  there exists a word in the sentence that has two similarly likely interpretations.
2. Distinctiveness - the two interpretations are very different from each other i.e. how distinct are the words semantically related to the two interpretations from each other. There needs to be at least one different word in the set of words supporting each interpretation.\n
For a given sentence to be a pun, it should satisfy BOTH criteria - Ambiguity and Distinctiveness.\n
The final output is either true or false where true means that the sentence is a pun.\n
It is possible that a sentence does not have a word that is ambiguous. The result in this case is false.\n
It is possible that a sentence has Ambiguity but no Distinctiveness. The result in this case is false.\n
Here are three examples of the cases you will encounter:
1. An example with both ambiguity and distinctiveness - 
The magician got so mad that he pulled his hare out.
{{
  "output": true,
  "ambiguity": "The pun word 'hare' supports two plausible interpretations of 'hare' meaning a rabbit and 'hair' meaning human hair.",
  "distinctiveness": "In the given sentence, the words 'magician' relates to 'hare' while 'pulled' refers to the second interpretation of 'hair'. Both of these are distinct where one refers to a magician's animal while the other refers to an action done in anger which is pulling your hair."
}}
2. An example with only ambiguity -
I went to the bank.
{{
  "output": false,
  "ambiguity": "The word 'bank' does have ambiguity here where it supports two plausible interpretations of bank as in a financial institution and bank as in the banks of a river.",
  "distinctiveness": "There are no other words in the sentence that provide distinctiveness to the two interpretations, the sentence is not a pun."
}}
3. An example with neither ambiguity nor distinctiveness -
Let us go home.
{{
  "output": false,
  "ambiguity": "There is no ambiguous word in the sentence.",
  "distinctiveness": "Not applicable"
}}
Identify whether the input sentence is a pun and explain the result based on ambiguity and distinctiveness in valid JSON format. Generate a response in the form of a valid JSON object with three keys: output, ambiguity and distinctiveness.\n
### INPUT:\n
'''

In [60]:
results = []
i = 0
for _, row in test.iterrows():
  prepared_prompt = prompt + row['text'] + "\n### OUTPUT:\n"
  json_out = ''
  attempt = 0
  while True:
    attempt += 1
    json_out = generate(prepared_prompt, attempt)
    parsedJson = parseResult(json_out, row)
    print(f"{i} -- {attempt}")
    if parsedJson is not None:
      parsedJson['id'] = row['id']
      parsedJson['text'] = row['text']
      results.append(parsedJson)
      i+=1
      break


Llama.generate: prefix-match hit


0 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      15.16 ms /   116 runs   (    0.13 ms per token,  7653.23 tokens per second)
llama_print_timings: prompt eval time =    2160.70 ms /    25 tokens (   86.43 ms per token,    11.57 tokens per second)
llama_print_timings:        eval time =    6151.46 ms /   115 runs   (   53.49 ms per token,    18.69 tokens per second)
llama_print_timings:       total time =    8529.95 ms
Llama.generate: prefix-match hit


1 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      15.18 ms /   128 runs   (    0.12 ms per token,  8431.59 tokens per second)
llama_print_timings: prompt eval time =     192.01 ms /    17 tokens (   11.29 ms per token,    88.54 tokens per second)
llama_print_timings:        eval time =    6748.20 ms /   127 runs   (   53.14 ms per token,    18.82 tokens per second)
llama_print_timings:       total time =    7147.94 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      17.39 ms /   119 runs   (    0.15 ms per token,  6844.98 tokens per second)
llama_print_timings: prompt eval time =     199.17 ms /    22 tokens (    9.05 ms per token,   110.46 tokens per second)
llama_print_timings:        eval time =    6368.90 ms /   118 runs   (   53.97 ms per token,    18.53 tokens per second)
llama_print_timings:       total time =    6805.53 ms
Llama.generate: prefix-

2 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       5.51 ms /    39 runs   (    0.14 ms per token,  7072.91 tokens per second)
llama_print_timings: prompt eval time =     520.14 ms /    67 tokens (    7.76 ms per token,   128.81 tokens per second)
llama_print_timings:        eval time =    2050.41 ms /    38 runs   (   53.96 ms per token,    18.53 tokens per second)
llama_print_timings:       total time =    2657.83 ms
Llama.generate: prefix-match hit


3 -- 1
4 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      14.90 ms /   126 runs   (    0.12 ms per token,  8455.24 tokens per second)
llama_print_timings: prompt eval time =     201.11 ms /    21 tokens (    9.58 ms per token,   104.42 tokens per second)
llama_print_timings:        eval time =    6643.62 ms /   125 runs   (   53.15 ms per token,    18.82 tokens per second)
llama_print_timings:       total time =    7046.05 ms
Llama.generate: prefix-match hit


5 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      13.39 ms /   131 runs   (    0.10 ms per token,  9783.42 tokens per second)
llama_print_timings: prompt eval time =     210.88 ms /    31 tokens (    6.80 ms per token,   147.01 tokens per second)
llama_print_timings:        eval time =    6958.07 ms /   130 runs   (   53.52 ms per token,    18.68 tokens per second)
llama_print_timings:       total time =    7363.33 ms
Llama.generate: prefix-match hit


6 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      14.17 ms /   139 runs   (    0.10 ms per token,  9812.92 tokens per second)
llama_print_timings: prompt eval time =     191.01 ms /    17 tokens (   11.24 ms per token,    89.00 tokens per second)
llama_print_timings:        eval time =    7313.75 ms /   138 runs   (   53.00 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    7699.11 ms
Llama.generate: prefix-match hit


7 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       6.71 ms /    70 runs   (    0.10 ms per token, 10435.30 tokens per second)
llama_print_timings: prompt eval time =     202.53 ms /    25 tokens (    8.10 ms per token,   123.44 tokens per second)
llama_print_timings:        eval time =    3652.66 ms /    69 runs   (   52.94 ms per token,    18.89 tokens per second)
llama_print_timings:       total time =    3951.07 ms
Llama.generate: prefix-match hit


8 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.57 ms /    45 runs   (    0.10 ms per token,  9855.45 tokens per second)
llama_print_timings: prompt eval time =     198.27 ms /    22 tokens (    9.01 ms per token,   110.96 tokens per second)
llama_print_timings:        eval time =    2325.97 ms /    44 runs   (   52.86 ms per token,    18.92 tokens per second)
llama_print_timings:       total time =    2588.10 ms
Llama.generate: prefix-match hit


9 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      14.97 ms /   145 runs   (    0.10 ms per token,  9688.63 tokens per second)
llama_print_timings: prompt eval time =     183.54 ms /    20 tokens (    9.18 ms per token,   108.97 tokens per second)
llama_print_timings:        eval time =    7642.76 ms /   144 runs   (   53.07 ms per token,    18.84 tokens per second)
llama_print_timings:       total time =    8030.29 ms
Llama.generate: prefix-match hit


10 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      11.62 ms /   112 runs   (    0.10 ms per token,  9640.21 tokens per second)
llama_print_timings: prompt eval time =     187.59 ms /    15 tokens (   12.51 ms per token,    79.96 tokens per second)
llama_print_timings:        eval time =    5880.59 ms /   111 runs   (   52.98 ms per token,    18.88 tokens per second)
llama_print_timings:       total time =    6225.93 ms
Llama.generate: prefix-match hit


11 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      10.94 ms /   111 runs   (    0.10 ms per token, 10150.89 tokens per second)
llama_print_timings: prompt eval time =     205.64 ms /    28 tokens (    7.34 ms per token,   136.16 tokens per second)
llama_print_timings:        eval time =    5834.86 ms /   110 runs   (   53.04 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    6198.79 ms
Llama.generate: prefix-match hit


12 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      13.86 ms /   135 runs   (    0.10 ms per token,  9738.85 tokens per second)
llama_print_timings: prompt eval time =     193.10 ms /    18 tokens (   10.73 ms per token,    93.22 tokens per second)
llama_print_timings:        eval time =    7108.31 ms /   134 runs   (   53.05 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    7494.24 ms
Llama.generate: prefix-match hit


13 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.74 ms /   127 runs   (    0.10 ms per token,  9966.26 tokens per second)
llama_print_timings: prompt eval time =     344.37 ms /    33 tokens (   10.44 ms per token,    95.83 tokens per second)
llama_print_timings:        eval time =    6690.65 ms /   126 runs   (   53.10 ms per token,    18.83 tokens per second)
llama_print_timings:       total time =    7212.60 ms
Llama.generate: prefix-match hit


14 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.69 ms /    45 runs   (    0.10 ms per token,  9590.79 tokens per second)
llama_print_timings: prompt eval time =     188.38 ms /    15 tokens (   12.56 ms per token,    79.63 tokens per second)
llama_print_timings:        eval time =    2321.01 ms /    44 runs   (   52.75 ms per token,    18.96 tokens per second)
llama_print_timings:       total time =    2571.13 ms
Llama.generate: prefix-match hit


15 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.26 ms /    40 runs   (    0.11 ms per token,  9378.66 tokens per second)
llama_print_timings: prompt eval time =     199.56 ms /    23 tokens (    8.68 ms per token,   115.25 tokens per second)
llama_print_timings:        eval time =    2060.27 ms /    39 runs   (   52.83 ms per token,    18.93 tokens per second)
llama_print_timings:       total time =    2316.89 ms
Llama.generate: prefix-match hit


16 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      11.74 ms /   119 runs   (    0.10 ms per token, 10132.83 tokens per second)
llama_print_timings: prompt eval time =     198.10 ms /    22 tokens (    9.00 ms per token,   111.05 tokens per second)
llama_print_timings:        eval time =    6260.33 ms /   118 runs   (   53.05 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    6626.96 ms
Llama.generate: prefix-match hit


17 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      14.69 ms /   139 runs   (    0.11 ms per token,  9464.15 tokens per second)
llama_print_timings: prompt eval time =     183.66 ms /    20 tokens (    9.18 ms per token,   108.90 tokens per second)
llama_print_timings:        eval time =    7329.82 ms /   138 runs   (   53.11 ms per token,    18.83 tokens per second)
llama_print_timings:       total time =    7712.96 ms
Llama.generate: prefix-match hit


18 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      11.54 ms /   131 runs   (    0.09 ms per token, 11354.77 tokens per second)
llama_print_timings: prompt eval time =     204.01 ms /    32 tokens (    6.38 ms per token,   156.85 tokens per second)
llama_print_timings:        eval time =    6911.70 ms /   130 runs   (   53.17 ms per token,    18.81 tokens per second)
llama_print_timings:       total time =    7298.69 ms
Llama.generate: prefix-match hit


19 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      11.91 ms /   120 runs   (    0.10 ms per token, 10078.95 tokens per second)
llama_print_timings: prompt eval time =     192.48 ms /    18 tokens (   10.69 ms per token,    93.52 tokens per second)
llama_print_timings:        eval time =    6316.63 ms /   119 runs   (   53.08 ms per token,    18.84 tokens per second)
llama_print_timings:       total time =    6679.58 ms
Llama.generate: prefix-match hit


20 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      11.62 ms /   115 runs   (    0.10 ms per token,  9900.14 tokens per second)
llama_print_timings: prompt eval time =     199.16 ms /    23 tokens (    8.66 ms per token,   115.48 tokens per second)
llama_print_timings:        eval time =    6048.18 ms /   114 runs   (   53.05 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    6407.81 ms
Llama.generate: prefix-match hit


21 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.33 ms /    40 runs   (    0.11 ms per token,  9246.42 tokens per second)
llama_print_timings: prompt eval time =     192.92 ms /    18 tokens (   10.72 ms per token,    93.30 tokens per second)
llama_print_timings:        eval time =    2059.73 ms /    39 runs   (   52.81 ms per token,    18.93 tokens per second)
llama_print_timings:       total time =    2311.80 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.53 ms /    39 runs   (    0.12 ms per token,  8607.37 tokens per second)
llama_print_timings: prompt eval time =     182.88 ms /    19 tokens (    9.63 ms per token,   103.89 tokens per second)
llama_print_timings:        eval time =    2007.61 ms /    38 runs   (   52.83 ms per token,    18.93 tokens per second)
llama_print_timings:       total time =    2249.23 ms
Llama.generate: prefix-

22 -- 1
23 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.60 ms /    45 runs   (    0.10 ms per token,  9782.61 tokens per second)
llama_print_timings: prompt eval time =     188.85 ms /    16 tokens (   11.80 ms per token,    84.72 tokens per second)
llama_print_timings:        eval time =    2321.27 ms /    44 runs   (   52.76 ms per token,    18.96 tokens per second)
llama_print_timings:       total time =    2572.14 ms
Llama.generate: prefix-match hit


24 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.68 ms /   132 runs   (    0.10 ms per token, 10408.45 tokens per second)
llama_print_timings: prompt eval time =     192.87 ms /    18 tokens (   10.72 ms per token,    93.33 tokens per second)
llama_print_timings:        eval time =    6946.13 ms /   131 runs   (   53.02 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    7322.35 ms
Llama.generate: prefix-match hit


25 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.88 ms /   126 runs   (    0.10 ms per token,  9784.13 tokens per second)
llama_print_timings: prompt eval time =     345.25 ms /    33 tokens (   10.46 ms per token,    95.58 tokens per second)
llama_print_timings:        eval time =    6633.70 ms /   125 runs   (   53.07 ms per token,    18.84 tokens per second)
llama_print_timings:       total time =    7155.78 ms
Llama.generate: prefix-match hit


26 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.26 ms /    40 runs   (    0.11 ms per token,  9398.50 tokens per second)
llama_print_timings: prompt eval time =     194.06 ms /    19 tokens (   10.21 ms per token,    97.91 tokens per second)
llama_print_timings:        eval time =    2060.08 ms /    39 runs   (   52.82 ms per token,    18.93 tokens per second)
llama_print_timings:       total time =    2312.57 ms
Llama.generate: prefix-match hit


27 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       7.60 ms /    73 runs   (    0.10 ms per token,  9600.21 tokens per second)
llama_print_timings: prompt eval time =     188.22 ms /    16 tokens (   11.76 ms per token,    85.01 tokens per second)
llama_print_timings:        eval time =    3808.65 ms /    72 runs   (   52.90 ms per token,    18.90 tokens per second)
llama_print_timings:       total time =    4100.04 ms
Llama.generate: prefix-match hit


28 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      11.48 ms /   113 runs   (    0.10 ms per token,  9846.64 tokens per second)
llama_print_timings: prompt eval time =     192.88 ms /    18 tokens (   10.72 ms per token,    93.32 tokens per second)
llama_print_timings:        eval time =    5937.87 ms /   112 runs   (   53.02 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    6292.86 ms
Llama.generate: prefix-match hit


29 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      14.46 ms /   148 runs   (    0.10 ms per token, 10233.72 tokens per second)
llama_print_timings: prompt eval time =     373.08 ms /    53 tokens (    7.04 ms per token,   142.06 tokens per second)
llama_print_timings:        eval time =    7827.94 ms /   147 runs   (   53.25 ms per token,    18.78 tokens per second)
llama_print_timings:       total time =    8411.61 ms
Llama.generate: prefix-match hit


30 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.43 ms /    40 runs   (    0.11 ms per token,  9023.23 tokens per second)
llama_print_timings: prompt eval time =     187.66 ms /    14 tokens (   13.40 ms per token,    74.60 tokens per second)
llama_print_timings:        eval time =    2062.29 ms /    39 runs   (   52.88 ms per token,    18.91 tokens per second)
llama_print_timings:       total time =    2310.39 ms
Llama.generate: prefix-match hit


31 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.89 ms /   125 runs   (    0.10 ms per token,  9698.94 tokens per second)
llama_print_timings: prompt eval time =     191.09 ms /    17 tokens (   11.24 ms per token,    88.96 tokens per second)
llama_print_timings:        eval time =    6572.82 ms /   124 runs   (   53.01 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    6940.04 ms
Llama.generate: prefix-match hit


32 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.19 ms /    40 runs   (    0.10 ms per token,  9557.95 tokens per second)
llama_print_timings: prompt eval time =     202.54 ms /    25 tokens (    8.10 ms per token,   123.43 tokens per second)
llama_print_timings:        eval time =    2060.57 ms /    39 runs   (   52.84 ms per token,    18.93 tokens per second)
llama_print_timings:       total time =    2319.76 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      10.92 ms /   108 runs   (    0.10 ms per token,  9891.92 tokens per second)
llama_print_timings: prompt eval time =     183.16 ms /    20 tokens (    9.16 ms per token,   109.19 tokens per second)
llama_print_timings:        eval time =    5672.48 ms /   107 runs   (   53.01 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    6008.18 ms
Llama.generate: prefix-

33 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.22 ms /    40 runs   (    0.11 ms per token,  9476.43 tokens per second)
llama_print_timings: prompt eval time =     183.05 ms /    12 tokens (   15.25 ms per token,    65.56 tokens per second)
llama_print_timings:        eval time =    2056.34 ms /    39 runs   (   52.73 ms per token,    18.97 tokens per second)
llama_print_timings:       total time =    2296.64 ms
Llama.generate: prefix-match hit


34 -- 1
35 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.82 ms /   125 runs   (    0.10 ms per token,  9748.87 tokens per second)
llama_print_timings: prompt eval time =     188.23 ms /    16 tokens (   11.76 ms per token,    85.00 tokens per second)
llama_print_timings:        eval time =    6571.21 ms /   124 runs   (   52.99 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    6938.10 ms
Llama.generate: prefix-match hit


36 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      11.92 ms /   116 runs   (    0.10 ms per token,  9734.81 tokens per second)
llama_print_timings: prompt eval time =     195.90 ms /    21 tokens (    9.33 ms per token,   107.20 tokens per second)
llama_print_timings:        eval time =    6092.97 ms /   115 runs   (   52.98 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    6450.42 ms
Llama.generate: prefix-match hit


37 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.59 ms /   126 runs   (    0.10 ms per token, 10005.56 tokens per second)
llama_print_timings: prompt eval time =     195.95 ms /    21 tokens (    9.33 ms per token,   107.17 tokens per second)
llama_print_timings:        eval time =    6633.37 ms /   125 runs   (   53.07 ms per token,    18.84 tokens per second)
llama_print_timings:       total time =    7006.98 ms
Llama.generate: prefix-match hit


38 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.89 ms /   121 runs   (    0.11 ms per token,  9388.58 tokens per second)
llama_print_timings: prompt eval time =     206.23 ms /    27 tokens (    7.64 ms per token,   130.92 tokens per second)
llama_print_timings:        eval time =    6369.36 ms /   120 runs   (   53.08 ms per token,    18.84 tokens per second)
llama_print_timings:       total time =    6748.34 ms
Llama.generate: prefix-match hit


39 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      13.74 ms /   140 runs   (    0.10 ms per token, 10189.97 tokens per second)
llama_print_timings: prompt eval time =     183.37 ms /    20 tokens (    9.17 ms per token,   109.07 tokens per second)
llama_print_timings:        eval time =    7373.54 ms /   139 runs   (   53.05 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    7753.18 ms
Llama.generate: prefix-match hit


40 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.63 ms /    46 runs   (    0.10 ms per token,  9939.50 tokens per second)
llama_print_timings: prompt eval time =     196.22 ms /    21 tokens (    9.34 ms per token,   107.02 tokens per second)
llama_print_timings:        eval time =    2378.56 ms /    45 runs   (   52.86 ms per token,    18.92 tokens per second)
llama_print_timings:       total time =    2639.62 ms
Llama.generate: prefix-match hit


41 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.98 ms /   122 runs   (    0.11 ms per token,  9397.63 tokens per second)
llama_print_timings: prompt eval time =     188.36 ms /    15 tokens (   12.56 ms per token,    79.64 tokens per second)
llama_print_timings:        eval time =    6417.21 ms /   121 runs   (   53.03 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    6781.49 ms
Llama.generate: prefix-match hit


42 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.68 ms /   125 runs   (    0.10 ms per token,  9859.60 tokens per second)
llama_print_timings: prompt eval time =     190.70 ms /    17 tokens (   11.22 ms per token,    89.15 tokens per second)
llama_print_timings:        eval time =    6572.50 ms /   124 runs   (   53.00 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    6940.40 ms
Llama.generate: prefix-match hit


43 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       3.82 ms /    40 runs   (    0.10 ms per token, 10473.95 tokens per second)
llama_print_timings: prompt eval time =     211.38 ms /    31 tokens (    6.82 ms per token,   146.66 tokens per second)
llama_print_timings:        eval time =    2062.20 ms /    39 runs   (   52.88 ms per token,    18.91 tokens per second)
llama_print_timings:       total time =    2330.65 ms
Llama.generate: prefix-match hit


44 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.53 ms /    40 runs   (    0.11 ms per token,  8837.83 tokens per second)
llama_print_timings: prompt eval time =     192.69 ms /    18 tokens (   10.71 ms per token,    93.41 tokens per second)
llama_print_timings:        eval time =    2061.53 ms /    39 runs   (   52.86 ms per token,    18.92 tokens per second)
llama_print_timings:       total time =    2314.42 ms
Llama.generate: prefix-match hit


45 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       6.64 ms /    64 runs   (    0.10 ms per token,  9644.36 tokens per second)
llama_print_timings: prompt eval time =     194.05 ms /    19 tokens (   10.21 ms per token,    97.91 tokens per second)
llama_print_timings:        eval time =    3328.92 ms /    63 runs   (   52.84 ms per token,    18.93 tokens per second)
llama_print_timings:       total time =    3614.08 ms
Llama.generate: prefix-match hit


46 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.13 ms /    39 runs   (    0.11 ms per token,  9452.25 tokens per second)
llama_print_timings: prompt eval time =     203.97 ms /    26 tokens (    7.85 ms per token,   127.47 tokens per second)
llama_print_timings:        eval time =    2009.49 ms /    38 runs   (   52.88 ms per token,    18.91 tokens per second)
llama_print_timings:       total time =    2269.59 ms
Llama.generate: prefix-match hit


47 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.77 ms /   120 runs   (    0.11 ms per token,  9400.71 tokens per second)
llama_print_timings: prompt eval time =     194.63 ms /    19 tokens (   10.24 ms per token,    97.62 tokens per second)
llama_print_timings:        eval time =    6309.66 ms /   119 runs   (   53.02 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    6677.62 ms
Llama.generate: prefix-match hit


48 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       6.88 ms /    66 runs   (    0.10 ms per token,  9598.60 tokens per second)
llama_print_timings: prompt eval time =     188.24 ms /    16 tokens (   11.76 ms per token,    85.00 tokens per second)
llama_print_timings:        eval time =    3437.43 ms /    65 runs   (   52.88 ms per token,    18.91 tokens per second)
llama_print_timings:       total time =    3719.24 ms
Llama.generate: prefix-match hit


49 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      13.00 ms /   125 runs   (    0.10 ms per token,  9613.17 tokens per second)
llama_print_timings: prompt eval time =     210.46 ms /    30 tokens (    7.02 ms per token,   142.55 tokens per second)
llama_print_timings:        eval time =    6579.30 ms /   124 runs   (   53.06 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    6967.11 ms
Llama.generate: prefix-match hit


50 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.19 ms /   119 runs   (    0.10 ms per token,  9762.90 tokens per second)
llama_print_timings: prompt eval time =     192.84 ms /    18 tokens (   10.71 ms per token,    93.34 tokens per second)
llama_print_timings:        eval time =    6254.19 ms /   118 runs   (   53.00 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    6614.31 ms
Llama.generate: prefix-match hit


51 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.59 ms /   120 runs   (    0.10 ms per token,  9530.62 tokens per second)
llama_print_timings: prompt eval time =     198.19 ms /    22 tokens (    9.01 ms per token,   111.00 tokens per second)
llama_print_timings:        eval time =    6313.69 ms /   119 runs   (   53.06 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    6686.32 ms
Llama.generate: prefix-match hit


52 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       6.35 ms /    67 runs   (    0.09 ms per token, 10556.17 tokens per second)
llama_print_timings: prompt eval time =     207.09 ms /    29 tokens (    7.14 ms per token,   140.03 tokens per second)
llama_print_timings:        eval time =    3494.99 ms /    66 runs   (   52.95 ms per token,    18.88 tokens per second)
llama_print_timings:       total time =    3795.74 ms
Llama.generate: prefix-match hit


53 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.83 ms /   125 runs   (    0.10 ms per token,  9745.83 tokens per second)
llama_print_timings: prompt eval time =     204.41 ms /    32 tokens (    6.39 ms per token,   156.55 tokens per second)
llama_print_timings:        eval time =    6581.42 ms /   124 runs   (   53.08 ms per token,    18.84 tokens per second)
llama_print_timings:       total time =    6961.49 ms
Llama.generate: prefix-match hit


54 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.92 ms /    45 runs   (    0.11 ms per token,  9144.48 tokens per second)
llama_print_timings: prompt eval time =     192.50 ms /    18 tokens (   10.69 ms per token,    93.51 tokens per second)
llama_print_timings:        eval time =    2325.78 ms /    44 runs   (   52.86 ms per token,    18.92 tokens per second)
llama_print_timings:       total time =    2584.80 ms
Llama.generate: prefix-match hit


55 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.17 ms /    40 runs   (    0.10 ms per token,  9587.73 tokens per second)
llama_print_timings: prompt eval time =     188.60 ms /    16 tokens (   11.79 ms per token,    84.83 tokens per second)
llama_print_timings:        eval time =    2058.87 ms /    39 runs   (   52.79 ms per token,    18.94 tokens per second)
llama_print_timings:       total time =    2304.24 ms
Llama.generate: prefix-match hit


56 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.15 ms /    40 runs   (    0.10 ms per token,  9650.18 tokens per second)
llama_print_timings: prompt eval time =     183.80 ms /    20 tokens (    9.19 ms per token,   108.81 tokens per second)
llama_print_timings:        eval time =    2057.19 ms /    39 runs   (   52.75 ms per token,    18.96 tokens per second)
llama_print_timings:       total time =    2296.39 ms
Llama.generate: prefix-match hit


57 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      13.72 ms /   135 runs   (    0.10 ms per token,  9838.93 tokens per second)
llama_print_timings: prompt eval time =     199.68 ms /    23 tokens (    8.68 ms per token,   115.19 tokens per second)
llama_print_timings:        eval time =    7111.72 ms /   134 runs   (   53.07 ms per token,    18.84 tokens per second)
llama_print_timings:       total time =    7502.68 ms
Llama.generate: prefix-match hit


58 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      10.60 ms /   114 runs   (    0.09 ms per token, 10756.75 tokens per second)
llama_print_timings: prompt eval time =     198.97 ms /    23 tokens (    8.65 ms per token,   115.59 tokens per second)
llama_print_timings:        eval time =    5991.37 ms /   113 runs   (   53.02 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    6352.87 ms
Llama.generate: prefix-match hit


59 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.05 ms /    39 runs   (    0.10 ms per token,  9617.76 tokens per second)
llama_print_timings: prompt eval time =     183.18 ms /    20 tokens (    9.16 ms per token,   109.18 tokens per second)
llama_print_timings:        eval time =    2004.55 ms /    38 runs   (   52.75 ms per token,    18.96 tokens per second)
llama_print_timings:       total time =    2242.49 ms
Llama.generate: prefix-match hit


60 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.07 ms /    40 runs   (    0.10 ms per token,  9825.60 tokens per second)
llama_print_timings: prompt eval time =     192.16 ms /    18 tokens (   10.68 ms per token,    93.67 tokens per second)
llama_print_timings:        eval time =    2055.17 ms /    39 runs   (   52.70 ms per token,    18.98 tokens per second)
llama_print_timings:       total time =    2302.44 ms
Llama.generate: prefix-match hit


61 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      13.30 ms /   128 runs   (    0.10 ms per token,  9621.89 tokens per second)
llama_print_timings: prompt eval time =     191.21 ms /    17 tokens (   11.25 ms per token,    88.91 tokens per second)
llama_print_timings:        eval time =    6736.68 ms /   127 runs   (   53.04 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    7112.00 ms
Llama.generate: prefix-match hit


62 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       5.26 ms /    47 runs   (    0.11 ms per token,  8926.88 tokens per second)
llama_print_timings: prompt eval time =     192.55 ms /    18 tokens (   10.70 ms per token,    93.48 tokens per second)
llama_print_timings:        eval time =    2432.39 ms /    46 runs   (   52.88 ms per token,    18.91 tokens per second)
llama_print_timings:       total time =    2694.41 ms
Llama.generate: prefix-match hit


63 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.98 ms /    46 runs   (    0.11 ms per token,  9233.24 tokens per second)
llama_print_timings: prompt eval time =     190.64 ms /    17 tokens (   11.21 ms per token,    89.17 tokens per second)
llama_print_timings:        eval time =    2376.72 ms /    45 runs   (   52.82 ms per token,    18.93 tokens per second)
llama_print_timings:       total time =    2633.60 ms
Llama.generate: prefix-match hit


64 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.92 ms /    47 runs   (    0.10 ms per token,  9560.62 tokens per second)
llama_print_timings: prompt eval time =     192.51 ms /    18 tokens (   10.70 ms per token,    93.50 tokens per second)
llama_print_timings:        eval time =    2429.05 ms /    46 runs   (   52.81 ms per token,    18.94 tokens per second)
llama_print_timings:       total time =    2687.49 ms
Llama.generate: prefix-match hit


65 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.75 ms /   131 runs   (    0.10 ms per token, 10274.51 tokens per second)
llama_print_timings: prompt eval time =     200.67 ms /    24 tokens (    8.36 ms per token,   119.60 tokens per second)
llama_print_timings:        eval time =    6897.96 ms /   130 runs   (   53.06 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    7282.00 ms
Llama.generate: prefix-match hit


66 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      11.97 ms /   120 runs   (    0.10 ms per token, 10028.41 tokens per second)
llama_print_timings: prompt eval time =     203.55 ms /    25 tokens (    8.14 ms per token,   122.82 tokens per second)
llama_print_timings:        eval time =    6313.61 ms /   119 runs   (   53.06 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    6686.84 ms
Llama.generate: prefix-match hit


67 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.60 ms /   127 runs   (    0.10 ms per token, 10079.37 tokens per second)
llama_print_timings: prompt eval time =     192.30 ms /    18 tokens (   10.68 ms per token,    93.60 tokens per second)
llama_print_timings:        eval time =    6683.70 ms /   126 runs   (   53.05 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    7054.58 ms
Llama.generate: prefix-match hit


68 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.13 ms /    40 runs   (    0.10 ms per token,  9685.23 tokens per second)
llama_print_timings: prompt eval time =     183.29 ms /    20 tokens (    9.16 ms per token,   109.11 tokens per second)
llama_print_timings:        eval time =    2055.75 ms /    39 runs   (   52.71 ms per token,    18.97 tokens per second)
llama_print_timings:       total time =    2294.84 ms
Llama.generate: prefix-match hit


69 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.05 ms /    40 runs   (    0.10 ms per token,  9874.11 tokens per second)
llama_print_timings: prompt eval time =     188.58 ms /    16 tokens (   11.79 ms per token,    84.85 tokens per second)
llama_print_timings:        eval time =    2057.44 ms /    39 runs   (   52.75 ms per token,    18.96 tokens per second)
llama_print_timings:       total time =    2302.11 ms
Llama.generate: prefix-match hit


70 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      14.60 ms /   146 runs   (    0.10 ms per token, 10002.06 tokens per second)
llama_print_timings: prompt eval time =     202.75 ms /    25 tokens (    8.11 ms per token,   123.30 tokens per second)
llama_print_timings:        eval time =    7701.39 ms /   145 runs   (   53.11 ms per token,    18.83 tokens per second)
llama_print_timings:       total time =    8112.37 ms
Llama.generate: prefix-match hit


71 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.46 ms /    40 runs   (    0.11 ms per token,  8970.62 tokens per second)
llama_print_timings: prompt eval time =     188.34 ms /    15 tokens (   12.56 ms per token,    79.64 tokens per second)
llama_print_timings:        eval time =    2059.93 ms /    39 runs   (   52.82 ms per token,    18.93 tokens per second)
llama_print_timings:       total time =    2307.34 ms
Llama.generate: prefix-match hit


72 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.13 ms /    40 runs   (    0.10 ms per token,  9680.54 tokens per second)
llama_print_timings: prompt eval time =     191.30 ms /    17 tokens (   11.25 ms per token,    88.86 tokens per second)
llama_print_timings:        eval time =    2057.72 ms /    39 runs   (   52.76 ms per token,    18.95 tokens per second)
llama_print_timings:       total time =    2304.44 ms
Llama.generate: prefix-match hit


73 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      11.19 ms /   121 runs   (    0.09 ms per token, 10809.36 tokens per second)
llama_print_timings: prompt eval time =     346.51 ms /    34 tokens (   10.19 ms per token,    98.12 tokens per second)
llama_print_timings:        eval time =    6378.53 ms /   120 runs   (   53.15 ms per token,    18.81 tokens per second)
llama_print_timings:       total time =    6895.78 ms
Llama.generate: prefix-match hit


74 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.21 ms /    39 runs   (    0.11 ms per token,  9272.47 tokens per second)
llama_print_timings: prompt eval time =     196.36 ms /    21 tokens (    9.35 ms per token,   106.95 tokens per second)
llama_print_timings:        eval time =    2015.01 ms /    38 runs   (   53.03 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    2268.86 ms
Llama.generate: prefix-match hit


75 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.35 ms /    40 runs   (    0.11 ms per token,  9203.87 tokens per second)
llama_print_timings: prompt eval time =     203.29 ms /    25 tokens (    8.13 ms per token,   122.98 tokens per second)
llama_print_timings:        eval time =    2070.28 ms /    39 runs   (   53.08 ms per token,    18.84 tokens per second)
llama_print_timings:       total time =    2332.12 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      11.95 ms /   125 runs   (    0.10 ms per token, 10458.50 tokens per second)
llama_print_timings: prompt eval time =     193.55 ms /    18 tokens (   10.75 ms per token,    93.00 tokens per second)
llama_print_timings:        eval time =    6582.15 ms /   124 runs   (   53.08 ms per token,    18.84 tokens per second)
llama_print_timings:       total time =    6949.88 ms
Llama.generate: prefix-

76 -- 1
77 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      11.43 ms /   122 runs   (    0.09 ms per token, 10669.93 tokens per second)
llama_print_timings: prompt eval time =     211.02 ms /    31 tokens (    6.81 ms per token,   146.91 tokens per second)
llama_print_timings:        eval time =    6430.89 ms /   121 runs   (   53.15 ms per token,    18.82 tokens per second)
llama_print_timings:       total time =    6810.67 ms
Llama.generate: prefix-match hit


78 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      14.02 ms /   139 runs   (    0.10 ms per token,  9912.99 tokens per second)
llama_print_timings: prompt eval time =     183.42 ms /    20 tokens (    9.17 ms per token,   109.04 tokens per second)
llama_print_timings:        eval time =    7328.76 ms /   138 runs   (   53.11 ms per token,    18.83 tokens per second)
llama_print_timings:       total time =    7705.75 ms
Llama.generate: prefix-match hit


79 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       6.84 ms /    67 runs   (    0.10 ms per token,  9798.19 tokens per second)
llama_print_timings: prompt eval time =     194.55 ms /    19 tokens (   10.24 ms per token,    97.66 tokens per second)
llama_print_timings:        eval time =    3492.50 ms /    66 runs   (   52.92 ms per token,    18.90 tokens per second)
llama_print_timings:       total time =    3780.58 ms
Llama.generate: prefix-match hit


80 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.10 ms /   118 runs   (    0.10 ms per token,  9754.48 tokens per second)
llama_print_timings: prompt eval time =     203.91 ms /    26 tokens (    7.84 ms per token,   127.51 tokens per second)
llama_print_timings:        eval time =    6214.02 ms /   117 runs   (   53.11 ms per token,    18.83 tokens per second)
llama_print_timings:       total time =    6586.73 ms
Llama.generate: prefix-match hit


81 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.12 ms /    40 runs   (    0.10 ms per token,  9704.03 tokens per second)
llama_print_timings: prompt eval time =     188.80 ms /    16 tokens (   11.80 ms per token,    84.75 tokens per second)
llama_print_timings:        eval time =    2058.08 ms /    39 runs   (   52.77 ms per token,    18.95 tokens per second)
llama_print_timings:       total time =    2302.47 ms
Llama.generate: prefix-match hit


82 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       6.78 ms /    68 runs   (    0.10 ms per token, 10025.06 tokens per second)
llama_print_timings: prompt eval time =     192.96 ms /    18 tokens (   10.72 ms per token,    93.29 tokens per second)
llama_print_timings:        eval time =    3544.49 ms /    67 runs   (   52.90 ms per token,    18.90 tokens per second)
llama_print_timings:       total time =    3830.73 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      11.38 ms /   120 runs   (    0.09 ms per token, 10549.45 tokens per second)
llama_print_timings: prompt eval time =     359.45 ms /    43 tokens (    8.36 ms per token,   119.63 tokens per second)
llama_print_timings:        eval time =    6326.45 ms /   119 runs   (   53.16 ms per token,    18.81 tokens per second)
llama_print_timings:       total time =    6852.55 ms
Llama.generate: prefix-

83 -- 1
84 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.66 ms /    45 runs   (    0.10 ms per token,  9652.51 tokens per second)
llama_print_timings: prompt eval time =     199.79 ms /    23 tokens (    8.69 ms per token,   115.12 tokens per second)
llama_print_timings:        eval time =    2327.68 ms /    44 runs   (   52.90 ms per token,    18.90 tokens per second)
llama_print_timings:       total time =    2590.42 ms
Llama.generate: prefix-match hit


85 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      11.66 ms /   117 runs   (    0.10 ms per token, 10036.89 tokens per second)
llama_print_timings: prompt eval time =     203.33 ms /    25 tokens (    8.13 ms per token,   122.96 tokens per second)
llama_print_timings:        eval time =    6131.04 ms /   116 runs   (   52.85 ms per token,    18.92 tokens per second)
llama_print_timings:       total time =    6497.80 ms
Llama.generate: prefix-match hit


86 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.88 ms /   138 runs   (    0.09 ms per token, 10716.78 tokens per second)
llama_print_timings: prompt eval time =     206.04 ms /    27 tokens (    7.63 ms per token,   131.04 tokens per second)
llama_print_timings:        eval time =    7277.52 ms /   137 runs   (   53.12 ms per token,    18.83 tokens per second)
llama_print_timings:       total time =    7674.91 ms
Llama.generate: prefix-match hit


87 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.10 ms /    40 runs   (    0.10 ms per token,  9751.34 tokens per second)
llama_print_timings: prompt eval time =     188.82 ms /    15 tokens (   12.59 ms per token,    79.44 tokens per second)
llama_print_timings:        eval time =    2055.67 ms /    39 runs   (   52.71 ms per token,    18.97 tokens per second)
llama_print_timings:       total time =    2300.03 ms
Llama.generate: prefix-match hit


88 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.50 ms /   130 runs   (    0.10 ms per token, 10396.67 tokens per second)
llama_print_timings: prompt eval time =     199.53 ms /    23 tokens (    8.68 ms per token,   115.27 tokens per second)
llama_print_timings:        eval time =    6850.83 ms /   129 runs   (   53.11 ms per token,    18.83 tokens per second)
llama_print_timings:       total time =    7235.75 ms
Llama.generate: prefix-match hit


89 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      11.59 ms /   117 runs   (    0.10 ms per token, 10092.30 tokens per second)
llama_print_timings: prompt eval time =     204.11 ms /    26 tokens (    7.85 ms per token,   127.38 tokens per second)
llama_print_timings:        eval time =    6153.06 ms /   116 runs   (   53.04 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    6519.63 ms
Llama.generate: prefix-match hit


90 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.56 ms /    40 runs   (    0.11 ms per token,  8773.85 tokens per second)
llama_print_timings: prompt eval time =     198.11 ms /    22 tokens (    9.00 ms per token,   111.05 tokens per second)
llama_print_timings:        eval time =    2061.48 ms /    39 runs   (   52.86 ms per token,    18.92 tokens per second)
llama_print_timings:       total time =    2319.68 ms
Llama.generate: prefix-match hit


91 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.46 ms /   122 runs   (    0.10 ms per token,  9789.76 tokens per second)
llama_print_timings: prompt eval time =     183.30 ms /    20 tokens (    9.16 ms per token,   109.11 tokens per second)
llama_print_timings:        eval time =    6420.22 ms /   121 runs   (   53.06 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    6777.86 ms
Llama.generate: prefix-match hit


92 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      13.72 ms /   135 runs   (    0.10 ms per token,  9841.80 tokens per second)
llama_print_timings: prompt eval time =     188.93 ms /    15 tokens (   12.60 ms per token,    79.40 tokens per second)
llama_print_timings:        eval time =    7105.17 ms /   134 runs   (   53.02 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    7484.78 ms
Llama.generate: prefix-match hit


93 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      13.22 ms /   130 runs   (    0.10 ms per token,  9835.07 tokens per second)
llama_print_timings: prompt eval time =     183.09 ms /    20 tokens (    9.15 ms per token,   109.24 tokens per second)
llama_print_timings:        eval time =    6841.39 ms /   129 runs   (   53.03 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    7206.87 ms
Llama.generate: prefix-match hit


94 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       5.07 ms /    44 runs   (    0.12 ms per token,  8673.37 tokens per second)
llama_print_timings: prompt eval time =     196.34 ms /    21 tokens (    9.35 ms per token,   106.96 tokens per second)
llama_print_timings:        eval time =    2272.63 ms /    43 runs   (   52.85 ms per token,    18.92 tokens per second)
llama_print_timings:       total time =    2536.83 ms
Llama.generate: prefix-match hit


95 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.09 ms /    39 runs   (    0.10 ms per token,  9523.81 tokens per second)
llama_print_timings: prompt eval time =     197.77 ms /    22 tokens (    8.99 ms per token,   111.24 tokens per second)
llama_print_timings:        eval time =    2006.23 ms /    38 runs   (   52.80 ms per token,    18.94 tokens per second)
llama_print_timings:       total time =    2260.15 ms
Llama.generate: prefix-match hit


96 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.69 ms /   119 runs   (    0.11 ms per token,  9379.68 tokens per second)
llama_print_timings: prompt eval time =     183.19 ms /    20 tokens (    9.16 ms per token,   109.17 tokens per second)
llama_print_timings:        eval time =    6255.59 ms /   118 runs   (   53.01 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    6610.07 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      11.55 ms /   125 runs   (    0.09 ms per token, 10820.64 tokens per second)
llama_print_timings: prompt eval time =     194.10 ms /    19 tokens (   10.22 ms per token,    97.89 tokens per second)
llama_print_timings:        eval time =    6577.97 ms /   124 runs   (   53.05 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    6947.08 ms
Llama.generate: prefix-

97 -- 1
98 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.36 ms /    40 runs   (    0.11 ms per token,  9176.42 tokens per second)
llama_print_timings: prompt eval time =     191.19 ms /    17 tokens (   11.25 ms per token,    88.92 tokens per second)
llama_print_timings:        eval time =    2057.19 ms /    39 runs   (   52.75 ms per token,    18.96 tokens per second)
llama_print_timings:       total time =    2307.03 ms
Llama.generate: prefix-match hit


99 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.07 ms /   122 runs   (    0.10 ms per token, 10104.36 tokens per second)
llama_print_timings: prompt eval time =     199.22 ms /    23 tokens (    8.66 ms per token,   115.45 tokens per second)
llama_print_timings:        eval time =    6416.47 ms /   121 runs   (   53.03 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    6785.92 ms
Llama.generate: prefix-match hit


100 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.25 ms /    39 runs   (    0.11 ms per token,  9170.00 tokens per second)
llama_print_timings: prompt eval time =     187.46 ms /    14 tokens (   13.39 ms per token,    74.68 tokens per second)
llama_print_timings:        eval time =    2008.57 ms /    38 runs   (   52.86 ms per token,    18.92 tokens per second)
llama_print_timings:       total time =    2254.11 ms
Llama.generate: prefix-match hit


101 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.29 ms /    39 runs   (    0.11 ms per token,  9086.67 tokens per second)
llama_print_timings: prompt eval time =     194.21 ms /    19 tokens (   10.22 ms per token,    97.83 tokens per second)
llama_print_timings:        eval time =    2007.92 ms /    38 runs   (   52.84 ms per token,    18.93 tokens per second)
llama_print_timings:       total time =    2258.28 ms
Llama.generate: prefix-match hit


102 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.79 ms /   134 runs   (    0.10 ms per token, 10473.66 tokens per second)
llama_print_timings: prompt eval time =     190.58 ms /    17 tokens (   11.21 ms per token,    89.20 tokens per second)
llama_print_timings:        eval time =    7049.44 ms /   133 runs   (   53.00 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    7427.47 ms
Llama.generate: prefix-match hit


103 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.25 ms /    40 runs   (    0.11 ms per token,  9400.71 tokens per second)
llama_print_timings: prompt eval time =     200.49 ms /    24 tokens (    8.35 ms per token,   119.70 tokens per second)
llama_print_timings:        eval time =    2059.37 ms /    39 runs   (   52.80 ms per token,    18.94 tokens per second)
llama_print_timings:       total time =    2316.34 ms
Llama.generate: prefix-match hit


104 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.49 ms /    44 runs   (    0.10 ms per token,  9806.11 tokens per second)
llama_print_timings: prompt eval time =     200.73 ms /    24 tokens (    8.36 ms per token,   119.56 tokens per second)
llama_print_timings:        eval time =    2272.91 ms /    43 runs   (   52.86 ms per token,    18.92 tokens per second)
llama_print_timings:       total time =    2534.50 ms
Llama.generate: prefix-match hit


105 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      11.67 ms /   110 runs   (    0.11 ms per token,  9427.49 tokens per second)
llama_print_timings: prompt eval time =     194.20 ms /    19 tokens (   10.22 ms per token,    97.84 tokens per second)
llama_print_timings:        eval time =    5775.40 ms /   109 runs   (   52.99 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    6125.87 ms
Llama.generate: prefix-match hit


106 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.45 ms /   121 runs   (    0.10 ms per token,  9718.88 tokens per second)
llama_print_timings: prompt eval time =     183.40 ms /    20 tokens (    9.17 ms per token,   109.05 tokens per second)
llama_print_timings:        eval time =    6360.52 ms /   120 runs   (   53.00 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    6715.72 ms
Llama.generate: prefix-match hit


107 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      11.83 ms /   134 runs   (    0.09 ms per token, 11330.01 tokens per second)
llama_print_timings: prompt eval time =     337.74 ms /    51 tokens (    6.62 ms per token,   151.00 tokens per second)
llama_print_timings:        eval time =    7071.30 ms /   133 runs   (   53.17 ms per token,    18.81 tokens per second)
llama_print_timings:       total time =    7595.38 ms
Llama.generate: prefix-match hit


108 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.11 ms /    40 runs   (    0.10 ms per token,  9729.99 tokens per second)
llama_print_timings: prompt eval time =     198.22 ms /    22 tokens (    9.01 ms per token,   110.99 tokens per second)
llama_print_timings:        eval time =    2058.73 ms /    39 runs   (   52.79 ms per token,    18.94 tokens per second)
llama_print_timings:       total time =    2312.71 ms
Llama.generate: prefix-match hit


109 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       7.21 ms /    67 runs   (    0.11 ms per token,  9291.36 tokens per second)
llama_print_timings: prompt eval time =     192.94 ms /    18 tokens (   10.72 ms per token,    93.29 tokens per second)
llama_print_timings:        eval time =    3491.06 ms /    66 runs   (   52.89 ms per token,    18.91 tokens per second)
llama_print_timings:       total time =    3780.90 ms
Llama.generate: prefix-match hit


110 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.43 ms /    40 runs   (    0.11 ms per token,  9023.23 tokens per second)
llama_print_timings: prompt eval time =     192.72 ms /    18 tokens (   10.71 ms per token,    93.40 tokens per second)
llama_print_timings:        eval time =    2059.39 ms /    39 runs   (   52.80 ms per token,    18.94 tokens per second)
llama_print_timings:       total time =    2311.66 ms
Llama.generate: prefix-match hit


111 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      15.14 ms /   147 runs   (    0.10 ms per token,  9711.30 tokens per second)
llama_print_timings: prompt eval time =     202.31 ms /    25 tokens (    8.09 ms per token,   123.57 tokens per second)
llama_print_timings:        eval time =    7760.94 ms /   146 runs   (   53.16 ms per token,    18.81 tokens per second)
llama_print_timings:       total time =    8170.28 ms
Llama.generate: prefix-match hit


112 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.13 ms /    39 runs   (    0.11 ms per token,  9436.24 tokens per second)
llama_print_timings: prompt eval time =     187.53 ms /    14 tokens (   13.39 ms per token,    74.66 tokens per second)
llama_print_timings:        eval time =    2007.95 ms /    38 runs   (   52.84 ms per token,    18.92 tokens per second)
llama_print_timings:       total time =    2251.94 ms
Llama.generate: prefix-match hit


113 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.90 ms /    45 runs   (    0.11 ms per token,  9181.80 tokens per second)
llama_print_timings: prompt eval time =     183.34 ms /    20 tokens (    9.17 ms per token,   109.09 tokens per second)
llama_print_timings:        eval time =    2325.95 ms /    44 runs   (   52.86 ms per token,    18.92 tokens per second)
llama_print_timings:       total time =    2575.49 ms
Llama.generate: prefix-match hit


114 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      11.98 ms /   129 runs   (    0.09 ms per token, 10766.15 tokens per second)
llama_print_timings: prompt eval time =     198.31 ms /    22 tokens (    9.01 ms per token,   110.94 tokens per second)
llama_print_timings:        eval time =    6790.46 ms /   128 runs   (   53.05 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    7169.88 ms
Llama.generate: prefix-match hit


115 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.42 ms /   118 runs   (    0.11 ms per token,  9500.81 tokens per second)
llama_print_timings: prompt eval time =     192.53 ms /    18 tokens (   10.70 ms per token,    93.49 tokens per second)
llama_print_timings:        eval time =    6206.06 ms /   117 runs   (   53.04 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    6569.99 ms
Llama.generate: prefix-match hit


116 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      14.37 ms /   143 runs   (    0.10 ms per token,  9949.90 tokens per second)
llama_print_timings: prompt eval time =     196.50 ms /    21 tokens (    9.36 ms per token,   106.87 tokens per second)
llama_print_timings:        eval time =    7539.26 ms /   142 runs   (   53.09 ms per token,    18.83 tokens per second)
llama_print_timings:       total time =    7939.06 ms
Llama.generate: prefix-match hit


117 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      14.15 ms /   134 runs   (    0.11 ms per token,  9470.63 tokens per second)
llama_print_timings: prompt eval time =     201.94 ms /    25 tokens (    8.08 ms per token,   123.80 tokens per second)
llama_print_timings:        eval time =    7061.98 ms /   133 runs   (   53.10 ms per token,    18.83 tokens per second)
llama_print_timings:       total time =    7458.38 ms
Llama.generate: prefix-match hit


118 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       3.95 ms /    40 runs   (    0.10 ms per token, 10124.02 tokens per second)
llama_print_timings: prompt eval time =     183.52 ms /    20 tokens (    9.18 ms per token,   108.98 tokens per second)
llama_print_timings:        eval time =    2055.32 ms /    39 runs   (   52.70 ms per token,    18.98 tokens per second)
llama_print_timings:       total time =    2294.04 ms
Llama.generate: prefix-match hit


119 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.91 ms /   124 runs   (    0.10 ms per token,  9606.45 tokens per second)
llama_print_timings: prompt eval time =     198.36 ms /    22 tokens (    9.02 ms per token,   110.91 tokens per second)
llama_print_timings:        eval time =    6528.45 ms /   123 runs   (   53.08 ms per token,    18.84 tokens per second)
llama_print_timings:       total time =    6905.00 ms
Llama.generate: prefix-match hit


120 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.16 ms /    40 runs   (    0.10 ms per token,  9613.07 tokens per second)
llama_print_timings: prompt eval time =     187.65 ms /    14 tokens (   13.40 ms per token,    74.61 tokens per second)
llama_print_timings:        eval time =    2058.98 ms /    39 runs   (   52.79 ms per token,    18.94 tokens per second)
llama_print_timings:       total time =    2302.86 ms
Llama.generate: prefix-match hit


121 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       6.53 ms /    64 runs   (    0.10 ms per token,  9803.92 tokens per second)
llama_print_timings: prompt eval time =     191.36 ms /    17 tokens (   11.26 ms per token,    88.84 tokens per second)
llama_print_timings:        eval time =    3326.28 ms /    63 runs   (   52.80 ms per token,    18.94 tokens per second)
llama_print_timings:       total time =    3606.78 ms
Llama.generate: prefix-match hit


122 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       6.59 ms /    64 runs   (    0.10 ms per token,  9707.27 tokens per second)
llama_print_timings: prompt eval time =     187.80 ms /    14 tokens (   13.41 ms per token,    74.55 tokens per second)
llama_print_timings:        eval time =    3328.68 ms /    63 runs   (   52.84 ms per token,    18.93 tokens per second)
llama_print_timings:       total time =    3607.21 ms
Llama.generate: prefix-match hit


123 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       3.99 ms /    40 runs   (    0.10 ms per token, 10035.12 tokens per second)
llama_print_timings: prompt eval time =     182.47 ms /    10 tokens (   18.25 ms per token,    54.80 tokens per second)
llama_print_timings:        eval time =    2054.47 ms /    39 runs   (   52.68 ms per token,    18.98 tokens per second)
llama_print_timings:       total time =    2292.43 ms
Llama.generate: prefix-match hit


124 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.35 ms /    40 runs   (    0.11 ms per token,  9193.29 tokens per second)
llama_print_timings: prompt eval time =     188.71 ms /    15 tokens (   12.58 ms per token,    79.49 tokens per second)
llama_print_timings:        eval time =    2060.51 ms /    39 runs   (   52.83 ms per token,    18.93 tokens per second)
llama_print_timings:       total time =    2308.73 ms
Llama.generate: prefix-match hit


125 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      13.78 ms /   133 runs   (    0.10 ms per token,  9652.37 tokens per second)
llama_print_timings: prompt eval time =     199.45 ms /    23 tokens (    8.67 ms per token,   115.32 tokens per second)
llama_print_timings:        eval time =    7003.74 ms /   132 runs   (   53.06 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    7392.94 ms
Llama.generate: prefix-match hit


126 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.71 ms /   123 runs   (    0.10 ms per token,  9678.94 tokens per second)
llama_print_timings: prompt eval time =     193.20 ms /    18 tokens (   10.73 ms per token,    93.17 tokens per second)
llama_print_timings:        eval time =    6463.39 ms /   122 runs   (   52.98 ms per token,    18.88 tokens per second)
llama_print_timings:       total time =    6832.76 ms
Llama.generate: prefix-match hit


127 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.36 ms /    39 runs   (    0.11 ms per token,  8936.76 tokens per second)
llama_print_timings: prompt eval time =     197.81 ms /    22 tokens (    8.99 ms per token,   111.22 tokens per second)
llama_print_timings:        eval time =    2006.79 ms /    38 runs   (   52.81 ms per token,    18.94 tokens per second)
llama_print_timings:       total time =    2265.46 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      10.88 ms /   117 runs   (    0.09 ms per token, 10758.62 tokens per second)
llama_print_timings: prompt eval time =     367.16 ms /    49 tokens (    7.49 ms per token,   133.46 tokens per second)
llama_print_timings:        eval time =    6167.56 ms /   116 runs   (   53.17 ms per token,    18.81 tokens per second)
llama_print_timings:       total time =    6700.10 ms
Llama.generate: prefix-

128 -- 1
129 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      10.93 ms /   107 runs   (    0.10 ms per token,  9786.88 tokens per second)
llama_print_timings: prompt eval time =     191.08 ms /    17 tokens (   11.24 ms per token,    88.97 tokens per second)
llama_print_timings:        eval time =    5612.72 ms /   106 runs   (   52.95 ms per token,    18.89 tokens per second)
llama_print_timings:       total time =    5952.43 ms
Llama.generate: prefix-match hit


130 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       3.91 ms /    39 runs   (    0.10 ms per token,  9966.78 tokens per second)
llama_print_timings: prompt eval time =     188.46 ms /    15 tokens (   12.56 ms per token,    79.59 tokens per second)
llama_print_timings:        eval time =    2005.00 ms /    38 runs   (   52.76 ms per token,    18.95 tokens per second)
llama_print_timings:       total time =    2247.74 ms
Llama.generate: prefix-match hit


131 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      13.45 ms /   127 runs   (    0.11 ms per token,  9443.08 tokens per second)
llama_print_timings: prompt eval time =     198.93 ms /    23 tokens (    8.65 ms per token,   115.62 tokens per second)
llama_print_timings:        eval time =    6688.07 ms /   126 runs   (   53.08 ms per token,    18.84 tokens per second)
llama_print_timings:       total time =    7070.34 ms
Llama.generate: prefix-match hit


132 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      14.32 ms /   133 runs   (    0.11 ms per token,  9289.01 tokens per second)
llama_print_timings: prompt eval time =     198.41 ms /    22 tokens (    9.02 ms per token,   110.88 tokens per second)
llama_print_timings:        eval time =    7002.14 ms /   132 runs   (   53.05 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    7395.19 ms
Llama.generate: prefix-match hit


133 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.36 ms /   121 runs   (    0.10 ms per token,  9786.48 tokens per second)
llama_print_timings: prompt eval time =     200.58 ms /    24 tokens (    8.36 ms per token,   119.65 tokens per second)
llama_print_timings:        eval time =    6358.99 ms /   120 runs   (   52.99 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    6730.72 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      13.29 ms /   142 runs   (    0.09 ms per token, 10687.14 tokens per second)
llama_print_timings: prompt eval time =     202.53 ms /    25 tokens (    8.10 ms per token,   123.44 tokens per second)
llama_print_timings:        eval time =    7480.86 ms /   141 runs   (   53.06 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    7884.88 ms
Llama.generate: prefix-

134 -- 1
135 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.17 ms /    40 runs   (    0.10 ms per token,  9585.43 tokens per second)
llama_print_timings: prompt eval time =     192.75 ms /    18 tokens (   10.71 ms per token,    93.39 tokens per second)
llama_print_timings:        eval time =    2058.65 ms /    39 runs   (   52.79 ms per token,    18.94 tokens per second)
llama_print_timings:       total time =    2308.29 ms
Llama.generate: prefix-match hit


136 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.13 ms /    40 runs   (    0.10 ms per token,  9692.27 tokens per second)
llama_print_timings: prompt eval time =     183.61 ms /    12 tokens (   15.30 ms per token,    65.36 tokens per second)
llama_print_timings:        eval time =    2055.68 ms /    39 runs   (   52.71 ms per token,    18.97 tokens per second)
llama_print_timings:       total time =    2294.89 ms
Llama.generate: prefix-match hit


137 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.13 ms /    40 runs   (    0.10 ms per token,  9687.58 tokens per second)
llama_print_timings: prompt eval time =     194.50 ms /    19 tokens (   10.24 ms per token,    97.68 tokens per second)
llama_print_timings:        eval time =    2057.64 ms /    39 runs   (   52.76 ms per token,    18.95 tokens per second)
llama_print_timings:       total time =    2307.34 ms
Llama.generate: prefix-match hit


138 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       9.27 ms /    94 runs   (    0.10 ms per token, 10136.96 tokens per second)
llama_print_timings: prompt eval time =     352.12 ms /    39 tokens (    9.03 ms per token,   110.76 tokens per second)
llama_print_timings:        eval time =    4933.59 ms /    93 runs   (   53.05 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    5420.71 ms
Llama.generate: prefix-match hit


139 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       9.65 ms /    99 runs   (    0.10 ms per token, 10260.13 tokens per second)
llama_print_timings: prompt eval time =     191.19 ms /    17 tokens (   11.25 ms per token,    88.92 tokens per second)
llama_print_timings:        eval time =    5192.11 ms /    98 runs   (   52.98 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    5525.77 ms
Llama.generate: prefix-match hit


140 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.02 ms /    39 runs   (    0.10 ms per token,  9711.16 tokens per second)
llama_print_timings: prompt eval time =     185.50 ms /    13 tokens (   14.27 ms per token,    70.08 tokens per second)
llama_print_timings:        eval time =    2007.78 ms /    38 runs   (   52.84 ms per token,    18.93 tokens per second)
llama_print_timings:       total time =    2248.04 ms
Llama.generate: prefix-match hit


141 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.39 ms /    40 runs   (    0.11 ms per token,  9109.54 tokens per second)
llama_print_timings: prompt eval time =     194.10 ms /    19 tokens (   10.22 ms per token,    97.89 tokens per second)
llama_print_timings:        eval time =    2058.02 ms /    39 runs   (   52.77 ms per token,    18.95 tokens per second)
llama_print_timings:       total time =    2312.13 ms
Llama.generate: prefix-match hit


142 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      13.46 ms /   134 runs   (    0.10 ms per token,  9959.12 tokens per second)
llama_print_timings: prompt eval time =     198.14 ms /    22 tokens (    9.01 ms per token,   111.03 tokens per second)
llama_print_timings:        eval time =    7058.96 ms /   133 runs   (   53.07 ms per token,    18.84 tokens per second)
llama_print_timings:       total time =    7446.85 ms
Llama.generate: prefix-match hit


143 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.62 ms /    46 runs   (    0.10 ms per token,  9954.56 tokens per second)
llama_print_timings: prompt eval time =     198.28 ms /    22 tokens (    9.01 ms per token,   110.96 tokens per second)
llama_print_timings:        eval time =    2377.50 ms /    45 runs   (   52.83 ms per token,    18.93 tokens per second)
llama_print_timings:       total time =    2640.92 ms
Llama.generate: prefix-match hit


144 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      11.82 ms /   121 runs   (    0.10 ms per token, 10237.75 tokens per second)
llama_print_timings: prompt eval time =     206.37 ms /    27 tokens (    7.64 ms per token,   130.84 tokens per second)
llama_print_timings:        eval time =    6369.31 ms /   120 runs   (   53.08 ms per token,    18.84 tokens per second)
llama_print_timings:       total time =    6745.12 ms
Llama.generate: prefix-match hit


145 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.04 ms /    40 runs   (    0.10 ms per token,  9910.80 tokens per second)
llama_print_timings: prompt eval time =     187.80 ms /    16 tokens (   11.74 ms per token,    85.20 tokens per second)
llama_print_timings:        eval time =    2059.15 ms /    39 runs   (   52.80 ms per token,    18.94 tokens per second)
llama_print_timings:       total time =    2303.88 ms
Llama.generate: prefix-match hit


146 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.82 ms /    45 runs   (    0.11 ms per token,  9345.79 tokens per second)
llama_print_timings: prompt eval time =     196.40 ms /    21 tokens (    9.35 ms per token,   106.92 tokens per second)
llama_print_timings:        eval time =    2325.82 ms /    44 runs   (   52.86 ms per token,    18.92 tokens per second)
llama_print_timings:       total time =    2586.50 ms
Llama.generate: prefix-match hit


147 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      11.80 ms /   122 runs   (    0.10 ms per token, 10339.86 tokens per second)
llama_print_timings: prompt eval time =     196.62 ms /    21 tokens (    9.36 ms per token,   106.81 tokens per second)
llama_print_timings:        eval time =    6417.58 ms /   121 runs   (   53.04 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    6784.88 ms
Llama.generate: prefix-match hit


148 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.73 ms /    45 runs   (    0.11 ms per token,  9515.75 tokens per second)
llama_print_timings: prompt eval time =     191.03 ms /    17 tokens (   11.24 ms per token,    88.99 tokens per second)
llama_print_timings:        eval time =    2321.39 ms /    44 runs   (   52.76 ms per token,    18.95 tokens per second)
llama_print_timings:       total time =    2576.60 ms
Llama.generate: prefix-match hit


149 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.56 ms /    39 runs   (    0.12 ms per token,  8556.38 tokens per second)
llama_print_timings: prompt eval time =     199.92 ms /    23 tokens (    8.69 ms per token,   115.05 tokens per second)
llama_print_timings:        eval time =    2009.02 ms /    38 runs   (   52.87 ms per token,    18.91 tokens per second)
llama_print_timings:       total time =    2270.35 ms
Llama.generate: prefix-match hit


150 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.27 ms /    40 runs   (    0.11 ms per token,  9374.27 tokens per second)
llama_print_timings: prompt eval time =     189.08 ms /    15 tokens (   12.61 ms per token,    79.33 tokens per second)
llama_print_timings:        eval time =    2058.56 ms /    39 runs   (   52.78 ms per token,    18.95 tokens per second)
llama_print_timings:       total time =    2305.31 ms
Llama.generate: prefix-match hit


151 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      15.25 ms /   158 runs   (    0.10 ms per token, 10357.94 tokens per second)
llama_print_timings: prompt eval time =     196.45 ms /    21 tokens (    9.35 ms per token,   106.90 tokens per second)
llama_print_timings:        eval time =    8336.29 ms /   157 runs   (   53.10 ms per token,    18.83 tokens per second)
llama_print_timings:       total time =    8755.74 ms
Llama.generate: prefix-match hit


152 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      13.93 ms /   145 runs   (    0.10 ms per token, 10410.68 tokens per second)
llama_print_timings: prompt eval time =     196.40 ms /    21 tokens (    9.35 ms per token,   106.93 tokens per second)
llama_print_timings:        eval time =    7644.87 ms /   144 runs   (   53.09 ms per token,    18.84 tokens per second)
llama_print_timings:       total time =    8046.10 ms
Llama.generate: prefix-match hit


153 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.71 ms /    40 runs   (    0.12 ms per token,  8488.96 tokens per second)
llama_print_timings: prompt eval time =     183.47 ms /    20 tokens (    9.17 ms per token,   109.01 tokens per second)
llama_print_timings:        eval time =    2066.00 ms /    39 runs   (   52.97 ms per token,    18.88 tokens per second)
llama_print_timings:       total time =    2313.28 ms
Llama.generate: prefix-match hit


154 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.22 ms /    39 runs   (    0.11 ms per token,  9243.90 tokens per second)
llama_print_timings: prompt eval time =     204.27 ms /    32 tokens (    6.38 ms per token,   156.66 tokens per second)
llama_print_timings:        eval time =    2012.57 ms /    38 runs   (   52.96 ms per token,    18.88 tokens per second)
llama_print_timings:       total time =    2274.62 ms
Llama.generate: prefix-match hit


155 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.05 ms /    40 runs   (    0.10 ms per token,  9874.11 tokens per second)
llama_print_timings: prompt eval time =     200.59 ms /    24 tokens (    8.36 ms per token,   119.65 tokens per second)
llama_print_timings:        eval time =    2058.37 ms /    39 runs   (   52.78 ms per token,    18.95 tokens per second)
llama_print_timings:       total time =    2315.14 ms
Llama.generate: prefix-match hit


156 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.34 ms /    40 runs   (    0.11 ms per token,  9212.34 tokens per second)
llama_print_timings: prompt eval time =     191.01 ms /    17 tokens (   11.24 ms per token,    89.00 tokens per second)
llama_print_timings:        eval time =    2058.26 ms /    39 runs   (   52.78 ms per token,    18.95 tokens per second)
llama_print_timings:       total time =    2307.30 ms
Llama.generate: prefix-match hit


157 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      13.95 ms /   138 runs   (    0.10 ms per token,  9893.18 tokens per second)
llama_print_timings: prompt eval time =     194.46 ms /    19 tokens (   10.23 ms per token,    97.71 tokens per second)
llama_print_timings:        eval time =    7271.88 ms /   137 runs   (   53.08 ms per token,    18.84 tokens per second)
llama_print_timings:       total time =    7665.00 ms
Llama.generate: prefix-match hit


158 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      13.71 ms /   133 runs   (    0.10 ms per token,  9700.24 tokens per second)
llama_print_timings: prompt eval time =     200.44 ms /    24 tokens (    8.35 ms per token,   119.74 tokens per second)
llama_print_timings:        eval time =    7005.40 ms /   132 runs   (   53.07 ms per token,    18.84 tokens per second)
llama_print_timings:       total time =    7396.95 ms
Llama.generate: prefix-match hit


159 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       3.93 ms /    39 runs   (    0.10 ms per token,  9931.25 tokens per second)
llama_print_timings: prompt eval time =     350.88 ms /    37 tokens (    9.48 ms per token,   105.45 tokens per second)
llama_print_timings:        eval time =    2011.07 ms /    38 runs   (   52.92 ms per token,    18.90 tokens per second)
llama_print_timings:       total time =    2419.75 ms
Llama.generate: prefix-match hit


160 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.25 ms /    40 runs   (    0.11 ms per token,  9418.41 tokens per second)
llama_print_timings: prompt eval time =     188.93 ms /    15 tokens (   12.60 ms per token,    79.40 tokens per second)
llama_print_timings:        eval time =    2057.86 ms /    39 runs   (   52.77 ms per token,    18.95 tokens per second)
llama_print_timings:       total time =    2304.37 ms
Llama.generate: prefix-match hit


161 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.27 ms /   125 runs   (    0.10 ms per token, 10191.60 tokens per second)
llama_print_timings: prompt eval time =     205.96 ms /    28 tokens (    7.36 ms per token,   135.95 tokens per second)
llama_print_timings:        eval time =    6583.91 ms /   124 runs   (   53.10 ms per token,    18.83 tokens per second)
llama_print_timings:       total time =    6970.26 ms
Llama.generate: prefix-match hit


162 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.32 ms /    40 runs   (    0.11 ms per token,  9250.69 tokens per second)
llama_print_timings: prompt eval time =     188.70 ms /    15 tokens (   12.58 ms per token,    79.49 tokens per second)
llama_print_timings:        eval time =    2057.67 ms /    39 runs   (   52.76 ms per token,    18.95 tokens per second)
llama_print_timings:       total time =    2304.01 ms
Llama.generate: prefix-match hit


163 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.08 ms /    39 runs   (    0.10 ms per token,  9547.12 tokens per second)
llama_print_timings: prompt eval time =     188.90 ms /    15 tokens (   12.59 ms per token,    79.41 tokens per second)
llama_print_timings:        eval time =    2003.32 ms /    38 runs   (   52.72 ms per token,    18.97 tokens per second)
llama_print_timings:       total time =    2248.37 ms
Llama.generate: prefix-match hit


164 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.75 ms /   126 runs   (    0.10 ms per token,  9884.68 tokens per second)
llama_print_timings: prompt eval time =     203.00 ms /    25 tokens (    8.12 ms per token,   123.15 tokens per second)
llama_print_timings:        eval time =    6628.58 ms /   125 runs   (   53.03 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    7008.38 ms
Llama.generate: prefix-match hit


165 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.99 ms /    46 runs   (    0.11 ms per token,  9227.68 tokens per second)
llama_print_timings: prompt eval time =     194.04 ms /    19 tokens (   10.21 ms per token,    97.92 tokens per second)
llama_print_timings:        eval time =    2377.67 ms /    45 runs   (   52.84 ms per token,    18.93 tokens per second)
llama_print_timings:       total time =    2637.50 ms
Llama.generate: prefix-match hit


166 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.97 ms /   132 runs   (    0.10 ms per token, 10177.33 tokens per second)
llama_print_timings: prompt eval time =     199.46 ms /    23 tokens (    8.67 ms per token,   115.31 tokens per second)
llama_print_timings:        eval time =    6949.22 ms /   131 runs   (   53.05 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    7338.31 ms
Llama.generate: prefix-match hit


167 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.12 ms /    40 runs   (    0.10 ms per token,  9715.81 tokens per second)
llama_print_timings: prompt eval time =     185.64 ms /    13 tokens (   14.28 ms per token,    70.03 tokens per second)
llama_print_timings:        eval time =    2057.91 ms /    39 runs   (   52.77 ms per token,    18.95 tokens per second)
llama_print_timings:       total time =    2299.84 ms
Llama.generate: prefix-match hit


168 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.31 ms /    40 runs   (    0.11 ms per token,  9278.59 tokens per second)
llama_print_timings: prompt eval time =     194.30 ms /    19 tokens (   10.23 ms per token,    97.79 tokens per second)
llama_print_timings:        eval time =    2059.84 ms /    39 runs   (   52.82 ms per token,    18.93 tokens per second)
llama_print_timings:       total time =    2312.04 ms
Llama.generate: prefix-match hit


169 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.26 ms /    40 runs   (    0.11 ms per token,  9385.27 tokens per second)
llama_print_timings: prompt eval time =     190.81 ms /    17 tokens (   11.22 ms per token,    89.09 tokens per second)
llama_print_timings:        eval time =    2058.28 ms /    39 runs   (   52.78 ms per token,    18.95 tokens per second)
llama_print_timings:       total time =    2308.92 ms
Llama.generate: prefix-match hit


170 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      13.60 ms /   136 runs   (    0.10 ms per token, 10000.00 tokens per second)
llama_print_timings: prompt eval time =     183.71 ms /    20 tokens (    9.19 ms per token,   108.87 tokens per second)
llama_print_timings:        eval time =    7161.07 ms /   135 runs   (   53.04 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    7536.86 ms
Llama.generate: prefix-match hit


171 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      13.33 ms /   126 runs   (    0.11 ms per token,  9452.36 tokens per second)
llama_print_timings: prompt eval time =     183.42 ms /    20 tokens (    9.17 ms per token,   109.04 tokens per second)
llama_print_timings:        eval time =    6625.21 ms /   125 runs   (   53.00 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    6991.41 ms
Llama.generate: prefix-match hit


172 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.27 ms /    40 runs   (    0.11 ms per token,  9376.47 tokens per second)
llama_print_timings: prompt eval time =     196.69 ms /    21 tokens (    9.37 ms per token,   106.76 tokens per second)
llama_print_timings:        eval time =    2058.59 ms /    39 runs   (   52.78 ms per token,    18.94 tokens per second)
llama_print_timings:       total time =    2312.28 ms
Llama.generate: prefix-match hit


173 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.57 ms /   129 runs   (    0.10 ms per token, 10260.08 tokens per second)
llama_print_timings: prompt eval time =     205.43 ms /    28 tokens (    7.34 ms per token,   136.30 tokens per second)
llama_print_timings:        eval time =    6796.52 ms /   128 runs   (   53.10 ms per token,    18.83 tokens per second)
llama_print_timings:       total time =    7189.47 ms
Llama.generate: prefix-match hit


174 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.20 ms /    39 runs   (    0.11 ms per token,  9283.50 tokens per second)
llama_print_timings: prompt eval time =     194.47 ms /    19 tokens (   10.24 ms per token,    97.70 tokens per second)
llama_print_timings:        eval time =    2003.61 ms /    38 runs   (   52.73 ms per token,    18.97 tokens per second)
llama_print_timings:       total time =    2255.37 ms
Llama.generate: prefix-match hit


175 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      13.30 ms /   138 runs   (    0.10 ms per token, 10372.82 tokens per second)
llama_print_timings: prompt eval time =     208.76 ms /    31 tokens (    6.73 ms per token,   148.50 tokens per second)
llama_print_timings:        eval time =    7280.40 ms /   137 runs   (   53.14 ms per token,    18.82 tokens per second)
llama_print_timings:       total time =    7688.17 ms
Llama.generate: prefix-match hit


176 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      14.27 ms /   152 runs   (    0.09 ms per token, 10648.73 tokens per second)
llama_print_timings: prompt eval time =     183.97 ms /    20 tokens (    9.20 ms per token,   108.71 tokens per second)
llama_print_timings:        eval time =    8013.85 ms /   151 runs   (   53.07 ms per token,    18.84 tokens per second)
llama_print_timings:       total time =    8414.80 ms
Llama.generate: prefix-match hit


177 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       5.07 ms /    49 runs   (    0.10 ms per token,  9674.23 tokens per second)
llama_print_timings: prompt eval time =     191.37 ms /    17 tokens (   11.26 ms per token,    88.83 tokens per second)
llama_print_timings:        eval time =    2536.86 ms /    48 runs   (   52.85 ms per token,    18.92 tokens per second)
llama_print_timings:       total time =    2797.73 ms
Llama.generate: prefix-match hit


178 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      13.51 ms /   129 runs   (    0.10 ms per token,  9547.78 tokens per second)
llama_print_timings: prompt eval time =     202.51 ms /    25 tokens (    8.10 ms per token,   123.45 tokens per second)
llama_print_timings:        eval time =    6794.75 ms /   128 runs   (   53.08 ms per token,    18.84 tokens per second)
llama_print_timings:       total time =    7185.11 ms
Llama.generate: prefix-match hit


179 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.17 ms /    40 runs   (    0.10 ms per token,  9583.13 tokens per second)
llama_print_timings: prompt eval time =     188.72 ms /    16 tokens (   11.80 ms per token,    84.78 tokens per second)
llama_print_timings:        eval time =    2059.20 ms /    39 runs   (   52.80 ms per token,    18.94 tokens per second)
llama_print_timings:       total time =    2304.38 ms
Llama.generate: prefix-match hit


180 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.07 ms /    40 runs   (    0.10 ms per token,  9832.84 tokens per second)
llama_print_timings: prompt eval time =     183.69 ms /    20 tokens (    9.18 ms per token,   108.88 tokens per second)
llama_print_timings:        eval time =    2055.49 ms /    39 runs   (   52.70 ms per token,    18.97 tokens per second)
llama_print_timings:       total time =    2295.89 ms
Llama.generate: prefix-match hit


181 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      11.87 ms /   117 runs   (    0.10 ms per token,  9853.46 tokens per second)
llama_print_timings: prompt eval time =     188.84 ms /    15 tokens (   12.59 ms per token,    79.43 tokens per second)
llama_print_timings:        eval time =    6146.10 ms /   116 runs   (   52.98 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    6500.27 ms
Llama.generate: prefix-match hit


182 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       5.04 ms /    45 runs   (    0.11 ms per token,  8935.66 tokens per second)
llama_print_timings: prompt eval time =     183.55 ms /    20 tokens (    9.18 ms per token,   108.96 tokens per second)
llama_print_timings:        eval time =    2324.63 ms /    44 runs   (   52.83 ms per token,    18.93 tokens per second)
llama_print_timings:       total time =    2574.98 ms
Llama.generate: prefix-match hit


183 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.18 ms /    40 runs   (    0.10 ms per token,  9562.51 tokens per second)
llama_print_timings: prompt eval time =     192.61 ms /    18 tokens (   10.70 ms per token,    93.45 tokens per second)
llama_print_timings:        eval time =    2059.38 ms /    39 runs   (   52.80 ms per token,    18.94 tokens per second)
llama_print_timings:       total time =    2307.61 ms
Llama.generate: prefix-match hit


184 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      11.86 ms /   123 runs   (    0.10 ms per token, 10370.99 tokens per second)
llama_print_timings: prompt eval time =     363.23 ms /    47 tokens (    7.73 ms per token,   129.39 tokens per second)
llama_print_timings:        eval time =    6483.68 ms /   122 runs   (   53.14 ms per token,    18.82 tokens per second)
llama_print_timings:       total time =    7019.85 ms
Llama.generate: prefix-match hit


185 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      11.56 ms /   122 runs   (    0.09 ms per token, 10549.98 tokens per second)
llama_print_timings: prompt eval time =     205.34 ms /    28 tokens (    7.33 ms per token,   136.36 tokens per second)
llama_print_timings:        eval time =    6422.15 ms /   121 runs   (   53.08 ms per token,    18.84 tokens per second)
llama_print_timings:       total time =    6798.11 ms
Llama.generate: prefix-match hit


186 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      10.68 ms /   116 runs   (    0.09 ms per token, 10858.37 tokens per second)
llama_print_timings: prompt eval time =     200.14 ms /    24 tokens (    8.34 ms per token,   119.92 tokens per second)
llama_print_timings:        eval time =    6097.14 ms /   115 runs   (   53.02 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    6462.54 ms
Llama.generate: prefix-match hit


187 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.13 ms /    40 runs   (    0.10 ms per token,  9685.23 tokens per second)
llama_print_timings: prompt eval time =     188.17 ms /    14 tokens (   13.44 ms per token,    74.40 tokens per second)
llama_print_timings:        eval time =    2058.23 ms /    39 runs   (   52.78 ms per token,    18.95 tokens per second)
llama_print_timings:       total time =    2302.72 ms
Llama.generate: prefix-match hit


188 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       6.72 ms /    62 runs   (    0.11 ms per token,  9220.70 tokens per second)
llama_print_timings: prompt eval time =     193.43 ms /    18 tokens (   10.75 ms per token,    93.06 tokens per second)
llama_print_timings:        eval time =    3224.13 ms /    61 runs   (   52.85 ms per token,    18.92 tokens per second)
llama_print_timings:       total time =    3506.75 ms
Llama.generate: prefix-match hit


189 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       6.40 ms /    61 runs   (    0.10 ms per token,  9528.27 tokens per second)
llama_print_timings: prompt eval time =     191.05 ms /    17 tokens (   11.24 ms per token,    88.98 tokens per second)
llama_print_timings:        eval time =    3169.38 ms /    60 runs   (   52.82 ms per token,    18.93 tokens per second)
llama_print_timings:       total time =    3447.87 ms
Llama.generate: prefix-match hit


190 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.95 ms /   127 runs   (    0.10 ms per token,  9807.71 tokens per second)
llama_print_timings: prompt eval time =     183.49 ms /    20 tokens (    9.17 ms per token,   109.00 tokens per second)
llama_print_timings:        eval time =    6682.36 ms /   126 runs   (   53.03 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    7048.10 ms
Llama.generate: prefix-match hit


191 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      13.94 ms /   134 runs   (    0.10 ms per token,  9616.07 tokens per second)
llama_print_timings: prompt eval time =     186.56 ms /    15 tokens (   12.44 ms per token,    80.40 tokens per second)
llama_print_timings:        eval time =    7052.80 ms /   133 runs   (   53.03 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    7430.51 ms
Llama.generate: prefix-match hit


192 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      13.94 ms /   137 runs   (    0.10 ms per token,  9827.83 tokens per second)
llama_print_timings: prompt eval time =     199.24 ms /    23 tokens (    8.66 ms per token,   115.44 tokens per second)
llama_print_timings:        eval time =    7217.40 ms /   136 runs   (   53.07 ms per token,    18.84 tokens per second)
llama_print_timings:       total time =    7614.08 ms
Llama.generate: prefix-match hit


193 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.24 ms /   131 runs   (    0.09 ms per token, 10699.99 tokens per second)
llama_print_timings: prompt eval time =     204.09 ms /    32 tokens (    6.38 ms per token,   156.80 tokens per second)
llama_print_timings:        eval time =    6907.99 ms /   130 runs   (   53.14 ms per token,    18.82 tokens per second)
llama_print_timings:       total time =    7296.18 ms
Llama.generate: prefix-match hit


194 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.39 ms /   119 runs   (    0.10 ms per token,  9601.42 tokens per second)
llama_print_timings: prompt eval time =     189.24 ms /    16 tokens (   11.83 ms per token,    84.55 tokens per second)
llama_print_timings:        eval time =    6254.67 ms /   118 runs   (   53.01 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    6613.41 ms
Llama.generate: prefix-match hit


195 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.21 ms /    40 runs   (    0.11 ms per token,  9510.22 tokens per second)
llama_print_timings: prompt eval time =     192.73 ms /    18 tokens (   10.71 ms per token,    93.40 tokens per second)
llama_print_timings:        eval time =    2056.85 ms /    39 runs   (   52.74 ms per token,    18.96 tokens per second)
llama_print_timings:       total time =    2306.59 ms
Llama.generate: prefix-match hit


196 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      14.72 ms /   146 runs   (    0.10 ms per token,  9918.48 tokens per second)
llama_print_timings: prompt eval time =     207.44 ms /    29 tokens (    7.15 ms per token,   139.80 tokens per second)
llama_print_timings:        eval time =    7705.18 ms /   145 runs   (   53.14 ms per token,    18.82 tokens per second)
llama_print_timings:       total time =    8123.49 ms
Llama.generate: prefix-match hit


197 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.26 ms /    40 runs   (    0.11 ms per token,  9398.50 tokens per second)
llama_print_timings: prompt eval time =     188.41 ms /    15 tokens (   12.56 ms per token,    79.61 tokens per second)
llama_print_timings:        eval time =    2059.44 ms /    39 runs   (   52.81 ms per token,    18.94 tokens per second)
llama_print_timings:       total time =    2303.74 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.43 ms /    39 runs   (    0.11 ms per token,  8801.62 tokens per second)
llama_print_timings: prompt eval time =     194.24 ms /    19 tokens (   10.22 ms per token,    97.82 tokens per second)
llama_print_timings:        eval time =    2004.49 ms /    38 runs   (   52.75 ms per token,    18.96 tokens per second)
llama_print_timings:       total time =    2256.54 ms
Llama.generate: prefix-

198 -- 1
199 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.88 ms /   129 runs   (    0.10 ms per token, 10014.75 tokens per second)
llama_print_timings: prompt eval time =     209.62 ms /    30 tokens (    6.99 ms per token,   143.11 tokens per second)
llama_print_timings:        eval time =    6798.28 ms /   128 runs   (   53.11 ms per token,    18.83 tokens per second)
llama_print_timings:       total time =    7189.73 ms
Llama.generate: prefix-match hit


200 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      13.07 ms /   141 runs   (    0.09 ms per token, 10789.72 tokens per second)
llama_print_timings: prompt eval time =     345.01 ms /    34 tokens (   10.15 ms per token,    98.55 tokens per second)
llama_print_timings:        eval time =    7432.95 ms /   140 runs   (   53.09 ms per token,    18.84 tokens per second)
llama_print_timings:       total time =    7974.86 ms
Llama.generate: prefix-match hit


201 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      13.05 ms /   126 runs   (    0.10 ms per token,  9651.47 tokens per second)
llama_print_timings: prompt eval time =     205.89 ms /    27 tokens (    7.63 ms per token,   131.14 tokens per second)
llama_print_timings:        eval time =    6637.40 ms /   125 runs   (   53.10 ms per token,    18.83 tokens per second)
llama_print_timings:       total time =    7024.69 ms
Llama.generate: prefix-match hit


202 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.14 ms /    40 runs   (    0.10 ms per token,  9659.50 tokens per second)
llama_print_timings: prompt eval time =     188.36 ms /    16 tokens (   11.77 ms per token,    84.94 tokens per second)
llama_print_timings:        eval time =    2056.17 ms /    39 runs   (   52.72 ms per token,    18.97 tokens per second)
llama_print_timings:       total time =    2301.44 ms
Llama.generate: prefix-match hit


203 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.37 ms /    40 runs   (    0.11 ms per token,  9149.13 tokens per second)
llama_print_timings: prompt eval time =     183.77 ms /    12 tokens (   15.31 ms per token,    65.30 tokens per second)
llama_print_timings:        eval time =    2060.66 ms /    39 runs   (   52.84 ms per token,    18.93 tokens per second)
llama_print_timings:       total time =    2303.84 ms
Llama.generate: prefix-match hit


204 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      14.80 ms /   142 runs   (    0.10 ms per token,  9593.95 tokens per second)
llama_print_timings: prompt eval time =     199.50 ms /    23 tokens (    8.67 ms per token,   115.29 tokens per second)
llama_print_timings:        eval time =    7487.57 ms /   141 runs   (   53.10 ms per token,    18.83 tokens per second)
llama_print_timings:       total time =    7890.86 ms
Llama.generate: prefix-match hit


205 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.16 ms /    39 runs   (    0.11 ms per token,  9381.77 tokens per second)
llama_print_timings: prompt eval time =     195.13 ms /    20 tokens (    9.76 ms per token,   102.50 tokens per second)
llama_print_timings:        eval time =    2005.40 ms /    38 runs   (   52.77 ms per token,    18.95 tokens per second)
llama_print_timings:       total time =    2255.31 ms
Llama.generate: prefix-match hit


206 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.14 ms /   123 runs   (    0.10 ms per token, 10130.13 tokens per second)
llama_print_timings: prompt eval time =     194.15 ms /    19 tokens (   10.22 ms per token,    97.86 tokens per second)
llama_print_timings:        eval time =    6468.60 ms /   122 runs   (   53.02 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    6835.94 ms
Llama.generate: prefix-match hit


207 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.16 ms /    40 runs   (    0.10 ms per token,  9613.07 tokens per second)
llama_print_timings: prompt eval time =     188.47 ms /    15 tokens (   12.56 ms per token,    79.59 tokens per second)
llama_print_timings:        eval time =    2060.30 ms /    39 runs   (   52.83 ms per token,    18.93 tokens per second)
llama_print_timings:       total time =    2305.70 ms
Llama.generate: prefix-match hit


208 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.07 ms /    39 runs   (    0.10 ms per token,  9582.31 tokens per second)
llama_print_timings: prompt eval time =     199.51 ms /    23 tokens (    8.67 ms per token,   115.28 tokens per second)
llama_print_timings:        eval time =    2006.62 ms /    38 runs   (   52.81 ms per token,    18.94 tokens per second)
llama_print_timings:       total time =    2261.55 ms
Llama.generate: prefix-match hit


209 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      13.07 ms /   130 runs   (    0.10 ms per token,  9944.92 tokens per second)
llama_print_timings: prompt eval time =     199.13 ms /    23 tokens (    8.66 ms per token,   115.50 tokens per second)
llama_print_timings:        eval time =    6840.38 ms /   129 runs   (   53.03 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    7222.34 ms
Llama.generate: prefix-match hit


210 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      11.75 ms /   113 runs   (    0.10 ms per token,  9614.57 tokens per second)
llama_print_timings: prompt eval time =     198.38 ms /    22 tokens (    9.02 ms per token,   110.90 tokens per second)
llama_print_timings:        eval time =    5940.02 ms /   112 runs   (   53.04 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    6297.94 ms
Llama.generate: prefix-match hit


211 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.53 ms /    40 runs   (    0.11 ms per token,  8831.97 tokens per second)
llama_print_timings: prompt eval time =     199.52 ms /    23 tokens (    8.67 ms per token,   115.27 tokens per second)
llama_print_timings:        eval time =    2059.94 ms /    39 runs   (   52.82 ms per token,    18.93 tokens per second)
llama_print_timings:       total time =    2321.77 ms
Llama.generate: prefix-match hit


212 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      13.59 ms /   133 runs   (    0.10 ms per token,  9786.61 tokens per second)
llama_print_timings: prompt eval time =     194.26 ms /    19 tokens (   10.22 ms per token,    97.81 tokens per second)
llama_print_timings:        eval time =    6999.29 ms /   132 runs   (   53.02 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    7382.09 ms
Llama.generate: prefix-match hit


213 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.65 ms /    40 runs   (    0.12 ms per token,  8602.15 tokens per second)
llama_print_timings: prompt eval time =     191.36 ms /    17 tokens (   11.26 ms per token,    88.84 tokens per second)
llama_print_timings:        eval time =    2061.24 ms /    39 runs   (   52.85 ms per token,    18.92 tokens per second)
llama_print_timings:       total time =    2314.38 ms
Llama.generate: prefix-match hit


214 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.52 ms /    40 runs   (    0.11 ms per token,  8847.60 tokens per second)
llama_print_timings: prompt eval time =     188.79 ms /    15 tokens (   12.59 ms per token,    79.45 tokens per second)
llama_print_timings:        eval time =    2059.90 ms /    39 runs   (   52.82 ms per token,    18.93 tokens per second)
llama_print_timings:       total time =    2309.15 ms
Llama.generate: prefix-match hit


215 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      13.30 ms /   141 runs   (    0.09 ms per token, 10603.90 tokens per second)
llama_print_timings: prompt eval time =     353.15 ms /    39 tokens (    9.06 ms per token,   110.43 tokens per second)
llama_print_timings:        eval time =    7439.99 ms /   140 runs   (   53.14 ms per token,    18.82 tokens per second)
llama_print_timings:       total time =    7991.13 ms
Llama.generate: prefix-match hit


216 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.05 ms /    39 runs   (    0.10 ms per token,  9629.63 tokens per second)
llama_print_timings: prompt eval time =     200.38 ms /    24 tokens (    8.35 ms per token,   119.77 tokens per second)
llama_print_timings:        eval time =    2005.47 ms /    38 runs   (   52.78 ms per token,    18.95 tokens per second)
llama_print_timings:       total time =    2261.32 ms
Llama.generate: prefix-match hit


217 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.08 ms /    40 runs   (    0.10 ms per token,  9803.92 tokens per second)
llama_print_timings: prompt eval time =     188.46 ms /    15 tokens (   12.56 ms per token,    79.59 tokens per second)
llama_print_timings:        eval time =    2057.54 ms /    39 runs   (   52.76 ms per token,    18.95 tokens per second)
llama_print_timings:       total time =    2302.71 ms
Llama.generate: prefix-match hit


218 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.27 ms /    39 runs   (    0.11 ms per token,  9137.77 tokens per second)
llama_print_timings: prompt eval time =     185.43 ms /    13 tokens (   14.26 ms per token,    70.11 tokens per second)
llama_print_timings:        eval time =    2005.49 ms /    38 runs   (   52.78 ms per token,    18.95 tokens per second)
llama_print_timings:       total time =    2247.87 ms
Llama.generate: prefix-match hit


219 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       6.89 ms /    67 runs   (    0.10 ms per token,  9724.24 tokens per second)
llama_print_timings: prompt eval time =     188.79 ms /    15 tokens (   12.59 ms per token,    79.45 tokens per second)
llama_print_timings:        eval time =    3488.89 ms /    66 runs   (   52.86 ms per token,    18.92 tokens per second)
llama_print_timings:       total time =    3770.88 ms
Llama.generate: prefix-match hit


220 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.28 ms /    40 runs   (    0.11 ms per token,  9352.35 tokens per second)
llama_print_timings: prompt eval time =     183.41 ms /    20 tokens (    9.17 ms per token,   109.04 tokens per second)
llama_print_timings:        eval time =    2057.66 ms /    39 runs   (   52.76 ms per token,    18.95 tokens per second)
llama_print_timings:       total time =    2298.02 ms
Llama.generate: prefix-match hit


221 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      13.92 ms /   132 runs   (    0.11 ms per token,  9484.12 tokens per second)
llama_print_timings: prompt eval time =     199.70 ms /    23 tokens (    8.68 ms per token,   115.17 tokens per second)
llama_print_timings:        eval time =    6948.65 ms /   131 runs   (   53.04 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    7336.97 ms
Llama.generate: prefix-match hit


222 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.13 ms /    40 runs   (    0.10 ms per token,  9675.86 tokens per second)
llama_print_timings: prompt eval time =     204.10 ms /    26 tokens (    7.85 ms per token,   127.39 tokens per second)
llama_print_timings:        eval time =    2062.18 ms /    39 runs   (   52.88 ms per token,    18.91 tokens per second)
llama_print_timings:       total time =    2323.44 ms
Llama.generate: prefix-match hit


223 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.95 ms /   125 runs   (    0.10 ms per token,  9655.49 tokens per second)
llama_print_timings: prompt eval time =     192.94 ms /    18 tokens (   10.72 ms per token,    93.30 tokens per second)
llama_print_timings:        eval time =    6572.89 ms /   124 runs   (   53.01 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    6942.82 ms
Llama.generate: prefix-match hit


224 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      14.46 ms /   146 runs   (    0.10 ms per token, 10098.22 tokens per second)
llama_print_timings: prompt eval time =     345.03 ms /    33 tokens (   10.46 ms per token,    95.64 tokens per second)
llama_print_timings:        eval time =    7708.91 ms /   145 runs   (   53.16 ms per token,    18.81 tokens per second)
llama_print_timings:       total time =    8261.71 ms
Llama.generate: prefix-match hit


225 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      13.72 ms /   131 runs   (    0.10 ms per token,  9546.71 tokens per second)
llama_print_timings: prompt eval time =     187.44 ms /    14 tokens (   13.39 ms per token,    74.69 tokens per second)
llama_print_timings:        eval time =    6896.94 ms /   130 runs   (   53.05 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    7272.42 ms
Llama.generate: prefix-match hit


226 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       7.84 ms /    71 runs   (    0.11 ms per token,  9052.66 tokens per second)
llama_print_timings: prompt eval time =     192.91 ms /    18 tokens (   10.72 ms per token,    93.31 tokens per second)
llama_print_timings:        eval time =    3700.93 ms /    70 runs   (   52.87 ms per token,    18.91 tokens per second)
llama_print_timings:       total time =    3997.92 ms
Llama.generate: prefix-match hit


227 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      10.61 ms /   103 runs   (    0.10 ms per token,  9705.08 tokens per second)
llama_print_timings: prompt eval time =     182.91 ms /    20 tokens (    9.15 ms per token,   109.34 tokens per second)
llama_print_timings:        eval time =    5399.27 ms /   102 runs   (   52.93 ms per token,    18.89 tokens per second)
llama_print_timings:       total time =    5728.93 ms
Llama.generate: prefix-match hit


228 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.30 ms /    40 runs   (    0.11 ms per token,  9304.49 tokens per second)
llama_print_timings: prompt eval time =     183.52 ms /    12 tokens (   15.29 ms per token,    65.39 tokens per second)
llama_print_timings:        eval time =    2058.45 ms /    39 runs   (   52.78 ms per token,    18.95 tokens per second)
llama_print_timings:       total time =    2299.16 ms
Llama.generate: prefix-match hit


229 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.29 ms /    40 runs   (    0.11 ms per token,  9324.01 tokens per second)
llama_print_timings: prompt eval time =     185.52 ms /    13 tokens (   14.27 ms per token,    70.07 tokens per second)
llama_print_timings:        eval time =    2058.58 ms /    39 runs   (   52.78 ms per token,    18.95 tokens per second)
llama_print_timings:       total time =    2302.54 ms
Llama.generate: prefix-match hit


230 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.26 ms /    40 runs   (    0.11 ms per token,  9387.47 tokens per second)
llama_print_timings: prompt eval time =     191.44 ms /    17 tokens (   11.26 ms per token,    88.80 tokens per second)
llama_print_timings:        eval time =    2059.88 ms /    39 runs   (   52.82 ms per token,    18.93 tokens per second)
llama_print_timings:       total time =    2309.31 ms
Llama.generate: prefix-match hit


231 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.41 ms /    39 runs   (    0.11 ms per token,  8837.53 tokens per second)
llama_print_timings: prompt eval time =     183.49 ms /    20 tokens (    9.17 ms per token,   109.00 tokens per second)
llama_print_timings:        eval time =    2005.66 ms /    38 runs   (   52.78 ms per token,    18.95 tokens per second)
llama_print_timings:       total time =    2247.62 ms
Llama.generate: prefix-match hit


232 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.68 ms /   125 runs   (    0.10 ms per token,  9855.71 tokens per second)
llama_print_timings: prompt eval time =     200.02 ms /    24 tokens (    8.33 ms per token,   119.99 tokens per second)
llama_print_timings:        eval time =    6578.69 ms /   124 runs   (   53.05 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    6955.29 ms
Llama.generate: prefix-match hit


233 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       6.28 ms /    64 runs   (    0.10 ms per token, 10192.71 tokens per second)
llama_print_timings: prompt eval time =     198.60 ms /    23 tokens (    8.63 ms per token,   115.81 tokens per second)
llama_print_timings:        eval time =    3336.41 ms /    63 runs   (   52.96 ms per token,    18.88 tokens per second)
llama_print_timings:       total time =    3623.99 ms
Llama.generate: prefix-match hit


234 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.28 ms /    40 runs   (    0.11 ms per token,  9341.43 tokens per second)
llama_print_timings: prompt eval time =     188.46 ms /    15 tokens (   12.56 ms per token,    79.59 tokens per second)
llama_print_timings:        eval time =    2059.32 ms /    39 runs   (   52.80 ms per token,    18.94 tokens per second)
llama_print_timings:       total time =    2305.60 ms
Llama.generate: prefix-match hit


235 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      13.73 ms /   138 runs   (    0.10 ms per token, 10053.18 tokens per second)
llama_print_timings: prompt eval time =     211.36 ms /    31 tokens (    6.82 ms per token,   146.67 tokens per second)
llama_print_timings:        eval time =    7273.19 ms /   137 runs   (   53.09 ms per token,    18.84 tokens per second)
llama_print_timings:       total time =    7678.97 ms
Llama.generate: prefix-match hit


236 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.46 ms /    39 runs   (    0.11 ms per token,  8744.39 tokens per second)
llama_print_timings: prompt eval time =     188.32 ms /    16 tokens (   11.77 ms per token,    84.96 tokens per second)
llama_print_timings:        eval time =    2006.52 ms /    38 runs   (   52.80 ms per token,    18.94 tokens per second)
llama_print_timings:       total time =    2253.32 ms
Llama.generate: prefix-match hit


237 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.80 ms /   125 runs   (    0.10 ms per token,  9765.62 tokens per second)
llama_print_timings: prompt eval time =     199.24 ms /    23 tokens (    8.66 ms per token,   115.44 tokens per second)
llama_print_timings:        eval time =    6578.15 ms /   124 runs   (   53.05 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    6961.63 ms
Llama.generate: prefix-match hit


238 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.30 ms /    40 runs   (    0.11 ms per token,  9291.52 tokens per second)
llama_print_timings: prompt eval time =     196.09 ms /    21 tokens (    9.34 ms per token,   107.09 tokens per second)
llama_print_timings:        eval time =    2060.52 ms /    39 runs   (   52.83 ms per token,    18.93 tokens per second)
llama_print_timings:       total time =    2313.58 ms
Llama.generate: prefix-match hit


239 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      10.89 ms /   102 runs   (    0.11 ms per token,  9369.83 tokens per second)
llama_print_timings: prompt eval time =     188.48 ms /    15 tokens (   12.57 ms per token,    79.58 tokens per second)
llama_print_timings:        eval time =    5348.79 ms /   101 runs   (   52.96 ms per token,    18.88 tokens per second)
llama_print_timings:       total time =    5683.59 ms
Llama.generate: prefix-match hit


240 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      11.11 ms /   108 runs   (    0.10 ms per token,  9717.47 tokens per second)
llama_print_timings: prompt eval time =     196.13 ms /    21 tokens (    9.34 ms per token,   107.07 tokens per second)
llama_print_timings:        eval time =    5675.94 ms /   107 runs   (   53.05 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    6027.87 ms
Llama.generate: prefix-match hit


241 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      11.95 ms /   113 runs   (    0.11 ms per token,  9458.44 tokens per second)
llama_print_timings: prompt eval time =     200.41 ms /    24 tokens (    8.35 ms per token,   119.76 tokens per second)
llama_print_timings:        eval time =    5935.83 ms /   112 runs   (   53.00 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    6299.97 ms
Llama.generate: prefix-match hit


242 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      13.83 ms /   133 runs   (    0.10 ms per token,  9620.25 tokens per second)
llama_print_timings: prompt eval time =     183.53 ms /    20 tokens (    9.18 ms per token,   108.98 tokens per second)
llama_print_timings:        eval time =    6997.78 ms /   132 runs   (   53.01 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    7369.44 ms
Llama.generate: prefix-match hit


243 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      16.48 ms /   163 runs   (    0.10 ms per token,  9891.98 tokens per second)
llama_print_timings: prompt eval time =     183.38 ms /    20 tokens (    9.17 ms per token,   109.06 tokens per second)
llama_print_timings:        eval time =    8601.30 ms /   162 runs   (   53.09 ms per token,    18.83 tokens per second)
llama_print_timings:       total time =    9016.24 ms
Llama.generate: prefix-match hit


244 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.60 ms /    40 runs   (    0.11 ms per token,  8703.22 tokens per second)
llama_print_timings: prompt eval time =     188.69 ms /    16 tokens (   11.79 ms per token,    84.79 tokens per second)
llama_print_timings:        eval time =    2060.16 ms /    39 runs   (   52.82 ms per token,    18.93 tokens per second)
llama_print_timings:       total time =    2310.83 ms
Llama.generate: prefix-match hit


245 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.30 ms /    40 runs   (    0.11 ms per token,  9304.49 tokens per second)
llama_print_timings: prompt eval time =     189.11 ms /    15 tokens (   12.61 ms per token,    79.32 tokens per second)
llama_print_timings:        eval time =    2059.00 ms /    39 runs   (   52.79 ms per token,    18.94 tokens per second)
llama_print_timings:       total time =    2305.77 ms
Llama.generate: prefix-match hit


246 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      15.93 ms /   156 runs   (    0.10 ms per token,  9792.84 tokens per second)
llama_print_timings: prompt eval time =     188.69 ms /    16 tokens (   11.79 ms per token,    84.79 tokens per second)
llama_print_timings:        eval time =    8223.74 ms /   155 runs   (   53.06 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    8634.16 ms
Llama.generate: prefix-match hit


247 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      13.94 ms /   134 runs   (    0.10 ms per token,  9613.32 tokens per second)
llama_print_timings: prompt eval time =     183.50 ms /    20 tokens (    9.18 ms per token,   108.99 tokens per second)
llama_print_timings:        eval time =    7053.21 ms /   133 runs   (   53.03 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    7430.53 ms
Llama.generate: prefix-match hit


248 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       6.61 ms /    64 runs   (    0.10 ms per token,  9688.16 tokens per second)
llama_print_timings: prompt eval time =     192.51 ms /    18 tokens (   10.70 ms per token,    93.50 tokens per second)
llama_print_timings:        eval time =    3324.44 ms /    63 runs   (   52.77 ms per token,    18.95 tokens per second)
llama_print_timings:       total time =    3606.60 ms
Llama.generate: prefix-match hit


249 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.04 ms /   122 runs   (    0.10 ms per token, 10132.89 tokens per second)
llama_print_timings: prompt eval time =     200.42 ms /    23 tokens (    8.71 ms per token,   114.76 tokens per second)
llama_print_timings:        eval time =    6418.26 ms /   121 runs   (   53.04 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    6793.35 ms
Llama.generate: prefix-match hit


250 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.19 ms /   125 runs   (    0.10 ms per token, 10252.62 tokens per second)
llama_print_timings: prompt eval time =     207.02 ms /    29 tokens (    7.14 ms per token,   140.08 tokens per second)
llama_print_timings:        eval time =    6581.30 ms /   124 runs   (   53.07 ms per token,    18.84 tokens per second)
llama_print_timings:       total time =    6967.56 ms
Llama.generate: prefix-match hit


251 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      11.83 ms /   114 runs   (    0.10 ms per token,  9635.70 tokens per second)
llama_print_timings: prompt eval time =     192.78 ms /    18 tokens (   10.71 ms per token,    93.37 tokens per second)
llama_print_timings:        eval time =    5989.46 ms /   113 runs   (   53.00 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    6342.81 ms
Llama.generate: prefix-match hit


252 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      13.16 ms /   122 runs   (    0.11 ms per token,  9269.81 tokens per second)
llama_print_timings: prompt eval time =     188.66 ms /    15 tokens (   12.58 ms per token,    79.51 tokens per second)
llama_print_timings:        eval time =    6413.71 ms /   121 runs   (   53.01 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    6780.07 ms
Llama.generate: prefix-match hit


253 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.70 ms /   121 runs   (    0.10 ms per token,  9524.56 tokens per second)
llama_print_timings: prompt eval time =     188.19 ms /    16 tokens (   11.76 ms per token,    85.02 tokens per second)
llama_print_timings:        eval time =    6362.21 ms /   120 runs   (   53.02 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    6723.84 ms
Llama.generate: prefix-match hit


254 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.64 ms /   114 runs   (    0.11 ms per token,  9021.13 tokens per second)
llama_print_timings: prompt eval time =     194.37 ms /    19 tokens (   10.23 ms per token,    97.75 tokens per second)
llama_print_timings:        eval time =    5992.61 ms /   113 runs   (   53.03 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    6356.57 ms
Llama.generate: prefix-match hit


255 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.24 ms /    40 runs   (    0.11 ms per token,  9425.07 tokens per second)
llama_print_timings: prompt eval time =     193.96 ms /    19 tokens (   10.21 ms per token,    97.96 tokens per second)
llama_print_timings:        eval time =    2060.19 ms /    39 runs   (   52.83 ms per token,    18.93 tokens per second)
llama_print_timings:       total time =    2311.85 ms
Llama.generate: prefix-match hit


256 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      10.76 ms /   103 runs   (    0.10 ms per token,  9568.93 tokens per second)
llama_print_timings: prompt eval time =     183.92 ms /    20 tokens (    9.20 ms per token,   108.74 tokens per second)
llama_print_timings:        eval time =    5405.32 ms /   102 runs   (   52.99 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    5732.82 ms
Llama.generate: prefix-match hit


257 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      13.34 ms /   132 runs   (    0.10 ms per token,  9898.76 tokens per second)
llama_print_timings: prompt eval time =     195.94 ms /    21 tokens (    9.33 ms per token,   107.17 tokens per second)
llama_print_timings:        eval time =    6951.78 ms /   131 runs   (   53.07 ms per token,    18.84 tokens per second)
llama_print_timings:       total time =    7336.63 ms
Llama.generate: prefix-match hit


258 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      13.28 ms /   130 runs   (    0.10 ms per token,  9789.16 tokens per second)
llama_print_timings: prompt eval time =     199.02 ms /    23 tokens (    8.65 ms per token,   115.57 tokens per second)
llama_print_timings:        eval time =    6846.08 ms /   129 runs   (   53.07 ms per token,    18.84 tokens per second)
llama_print_timings:       total time =    7233.03 ms
Llama.generate: prefix-match hit


259 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      14.60 ms /   140 runs   (    0.10 ms per token,  9588.38 tokens per second)
llama_print_timings: prompt eval time =     195.72 ms /    21 tokens (    9.32 ms per token,   107.29 tokens per second)
llama_print_timings:        eval time =    7375.38 ms /   139 runs   (   53.06 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    7771.50 ms
Llama.generate: prefix-match hit


260 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      14.74 ms /   143 runs   (    0.10 ms per token,  9700.83 tokens per second)
llama_print_timings: prompt eval time =     195.08 ms /    19 tokens (   10.27 ms per token,    97.40 tokens per second)
llama_print_timings:        eval time =    7526.75 ms /   142 runs   (   53.01 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    7921.88 ms
Llama.generate: prefix-match hit


261 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.00 ms /    39 runs   (    0.10 ms per token,  9757.32 tokens per second)
llama_print_timings: prompt eval time =     197.95 ms /    22 tokens (    9.00 ms per token,   111.14 tokens per second)
llama_print_timings:        eval time =    2001.25 ms /    38 runs   (   52.66 ms per token,    18.99 tokens per second)
llama_print_timings:       total time =    2252.50 ms
Llama.generate: prefix-match hit


262 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       6.63 ms /    67 runs   (    0.10 ms per token, 10107.11 tokens per second)
llama_print_timings: prompt eval time =     183.89 ms /    11 tokens (   16.72 ms per token,    59.82 tokens per second)
llama_print_timings:        eval time =    3479.53 ms /    66 runs   (   52.72 ms per token,    18.97 tokens per second)
llama_print_timings:       total time =    3754.93 ms
Llama.generate: prefix-match hit


263 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      11.70 ms /   116 runs   (    0.10 ms per token,  9914.53 tokens per second)
llama_print_timings: prompt eval time =     188.83 ms /    15 tokens (   12.59 ms per token,    79.44 tokens per second)
llama_print_timings:        eval time =    6087.32 ms /   115 runs   (   52.93 ms per token,    18.89 tokens per second)
llama_print_timings:       total time =    6438.90 ms
Llama.generate: prefix-match hit


264 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.10 ms /    40 runs   (    0.10 ms per token,  9753.72 tokens per second)
llama_print_timings: prompt eval time =     190.79 ms /    17 tokens (   11.22 ms per token,    89.11 tokens per second)
llama_print_timings:        eval time =    2053.99 ms /    39 runs   (   52.67 ms per token,    18.99 tokens per second)
llama_print_timings:       total time =    2299.05 ms
Llama.generate: prefix-match hit


265 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       3.97 ms /    40 runs   (    0.10 ms per token, 10085.73 tokens per second)
llama_print_timings: prompt eval time =     198.39 ms /    22 tokens (    9.02 ms per token,   110.89 tokens per second)
llama_print_timings:        eval time =    2055.19 ms /    39 runs   (   52.70 ms per token,    18.98 tokens per second)
llama_print_timings:       total time =    2307.97 ms
Llama.generate: prefix-match hit


266 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       3.99 ms /    40 runs   (    0.10 ms per token, 10027.58 tokens per second)
llama_print_timings: prompt eval time =     187.30 ms /    14 tokens (   13.38 ms per token,    74.74 tokens per second)
llama_print_timings:        eval time =    2052.74 ms /    39 runs   (   52.63 ms per token,    19.00 tokens per second)
llama_print_timings:       total time =    2295.09 ms
Llama.generate: prefix-match hit


267 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       7.00 ms /    68 runs   (    0.10 ms per token,  9712.90 tokens per second)
llama_print_timings: prompt eval time =     191.02 ms /    17 tokens (   11.24 ms per token,    89.00 tokens per second)
llama_print_timings:        eval time =    3540.83 ms /    67 runs   (   52.85 ms per token,    18.92 tokens per second)
llama_print_timings:       total time =    3826.13 ms
Llama.generate: prefix-match hit


268 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      11.35 ms /   108 runs   (    0.11 ms per token,  9512.07 tokens per second)
llama_print_timings: prompt eval time =     187.62 ms /    14 tokens (   13.40 ms per token,    74.62 tokens per second)
llama_print_timings:        eval time =    5659.77 ms /   107 runs   (   52.90 ms per token,    18.91 tokens per second)
llama_print_timings:       total time =    5997.52 ms
Llama.generate: prefix-match hit


269 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.06 ms /    40 runs   (    0.10 ms per token,  9859.50 tokens per second)
llama_print_timings: prompt eval time =     188.86 ms /    16 tokens (   11.80 ms per token,    84.72 tokens per second)
llama_print_timings:        eval time =    2053.03 ms /    39 runs   (   52.64 ms per token,    19.00 tokens per second)
llama_print_timings:       total time =    2296.08 ms
Llama.generate: prefix-match hit


270 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.10 ms /    40 runs   (    0.10 ms per token,  9765.62 tokens per second)
llama_print_timings: prompt eval time =     200.22 ms /    24 tokens (    8.34 ms per token,   119.87 tokens per second)
llama_print_timings:        eval time =    2058.07 ms /    39 runs   (   52.77 ms per token,    18.95 tokens per second)
llama_print_timings:       total time =    2312.76 ms
Llama.generate: prefix-match hit


271 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.74 ms /    45 runs   (    0.11 ms per token,  9487.67 tokens per second)
llama_print_timings: prompt eval time =     194.23 ms /    19 tokens (   10.22 ms per token,    97.82 tokens per second)
llama_print_timings:        eval time =    2317.89 ms /    44 runs   (   52.68 ms per token,    18.98 tokens per second)
llama_print_timings:       total time =    2575.83 ms
Llama.generate: prefix-match hit


272 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.26 ms /    40 runs   (    0.11 ms per token,  9396.29 tokens per second)
llama_print_timings: prompt eval time =     192.80 ms /    18 tokens (   10.71 ms per token,    93.36 tokens per second)
llama_print_timings:        eval time =    2054.81 ms /    39 runs   (   52.69 ms per token,    18.98 tokens per second)
llama_print_timings:       total time =    2304.24 ms
Llama.generate: prefix-match hit


273 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.47 ms /   120 runs   (    0.10 ms per token,  9621.55 tokens per second)
llama_print_timings: prompt eval time =     193.78 ms /    19 tokens (   10.20 ms per token,    98.05 tokens per second)
llama_print_timings:        eval time =    6300.76 ms /   119 runs   (   52.95 ms per token,    18.89 tokens per second)
llama_print_timings:       total time =    6661.44 ms
Llama.generate: prefix-match hit


274 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      11.49 ms /   124 runs   (    0.09 ms per token, 10795.75 tokens per second)
llama_print_timings: prompt eval time =     205.78 ms /    27 tokens (    7.62 ms per token,   131.21 tokens per second)
llama_print_timings:        eval time =    6520.11 ms /   123 runs   (   53.01 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    6897.67 ms
Llama.generate: prefix-match hit


275 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.06 ms /    40 runs   (    0.10 ms per token,  9852.22 tokens per second)
llama_print_timings: prompt eval time =     190.35 ms /    17 tokens (   11.20 ms per token,    89.31 tokens per second)
llama_print_timings:        eval time =    2054.20 ms /    39 runs   (   52.67 ms per token,    18.99 tokens per second)
llama_print_timings:       total time =    2299.32 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      13.67 ms /   140 runs   (    0.10 ms per token, 10244.40 tokens per second)
llama_print_timings: prompt eval time =     198.28 ms /    22 tokens (    9.01 ms per token,   110.95 tokens per second)
llama_print_timings:        eval time =    7363.78 ms /   139 runs   (   52.98 ms per token,    18.88 tokens per second)
llama_print_timings:       total time =    7761.43 ms
Llama.generate: prefix-

276 -- 1
277 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.01 ms /    39 runs   (    0.10 ms per token,  9735.40 tokens per second)
llama_print_timings: prompt eval time =     184.06 ms /    20 tokens (    9.20 ms per token,   108.66 tokens per second)
llama_print_timings:        eval time =    2000.64 ms /    38 runs   (   52.65 ms per token,    18.99 tokens per second)
llama_print_timings:       total time =    2238.39 ms
Llama.generate: prefix-match hit


278 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.05 ms /    40 runs   (    0.10 ms per token,  9883.86 tokens per second)
llama_print_timings: prompt eval time =     190.92 ms /    17 tokens (   11.23 ms per token,    89.04 tokens per second)
llama_print_timings:        eval time =    2053.59 ms /    39 runs   (   52.66 ms per token,    18.99 tokens per second)
llama_print_timings:       total time =    2299.49 ms
Llama.generate: prefix-match hit


279 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      11.55 ms /   118 runs   (    0.10 ms per token, 10218.22 tokens per second)
llama_print_timings: prompt eval time =     187.37 ms /    14 tokens (   13.38 ms per token,    74.72 tokens per second)
llama_print_timings:        eval time =    6193.06 ms /   117 runs   (   52.93 ms per token,    18.89 tokens per second)
llama_print_timings:       total time =    6543.41 ms
Llama.generate: prefix-match hit


280 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      13.11 ms /   132 runs   (    0.10 ms per token, 10065.58 tokens per second)
llama_print_timings: prompt eval time =     196.25 ms /    21 tokens (    9.35 ms per token,   107.01 tokens per second)
llama_print_timings:        eval time =    6947.74 ms /   131 runs   (   53.04 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    7328.03 ms
Llama.generate: prefix-match hit


281 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.41 ms /   122 runs   (    0.10 ms per token,  9831.57 tokens per second)
llama_print_timings: prompt eval time =     192.75 ms /    18 tokens (   10.71 ms per token,    93.38 tokens per second)
llama_print_timings:        eval time =    6402.49 ms /   121 runs   (   52.91 ms per token,    18.90 tokens per second)
llama_print_timings:       total time =    6766.50 ms
Llama.generate: prefix-match hit


282 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      13.64 ms /   136 runs   (    0.10 ms per token,  9972.14 tokens per second)
llama_print_timings: prompt eval time =     188.09 ms /    16 tokens (   11.76 ms per token,    85.07 tokens per second)
llama_print_timings:        eval time =    7149.06 ms /   135 runs   (   52.96 ms per token,    18.88 tokens per second)
llama_print_timings:       total time =    7526.41 ms
Llama.generate: prefix-match hit


283 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.09 ms /    40 runs   (    0.10 ms per token,  9782.34 tokens per second)
llama_print_timings: prompt eval time =     187.43 ms /    14 tokens (   13.39 ms per token,    74.69 tokens per second)
llama_print_timings:        eval time =    2056.29 ms /    39 runs   (   52.73 ms per token,    18.97 tokens per second)
llama_print_timings:       total time =    2298.19 ms
Llama.generate: prefix-match hit


284 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.29 ms /    39 runs   (    0.11 ms per token,  9097.27 tokens per second)
llama_print_timings: prompt eval time =     188.54 ms /    16 tokens (   11.78 ms per token,    84.86 tokens per second)
llama_print_timings:        eval time =    2000.51 ms /    38 runs   (   52.64 ms per token,    19.00 tokens per second)
llama_print_timings:       total time =    2247.59 ms
Llama.generate: prefix-match hit


285 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      10.29 ms /   102 runs   (    0.10 ms per token,  9916.39 tokens per second)
llama_print_timings: prompt eval time =     197.96 ms /    22 tokens (    9.00 ms per token,   111.13 tokens per second)
llama_print_timings:        eval time =    5342.62 ms /   101 runs   (   52.90 ms per token,    18.90 tokens per second)
llama_print_timings:       total time =    5682.09 ms
Llama.generate: prefix-match hit


286 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.16 ms /    40 runs   (    0.10 ms per token,  9624.64 tokens per second)
llama_print_timings: prompt eval time =     196.31 ms /    21 tokens (    9.35 ms per token,   106.97 tokens per second)
llama_print_timings:        eval time =    2050.79 ms /    39 runs   (   52.58 ms per token,    19.02 tokens per second)
llama_print_timings:       total time =    2301.98 ms
Llama.generate: prefix-match hit


287 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.51 ms /    44 runs   (    0.10 ms per token,  9749.61 tokens per second)
llama_print_timings: prompt eval time =     190.93 ms /    17 tokens (   11.23 ms per token,    89.04 tokens per second)
llama_print_timings:        eval time =    2264.86 ms /    43 runs   (   52.67 ms per token,    18.99 tokens per second)
llama_print_timings:       total time =    2516.33 ms
Llama.generate: prefix-match hit


288 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.68 ms /    45 runs   (    0.10 ms per token,  9611.28 tokens per second)
llama_print_timings: prompt eval time =     199.87 ms /    23 tokens (    8.69 ms per token,   115.08 tokens per second)
llama_print_timings:        eval time =    2321.36 ms /    44 runs   (   52.76 ms per token,    18.95 tokens per second)
llama_print_timings:       total time =    2584.33 ms
Llama.generate: prefix-match hit


289 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.77 ms /    44 runs   (    0.11 ms per token,  9222.39 tokens per second)
llama_print_timings: prompt eval time =     183.16 ms /    20 tokens (    9.16 ms per token,   109.19 tokens per second)
llama_print_timings:        eval time =    2268.90 ms /    43 runs   (   52.77 ms per token,    18.95 tokens per second)
llama_print_timings:       total time =    2514.03 ms
Llama.generate: prefix-match hit


290 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      11.36 ms /   116 runs   (    0.10 ms per token, 10214.86 tokens per second)
llama_print_timings: prompt eval time =     199.05 ms /    23 tokens (    8.65 ms per token,   115.55 tokens per second)
llama_print_timings:        eval time =    6084.24 ms /   115 runs   (   52.91 ms per token,    18.90 tokens per second)
llama_print_timings:       total time =    6444.37 ms
Llama.generate: prefix-match hit


291 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       3.99 ms /    39 runs   (    0.10 ms per token,  9784.24 tokens per second)
llama_print_timings: prompt eval time =     185.49 ms /    13 tokens (   14.27 ms per token,    70.08 tokens per second)
llama_print_timings:        eval time =    2000.73 ms /    38 runs   (   52.65 ms per token,    18.99 tokens per second)
llama_print_timings:       total time =    2240.03 ms
Llama.generate: prefix-match hit


292 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       7.65 ms /    78 runs   (    0.10 ms per token, 10201.41 tokens per second)
llama_print_timings: prompt eval time =     188.10 ms /    16 tokens (   11.76 ms per token,    85.06 tokens per second)
llama_print_timings:        eval time =    4067.57 ms /    77 runs   (   52.83 ms per token,    18.93 tokens per second)
llama_print_timings:       total time =    4362.96 ms
Llama.generate: prefix-match hit


293 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      11.02 ms /   127 runs   (    0.09 ms per token, 11523.46 tokens per second)
llama_print_timings: prompt eval time =     211.34 ms /    31 tokens (    6.82 ms per token,   146.69 tokens per second)
llama_print_timings:        eval time =    6683.73 ms /   126 runs   (   53.05 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    7070.22 ms
Llama.generate: prefix-match hit


294 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       6.12 ms /    59 runs   (    0.10 ms per token,  9648.41 tokens per second)
llama_print_timings: prompt eval time =     188.77 ms /    15 tokens (   12.58 ms per token,    79.46 tokens per second)
llama_print_timings:        eval time =    3061.45 ms /    58 runs   (   52.78 ms per token,    18.95 tokens per second)
llama_print_timings:       total time =    3332.17 ms
Llama.generate: prefix-match hit


295 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.00 ms /    39 runs   (    0.10 ms per token,  9759.76 tokens per second)
llama_print_timings: prompt eval time =     200.04 ms /    24 tokens (    8.34 ms per token,   119.97 tokens per second)
llama_print_timings:        eval time =    2001.80 ms /    38 runs   (   52.68 ms per token,    18.98 tokens per second)
llama_print_timings:       total time =    2255.75 ms
Llama.generate: prefix-match hit


296 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       5.02 ms /    49 runs   (    0.10 ms per token,  9759.01 tokens per second)
llama_print_timings: prompt eval time =     193.70 ms /    19 tokens (   10.19 ms per token,    98.09 tokens per second)
llama_print_timings:        eval time =    2530.33 ms /    48 runs   (   52.72 ms per token,    18.97 tokens per second)
llama_print_timings:       total time =    2791.16 ms
Llama.generate: prefix-match hit


297 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      13.88 ms /   137 runs   (    0.10 ms per token,  9869.61 tokens per second)
llama_print_timings: prompt eval time =     193.73 ms /    19 tokens (   10.20 ms per token,    98.07 tokens per second)
llama_print_timings:        eval time =    7210.42 ms /   136 runs   (   53.02 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    7594.97 ms
Llama.generate: prefix-match hit


298 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.11 ms /    40 runs   (    0.10 ms per token,  9737.10 tokens per second)
llama_print_timings: prompt eval time =     182.08 ms /    10 tokens (   18.21 ms per token,    54.92 tokens per second)
llama_print_timings:        eval time =    2051.63 ms /    39 runs   (   52.61 ms per token,    19.01 tokens per second)
llama_print_timings:       total time =    2289.46 ms
Llama.generate: prefix-match hit


299 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.86 ms /   125 runs   (    0.10 ms per token,  9719.31 tokens per second)
llama_print_timings: prompt eval time =     196.06 ms /    21 tokens (    9.34 ms per token,   107.11 tokens per second)
llama_print_timings:        eval time =    6570.03 ms /   124 runs   (   52.98 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    6945.85 ms
Llama.generate: prefix-match hit


300 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      14.44 ms /   152 runs   (    0.10 ms per token, 10524.86 tokens per second)
llama_print_timings: prompt eval time =     344.81 ms /    34 tokens (   10.14 ms per token,    98.60 tokens per second)
llama_print_timings:        eval time =    8020.32 ms /   151 runs   (   53.11 ms per token,    18.83 tokens per second)
llama_print_timings:       total time =    8577.82 ms
Llama.generate: prefix-match hit


301 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       3.89 ms /    39 runs   (    0.10 ms per token, 10017.98 tokens per second)
llama_print_timings: prompt eval time =     183.82 ms /    11 tokens (   16.71 ms per token,    59.84 tokens per second)
llama_print_timings:        eval time =    2000.31 ms /    38 runs   (   52.64 ms per token,    19.00 tokens per second)
llama_print_timings:       total time =    2237.02 ms
Llama.generate: prefix-match hit


302 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.88 ms /   130 runs   (    0.10 ms per token, 10097.09 tokens per second)
llama_print_timings: prompt eval time =     193.18 ms /    18 tokens (   10.73 ms per token,    93.18 tokens per second)
llama_print_timings:        eval time =    6828.25 ms /   129 runs   (   52.93 ms per token,    18.89 tokens per second)
llama_print_timings:       total time =    7201.63 ms
Llama.generate: prefix-match hit


303 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       3.84 ms /    40 runs   (    0.10 ms per token, 10413.95 tokens per second)
llama_print_timings: prompt eval time =     194.05 ms /    19 tokens (   10.21 ms per token,    97.91 tokens per second)
llama_print_timings:        eval time =    2053.05 ms /    39 runs   (   52.64 ms per token,    19.00 tokens per second)
llama_print_timings:       total time =    2301.67 ms
Llama.generate: prefix-match hit


304 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.07 ms /    40 runs   (    0.10 ms per token,  9828.01 tokens per second)
llama_print_timings: prompt eval time =     188.58 ms /    15 tokens (   12.57 ms per token,    79.54 tokens per second)
llama_print_timings:        eval time =    2052.38 ms /    39 runs   (   52.62 ms per token,    19.00 tokens per second)
llama_print_timings:       total time =    2295.73 ms
Llama.generate: prefix-match hit


305 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.09 ms /   122 runs   (    0.10 ms per token, 10095.16 tokens per second)
llama_print_timings: prompt eval time =     192.57 ms /    18 tokens (   10.70 ms per token,    93.47 tokens per second)
llama_print_timings:        eval time =    6404.48 ms /   121 runs   (   52.93 ms per token,    18.89 tokens per second)
llama_print_timings:       total time =    6766.95 ms
Llama.generate: prefix-match hit


306 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      10.53 ms /   108 runs   (    0.10 ms per token, 10255.44 tokens per second)
llama_print_timings: prompt eval time =     203.98 ms /    32 tokens (    6.37 ms per token,   156.88 tokens per second)
llama_print_timings:        eval time =    5671.73 ms /   107 runs   (   53.01 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    6028.32 ms
Llama.generate: prefix-match hit


307 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      10.71 ms /   114 runs   (    0.09 ms per token, 10642.27 tokens per second)
llama_print_timings: prompt eval time =     204.15 ms /    26 tokens (    7.85 ms per token,   127.36 tokens per second)
llama_print_timings:        eval time =    5989.81 ms /   113 runs   (   53.01 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    6354.03 ms
Llama.generate: prefix-match hit


308 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      11.40 ms /   114 runs   (    0.10 ms per token, 10000.00 tokens per second)
llama_print_timings: prompt eval time =     184.38 ms /    20 tokens (    9.22 ms per token,   108.47 tokens per second)
llama_print_timings:        eval time =    5981.49 ms /   113 runs   (   52.93 ms per token,    18.89 tokens per second)
llama_print_timings:       total time =    6324.60 ms
Llama.generate: prefix-match hit


309 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       3.98 ms /    40 runs   (    0.10 ms per token, 10037.64 tokens per second)
llama_print_timings: prompt eval time =     187.72 ms /    14 tokens (   13.41 ms per token,    74.58 tokens per second)
llama_print_timings:        eval time =    2053.56 ms /    39 runs   (   52.66 ms per token,    18.99 tokens per second)
llama_print_timings:       total time =    2295.10 ms
Llama.generate: prefix-match hit


310 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       6.50 ms /    71 runs   (    0.09 ms per token, 10928.12 tokens per second)
llama_print_timings: prompt eval time =     345.99 ms /    37 tokens (    9.35 ms per token,   106.94 tokens per second)
llama_print_timings:        eval time =    3704.11 ms /    70 runs   (   52.92 ms per token,    18.90 tokens per second)
llama_print_timings:       total time =    4146.72 ms
Llama.generate: prefix-match hit


311 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.68 ms /    45 runs   (    0.10 ms per token,  9609.22 tokens per second)
llama_print_timings: prompt eval time =     194.17 ms /    19 tokens (   10.22 ms per token,    97.85 tokens per second)
llama_print_timings:        eval time =    2318.92 ms /    44 runs   (   52.70 ms per token,    18.97 tokens per second)
llama_print_timings:       total time =    2575.83 ms
Llama.generate: prefix-match hit


312 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.73 ms /   124 runs   (    0.10 ms per token,  9736.95 tokens per second)
llama_print_timings: prompt eval time =     194.32 ms /    19 tokens (   10.23 ms per token,    97.78 tokens per second)
llama_print_timings:        eval time =    6516.21 ms /   123 runs   (   52.98 ms per token,    18.88 tokens per second)
llama_print_timings:       total time =    6883.85 ms
Llama.generate: prefix-match hit


313 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.82 ms /   122 runs   (    0.11 ms per token,  9518.61 tokens per second)
llama_print_timings: prompt eval time =     183.28 ms /    20 tokens (    9.16 ms per token,   109.12 tokens per second)
llama_print_timings:        eval time =    6383.78 ms /   121 runs   (   52.76 ms per token,    18.95 tokens per second)
llama_print_timings:       total time =    6739.15 ms
Llama.generate: prefix-match hit


314 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.60 ms /    45 runs   (    0.10 ms per token,  9784.74 tokens per second)
llama_print_timings: prompt eval time =     192.65 ms /    18 tokens (   10.70 ms per token,    93.43 tokens per second)
llama_print_timings:        eval time =    2316.64 ms /    44 runs   (   52.65 ms per token,    18.99 tokens per second)
llama_print_timings:       total time =    2571.08 ms
Llama.generate: prefix-match hit


315 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.16 ms /    40 runs   (    0.10 ms per token,  9622.32 tokens per second)
llama_print_timings: prompt eval time =     185.38 ms /    13 tokens (   14.26 ms per token,    70.13 tokens per second)
llama_print_timings:        eval time =    2057.65 ms /    39 runs   (   52.76 ms per token,    18.95 tokens per second)
llama_print_timings:       total time =    2300.43 ms
Llama.generate: prefix-match hit


316 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.06 ms /    40 runs   (    0.10 ms per token,  9849.79 tokens per second)
llama_print_timings: prompt eval time =     182.20 ms /    10 tokens (   18.22 ms per token,    54.89 tokens per second)
llama_print_timings:        eval time =    2051.49 ms /    39 runs   (   52.60 ms per token,    19.01 tokens per second)
llama_print_timings:       total time =    2288.40 ms
Llama.generate: prefix-match hit


317 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       3.90 ms /    39 runs   (    0.10 ms per token, 10010.27 tokens per second)
llama_print_timings: prompt eval time =     200.42 ms /    24 tokens (    8.35 ms per token,   119.75 tokens per second)
llama_print_timings:        eval time =    2003.87 ms /    38 runs   (   52.73 ms per token,    18.96 tokens per second)
llama_print_timings:       total time =    2258.11 ms
Llama.generate: prefix-match hit


318 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      14.64 ms /   155 runs   (    0.09 ms per token, 10588.88 tokens per second)
llama_print_timings: prompt eval time =     195.96 ms /    21 tokens (    9.33 ms per token,   107.16 tokens per second)
llama_print_timings:        eval time =    8167.74 ms /   154 runs   (   53.04 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    8579.86 ms
Llama.generate: prefix-match hit


319 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.85 ms /    48 runs   (    0.10 ms per token,  9900.99 tokens per second)
llama_print_timings: prompt eval time =     196.22 ms /    21 tokens (    9.34 ms per token,   107.02 tokens per second)
llama_print_timings:        eval time =    2480.14 ms /    47 runs   (   52.77 ms per token,    18.95 tokens per second)
llama_print_timings:       total time =    2742.03 ms
Llama.generate: prefix-match hit


320 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      11.27 ms /   109 runs   (    0.10 ms per token,  9668.26 tokens per second)
llama_print_timings: prompt eval time =     194.17 ms /    19 tokens (   10.22 ms per token,    97.85 tokens per second)
llama_print_timings:        eval time =    5717.67 ms /   108 runs   (   52.94 ms per token,    18.89 tokens per second)
llama_print_timings:       total time =    6064.19 ms
Llama.generate: prefix-match hit


321 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       3.91 ms /    40 runs   (    0.10 ms per token, 10222.34 tokens per second)
llama_print_timings: prompt eval time =     182.00 ms /    10 tokens (   18.20 ms per token,    54.95 tokens per second)
llama_print_timings:        eval time =    2052.57 ms /    39 runs   (   52.63 ms per token,    19.00 tokens per second)
llama_print_timings:       total time =    2289.31 ms
Llama.generate: prefix-match hit


322 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.04 ms /    40 runs   (    0.10 ms per token,  9908.35 tokens per second)
llama_print_timings: prompt eval time =     190.75 ms /    17 tokens (   11.22 ms per token,    89.12 tokens per second)
llama_print_timings:        eval time =    2051.71 ms /    39 runs   (   52.61 ms per token,    19.01 tokens per second)
llama_print_timings:       total time =    2297.14 ms
Llama.generate: prefix-match hit


323 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      11.31 ms /   111 runs   (    0.10 ms per token,  9815.19 tokens per second)
llama_print_timings: prompt eval time =     192.82 ms /    18 tokens (   10.71 ms per token,    93.35 tokens per second)
llama_print_timings:        eval time =    5822.74 ms /   110 runs   (   52.93 ms per token,    18.89 tokens per second)
llama_print_timings:       total time =    6169.61 ms
Llama.generate: prefix-match hit


324 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.10 ms /    39 runs   (    0.11 ms per token,  9502.92 tokens per second)
llama_print_timings: prompt eval time =     192.46 ms /    18 tokens (   10.69 ms per token,    93.53 tokens per second)
llama_print_timings:        eval time =    2003.07 ms /    38 runs   (   52.71 ms per token,    18.97 tokens per second)
llama_print_timings:       total time =    2250.46 ms
Llama.generate: prefix-match hit


325 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.70 ms /   126 runs   (    0.10 ms per token,  9921.26 tokens per second)
llama_print_timings: prompt eval time =     188.51 ms /    15 tokens (   12.57 ms per token,    79.57 tokens per second)
llama_print_timings:        eval time =    6617.68 ms /   125 runs   (   52.94 ms per token,    18.89 tokens per second)
llama_print_timings:       total time =    6982.91 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       6.10 ms /    60 runs   (    0.10 ms per token,  9831.23 tokens per second)
llama_print_timings: prompt eval time =     188.51 ms /    15 tokens (   12.57 ms per token,    79.57 tokens per second)
llama_print_timings:        eval time =    3109.56 ms /    59 runs   (   52.70 ms per token,    18.97 tokens per second)
llama_print_timings:       total time =    3379.81 ms
Llama.generate: prefix-

326 -- 1
327 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.05 ms /    40 runs   (    0.10 ms per token,  9866.80 tokens per second)
llama_print_timings: prompt eval time =     190.91 ms /    17 tokens (   11.23 ms per token,    89.05 tokens per second)
llama_print_timings:        eval time =    2053.03 ms /    39 runs   (   52.64 ms per token,    19.00 tokens per second)
llama_print_timings:       total time =    2297.76 ms
Llama.generate: prefix-match hit


328 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.07 ms /    39 runs   (    0.10 ms per token,  9591.74 tokens per second)
llama_print_timings: prompt eval time =     198.61 ms /    22 tokens (    9.03 ms per token,   110.77 tokens per second)
llama_print_timings:        eval time =    2004.05 ms /    38 runs   (   52.74 ms per token,    18.96 tokens per second)
llama_print_timings:       total time =    2257.33 ms
Llama.generate: prefix-match hit


329 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.14 ms /   129 runs   (    0.09 ms per token, 10629.53 tokens per second)
llama_print_timings: prompt eval time =     361.63 ms /    47 tokens (    7.69 ms per token,   129.97 tokens per second)
llama_print_timings:        eval time =    6800.24 ms /   128 runs   (   53.13 ms per token,    18.82 tokens per second)
llama_print_timings:       total time =    7341.43 ms
Llama.generate: prefix-match hit


330 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.03 ms /    40 runs   (    0.10 ms per token,  9932.95 tokens per second)
llama_print_timings: prompt eval time =     188.73 ms /    16 tokens (   11.80 ms per token,    84.78 tokens per second)
llama_print_timings:        eval time =    2054.45 ms /    39 runs   (   52.68 ms per token,    18.98 tokens per second)
llama_print_timings:       total time =    2297.08 ms
Llama.generate: prefix-match hit


331 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.53 ms /    45 runs   (    0.10 ms per token,  9942.55 tokens per second)
llama_print_timings: prompt eval time =     193.90 ms /    19 tokens (   10.21 ms per token,    97.99 tokens per second)
llama_print_timings:        eval time =    2317.61 ms /    44 runs   (   52.67 ms per token,    18.99 tokens per second)
llama_print_timings:       total time =    2572.84 ms
Llama.generate: prefix-match hit


332 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.71 ms /   128 runs   (    0.10 ms per token, 10069.23 tokens per second)
llama_print_timings: prompt eval time =     194.20 ms /    19 tokens (   10.22 ms per token,    97.84 tokens per second)
llama_print_timings:        eval time =    6714.64 ms /   127 runs   (   52.87 ms per token,    18.91 tokens per second)
llama_print_timings:       total time =    7087.29 ms
Llama.generate: prefix-match hit


333 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      13.15 ms /   134 runs   (    0.10 ms per token, 10188.56 tokens per second)
llama_print_timings: prompt eval time =     200.42 ms /    24 tokens (    8.35 ms per token,   119.75 tokens per second)
llama_print_timings:        eval time =    7048.70 ms /   133 runs   (   53.00 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    7437.15 ms
Llama.generate: prefix-match hit


334 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      11.84 ms /   118 runs   (    0.10 ms per token,  9967.06 tokens per second)
llama_print_timings: prompt eval time =     198.43 ms /    22 tokens (    9.02 ms per token,   110.87 tokens per second)
llama_print_timings:        eval time =    6193.35 ms /   117 runs   (   52.93 ms per token,    18.89 tokens per second)
llama_print_timings:       total time =    6555.59 ms
Llama.generate: prefix-match hit


335 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.45 ms /    44 runs   (    0.10 ms per token,  9880.98 tokens per second)
llama_print_timings: prompt eval time =     199.62 ms /    23 tokens (    8.68 ms per token,   115.22 tokens per second)
llama_print_timings:        eval time =    2267.95 ms /    43 runs   (   52.74 ms per token,    18.96 tokens per second)
llama_print_timings:       total time =    2527.49 ms
Llama.generate: prefix-match hit


336 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      13.46 ms /   137 runs   (    0.10 ms per token, 10182.09 tokens per second)
llama_print_timings: prompt eval time =     205.30 ms /    28 tokens (    7.33 ms per token,   136.39 tokens per second)
llama_print_timings:        eval time =    7219.26 ms /   136 runs   (   53.08 ms per token,    18.84 tokens per second)
llama_print_timings:       total time =    7613.72 ms
Llama.generate: prefix-match hit


337 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      14.49 ms /   146 runs   (    0.10 ms per token, 10075.22 tokens per second)
llama_print_timings: prompt eval time =     209.96 ms /    30 tokens (    7.00 ms per token,   142.89 tokens per second)
llama_print_timings:        eval time =    7694.19 ms /   145 runs   (   53.06 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    8110.54 ms
Llama.generate: prefix-match hit


338 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       6.11 ms /    63 runs   (    0.10 ms per token, 10309.28 tokens per second)
llama_print_timings: prompt eval time =     196.71 ms /    21 tokens (    9.37 ms per token,   106.76 tokens per second)
llama_print_timings:        eval time =    3277.40 ms /    62 runs   (   52.86 ms per token,    18.92 tokens per second)
llama_print_timings:       total time =    3559.70 ms
Llama.generate: prefix-match hit


339 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      11.98 ms /   124 runs   (    0.10 ms per token, 10350.58 tokens per second)
llama_print_timings: prompt eval time =     183.73 ms /    20 tokens (    9.19 ms per token,   108.85 tokens per second)
llama_print_timings:        eval time =    6513.80 ms /   123 runs   (   52.96 ms per token,    18.88 tokens per second)
llama_print_timings:       total time =    6869.16 ms
Llama.generate: prefix-match hit


340 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       3.98 ms /    39 runs   (    0.10 ms per token,  9791.61 tokens per second)
llama_print_timings: prompt eval time =     190.75 ms /    17 tokens (   11.22 ms per token,    89.12 tokens per second)
llama_print_timings:        eval time =    2003.61 ms /    38 runs   (   52.73 ms per token,    18.97 tokens per second)
llama_print_timings:       total time =    2247.61 ms
Llama.generate: prefix-match hit


341 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       6.28 ms /    60 runs   (    0.10 ms per token,  9555.66 tokens per second)
llama_print_timings: prompt eval time =     190.59 ms /    17 tokens (   11.21 ms per token,    89.20 tokens per second)
llama_print_timings:        eval time =    3110.57 ms /    59 runs   (   52.72 ms per token,    18.97 tokens per second)
llama_print_timings:       total time =    3385.27 ms
Llama.generate: prefix-match hit


342 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.45 ms /   127 runs   (    0.10 ms per token, 10199.98 tokens per second)
llama_print_timings: prompt eval time =     193.97 ms /    19 tokens (   10.21 ms per token,    97.95 tokens per second)
llama_print_timings:        eval time =    6673.84 ms /   126 runs   (   52.97 ms per token,    18.88 tokens per second)
llama_print_timings:       total time =    7045.19 ms
Llama.generate: prefix-match hit


343 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.00 ms /    44 runs   (    0.09 ms per token, 11013.77 tokens per second)
llama_print_timings: prompt eval time =     209.94 ms /    30 tokens (    7.00 ms per token,   142.90 tokens per second)
llama_print_timings:        eval time =    2271.62 ms /    43 runs   (   52.83 ms per token,    18.93 tokens per second)
llama_print_timings:       total time =    2541.65 ms
Llama.generate: prefix-match hit


344 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.34 ms /   119 runs   (    0.10 ms per token,  9640.31 tokens per second)
llama_print_timings: prompt eval time =     183.48 ms /    20 tokens (    9.17 ms per token,   109.01 tokens per second)
llama_print_timings:        eval time =    6249.96 ms /   118 runs   (   52.97 ms per token,    18.88 tokens per second)
llama_print_timings:       total time =    6599.20 ms
Llama.generate: prefix-match hit


345 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.94 ms /   124 runs   (    0.10 ms per token,  9580.47 tokens per second)
llama_print_timings: prompt eval time =     198.67 ms /    22 tokens (    9.03 ms per token,   110.73 tokens per second)
llama_print_timings:        eval time =    6519.49 ms /   123 runs   (   53.00 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    6894.58 ms
Llama.generate: prefix-match hit


346 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.61 ms /    45 runs   (    0.10 ms per token,  9752.93 tokens per second)
llama_print_timings: prompt eval time =     185.66 ms /    13 tokens (   14.28 ms per token,    70.02 tokens per second)
llama_print_timings:        eval time =    2319.26 ms /    44 runs   (   52.71 ms per token,    18.97 tokens per second)
llama_print_timings:       total time =    2566.38 ms
Llama.generate: prefix-match hit


347 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       3.97 ms /    40 runs   (    0.10 ms per token, 10070.49 tokens per second)
llama_print_timings: prompt eval time =     188.63 ms /    15 tokens (   12.58 ms per token,    79.52 tokens per second)
llama_print_timings:        eval time =    2055.62 ms /    39 runs   (   52.71 ms per token,    18.97 tokens per second)
llama_print_timings:       total time =    2298.76 ms
Llama.generate: prefix-match hit


348 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      10.35 ms /   110 runs   (    0.09 ms per token, 10624.94 tokens per second)
llama_print_timings: prompt eval time =     203.65 ms /    26 tokens (    7.83 ms per token,   127.67 tokens per second)
llama_print_timings:        eval time =    5774.58 ms /   109 runs   (   52.98 ms per token,    18.88 tokens per second)
llama_print_timings:       total time =    6129.99 ms
Llama.generate: prefix-match hit


349 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      14.96 ms /   149 runs   (    0.10 ms per token,  9958.56 tokens per second)
llama_print_timings: prompt eval time =     200.40 ms /    24 tokens (    8.35 ms per token,   119.76 tokens per second)
llama_print_timings:        eval time =    7846.41 ms /   148 runs   (   53.02 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    8256.26 ms
Llama.generate: prefix-match hit


350 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      14.04 ms /   141 runs   (    0.10 ms per token, 10045.60 tokens per second)
llama_print_timings: prompt eval time =     346.38 ms /    36 tokens (    9.62 ms per token,   103.93 tokens per second)
llama_print_timings:        eval time =    7433.28 ms /   140 runs   (   53.09 ms per token,    18.83 tokens per second)
llama_print_timings:       total time =    7979.53 ms
Llama.generate: prefix-match hit


351 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.00 ms /    39 runs   (    0.10 ms per token,  9752.44 tokens per second)
llama_print_timings: prompt eval time =     188.50 ms /    16 tokens (   11.78 ms per token,    84.88 tokens per second)
llama_print_timings:        eval time =    2003.60 ms /    38 runs   (   52.73 ms per token,    18.97 tokens per second)
llama_print_timings:       total time =    2244.52 ms
Llama.generate: prefix-match hit


352 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      14.92 ms /   146 runs   (    0.10 ms per token,  9784.21 tokens per second)
llama_print_timings: prompt eval time =     199.72 ms /    23 tokens (    8.68 ms per token,   115.16 tokens per second)
llama_print_timings:        eval time =    7691.87 ms /   145 runs   (   53.05 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    8096.41 ms
Llama.generate: prefix-match hit


353 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.40 ms /   135 runs   (    0.09 ms per token, 10888.85 tokens per second)
llama_print_timings: prompt eval time =     345.77 ms /    35 tokens (    9.88 ms per token,   101.22 tokens per second)
llama_print_timings:        eval time =    7108.40 ms /   134 runs   (   53.05 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    7641.37 ms
Llama.generate: prefix-match hit


354 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      11.73 ms /   123 runs   (    0.10 ms per token, 10483.25 tokens per second)
llama_print_timings: prompt eval time =     196.30 ms /    21 tokens (    9.35 ms per token,   106.98 tokens per second)
llama_print_timings:        eval time =    6459.20 ms /   122 runs   (   52.94 ms per token,    18.89 tokens per second)
llama_print_timings:       total time =    6826.32 ms
Llama.generate: prefix-match hit


355 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      13.16 ms /   128 runs   (    0.10 ms per token,  9727.18 tokens per second)
llama_print_timings: prompt eval time =     195.68 ms /    21 tokens (    9.32 ms per token,   107.32 tokens per second)
llama_print_timings:        eval time =    6729.44 ms /   127 runs   (   52.99 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    7105.73 ms
Llama.generate: prefix-match hit


356 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       3.94 ms /    39 runs   (    0.10 ms per token,  9898.48 tokens per second)
llama_print_timings: prompt eval time =     187.94 ms /    14 tokens (   13.42 ms per token,    74.49 tokens per second)
llama_print_timings:        eval time =    1998.34 ms /    38 runs   (   52.59 ms per token,    19.02 tokens per second)
llama_print_timings:       total time =    2239.45 ms
Llama.generate: prefix-match hit


357 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      14.41 ms /   150 runs   (    0.10 ms per token, 10410.16 tokens per second)
llama_print_timings: prompt eval time =     349.30 ms /    38 tokens (    9.19 ms per token,   108.79 tokens per second)
llama_print_timings:        eval time =    7913.38 ms /   149 runs   (   53.11 ms per token,    18.83 tokens per second)
llama_print_timings:       total time =    8471.75 ms
Llama.generate: prefix-match hit


358 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      14.39 ms /   143 runs   (    0.10 ms per token,  9938.15 tokens per second)
llama_print_timings: prompt eval time =     197.93 ms /    22 tokens (    9.00 ms per token,   111.15 tokens per second)
llama_print_timings:        eval time =    7527.64 ms /   142 runs   (   53.01 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    7927.78 ms
Llama.generate: prefix-match hit


359 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      11.60 ms /   120 runs   (    0.10 ms per token, 10344.83 tokens per second)
llama_print_timings: prompt eval time =     207.66 ms /    29 tokens (    7.16 ms per token,   139.65 tokens per second)
llama_print_timings:        eval time =    6309.36 ms /   119 runs   (   53.02 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    6683.96 ms
Llama.generate: prefix-match hit


360 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.75 ms /    46 runs   (    0.10 ms per token,  9682.17 tokens per second)
llama_print_timings: prompt eval time =     192.85 ms /    18 tokens (   10.71 ms per token,    93.34 tokens per second)
llama_print_timings:        eval time =    2375.46 ms /    45 runs   (   52.79 ms per token,    18.94 tokens per second)
llama_print_timings:       total time =    2630.84 ms
Llama.generate: prefix-match hit


361 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      14.34 ms /   150 runs   (    0.10 ms per token, 10458.79 tokens per second)
llama_print_timings: prompt eval time =     200.13 ms /    24 tokens (    8.34 ms per token,   119.92 tokens per second)
llama_print_timings:        eval time =    7904.29 ms /   149 runs   (   53.05 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    8315.56 ms
Llama.generate: prefix-match hit


362 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.83 ms /   128 runs   (    0.10 ms per token,  9978.95 tokens per second)
llama_print_timings: prompt eval time =     204.73 ms /    26 tokens (    7.87 ms per token,   127.00 tokens per second)
llama_print_timings:        eval time =    6740.08 ms /   127 runs   (   53.07 ms per token,    18.84 tokens per second)
llama_print_timings:       total time =    7125.76 ms
Llama.generate: prefix-match hit


362 -- 2



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      11.53 ms /   122 runs   (    0.09 ms per token, 10582.93 tokens per second)
llama_print_timings: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    6466.36 ms /   122 runs   (   53.00 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    6636.28 ms
Llama.generate: prefix-match hit


363 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.52 ms /    45 runs   (    0.10 ms per token,  9949.15 tokens per second)
llama_print_timings: prompt eval time =     194.41 ms /    19 tokens (   10.23 ms per token,    97.73 tokens per second)
llama_print_timings:        eval time =    2318.21 ms /    44 runs   (   52.69 ms per token,    18.98 tokens per second)
llama_print_timings:       total time =    2574.34 ms
Llama.generate: prefix-match hit


364 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       3.87 ms /    39 runs   (    0.10 ms per token, 10090.56 tokens per second)
llama_print_timings: prompt eval time =     183.78 ms /    12 tokens (   15.32 ms per token,    65.30 tokens per second)
llama_print_timings:        eval time =    2001.74 ms /    38 runs   (   52.68 ms per token,    18.98 tokens per second)
llama_print_timings:       total time =    2239.24 ms
Llama.generate: prefix-match hit


365 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.57 ms /   122 runs   (    0.10 ms per token,  9704.88 tokens per second)
llama_print_timings: prompt eval time =     205.79 ms /    27 tokens (    7.62 ms per token,   131.20 tokens per second)
llama_print_timings:        eval time =    6422.85 ms /   121 runs   (   53.08 ms per token,    18.84 tokens per second)
llama_print_timings:       total time =    6804.33 ms
Llama.generate: prefix-match hit


366 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      13.37 ms /   131 runs   (    0.10 ms per token,  9798.79 tokens per second)
llama_print_timings: prompt eval time =     200.49 ms /    24 tokens (    8.35 ms per token,   119.71 tokens per second)
llama_print_timings:        eval time =    6892.78 ms /   130 runs   (   53.02 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    7276.17 ms
Llama.generate: prefix-match hit


367 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      11.31 ms /   112 runs   (    0.10 ms per token,  9899.24 tokens per second)
llama_print_timings: prompt eval time =     194.27 ms /    19 tokens (   10.22 ms per token,    97.80 tokens per second)
llama_print_timings:        eval time =    5877.60 ms /   111 runs   (   52.95 ms per token,    18.89 tokens per second)
llama_print_timings:       total time =    6227.90 ms
Llama.generate: prefix-match hit


368 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       3.99 ms /    39 runs   (    0.10 ms per token,  9781.79 tokens per second)
llama_print_timings: prompt eval time =     194.06 ms /    19 tokens (   10.21 ms per token,    97.91 tokens per second)
llama_print_timings:        eval time =    2004.93 ms /    38 runs   (   52.76 ms per token,    18.95 tokens per second)
llama_print_timings:       total time =    2252.13 ms
Llama.generate: prefix-match hit


369 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.67 ms /    45 runs   (    0.10 ms per token,  9644.23 tokens per second)
llama_print_timings: prompt eval time =     182.98 ms /    20 tokens (    9.15 ms per token,   109.30 tokens per second)
llama_print_timings:        eval time =    2319.33 ms /    44 runs   (   52.71 ms per token,    18.97 tokens per second)
llama_print_timings:       total time =    2564.99 ms
Llama.generate: prefix-match hit


370 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      14.34 ms /   145 runs   (    0.10 ms per token, 10113.69 tokens per second)
llama_print_timings: prompt eval time =     195.91 ms /    21 tokens (    9.33 ms per token,   107.19 tokens per second)
llama_print_timings:        eval time =    7631.53 ms /   144 runs   (   53.00 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    8031.30 ms
Llama.generate: prefix-match hit


371 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       3.90 ms /    39 runs   (    0.10 ms per token,  9994.87 tokens per second)
llama_print_timings: prompt eval time =     188.43 ms /    16 tokens (   11.78 ms per token,    84.91 tokens per second)
llama_print_timings:        eval time =    2001.37 ms /    38 runs   (   52.67 ms per token,    18.99 tokens per second)
llama_print_timings:       total time =    2243.04 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      13.56 ms /   144 runs   (    0.09 ms per token, 10617.12 tokens per second)
llama_print_timings: prompt eval time =     357.38 ms /    43 tokens (    8.31 ms per token,   120.32 tokens per second)
llama_print_timings:        eval time =    7596.70 ms /   143 runs   (   53.12 ms per token,    18.82 tokens per second)
llama_print_timings:       total time =    8154.20 ms
Llama.generate: prefix-

372 -- 1
373 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.30 ms /    40 runs   (    0.11 ms per token,  9304.49 tokens per second)
llama_print_timings: prompt eval time =     199.58 ms /    23 tokens (    8.68 ms per token,   115.24 tokens per second)
llama_print_timings:        eval time =    2055.00 ms /    39 runs   (   52.69 ms per token,    18.98 tokens per second)
llama_print_timings:       total time =    2311.39 ms
Llama.generate: prefix-match hit


374 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.84 ms /   125 runs   (    0.10 ms per token,  9735.20 tokens per second)
llama_print_timings: prompt eval time =     187.64 ms /    14 tokens (   13.40 ms per token,    74.61 tokens per second)
llama_print_timings:        eval time =    6565.44 ms /   124 runs   (   52.95 ms per token,    18.89 tokens per second)
llama_print_timings:       total time =    6928.18 ms
Llama.generate: prefix-match hit


375 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       3.93 ms /    40 runs   (    0.10 ms per token, 10185.89 tokens per second)
llama_print_timings: prompt eval time =     205.86 ms /    27 tokens (    7.62 ms per token,   131.16 tokens per second)
llama_print_timings:        eval time =    2057.57 ms /    39 runs   (   52.76 ms per token,    18.95 tokens per second)
llama_print_timings:       total time =    2317.42 ms
Llama.generate: prefix-match hit


376 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      13.56 ms /   130 runs   (    0.10 ms per token,  9587.02 tokens per second)
llama_print_timings: prompt eval time =     357.50 ms /    45 tokens (    7.94 ms per token,   125.87 tokens per second)
llama_print_timings:        eval time =    6874.60 ms /   129 runs   (   53.29 ms per token,    18.76 tokens per second)
llama_print_timings:       total time =    7433.66 ms
Llama.generate: prefix-match hit


377 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      13.34 ms /   122 runs   (    0.11 ms per token,  9142.69 tokens per second)
llama_print_timings: prompt eval time =     194.25 ms /    19 tokens (   10.22 ms per token,    97.81 tokens per second)
llama_print_timings:        eval time =    6505.49 ms /   121 runs   (   53.76 ms per token,    18.60 tokens per second)
llama_print_timings:       total time =    6907.32 ms
Llama.generate: prefix-match hit


378 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.94 ms /    40 runs   (    0.12 ms per token,  8102.09 tokens per second)
llama_print_timings: prompt eval time =     199.50 ms /    23 tokens (    8.67 ms per token,   115.29 tokens per second)
llama_print_timings:        eval time =    2068.41 ms /    39 runs   (   53.04 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    2331.90 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       9.39 ms /    40 runs   (    0.23 ms per token,  4257.58 tokens per second)
llama_print_timings: prompt eval time =     193.02 ms /    14 tokens (   13.79 ms per token,    72.53 tokens per second)
llama_print_timings:        eval time =    2127.34 ms /    39 runs   (   54.55 ms per token,    18.33 tokens per second)
llama_print_timings:       total time =    2473.06 ms
Llama.generate: prefix-

379 -- 1
380 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      28.36 ms /   125 runs   (    0.23 ms per token,  4407.77 tokens per second)
llama_print_timings: prompt eval time =     201.64 ms /    23 tokens (    8.77 ms per token,   114.06 tokens per second)
llama_print_timings:        eval time =    6688.51 ms /   124 runs   (   53.94 ms per token,    18.54 tokens per second)
llama_print_timings:       total time =    7278.49 ms
Llama.generate: prefix-match hit


381 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      10.77 ms /   102 runs   (    0.11 ms per token,  9470.75 tokens per second)
llama_print_timings: prompt eval time =     199.24 ms /    23 tokens (    8.66 ms per token,   115.44 tokens per second)
llama_print_timings:        eval time =    5350.80 ms /   101 runs   (   52.98 ms per token,    18.88 tokens per second)
llama_print_timings:       total time =    5695.95 ms
Llama.generate: prefix-match hit


382 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.29 ms /    40 runs   (    0.11 ms per token,  9328.36 tokens per second)
llama_print_timings: prompt eval time =     191.90 ms /    14 tokens (   13.71 ms per token,    72.95 tokens per second)
llama_print_timings:        eval time =    2097.93 ms /    39 runs   (   53.79 ms per token,    18.59 tokens per second)
llama_print_timings:       total time =    2360.24 ms
Llama.generate: prefix-match hit


383 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.80 ms /    46 runs   (    0.10 ms per token,  9591.33 tokens per second)
llama_print_timings: prompt eval time =     199.01 ms /    19 tokens (   10.47 ms per token,    95.47 tokens per second)
llama_print_timings:        eval time =    2420.28 ms /    45 runs   (   53.78 ms per token,    18.59 tokens per second)
llama_print_timings:       total time =    2687.68 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.29 ms /    40 runs   (    0.11 ms per token,  9313.15 tokens per second)
llama_print_timings: prompt eval time =     199.10 ms /    21 tokens (    9.48 ms per token,   105.48 tokens per second)
llama_print_timings:        eval time =    2128.71 ms /    39 runs   (   54.58 ms per token,    18.32 tokens per second)
llama_print_timings:       total time =    2385.86 ms
Llama.generate: prefix-

384 -- 1
385 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.55 ms /    40 runs   (    0.11 ms per token,  8783.49 tokens per second)
llama_print_timings: prompt eval time =     198.38 ms /    21 tokens (    9.45 ms per token,   105.85 tokens per second)
llama_print_timings:        eval time =    2188.91 ms /    39 runs   (   56.13 ms per token,    17.82 tokens per second)
llama_print_timings:       total time =    2453.50 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.24 ms /    40 runs   (    0.11 ms per token,  9425.07 tokens per second)
llama_print_timings: prompt eval time =     222.64 ms /    16 tokens (   13.91 ms per token,    71.87 tokens per second)
llama_print_timings:        eval time =    2136.80 ms /    39 runs   (   54.79 ms per token,    18.25 tokens per second)
llama_print_timings:       total time =    2422.36 ms
Llama.generate: prefix-

386 -- 1
387 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.62 ms /    44 runs   (    0.10 ms per token,  9530.00 tokens per second)
llama_print_timings: prompt eval time =     185.49 ms /    12 tokens (   15.46 ms per token,    64.69 tokens per second)
llama_print_timings:        eval time =    2334.82 ms /    43 runs   (   54.30 ms per token,    18.42 tokens per second)
llama_print_timings:       total time =    2583.14 ms
Llama.generate: prefix-match hit


388 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.27 ms /   116 runs   (    0.11 ms per token,  9456.26 tokens per second)
llama_print_timings: prompt eval time =     195.90 ms /    18 tokens (   10.88 ms per token,    91.89 tokens per second)
llama_print_timings:        eval time =    6288.47 ms /   115 runs   (   54.68 ms per token,    18.29 tokens per second)
llama_print_timings:       total time =    6658.92 ms
Llama.generate: prefix-match hit


389 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.53 ms /   128 runs   (    0.10 ms per token, 10217.11 tokens per second)
llama_print_timings: prompt eval time =     210.22 ms /    28 tokens (    7.51 ms per token,   133.19 tokens per second)
llama_print_timings:        eval time =    7042.62 ms /   127 runs   (   55.45 ms per token,    18.03 tokens per second)
llama_print_timings:       total time =    7442.82 ms
Llama.generate: prefix-match hit


390 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      14.93 ms /   140 runs   (    0.11 ms per token,  9377.72 tokens per second)
llama_print_timings: prompt eval time =     215.45 ms /    31 tokens (    6.95 ms per token,   143.88 tokens per second)
llama_print_timings:        eval time =    7633.43 ms /   139 runs   (   54.92 ms per token,    18.21 tokens per second)
llama_print_timings:       total time =    8059.60 ms
Llama.generate: prefix-match hit


391 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      10.49 ms /   100 runs   (    0.10 ms per token,  9530.16 tokens per second)
llama_print_timings: prompt eval time =     199.06 ms /    18 tokens (   11.06 ms per token,    90.42 tokens per second)
llama_print_timings:        eval time =    5509.39 ms /    99 runs   (   55.65 ms per token,    17.97 tokens per second)
llama_print_timings:       total time =    5857.69 ms
Llama.generate: prefix-match hit


392 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       9.62 ms /   103 runs   (    0.09 ms per token, 10712.43 tokens per second)
llama_print_timings: prompt eval time =     210.90 ms /    32 tokens (    6.59 ms per token,   151.73 tokens per second)
llama_print_timings:        eval time =    5612.43 ms /   102 runs   (   55.02 ms per token,    18.17 tokens per second)
llama_print_timings:       total time =    5965.31 ms
Llama.generate: prefix-match hit


393 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.37 ms /   128 runs   (    0.10 ms per token, 10349.29 tokens per second)
llama_print_timings: prompt eval time =     213.45 ms /    32 tokens (    6.67 ms per token,   149.92 tokens per second)
llama_print_timings:        eval time =    6832.24 ms /   127 runs   (   53.80 ms per token,    18.59 tokens per second)
llama_print_timings:       total time =    7215.20 ms
Llama.generate: prefix-match hit


394 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.36 ms /   139 runs   (    0.09 ms per token, 11250.51 tokens per second)
llama_print_timings: prompt eval time =     346.03 ms /    33 tokens (   10.49 ms per token,    95.37 tokens per second)
llama_print_timings:        eval time =    7416.04 ms /   138 runs   (   53.74 ms per token,    18.61 tokens per second)
llama_print_timings:       total time =    7941.25 ms
Llama.generate: prefix-match hit


395 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.56 ms /   125 runs   (    0.10 ms per token,  9953.81 tokens per second)
llama_print_timings: prompt eval time =     198.54 ms /    21 tokens (    9.45 ms per token,   105.77 tokens per second)
llama_print_timings:        eval time =    6655.60 ms /   124 runs   (   53.67 ms per token,    18.63 tokens per second)
llama_print_timings:       total time =    7018.96 ms
Llama.generate: prefix-match hit


396 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      11.06 ms /   112 runs   (    0.10 ms per token, 10125.67 tokens per second)
llama_print_timings: prompt eval time =     194.75 ms /    18 tokens (   10.82 ms per token,    92.43 tokens per second)
llama_print_timings:        eval time =    5947.79 ms /   111 runs   (   53.58 ms per token,    18.66 tokens per second)
llama_print_timings:       total time =    6287.76 ms
Llama.generate: prefix-match hit


397 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       3.93 ms /    40 runs   (    0.10 ms per token, 10170.35 tokens per second)
llama_print_timings: prompt eval time =     190.38 ms /    16 tokens (   11.90 ms per token,    84.04 tokens per second)
llama_print_timings:        eval time =    2079.08 ms /    39 runs   (   53.31 ms per token,    18.76 tokens per second)
llama_print_timings:       total time =    2320.10 ms
Llama.generate: prefix-match hit


398 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       3.93 ms /    40 runs   (    0.10 ms per token, 10172.94 tokens per second)
llama_print_timings: prompt eval time =     185.66 ms /    12 tokens (   15.47 ms per token,    64.63 tokens per second)
llama_print_timings:        eval time =    2079.29 ms /    39 runs   (   53.32 ms per token,    18.76 tokens per second)
llama_print_timings:       total time =    2316.59 ms
Llama.generate: prefix-match hit


399 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.45 ms /    44 runs   (    0.10 ms per token,  9887.64 tokens per second)
llama_print_timings: prompt eval time =     195.97 ms /    19 tokens (   10.31 ms per token,    96.95 tokens per second)
llama_print_timings:        eval time =    2294.65 ms /    43 runs   (   53.36 ms per token,    18.74 tokens per second)
llama_print_timings:       total time =    2546.89 ms
Llama.generate: prefix-match hit


400 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      11.17 ms /   121 runs   (    0.09 ms per token, 10828.71 tokens per second)
llama_print_timings: prompt eval time =     204.63 ms /    25 tokens (    8.19 ms per token,   122.17 tokens per second)
llama_print_timings:        eval time =    6438.90 ms /   120 runs   (   53.66 ms per token,    18.64 tokens per second)
llama_print_timings:       total time =    6799.19 ms
Llama.generate: prefix-match hit


401 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       7.55 ms /    88 runs   (    0.09 ms per token, 11658.72 tokens per second)
llama_print_timings: prompt eval time =     348.05 ms /    34 tokens (   10.24 ms per token,    97.69 tokens per second)
llama_print_timings:        eval time =    4665.13 ms /    87 runs   (   53.62 ms per token,    18.65 tokens per second)
llama_print_timings:       total time =    5124.94 ms
Llama.generate: prefix-match hit


402 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.40 ms /    45 runs   (    0.10 ms per token, 10222.63 tokens per second)
llama_print_timings: prompt eval time =     197.70 ms /    20 tokens (    9.88 ms per token,   101.16 tokens per second)
llama_print_timings:        eval time =    2348.55 ms /    44 runs   (   53.38 ms per token,    18.73 tokens per second)
llama_print_timings:       total time =    2603.33 ms
Llama.generate: prefix-match hit


403 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.38 ms /    44 runs   (    0.10 ms per token, 10036.50 tokens per second)
llama_print_timings: prompt eval time =     200.46 ms /    23 tokens (    8.72 ms per token,   114.74 tokens per second)
llama_print_timings:        eval time =    2297.12 ms /    43 runs   (   53.42 ms per token,    18.72 tokens per second)
llama_print_timings:       total time =    2553.05 ms
Llama.generate: prefix-match hit


404 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      13.00 ms /   136 runs   (    0.10 ms per token, 10461.54 tokens per second)
llama_print_timings: prompt eval time =     204.72 ms /    25 tokens (    8.19 ms per token,   122.12 tokens per second)
llama_print_timings:        eval time =    7249.51 ms /   135 runs   (   53.70 ms per token,    18.62 tokens per second)
llama_print_timings:       total time =    7633.24 ms
Llama.generate: prefix-match hit


405 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      10.75 ms /   116 runs   (    0.09 ms per token, 10787.69 tokens per second)
llama_print_timings: prompt eval time =     213.64 ms /    31 tokens (    6.89 ms per token,   145.10 tokens per second)
llama_print_timings:        eval time =    6174.68 ms /   115 runs   (   53.69 ms per token,    18.62 tokens per second)
llama_print_timings:       total time =    6536.95 ms
Llama.generate: prefix-match hit


406 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      11.03 ms /   110 runs   (    0.10 ms per token,  9974.61 tokens per second)
llama_print_timings: prompt eval time =     197.38 ms /    19 tokens (   10.39 ms per token,    96.26 tokens per second)
llama_print_timings:        eval time =    5839.85 ms /   109 runs   (   53.58 ms per token,    18.66 tokens per second)
llama_print_timings:       total time =    6179.88 ms
Llama.generate: prefix-match hit


407 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       3.89 ms /    40 runs   (    0.10 ms per token, 10293.36 tokens per second)
llama_print_timings: prompt eval time =     195.67 ms /    18 tokens (   10.87 ms per token,    91.99 tokens per second)
llama_print_timings:        eval time =    2079.12 ms /    39 runs   (   53.31 ms per token,    18.76 tokens per second)
llama_print_timings:       total time =    2325.55 ms
Llama.generate: prefix-match hit


408 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       3.87 ms /    39 runs   (    0.10 ms per token, 10074.92 tokens per second)
llama_print_timings: prompt eval time =     191.12 ms /    15 tokens (   12.74 ms per token,    78.49 tokens per second)
llama_print_timings:        eval time =    2027.45 ms /    38 runs   (   53.35 ms per token,    18.74 tokens per second)
llama_print_timings:       total time =    2267.54 ms
Llama.generate: prefix-match hit


409 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.53 ms /    46 runs   (    0.10 ms per token, 10161.25 tokens per second)
llama_print_timings: prompt eval time =     188.24 ms /    13 tokens (   14.48 ms per token,    69.06 tokens per second)
llama_print_timings:        eval time =    2401.19 ms /    45 runs   (   53.36 ms per token,    18.74 tokens per second)
llama_print_timings:       total time =    2647.52 ms
Llama.generate: prefix-match hit


410 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       3.93 ms /    40 runs   (    0.10 ms per token, 10170.35 tokens per second)
llama_print_timings: prompt eval time =     196.09 ms /    19 tokens (   10.32 ms per token,    96.90 tokens per second)
llama_print_timings:        eval time =    2078.42 ms /    39 runs   (   53.29 ms per token,    18.76 tokens per second)
llama_print_timings:       total time =    2325.75 ms
Llama.generate: prefix-match hit


411 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       6.24 ms /    64 runs   (    0.10 ms per token, 10249.84 tokens per second)
llama_print_timings: prompt eval time =     200.53 ms /    22 tokens (    9.12 ms per token,   109.71 tokens per second)
llama_print_timings:        eval time =    3372.99 ms /    63 runs   (   53.54 ms per token,    18.68 tokens per second)
llama_print_timings:       total time =    3656.12 ms
Llama.generate: prefix-match hit


412 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.58 ms /    45 runs   (    0.10 ms per token,  9825.33 tokens per second)
llama_print_timings: prompt eval time =     196.63 ms /    19 tokens (   10.35 ms per token,    96.63 tokens per second)
llama_print_timings:        eval time =    2348.86 ms /    44 runs   (   53.38 ms per token,    18.73 tokens per second)
llama_print_timings:       total time =    2604.24 ms
Llama.generate: prefix-match hit


413 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.57 ms /   126 runs   (    0.10 ms per token, 10023.07 tokens per second)
llama_print_timings: prompt eval time =     198.69 ms /    21 tokens (    9.46 ms per token,   105.69 tokens per second)
llama_print_timings:        eval time =    6707.47 ms /   125 runs   (   53.66 ms per token,    18.64 tokens per second)
llama_print_timings:       total time =    7069.74 ms
Llama.generate: prefix-match hit


414 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      11.85 ms /   126 runs   (    0.09 ms per token, 10635.60 tokens per second)
llama_print_timings: prompt eval time =     212.90 ms /    30 tokens (    7.10 ms per token,   140.91 tokens per second)
llama_print_timings:        eval time =    6714.17 ms /   125 runs   (   53.71 ms per token,    18.62 tokens per second)
llama_print_timings:       total time =    7089.07 ms
Llama.generate: prefix-match hit


415 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      13.88 ms /   139 runs   (    0.10 ms per token, 10017.30 tokens per second)
llama_print_timings: prompt eval time =     196.96 ms /    19 tokens (   10.37 ms per token,    96.47 tokens per second)
llama_print_timings:        eval time =    7408.24 ms /   138 runs   (   53.68 ms per token,    18.63 tokens per second)
llama_print_timings:       total time =    7785.52 ms
Llama.generate: prefix-match hit


416 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.36 ms /   131 runs   (    0.09 ms per token, 10600.42 tokens per second)
llama_print_timings: prompt eval time =     213.89 ms /    31 tokens (    6.90 ms per token,   144.93 tokens per second)
llama_print_timings:        eval time =    6984.37 ms /   130 runs   (   53.73 ms per token,    18.61 tokens per second)
llama_print_timings:       total time =    7369.12 ms
Llama.generate: prefix-match hit


417 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      14.42 ms /   148 runs   (    0.10 ms per token, 10262.10 tokens per second)
llama_print_timings: prompt eval time =     195.32 ms /    18 tokens (   10.85 ms per token,    92.16 tokens per second)
llama_print_timings:        eval time =    7887.28 ms /   147 runs   (   53.65 ms per token,    18.64 tokens per second)
llama_print_timings:       total time =    8276.25 ms
Llama.generate: prefix-match hit


418 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.00 ms /    40 runs   (    0.10 ms per token, 10002.50 tokens per second)
llama_print_timings: prompt eval time =     201.86 ms /    23 tokens (    8.78 ms per token,   113.94 tokens per second)
llama_print_timings:        eval time =    2083.09 ms /    39 runs   (   53.41 ms per token,    18.72 tokens per second)
llama_print_timings:       total time =    2337.95 ms
Llama.generate: prefix-match hit


419 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      11.35 ms /   121 runs   (    0.09 ms per token, 10664.55 tokens per second)
llama_print_timings: prompt eval time =     195.22 ms /    18 tokens (   10.85 ms per token,    92.20 tokens per second)
llama_print_timings:        eval time =    6432.27 ms /   120 runs   (   53.60 ms per token,    18.66 tokens per second)
llama_print_timings:       total time =    6784.05 ms
Llama.generate: prefix-match hit


420 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.05 ms /    40 runs   (    0.10 ms per token,  9876.54 tokens per second)
llama_print_timings: prompt eval time =     192.97 ms /    17 tokens (   11.35 ms per token,    88.09 tokens per second)
llama_print_timings:        eval time =    2079.87 ms /    39 runs   (   53.33 ms per token,    18.75 tokens per second)
llama_print_timings:       total time =    2324.00 ms
Llama.generate: prefix-match hit


421 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       3.85 ms /    40 runs   (    0.10 ms per token, 10386.91 tokens per second)
llama_print_timings: prompt eval time =     192.49 ms /    17 tokens (   11.32 ms per token,    88.31 tokens per second)
llama_print_timings:        eval time =    2077.44 ms /    39 runs   (   53.27 ms per token,    18.77 tokens per second)
llama_print_timings:       total time =    2319.44 ms
Llama.generate: prefix-match hit


422 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       5.83 ms /    59 runs   (    0.10 ms per token, 10127.02 tokens per second)
llama_print_timings: prompt eval time =     193.61 ms /    17 tokens (   11.39 ms per token,    87.81 tokens per second)
llama_print_timings:        eval time =    3097.65 ms /    58 runs   (   53.41 ms per token,    18.72 tokens per second)
llama_print_timings:       total time =    3369.56 ms
Llama.generate: prefix-match hit


423 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.59 ms /    46 runs   (    0.10 ms per token, 10030.53 tokens per second)
llama_print_timings: prompt eval time =     201.91 ms /    23 tokens (    8.78 ms per token,   113.91 tokens per second)
llama_print_timings:        eval time =    2405.19 ms /    45 runs   (   53.45 ms per token,    18.71 tokens per second)
llama_print_timings:       total time =    2666.09 ms
Llama.generate: prefix-match hit


424 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       8.55 ms /    87 runs   (    0.10 ms per token, 10175.44 tokens per second)
llama_print_timings: prompt eval time =     212.20 ms /    30 tokens (    7.07 ms per token,   141.37 tokens per second)
llama_print_timings:        eval time =    4611.14 ms /    86 runs   (   53.62 ms per token,    18.65 tokens per second)
llama_print_timings:       total time =    4934.28 ms
Llama.generate: prefix-match hit


425 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       3.88 ms /    39 runs   (    0.10 ms per token, 10046.37 tokens per second)
llama_print_timings: prompt eval time =     209.42 ms /    29 tokens (    7.22 ms per token,   138.48 tokens per second)
llama_print_timings:        eval time =    2034.06 ms /    38 runs   (   53.53 ms per token,    18.68 tokens per second)
llama_print_timings:       total time =    2292.74 ms
Llama.generate: prefix-match hit


426 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      11.23 ms /   114 runs   (    0.10 ms per token, 10149.57 tokens per second)
llama_print_timings: prompt eval time =     196.87 ms /    19 tokens (   10.36 ms per token,    96.51 tokens per second)
llama_print_timings:        eval time =    6058.06 ms /   113 runs   (   53.61 ms per token,    18.65 tokens per second)
llama_print_timings:       total time =    6401.03 ms
Llama.generate: prefix-match hit


427 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      10.52 ms /   112 runs   (    0.09 ms per token, 10650.44 tokens per second)
llama_print_timings: prompt eval time =     353.24 ms /    39 tokens (    9.06 ms per token,   110.41 tokens per second)
llama_print_timings:        eval time =    5960.19 ms /   111 runs   (   53.70 ms per token,    18.62 tokens per second)
llama_print_timings:       total time =    6457.22 ms
Llama.generate: prefix-match hit


428 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      13.30 ms /   151 runs   (    0.09 ms per token, 11356.80 tokens per second)
llama_print_timings: prompt eval time =     189.73 ms /    15 tokens (   12.65 ms per token,    79.06 tokens per second)
llama_print_timings:        eval time =    8057.30 ms /   150 runs   (   53.72 ms per token,    18.62 tokens per second)
llama_print_timings:       total time =    8443.41 ms
Llama.generate: prefix-match hit


429 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      10.35 ms /   112 runs   (    0.09 ms per token, 10822.30 tokens per second)
llama_print_timings: prompt eval time =     202.73 ms /    24 tokens (    8.45 ms per token,   118.38 tokens per second)
llama_print_timings:        eval time =    5955.50 ms /   111 runs   (   53.65 ms per token,    18.64 tokens per second)
llama_print_timings:       total time =    6301.12 ms
Llama.generate: prefix-match hit


430 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      10.53 ms /   106 runs   (    0.10 ms per token, 10062.65 tokens per second)
llama_print_timings: prompt eval time =     193.25 ms /    17 tokens (   11.37 ms per token,    87.97 tokens per second)
llama_print_timings:        eval time =    5627.15 ms /   105 runs   (   53.59 ms per token,    18.66 tokens per second)
llama_print_timings:       total time =    5956.52 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       3.85 ms /    40 runs   (    0.10 ms per token, 10403.12 tokens per second)
llama_print_timings: prompt eval time =     188.10 ms /    13 tokens (   14.47 ms per token,    69.11 tokens per second)
llama_print_timings:        eval time =    2079.16 ms /    39 runs   (   53.31 ms per token,    18.76 tokens per second)
llama_print_timings:       total time =    2318.03 ms
Llama.generate: prefix-

431 -- 1
432 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       4.56 ms /    46 runs   (    0.10 ms per token, 10092.15 tokens per second)
llama_print_timings: prompt eval time =     194.98 ms /    18 tokens (   10.83 ms per token,    92.32 tokens per second)
llama_print_timings:        eval time =    2399.63 ms /    45 runs   (   53.33 ms per token,    18.75 tokens per second)
llama_print_timings:       total time =    2652.29 ms
Llama.generate: prefix-match hit


433 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.10 ms /   123 runs   (    0.10 ms per token, 10161.93 tokens per second)
llama_print_timings: prompt eval time =     202.39 ms /    24 tokens (    8.43 ms per token,   118.58 tokens per second)
llama_print_timings:        eval time =    6543.48 ms /   122 runs   (   53.64 ms per token,    18.64 tokens per second)
llama_print_timings:       total time =    6904.28 ms
Llama.generate: prefix-match hit


434 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       3.82 ms /    40 runs   (    0.10 ms per token, 10468.46 tokens per second)
llama_print_timings: prompt eval time =     200.85 ms /    22 tokens (    9.13 ms per token,   109.53 tokens per second)
llama_print_timings:        eval time =    2080.84 ms /    39 runs   (   53.35 ms per token,    18.74 tokens per second)
llama_print_timings:       total time =    2333.06 ms
Llama.generate: prefix-match hit


435 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.89 ms /   131 runs   (    0.10 ms per token, 10165.28 tokens per second)
llama_print_timings: prompt eval time =     205.52 ms /    26 tokens (    7.90 ms per token,   126.51 tokens per second)
llama_print_timings:        eval time =    6984.85 ms /   130 runs   (   53.73 ms per token,    18.61 tokens per second)
llama_print_timings:       total time =    7359.24 ms
Llama.generate: prefix-match hit


436 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      11.27 ms /   112 runs   (    0.10 ms per token,  9935.24 tokens per second)
llama_print_timings: prompt eval time =     202.40 ms /    23 tokens (    8.80 ms per token,   113.64 tokens per second)
llama_print_timings:        eval time =    5956.31 ms /   111 runs   (   53.66 ms per token,    18.64 tokens per second)
llama_print_timings:       total time =    6302.94 ms
Llama.generate: prefix-match hit


437 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       3.80 ms /    39 runs   (    0.10 ms per token, 10260.46 tokens per second)
llama_print_timings: prompt eval time =     202.75 ms /    24 tokens (    8.45 ms per token,   118.37 tokens per second)
llama_print_timings:        eval time =    2031.04 ms /    38 runs   (   53.45 ms per token,    18.71 tokens per second)
llama_print_timings:       total time =    2284.10 ms
Llama.generate: prefix-match hit


438 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       3.95 ms /    40 runs   (    0.10 ms per token, 10121.46 tokens per second)
llama_print_timings: prompt eval time =     196.35 ms /    19 tokens (   10.33 ms per token,    96.77 tokens per second)
llama_print_timings:        eval time =    2082.29 ms /    39 runs   (   53.39 ms per token,    18.73 tokens per second)
llama_print_timings:       total time =    2330.25 ms
Llama.generate: prefix-match hit


439 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      11.20 ms /   125 runs   (    0.09 ms per token, 11159.72 tokens per second)
llama_print_timings: prompt eval time =     206.70 ms /    26 tokens (    7.95 ms per token,   125.79 tokens per second)
llama_print_timings:        eval time =    6660.41 ms /   124 runs   (   53.71 ms per token,    18.62 tokens per second)
llama_print_timings:       total time =    7027.89 ms
Llama.generate: prefix-match hit


440 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      11.64 ms /   133 runs   (    0.09 ms per token, 11423.17 tokens per second)
llama_print_timings: prompt eval time =     366.48 ms /    48 tokens (    7.63 ms per token,   130.98 tokens per second)
llama_print_timings:        eval time =    7104.00 ms /   132 runs   (   53.82 ms per token,    18.58 tokens per second)
llama_print_timings:       total time =    7639.48 ms
Llama.generate: prefix-match hit


441 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       3.87 ms /    40 runs   (    0.10 ms per token, 10349.29 tokens per second)
llama_print_timings: prompt eval time =     184.73 ms /    10 tokens (   18.47 ms per token,    54.13 tokens per second)
llama_print_timings:        eval time =    2079.65 ms /    39 runs   (   53.32 ms per token,    18.75 tokens per second)
llama_print_timings:       total time =    2316.05 ms
Llama.generate: prefix-match hit


442 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =      12.36 ms /   127 runs   (    0.10 ms per token, 10273.42 tokens per second)
llama_print_timings: prompt eval time =     198.77 ms /    21 tokens (    9.47 ms per token,   105.65 tokens per second)
llama_print_timings:        eval time =    6760.77 ms /   126 runs   (   53.66 ms per token,    18.64 tokens per second)
llama_print_timings:       total time =    7122.79 ms
Llama.generate: prefix-match hit


443 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       3.87 ms /    39 runs   (    0.10 ms per token, 10082.73 tokens per second)
llama_print_timings: prompt eval time =     207.12 ms /    28 tokens (    7.40 ms per token,   135.19 tokens per second)
llama_print_timings:        eval time =    2031.75 ms /    38 runs   (   53.47 ms per token,    18.70 tokens per second)
llama_print_timings:       total time =    2288.93 ms
Llama.generate: prefix-match hit


444 -- 1



llama_print_timings:        load time =    4274.02 ms
llama_print_timings:      sample time =       3.85 ms /    40 runs   (    0.10 ms per token, 10395.01 tokens per second)
llama_print_timings: prompt eval time =     190.95 ms /    16 tokens (   11.93 ms per token,    83.79 tokens per second)
llama_print_timings:        eval time =    2083.72 ms /    39 runs   (   53.43 ms per token,    18.72 tokens per second)
llama_print_timings:       total time =    2324.77 ms


In [61]:
results_df = pd.DataFrame(results, columns=['id', 'text', 'output', 'ambiguity', 'distinctiveness',])
results_df.to_json('outputs-task1-hetero-openocra7b.json', orient="records")
len(results_df)

445

In [62]:
results_df['output'] = results_df['output'].replace({True: 1, False: 0})

In [63]:
y_true = [int(x) for x in list(test['label'])]
y_pred = [int(x) for x in list(results_df['output'])]

In [64]:
print(classification_report(y_true, y_pred, digits=4))

              precision    recall  f1-score   support

           0     0.4358    0.7480    0.5507       127
           1     0.8590    0.6132    0.7156       318

    accuracy                         0.6517       445
   macro avg     0.6474    0.6806    0.6332       445
weighted avg     0.7382    0.6517    0.6685       445



### Task 2 and 3: Pun Location & Interpretation

In [82]:
def clean_sentence(text):
  res = text.replace(" ' ' ", '" ')
  res = res.replace(" ' ", "'")
  res = res.replace(" , ", ", ")
  res = res.replace(" . ", ". ")
  res = res.replace(" .", ". ")
  res = res.replace(" ! ", "! ")
  res = res.replace(" !", "! ")
  res = res.replace(" ? ", "? ")
  res = res.replace(" ?", "? ")
  res = res.replace(" - ", "-")
  return res.strip()

In [129]:
df = pd.read_json('data/llm_task2_hetero.json', orient="records")
df['sentence'] = df['sentence'].apply(clean_sentence)
df = df.rename(columns={'sentence': 'text'})
df.head()

Unnamed: 0,text,pun_word
0,In some countries there is a lot of Manuel labor.,Manuel
1,Someone who attempts long distance fraud is kn...,phony
2,"In the delivery room, the unborn infant expect...",berth
3,Beautifully manicured lawns are highly sod after.,sod
4,"I saw something similar to moss the other day,...",lichen


In [130]:
test = df
len(test)

318

In [131]:
prompt = f''' ### INSTRUCTION:
You are to identify the pun word in a given sentence based on the following criteria:
1. Ambiguity -  there exists a word in the sentence that has two similarly likely interpretations.
2. Distinctiveness - the two interpretations are very different from each other i.e. how distinct are the words semantically related to the two interpretations from each other. There needs to be at least one different word in the set of words supporting each interpretation.\n
For a word to be a pun word, it should satisfy BOTH criteria - Ambiguity and Distinctiveness.\n
It is guaranteed that the sentence has a pun word and hence the selected pun word must be from the sentence in the same form as it appears.\n
Also, list all the words that realize each interpretation of the pun word. If there are multiple words that support an interpretation, separate them with a semicolon.\n
Here are two examples of the cases you will encounter:
1. An example with heterographic puns (words that sound similar) - 
The magician got so mad that he pulled his hare out.
{{
  "pun_word": "hare",
  "target_word": "hair",
  "source_sense": "magician",
  "target_sense": "angry;pulled"
}}

In the above sentence, the pun word 'hare' supports two plausible interpretations of 'hare' meaning a rabbit and 'hair' meaning human hair. In the given sentence, the words 'magician' relates to 'hare' while 'angry' and 'pulled' refer to the second interpretation of 'hair'.

2. An example with homographic puns (same words with different senses) -
I used to be a banker but I lost interest.
{{
  "pun_word": "interest",
  "target_word": "interest",
  "source_sense": "banker",
  "target_sense": "used;to;be"
}}

In the above sentence, the pun word 'interest' supports two distinct interpretations of interest meaning 'financial interest' and interest meaning 'desire to do something'. The words 'banker' realize the first sense of financial interest while the words 'used', 'to', 'be' refer to the second sense.

Identify the pun word in the input sentence is a pun and list the target word and the supporting words for each sense the result based on ambiguity and distinctiveness in valid JSON format. Do not explain. Generate a response in the form of a valid JSON object with four keys: pun_word, target_word, source_sense and target_sense.\n
### INPUT:\n
'''

In [132]:
results = []
i = 0
for _, row in test.iterrows():
  prepared_prompt = prompt + row['text'] + "\n### OUTPUT:\n"
  json_out = ''
  attempt = 0
  while True:
    attempt += 1
    json_out = generate(prepared_prompt, attempt)
    parsedJson = parseResult(json_out, row)
    print(f"{i} -- {attempt}")
    if parsedJson is not None:
      parsedJson['text'] = row['text']
      results.append(parsedJson)
      i+=1
      break

Llama.generate: prefix-match hit


0 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       7.92 ms /    52 runs   (    0.15 ms per token,  6561.51 tokens per second)
llama_print_timings: prompt eval time =    6128.85 ms /    17 tokens (  360.52 ms per token,     2.77 tokens per second)
llama_print_timings:        eval time =    2709.51 ms /    51 runs   (   53.13 ms per token,    18.82 tokens per second)
llama_print_timings:       total time =    8960.64 ms
Llama.generate: prefix-match hit


1 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.39 ms /    56 runs   (    0.10 ms per token, 10391.54 tokens per second)
llama_print_timings: prompt eval time =     194.59 ms /    20 tokens (    9.73 ms per token,   102.78 tokens per second)
llama_print_timings:        eval time =    2906.99 ms /    55 runs   (   52.85 ms per token,    18.92 tokens per second)
llama_print_timings:       total time =    3177.65 ms
Llama.generate: prefix-match hit


2 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       7.75 ms /    57 runs   (    0.14 ms per token,  7356.74 tokens per second)
llama_print_timings: prompt eval time =     200.27 ms /    24 tokens (    8.34 ms per token,   119.84 tokens per second)
llama_print_timings:        eval time =    2973.79 ms /    56 runs   (   53.10 ms per token,    18.83 tokens per second)
llama_print_timings:       total time =    3276.50 ms
Llama.generate: prefix-match hit


3 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.36 ms /    57 runs   (    0.09 ms per token, 10632.34 tokens per second)
llama_print_timings: prompt eval time =     195.82 ms /    20 tokens (    9.79 ms per token,   102.14 tokens per second)
llama_print_timings:        eval time =    2958.69 ms /    56 runs   (   52.83 ms per token,    18.93 tokens per second)
llama_print_timings:       total time =    3231.67 ms
Llama.generate: prefix-match hit


4 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       7.81 ms /    57 runs   (    0.14 ms per token,  7297.40 tokens per second)
llama_print_timings: prompt eval time =     210.32 ms /    30 tokens (    7.01 ms per token,   142.64 tokens per second)
llama_print_timings:        eval time =    2982.88 ms /    56 runs   (   53.27 ms per token,    18.77 tokens per second)
llama_print_timings:       total time =    3296.10 ms
Llama.generate: prefix-match hit


5 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       6.76 ms /    54 runs   (    0.13 ms per token,  7991.71 tokens per second)
llama_print_timings: prompt eval time =     199.72 ms /    22 tokens (    9.08 ms per token,   110.15 tokens per second)
llama_print_timings:        eval time =    2820.93 ms /    53 runs   (   53.23 ms per token,    18.79 tokens per second)
llama_print_timings:       total time =    3110.69 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       7.61 ms /    57 runs   (    0.13 ms per token,  7488.18 tokens per second)
llama_print_timings: prompt eval time =     190.63 ms /    16 tokens (   11.91 ms per token,    83.93 tokens per second)
llama_print_timings:        eval time =    2984.69 ms /    56 runs   (   53.30 ms per token,    18.76 tokens per second)
llama_print_timings:       total time =    3281.56 ms
Llama.generate: prefix-

6 -- 1
7 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       7.25 ms /    58 runs   (    0.12 ms per token,  8001.10 tokens per second)
llama_print_timings: prompt eval time =     204.26 ms /    25 tokens (    8.17 ms per token,   122.39 tokens per second)
llama_print_timings:        eval time =    3038.69 ms /    57 runs   (   53.31 ms per token,    18.76 tokens per second)
llama_print_timings:       total time =    3338.49 ms
Llama.generate: prefix-match hit


8 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       7.50 ms /    56 runs   (    0.13 ms per token,  7465.67 tokens per second)
llama_print_timings: prompt eval time =     209.81 ms /    29 tokens (    7.23 ms per token,   138.22 tokens per second)
llama_print_timings:        eval time =    2936.02 ms /    55 runs   (   53.38 ms per token,    18.73 tokens per second)
llama_print_timings:       total time =    3246.28 ms
Llama.generate: prefix-match hit


9 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       6.39 ms /    56 runs   (    0.11 ms per token,  8769.18 tokens per second)
llama_print_timings: prompt eval time =     346.86 ms /    34 tokens (   10.20 ms per token,    98.02 tokens per second)
llama_print_timings:        eval time =    2926.73 ms /    55 runs   (   53.21 ms per token,    18.79 tokens per second)
llama_print_timings:       total time =    3356.57 ms
Llama.generate: prefix-match hit


10 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       6.09 ms /    53 runs   (    0.11 ms per token,  8697.08 tokens per second)
llama_print_timings: prompt eval time =     200.82 ms /    24 tokens (    8.37 ms per token,   119.51 tokens per second)
llama_print_timings:        eval time =    2773.39 ms /    52 runs   (   53.33 ms per token,    18.75 tokens per second)
llama_print_timings:       total time =    3057.66 ms
Llama.generate: prefix-match hit


11 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       6.83 ms /    55 runs   (    0.12 ms per token,  8050.35 tokens per second)
llama_print_timings: prompt eval time =     216.72 ms /    31 tokens (    6.99 ms per token,   143.04 tokens per second)
llama_print_timings:        eval time =    2890.77 ms /    54 runs   (   53.53 ms per token,    18.68 tokens per second)
llama_print_timings:       total time =    3197.57 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       6.44 ms /    56 runs   (    0.11 ms per token,  8698.35 tokens per second)
llama_print_timings: prompt eval time =     199.03 ms /    22 tokens (    9.05 ms per token,   110.54 tokens per second)
llama_print_timings:        eval time =    2930.64 ms /    55 runs   (   53.28 ms per token,    18.77 tokens per second)
llama_print_timings:       total time =    3219.37 ms
Llama.generate: prefix-

12 -- 1
13 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.42 ms /    53 runs   (    0.10 ms per token,  9774.99 tokens per second)
llama_print_timings: prompt eval time =     194.82 ms /    19 tokens (   10.25 ms per token,    97.53 tokens per second)
llama_print_timings:        eval time =    2781.39 ms /    52 runs   (   53.49 ms per token,    18.70 tokens per second)
llama_print_timings:       total time =    3052.89 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       6.68 ms /    55 runs   (    0.12 ms per token,  8232.30 tokens per second)
llama_print_timings: prompt eval time =     201.77 ms /    24 tokens (    8.41 ms per token,   118.94 tokens per second)
llama_print_timings:        eval time =    2865.48 ms /    54 runs   (   53.06 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    3156.28 ms
Llama.generate: prefix-

14 -- 1
15 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       6.75 ms /    55 runs   (    0.12 ms per token,  8142.12 tokens per second)
llama_print_timings: prompt eval time =     189.19 ms /    28 tokens (    6.76 ms per token,   148.00 tokens per second)
llama_print_timings:        eval time =    2882.84 ms /    54 runs   (   53.39 ms per token,    18.73 tokens per second)
llama_print_timings:       total time =    3165.04 ms
Llama.generate: prefix-match hit


16 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.01 ms /    53 runs   (    0.09 ms per token, 10574.62 tokens per second)
llama_print_timings: prompt eval time =     182.60 ms /    10 tokens (   18.26 ms per token,    54.76 tokens per second)
llama_print_timings:        eval time =    2750.45 ms /    52 runs   (   52.89 ms per token,    18.91 tokens per second)
llama_print_timings:       total time =    3004.93 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       6.09 ms /    54 runs   (    0.11 ms per token,  8869.91 tokens per second)
llama_print_timings: prompt eval time =     211.22 ms /    30 tokens (    7.04 ms per token,   142.03 tokens per second)
llama_print_timings:        eval time =    2811.74 ms /    53 runs   (   53.05 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    3104.33 ms
Llama.generate: prefix-

17 -- 1
18 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       7.84 ms /    64 runs   (    0.12 ms per token,  8158.06 tokens per second)
llama_print_timings: prompt eval time =     196.66 ms /    20 tokens (    9.83 ms per token,   101.70 tokens per second)
llama_print_timings:        eval time =    3398.61 ms /    63 runs   (   53.95 ms per token,    18.54 tokens per second)
llama_print_timings:       total time =    3709.75 ms
Llama.generate: prefix-match hit


19 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       6.83 ms /    52 runs   (    0.13 ms per token,  7613.47 tokens per second)
llama_print_timings: prompt eval time =     192.18 ms /    17 tokens (   11.30 ms per token,    88.46 tokens per second)
llama_print_timings:        eval time =    2725.75 ms /    51 runs   (   53.45 ms per token,    18.71 tokens per second)
llama_print_timings:       total time =    3015.79 ms
Llama.generate: prefix-match hit


20 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       6.15 ms /    52 runs   (    0.12 ms per token,  8456.66 tokens per second)
llama_print_timings: prompt eval time =     201.12 ms /    21 tokens (    9.58 ms per token,   104.42 tokens per second)
llama_print_timings:        eval time =    2755.42 ms /    51 runs   (   54.03 ms per token,    18.51 tokens per second)
llama_print_timings:       total time =    3042.55 ms
Llama.generate: prefix-match hit


21 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       6.88 ms /    53 runs   (    0.13 ms per token,  7706.85 tokens per second)
llama_print_timings: prompt eval time =     195.27 ms /    19 tokens (   10.28 ms per token,    97.30 tokens per second)
llama_print_timings:        eval time =    2782.39 ms /    52 runs   (   53.51 ms per token,    18.69 tokens per second)
llama_print_timings:       total time =    3072.40 ms
Llama.generate: prefix-match hit


22 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       6.24 ms /    54 runs   (    0.12 ms per token,  8655.23 tokens per second)
llama_print_timings: prompt eval time =     196.23 ms /    15 tokens (   13.08 ms per token,    76.44 tokens per second)
llama_print_timings:        eval time =    2815.35 ms /    53 runs   (   53.12 ms per token,    18.83 tokens per second)
llama_print_timings:       total time =    3099.63 ms
Llama.generate: prefix-match hit


23 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       6.75 ms /    53 runs   (    0.13 ms per token,  7851.85 tokens per second)
llama_print_timings: prompt eval time =     212.61 ms /    31 tokens (    6.86 ms per token,   145.80 tokens per second)
llama_print_timings:        eval time =    2786.56 ms /    52 runs   (   53.59 ms per token,    18.66 tokens per second)
llama_print_timings:       total time =    3087.51 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       7.30 ms /    59 runs   (    0.12 ms per token,  8083.30 tokens per second)
llama_print_timings: prompt eval time =     189.73 ms /    28 tokens (    6.78 ms per token,   147.58 tokens per second)
llama_print_timings:        eval time =    3083.88 ms /    58 runs   (   53.17 ms per token,    18.81 tokens per second)
llama_print_timings:       total time =    3374.10 ms
Llama.generate: prefix-

24 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       6.99 ms /    56 runs   (    0.12 ms per token,  8010.30 tokens per second)
llama_print_timings: prompt eval time =     191.19 ms /    17 tokens (   11.25 ms per token,    88.92 tokens per second)
llama_print_timings:        eval time =    2927.39 ms /    55 runs   (   53.23 ms per token,    18.79 tokens per second)
llama_print_timings:       total time =    3223.59 ms
Llama.generate: prefix-match hit


25 -- 1
26 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       6.65 ms /    56 runs   (    0.12 ms per token,  8417.26 tokens per second)
llama_print_timings: prompt eval time =     189.72 ms /    28 tokens (    6.78 ms per token,   147.58 tokens per second)
llama_print_timings:        eval time =    2912.76 ms /    55 runs   (   52.96 ms per token,    18.88 tokens per second)
llama_print_timings:       total time =    3194.65 ms
Llama.generate: prefix-match hit


27 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       6.52 ms /    52 runs   (    0.13 ms per token,  7979.13 tokens per second)
llama_print_timings: prompt eval time =     197.21 ms /    21 tokens (    9.39 ms per token,   106.49 tokens per second)
llama_print_timings:        eval time =    2708.84 ms /    51 runs   (   53.11 ms per token,    18.83 tokens per second)
llama_print_timings:       total time =    2996.14 ms
Llama.generate: prefix-match hit


28 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =      10.17 ms /    62 runs   (    0.16 ms per token,  6098.76 tokens per second)
llama_print_timings: prompt eval time =     195.31 ms /    19 tokens (   10.28 ms per token,    97.28 tokens per second)
llama_print_timings:        eval time =    3261.90 ms /    61 runs   (   53.47 ms per token,    18.70 tokens per second)
llama_print_timings:       total time =    3592.56 ms
Llama.generate: prefix-match hit


29 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.17 ms /    51 runs   (    0.10 ms per token,  9866.51 tokens per second)
llama_print_timings: prompt eval time =     195.12 ms /    20 tokens (    9.76 ms per token,   102.50 tokens per second)
llama_print_timings:        eval time =    2655.77 ms /    50 runs   (   53.12 ms per token,    18.83 tokens per second)
llama_print_timings:       total time =    2926.21 ms
Llama.generate: prefix-match hit


30 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.14 ms /    53 runs   (    0.10 ms per token, 10311.28 tokens per second)
llama_print_timings: prompt eval time =     200.93 ms /    24 tokens (    8.37 ms per token,   119.44 tokens per second)
llama_print_timings:        eval time =    2753.20 ms /    52 runs   (   52.95 ms per token,    18.89 tokens per second)
llama_print_timings:       total time =    3025.39 ms
Llama.generate: prefix-match hit


31 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.30 ms /    55 runs   (    0.10 ms per token, 10371.49 tokens per second)
llama_print_timings: prompt eval time =     197.26 ms /    21 tokens (    9.39 ms per token,   106.46 tokens per second)
llama_print_timings:        eval time =    2865.49 ms /    54 runs   (   53.06 ms per token,    18.84 tokens per second)
llama_print_timings:       total time =    3137.27 ms
Llama.generate: prefix-match hit


32 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.51 ms /    57 runs   (    0.10 ms per token, 10348.58 tokens per second)
llama_print_timings: prompt eval time =     200.54 ms /    23 tokens (    8.72 ms per token,   114.69 tokens per second)
llama_print_timings:        eval time =    2971.33 ms /    56 runs   (   53.06 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    3249.38 ms
Llama.generate: prefix-match hit


33 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.76 ms /    59 runs   (    0.10 ms per token, 10244.83 tokens per second)
llama_print_timings: prompt eval time =     205.08 ms /    26 tokens (    7.89 ms per token,   126.78 tokens per second)
llama_print_timings:        eval time =    3071.92 ms /    58 runs   (   52.96 ms per token,    18.88 tokens per second)
llama_print_timings:       total time =    3356.28 ms
Llama.generate: prefix-match hit


34 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       6.00 ms /    58 runs   (    0.10 ms per token,  9673.12 tokens per second)
llama_print_timings: prompt eval time =     199.59 ms /    22 tokens (    9.07 ms per token,   110.23 tokens per second)
llama_print_timings:        eval time =    3018.96 ms /    57 runs   (   52.96 ms per token,    18.88 tokens per second)
llama_print_timings:       total time =    3300.84 ms
Llama.generate: prefix-match hit


35 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       6.23 ms /    63 runs   (    0.10 ms per token, 10107.49 tokens per second)
llama_print_timings: prompt eval time =     205.49 ms /    32 tokens (    6.42 ms per token,   155.73 tokens per second)
llama_print_timings:        eval time =    3292.95 ms /    62 runs   (   53.11 ms per token,    18.83 tokens per second)
llama_print_timings:       total time =    3583.47 ms
Llama.generate: prefix-match hit


36 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.72 ms /    60 runs   (    0.10 ms per token, 10482.18 tokens per second)
llama_print_timings: prompt eval time =     197.47 ms /    21 tokens (    9.40 ms per token,   106.34 tokens per second)
llama_print_timings:        eval time =    3123.93 ms /    59 runs   (   52.95 ms per token,    18.89 tokens per second)
llama_print_timings:       total time =    3401.31 ms
Llama.generate: prefix-match hit


37 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.50 ms /    56 runs   (    0.10 ms per token, 10178.12 tokens per second)
llama_print_timings: prompt eval time =     194.29 ms /    19 tokens (   10.23 ms per token,    97.79 tokens per second)
llama_print_timings:        eval time =    2914.57 ms /    55 runs   (   52.99 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    3185.33 ms
Llama.generate: prefix-match hit


38 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.57 ms /    57 runs   (    0.10 ms per token, 10237.07 tokens per second)
llama_print_timings: prompt eval time =     199.48 ms /    22 tokens (    9.07 ms per token,   110.29 tokens per second)
llama_print_timings:        eval time =    2963.00 ms /    56 runs   (   52.91 ms per token,    18.90 tokens per second)
llama_print_timings:       total time =    3239.04 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =      34.16 ms /    61 runs   (    0.56 ms per token,  1785.82 tokens per second)
llama_print_timings: prompt eval time =     189.32 ms /    28 tokens (    6.76 ms per token,   147.90 tokens per second)
llama_print_timings:        eval time =    3293.84 ms /    60 runs   (   54.90 ms per token,    18.22 tokens per second)
llama_print_timings:       total time =    3835.69 ms
Llama.generate: prefix-

39 -- 1
40 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =      15.71 ms /    56 runs   (    0.28 ms per token,  3564.15 tokens per second)
llama_print_timings: prompt eval time =     200.52 ms /    22 tokens (    9.11 ms per token,   109.72 tokens per second)
llama_print_timings:        eval time =    2966.62 ms /    55 runs   (   53.94 ms per token,    18.54 tokens per second)
llama_print_timings:       total time =    3344.82 ms
Llama.generate: prefix-match hit


41 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =      15.05 ms /    56 runs   (    0.27 ms per token,  3721.67 tokens per second)
llama_print_timings: prompt eval time =     342.54 ms /    33 tokens (   10.38 ms per token,    96.34 tokens per second)
llama_print_timings:        eval time =    3001.40 ms /    55 runs   (   54.57 ms per token,    18.32 tokens per second)
llama_print_timings:       total time =    3580.43 ms
Llama.generate: prefix-match hit


42 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       8.00 ms /    56 runs   (    0.14 ms per token,  6996.50 tokens per second)
llama_print_timings: prompt eval time =     192.95 ms /    18 tokens (   10.72 ms per token,    93.29 tokens per second)
llama_print_timings:        eval time =    2920.48 ms /    55 runs   (   53.10 ms per token,    18.83 tokens per second)
llama_print_timings:       total time =    3224.65 ms
Llama.generate: prefix-match hit


43 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       6.46 ms /    59 runs   (    0.11 ms per token,  9140.20 tokens per second)
llama_print_timings: prompt eval time =     197.57 ms /    21 tokens (    9.41 ms per token,   106.29 tokens per second)
llama_print_timings:        eval time =    3073.80 ms /    58 runs   (   53.00 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    3359.49 ms
Llama.generate: prefix-match hit


44 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       6.78 ms /    52 runs   (    0.13 ms per token,  7668.49 tokens per second)
llama_print_timings: prompt eval time =     191.21 ms /    17 tokens (   11.25 ms per token,    88.91 tokens per second)
llama_print_timings:        eval time =    2706.53 ms /    51 runs   (   53.07 ms per token,    18.84 tokens per second)
llama_print_timings:       total time =    2993.54 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.38 ms /    52 runs   (    0.10 ms per token,  9661.84 tokens per second)
llama_print_timings: prompt eval time =     194.88 ms /    20 tokens (    9.74 ms per token,   102.63 tokens per second)
llama_print_timings:        eval time =    2699.03 ms /    51 runs   (   52.92 ms per token,    18.90 tokens per second)
llama_print_timings:       total time =    2969.57 ms
Llama.generate: prefix-

45 -- 1
46 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.57 ms /    57 runs   (    0.10 ms per token, 10227.88 tokens per second)
llama_print_timings: prompt eval time =     197.45 ms /    21 tokens (    9.40 ms per token,   106.36 tokens per second)
llama_print_timings:        eval time =    2966.45 ms /    56 runs   (   52.97 ms per token,    18.88 tokens per second)
llama_print_timings:       total time =    3242.97 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       7.11 ms /    55 runs   (    0.13 ms per token,  7734.50 tokens per second)
llama_print_timings: prompt eval time =     200.78 ms /    24 tokens (    8.37 ms per token,   119.53 tokens per second)
llama_print_timings:        eval time =    2886.71 ms /    54 runs   (   53.46 ms per token,    18.71 tokens per second)
llama_print_timings:       total time =    3187.71 ms
Llama.generate: prefix-

47 -- 1
48 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       7.75 ms /    54 runs   (    0.14 ms per token,  6966.84 tokens per second)
llama_print_timings: prompt eval time =     205.92 ms /    27 tokens (    7.63 ms per token,   131.12 tokens per second)
llama_print_timings:        eval time =    2819.70 ms /    53 runs   (   53.20 ms per token,    18.80 tokens per second)
llama_print_timings:       total time =    3128.38 ms
Llama.generate: prefix-match hit


49 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       7.91 ms /    56 runs   (    0.14 ms per token,  7082.33 tokens per second)
llama_print_timings: prompt eval time =     202.07 ms /    25 tokens (    8.08 ms per token,   123.72 tokens per second)
llama_print_timings:        eval time =    2919.95 ms /    55 runs   (   53.09 ms per token,    18.84 tokens per second)
llama_print_timings:       total time =    3229.38 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       8.31 ms /    62 runs   (    0.13 ms per token,  7464.48 tokens per second)
llama_print_timings: prompt eval time =     194.29 ms /    20 tokens (    9.71 ms per token,   102.94 tokens per second)
llama_print_timings:        eval time =    3255.95 ms /    61 runs   (   53.38 ms per token,    18.73 tokens per second)
llama_print_timings:       total time =    3571.52 ms
Llama.generate: prefix-

50 -- 1
51 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       4.97 ms /    52 runs   (    0.10 ms per token, 10466.99 tokens per second)
llama_print_timings: prompt eval time =     188.60 ms /    16 tokens (   11.79 ms per token,    84.84 tokens per second)
llama_print_timings:        eval time =    2737.07 ms /    51 runs   (   53.67 ms per token,    18.63 tokens per second)
llama_print_timings:       total time =    2997.93 ms
Llama.generate: prefix-match hit


52 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =      20.62 ms /    57 runs   (    0.36 ms per token,  2763.77 tokens per second)
llama_print_timings: prompt eval time =     368.40 ms /    49 tokens (    7.52 ms per token,   133.01 tokens per second)
llama_print_timings:        eval time =    3060.21 ms /    56 runs   (   54.65 ms per token,    18.30 tokens per second)
llama_print_timings:       total time =    3716.86 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =      27.10 ms /    50 runs   (    0.54 ms per token,  1844.75 tokens per second)
llama_print_timings: prompt eval time =     199.33 ms /    22 tokens (    9.06 ms per token,   110.37 tokens per second)
llama_print_timings:        eval time =    2709.64 ms /    49 runs   (   55.30 ms per token,    18.08 tokens per second)
llama_print_timings:       total time =    3177.46 ms
Llama.generate: prefix-

53 -- 1
54 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =      21.07 ms /    52 runs   (    0.41 ms per token,  2467.50 tokens per second)
llama_print_timings: prompt eval time =     199.14 ms /    20 tokens (    9.96 ms per token,   100.43 tokens per second)
llama_print_timings:        eval time =    2819.26 ms /    51 runs   (   55.28 ms per token,    18.09 tokens per second)
llama_print_timings:       total time =    3270.37 ms
Llama.generate: prefix-match hit


55 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.40 ms /    53 runs   (    0.10 ms per token,  9811.18 tokens per second)
llama_print_timings: prompt eval time =     200.55 ms /    24 tokens (    8.36 ms per token,   119.67 tokens per second)
llama_print_timings:        eval time =    2756.22 ms /    52 runs   (   53.00 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    3031.07 ms
Llama.generate: prefix-match hit


56 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       6.97 ms /    54 runs   (    0.13 ms per token,  7745.27 tokens per second)
llama_print_timings: prompt eval time =     193.99 ms /    19 tokens (   10.21 ms per token,    97.94 tokens per second)
llama_print_timings:        eval time =    2815.30 ms /    53 runs   (   53.12 ms per token,    18.83 tokens per second)
llama_print_timings:       total time =    3097.72 ms
Llama.generate: prefix-match hit


57 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =      14.57 ms /    51 runs   (    0.29 ms per token,  3500.58 tokens per second)
llama_print_timings: prompt eval time =     197.47 ms /    21 tokens (    9.40 ms per token,   106.34 tokens per second)
llama_print_timings:        eval time =    2711.97 ms /    50 runs   (   54.24 ms per token,    18.44 tokens per second)
llama_print_timings:       total time =    3097.78 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =      25.47 ms /    52 runs   (    0.49 ms per token,  2041.46 tokens per second)
llama_print_timings: prompt eval time =     201.81 ms /    23 tokens (    8.77 ms per token,   113.97 tokens per second)
llama_print_timings:        eval time =    2802.79 ms /    51 runs   (   54.96 ms per token,    18.20 tokens per second)
llama_print_timings:       total time =    3297.62 ms
Llama.generate: prefix-

58 -- 1
59 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.82 ms /    57 runs   (    0.10 ms per token,  9797.18 tokens per second)
llama_print_timings: prompt eval time =     194.71 ms /    19 tokens (   10.25 ms per token,    97.58 tokens per second)
llama_print_timings:        eval time =    2971.93 ms /    56 runs   (   53.07 ms per token,    18.84 tokens per second)
llama_print_timings:       total time =    3248.27 ms
Llama.generate: prefix-match hit


60 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =      10.18 ms /    56 runs   (    0.18 ms per token,  5500.44 tokens per second)
llama_print_timings: prompt eval time =     193.12 ms /    18 tokens (   10.73 ms per token,    93.21 tokens per second)
llama_print_timings:        eval time =    2934.78 ms /    55 runs   (   53.36 ms per token,    18.74 tokens per second)
llama_print_timings:       total time =    3238.15 ms
Llama.generate: prefix-match hit


61 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.44 ms /    57 runs   (    0.10 ms per token, 10485.65 tokens per second)
llama_print_timings: prompt eval time =     191.08 ms /    17 tokens (   11.24 ms per token,    88.97 tokens per second)
llama_print_timings:        eval time =    2968.11 ms /    56 runs   (   53.00 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    3235.97 ms
Llama.generate: prefix-match hit


62 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =      20.39 ms /    59 runs   (    0.35 ms per token,  2893.01 tokens per second)
llama_print_timings: prompt eval time =     194.14 ms /    19 tokens (   10.22 ms per token,    97.87 tokens per second)
llama_print_timings:        eval time =    3162.60 ms /    58 runs   (   54.53 ms per token,    18.34 tokens per second)
llama_print_timings:       total time =    3610.25 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =      32.95 ms /    56 runs   (    0.59 ms per token,  1699.44 tokens per second)
llama_print_timings: prompt eval time =     203.33 ms /    25 tokens (    8.13 ms per token,   122.96 tokens per second)
llama_print_timings:        eval time =    3004.39 ms /    55 runs   (   54.63 ms per token,    18.31 tokens per second)
llama_print_timings:       total time =    3555.41 ms
Llama.generate: prefix-

63 -- 1
64 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       8.60 ms /    56 runs   (    0.15 ms per token,  6510.11 tokens per second)
llama_print_timings: prompt eval time =     190.00 ms /    16 tokens (   11.88 ms per token,    84.21 tokens per second)
llama_print_timings:        eval time =    2929.22 ms /    55 runs   (   53.26 ms per token,    18.78 tokens per second)
llama_print_timings:       total time =    3242.85 ms
Llama.generate: prefix-match hit


65 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =      15.78 ms /    60 runs   (    0.26 ms per token,  3801.32 tokens per second)
llama_print_timings: prompt eval time =     198.72 ms /    22 tokens (    9.03 ms per token,   110.71 tokens per second)
llama_print_timings:        eval time =    3194.65 ms /    59 runs   (   54.15 ms per token,    18.47 tokens per second)
llama_print_timings:       total time =    3595.00 ms
Llama.generate: prefix-match hit


66 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =      22.67 ms /    55 runs   (    0.41 ms per token,  2426.54 tokens per second)
llama_print_timings: prompt eval time =     189.24 ms /    16 tokens (   11.83 ms per token,    84.55 tokens per second)
llama_print_timings:        eval time =    2964.73 ms /    54 runs   (   54.90 ms per token,    18.21 tokens per second)
llama_print_timings:       total time =    3459.56 ms
Llama.generate: prefix-match hit


67 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       6.21 ms /    54 runs   (    0.12 ms per token,  8690.05 tokens per second)
llama_print_timings: prompt eval time =     197.78 ms /    21 tokens (    9.42 ms per token,   106.18 tokens per second)
llama_print_timings:        eval time =    2820.45 ms /    53 runs   (   53.22 ms per token,    18.79 tokens per second)
llama_print_timings:       total time =    3106.10 ms
Llama.generate: prefix-match hit


68 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.62 ms /    57 runs   (    0.10 ms per token, 10140.54 tokens per second)
llama_print_timings: prompt eval time =     209.61 ms /    29 tokens (    7.23 ms per token,   138.35 tokens per second)
llama_print_timings:        eval time =    2969.03 ms /    56 runs   (   53.02 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    3255.57 ms
Llama.generate: prefix-match hit


69 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.20 ms /    53 runs   (    0.10 ms per token, 10192.31 tokens per second)
llama_print_timings: prompt eval time =     189.55 ms /    15 tokens (   12.64 ms per token,    79.13 tokens per second)
llama_print_timings:        eval time =    2754.22 ms /    52 runs   (   52.97 ms per token,    18.88 tokens per second)
llama_print_timings:       total time =    3018.06 ms
Llama.generate: prefix-match hit


70 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.23 ms /    52 runs   (    0.10 ms per token,  9942.64 tokens per second)
llama_print_timings: prompt eval time =     191.35 ms /    17 tokens (   11.26 ms per token,    88.84 tokens per second)
llama_print_timings:        eval time =    2698.93 ms /    51 runs   (   52.92 ms per token,    18.90 tokens per second)
llama_print_timings:       total time =    2962.27 ms
Llama.generate: prefix-match hit


71 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =      13.80 ms /    54 runs   (    0.26 ms per token,  3911.91 tokens per second)
llama_print_timings: prompt eval time =     194.68 ms /    19 tokens (   10.25 ms per token,    97.60 tokens per second)
llama_print_timings:        eval time =    2869.18 ms /    53 runs   (   54.14 ms per token,    18.47 tokens per second)
llama_print_timings:       total time =    3246.40 ms
Llama.generate: prefix-match hit


72 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =      11.08 ms /    54 runs   (    0.21 ms per token,  4873.65 tokens per second)
llama_print_timings: prompt eval time =     189.05 ms /    16 tokens (   11.82 ms per token,    84.63 tokens per second)
llama_print_timings:        eval time =    2840.26 ms /    53 runs   (   53.59 ms per token,    18.66 tokens per second)
llama_print_timings:       total time =    3176.25 ms
Llama.generate: prefix-match hit


73 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.55 ms /    60 runs   (    0.09 ms per token, 10806.92 tokens per second)
llama_print_timings: prompt eval time =     197.10 ms /    21 tokens (    9.39 ms per token,   106.54 tokens per second)
llama_print_timings:        eval time =    3128.38 ms /    59 runs   (   53.02 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    3405.27 ms
Llama.generate: prefix-match hit


74 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       9.05 ms /    52 runs   (    0.17 ms per token,  5743.32 tokens per second)
llama_print_timings: prompt eval time =     193.07 ms /    18 tokens (   10.73 ms per token,    93.23 tokens per second)
llama_print_timings:        eval time =    2721.32 ms /    51 runs   (   53.36 ms per token,    18.74 tokens per second)
llama_print_timings:       total time =    3029.38 ms
Llama.generate: prefix-match hit


75 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.72 ms /    57 runs   (    0.10 ms per token,  9963.29 tokens per second)
llama_print_timings: prompt eval time =     199.10 ms /    23 tokens (    8.66 ms per token,   115.52 tokens per second)
llama_print_timings:        eval time =    2968.24 ms /    56 runs   (   53.00 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    3247.91 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.35 ms /    58 runs   (    0.09 ms per token, 10843.15 tokens per second)
llama_print_timings: prompt eval time =     191.12 ms /    17 tokens (   11.24 ms per token,    88.95 tokens per second)
llama_print_timings:        eval time =    3021.54 ms /    57 runs   (   53.01 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    3289.97 ms
Llama.generate: prefix-

76 -- 1
77 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       4.99 ms /    53 runs   (    0.09 ms per token, 10627.63 tokens per second)
llama_print_timings: prompt eval time =     198.28 ms /    21 tokens (    9.44 ms per token,   105.91 tokens per second)
llama_print_timings:        eval time =    2755.12 ms /    52 runs   (   52.98 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    3024.98 ms
Llama.generate: prefix-match hit


78 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.39 ms /    53 runs   (    0.10 ms per token,  9827.55 tokens per second)
llama_print_timings: prompt eval time =     191.02 ms /    17 tokens (   11.24 ms per token,    88.99 tokens per second)
llama_print_timings:        eval time =    2751.93 ms /    52 runs   (   52.92 ms per token,    18.90 tokens per second)
llama_print_timings:       total time =    3018.77 ms
Llama.generate: prefix-match hit


79 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       6.63 ms /    58 runs   (    0.11 ms per token,  8746.80 tokens per second)
llama_print_timings: prompt eval time =     195.04 ms /    20 tokens (    9.75 ms per token,   102.54 tokens per second)
llama_print_timings:        eval time =    3048.21 ms /    57 runs   (   53.48 ms per token,    18.70 tokens per second)
llama_print_timings:       total time =    3341.65 ms
Llama.generate: prefix-match hit


80 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.59 ms /    58 runs   (    0.10 ms per token, 10370.11 tokens per second)
llama_print_timings: prompt eval time =     205.93 ms /    27 tokens (    7.63 ms per token,   131.11 tokens per second)
llama_print_timings:        eval time =    3018.74 ms /    57 runs   (   52.96 ms per token,    18.88 tokens per second)
llama_print_timings:       total time =    3303.32 ms
Llama.generate: prefix-match hit


81 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =      11.60 ms /    57 runs   (    0.20 ms per token,  4913.37 tokens per second)
llama_print_timings: prompt eval time =     209.00 ms /    29 tokens (    7.21 ms per token,   138.76 tokens per second)
llama_print_timings:        eval time =    2997.27 ms /    56 runs   (   53.52 ms per token,    18.68 tokens per second)
llama_print_timings:       total time =    3347.99 ms
Llama.generate: prefix-match hit


82 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       4.90 ms /    53 runs   (    0.09 ms per token, 10807.50 tokens per second)
llama_print_timings: prompt eval time =     569.19 ms /    95 tokens (    5.99 ms per token,   166.90 tokens per second)
llama_print_timings:        eval time =    2772.66 ms /    52 runs   (   53.32 ms per token,    18.75 tokens per second)
llama_print_timings:       total time =    3415.75 ms
Llama.generate: prefix-match hit


83 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.45 ms /    58 runs   (    0.09 ms per token, 10638.30 tokens per second)
llama_print_timings: prompt eval time =     197.08 ms /    21 tokens (    9.38 ms per token,   106.56 tokens per second)
llama_print_timings:        eval time =    3019.44 ms /    57 runs   (   52.97 ms per token,    18.88 tokens per second)
llama_print_timings:       total time =    3294.73 ms
Llama.generate: prefix-match hit


84 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.98 ms /    59 runs   (    0.10 ms per token,  9872.82 tokens per second)
llama_print_timings: prompt eval time =     189.85 ms /    28 tokens (    6.78 ms per token,   147.48 tokens per second)
llama_print_timings:        eval time =    3074.63 ms /    58 runs   (   53.01 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    3347.13 ms
Llama.generate: prefix-match hit


85 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       4.74 ms /    51 runs   (    0.09 ms per token, 10768.58 tokens per second)
llama_print_timings: prompt eval time =     193.31 ms /    18 tokens (   10.74 ms per token,    93.12 tokens per second)
llama_print_timings:        eval time =    2645.16 ms /    50 runs   (   52.90 ms per token,    18.90 tokens per second)
llama_print_timings:       total time =    2907.25 ms
Llama.generate: prefix-match hit


86 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.72 ms /    58 runs   (    0.10 ms per token, 10141.63 tokens per second)
llama_print_timings: prompt eval time =     346.42 ms /    34 tokens (   10.19 ms per token,    98.15 tokens per second)
llama_print_timings:        eval time =    3023.03 ms /    57 runs   (   53.04 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    3447.14 ms
Llama.generate: prefix-match hit


87 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.70 ms /    56 runs   (    0.10 ms per token,  9822.84 tokens per second)
llama_print_timings: prompt eval time =     210.42 ms /    30 tokens (    7.01 ms per token,   142.57 tokens per second)
llama_print_timings:        eval time =    2915.20 ms /    55 runs   (   53.00 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    3204.85 ms
Llama.generate: prefix-match hit


88 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.17 ms /    53 runs   (    0.10 ms per token, 10245.51 tokens per second)
llama_print_timings: prompt eval time =     195.06 ms /    20 tokens (    9.75 ms per token,   102.53 tokens per second)
llama_print_timings:        eval time =    2753.12 ms /    52 runs   (   52.94 ms per token,    18.89 tokens per second)
llama_print_timings:       total time =    3019.47 ms
Llama.generate: prefix-match hit


89 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.12 ms /    53 runs   (    0.10 ms per token, 10361.68 tokens per second)
llama_print_timings: prompt eval time =     197.58 ms /    21 tokens (    9.41 ms per token,   106.29 tokens per second)
llama_print_timings:        eval time =    2754.14 ms /    52 runs   (   52.96 ms per token,    18.88 tokens per second)
llama_print_timings:       total time =    3023.82 ms
Llama.generate: prefix-match hit


90 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.60 ms /    59 runs   (    0.09 ms per token, 10537.60 tokens per second)
llama_print_timings: prompt eval time =     199.00 ms /    22 tokens (    9.05 ms per token,   110.56 tokens per second)
llama_print_timings:        eval time =    3070.13 ms /    58 runs   (   52.93 ms per token,    18.89 tokens per second)
llama_print_timings:       total time =    3347.67 ms
Llama.generate: prefix-match hit


91 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       8.88 ms /    59 runs   (    0.15 ms per token,  6640.41 tokens per second)
llama_print_timings: prompt eval time =     351.92 ms /    38 tokens (    9.26 ms per token,   107.98 tokens per second)
llama_print_timings:        eval time =    3097.27 ms /    58 runs   (   53.40 ms per token,    18.73 tokens per second)
llama_print_timings:       total time =    3577.56 ms
Llama.generate: prefix-match hit


92 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =      23.51 ms /    68 runs   (    0.35 ms per token,  2892.26 tokens per second)
llama_print_timings: prompt eval time =     201.04 ms /    24 tokens (    8.38 ms per token,   119.38 tokens per second)
llama_print_timings:        eval time =    3676.73 ms /    67 runs   (   54.88 ms per token,    18.22 tokens per second)
llama_print_timings:       total time =    4195.24 ms
Llama.generate: prefix-match hit


93 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       4.87 ms /    52 runs   (    0.09 ms per token, 10686.40 tokens per second)
llama_print_timings: prompt eval time =     192.97 ms /    17 tokens (   11.35 ms per token,    88.10 tokens per second)
llama_print_timings:        eval time =    2701.80 ms /    51 runs   (   52.98 ms per token,    18.88 tokens per second)
llama_print_timings:       total time =    2965.18 ms
Llama.generate: prefix-match hit


94 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.70 ms /    58 runs   (    0.10 ms per token, 10184.37 tokens per second)
llama_print_timings: prompt eval time =     197.75 ms /    21 tokens (    9.42 ms per token,   106.20 tokens per second)
llama_print_timings:        eval time =    3020.05 ms /    57 runs   (   52.98 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    3298.02 ms
Llama.generate: prefix-match hit


95 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.12 ms /    54 runs   (    0.09 ms per token, 10553.06 tokens per second)
llama_print_timings: prompt eval time =     197.48 ms /    21 tokens (    9.40 ms per token,   106.34 tokens per second)
llama_print_timings:        eval time =    2802.21 ms /    53 runs   (   52.87 ms per token,    18.91 tokens per second)
llama_print_timings:       total time =    3071.70 ms
Llama.generate: prefix-match hit


96 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =      14.77 ms /    59 runs   (    0.25 ms per token,  3993.77 tokens per second)
llama_print_timings: prompt eval time =     189.68 ms /    15 tokens (   12.65 ms per token,    79.08 tokens per second)
llama_print_timings:        eval time =    3127.26 ms /    58 runs   (   53.92 ms per token,    18.55 tokens per second)
llama_print_timings:       total time =    3485.97 ms
Llama.generate: prefix-match hit


97 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       7.13 ms /    54 runs   (    0.13 ms per token,  7576.82 tokens per second)
llama_print_timings: prompt eval time =     194.55 ms /    20 tokens (    9.73 ms per token,   102.80 tokens per second)
llama_print_timings:        eval time =    2849.25 ms /    53 runs   (   53.76 ms per token,    18.60 tokens per second)
llama_print_timings:       total time =    3140.53 ms
Llama.generate: prefix-match hit


98 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.67 ms /    60 runs   (    0.09 ms per token, 10576.41 tokens per second)
llama_print_timings: prompt eval time =     204.72 ms /    26 tokens (    7.87 ms per token,   127.00 tokens per second)
llama_print_timings:        eval time =    3123.07 ms /    59 runs   (   52.93 ms per token,    18.89 tokens per second)
llama_print_timings:       total time =    3407.50 ms
Llama.generate: prefix-match hit


99 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       6.09 ms /    61 runs   (    0.10 ms per token, 10021.36 tokens per second)
llama_print_timings: prompt eval time =     357.63 ms /    43 tokens (    8.32 ms per token,   120.24 tokens per second)
llama_print_timings:        eval time =    3182.28 ms /    60 runs   (   53.04 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    3624.93 ms
Llama.generate: prefix-match hit


100 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.31 ms /    53 runs   (    0.10 ms per token,  9981.17 tokens per second)
llama_print_timings: prompt eval time =     200.74 ms /    23 tokens (    8.73 ms per token,   114.57 tokens per second)
llama_print_timings:        eval time =    2756.79 ms /    52 runs   (   53.02 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    3031.01 ms
Llama.generate: prefix-match hit


101 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       4.91 ms /    53 runs   (    0.09 ms per token, 10789.90 tokens per second)
llama_print_timings: prompt eval time =     189.44 ms /    15 tokens (   12.63 ms per token,    79.18 tokens per second)
llama_print_timings:        eval time =    2751.20 ms /    52 runs   (   52.91 ms per token,    18.90 tokens per second)
llama_print_timings:       total time =    3011.49 ms
Llama.generate: prefix-match hit


102 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =      26.19 ms /    54 runs   (    0.48 ms per token,  2062.01 tokens per second)
llama_print_timings: prompt eval time =     188.36 ms /    14 tokens (   13.45 ms per token,    74.32 tokens per second)
llama_print_timings:        eval time =    2870.49 ms /    53 runs   (   54.16 ms per token,    18.46 tokens per second)
llama_print_timings:       total time =    3335.35 ms
Llama.generate: prefix-match hit


103 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       6.86 ms /    55 runs   (    0.12 ms per token,  8021.00 tokens per second)
llama_print_timings: prompt eval time =     193.50 ms /    18 tokens (   10.75 ms per token,    93.02 tokens per second)
llama_print_timings:        eval time =    2864.28 ms /    54 runs   (   53.04 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    3150.07 ms
Llama.generate: prefix-match hit


104 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.45 ms /    58 runs   (    0.09 ms per token, 10638.30 tokens per second)
llama_print_timings: prompt eval time =     194.40 ms /    19 tokens (   10.23 ms per token,    97.74 tokens per second)
llama_print_timings:        eval time =    3019.56 ms /    57 runs   (   52.97 ms per token,    18.88 tokens per second)
llama_print_timings:       total time =    3291.21 ms
Llama.generate: prefix-match hit


105 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       9.64 ms /    56 runs   (    0.17 ms per token,  5806.72 tokens per second)
llama_print_timings: prompt eval time =     193.94 ms /    19 tokens (   10.21 ms per token,    97.97 tokens per second)
llama_print_timings:        eval time =    2945.32 ms /    55 runs   (   53.55 ms per token,    18.67 tokens per second)
llama_print_timings:       total time =    3271.66 ms
Llama.generate: prefix-match hit


106 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.37 ms /    57 runs   (    0.09 ms per token, 10620.46 tokens per second)
llama_print_timings: prompt eval time =     198.68 ms /    22 tokens (    9.03 ms per token,   110.73 tokens per second)
llama_print_timings:        eval time =    2964.32 ms /    56 runs   (   52.93 ms per token,    18.89 tokens per second)
llama_print_timings:       total time =    3238.99 ms
Llama.generate: prefix-match hit


107 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       4.53 ms /    50 runs   (    0.09 ms per token, 11027.79 tokens per second)
llama_print_timings: prompt eval time =     190.91 ms /    17 tokens (   11.23 ms per token,    89.05 tokens per second)
llama_print_timings:        eval time =    2590.89 ms /    49 runs   (   52.88 ms per token,    18.91 tokens per second)
llama_print_timings:       total time =    2847.99 ms
Llama.generate: prefix-match hit


108 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =      15.45 ms /   135 runs   (    0.11 ms per token,  8740.69 tokens per second)
llama_print_timings: prompt eval time =     205.90 ms /    32 tokens (    6.43 ms per token,   155.41 tokens per second)
llama_print_timings:        eval time =    7141.27 ms /   134 runs   (   53.29 ms per token,    18.76 tokens per second)
llama_print_timings:       total time =    7571.98 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.78 ms /    58 runs   (    0.10 ms per token, 10036.34 tokens per second)
llama_print_timings: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    3073.85 ms /    58 runs   (   53.00 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    3151.85 ms
Llama.generate: prefix-

108 -- 2



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =      17.00 ms /    58 runs   (    0.29 ms per token,  3410.76 tokens per second)
llama_print_timings: prompt eval time =     188.17 ms /    15 tokens (   12.54 ms per token,    79.71 tokens per second)
llama_print_timings:        eval time =    3095.64 ms /    57 runs   (   54.31 ms per token,    18.41 tokens per second)
llama_print_timings:       total time =    3513.66 ms
Llama.generate: prefix-match hit


109 -- 1
110 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =      22.39 ms /    53 runs   (    0.42 ms per token,  2367.55 tokens per second)
llama_print_timings: prompt eval time =     194.14 ms /    18 tokens (   10.79 ms per token,    92.72 tokens per second)
llama_print_timings:        eval time =    2849.27 ms /    52 runs   (   54.79 ms per token,    18.25 tokens per second)
llama_print_timings:       total time =    3364.85 ms
Llama.generate: prefix-match hit


111 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =      13.88 ms /    51 runs   (    0.27 ms per token,  3673.03 tokens per second)
llama_print_timings: prompt eval time =     205.24 ms /    26 tokens (    7.89 ms per token,   126.68 tokens per second)
llama_print_timings:        eval time =    2696.85 ms /    50 runs   (   53.94 ms per token,    18.54 tokens per second)
llama_print_timings:       total time =    3082.23 ms
Llama.generate: prefix-match hit


112 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       6.46 ms /    64 runs   (    0.10 ms per token,  9911.72 tokens per second)
llama_print_timings: prompt eval time =     206.03 ms /    32 tokens (    6.44 ms per token,   155.32 tokens per second)
llama_print_timings:        eval time =    3340.15 ms /    63 runs   (   53.02 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    3632.70 ms
Llama.generate: prefix-match hit


113 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       6.18 ms /    64 runs   (    0.10 ms per token, 10359.34 tokens per second)
llama_print_timings: prompt eval time =     200.39 ms /    23 tokens (    8.71 ms per token,   114.78 tokens per second)
llama_print_timings:        eval time =    3335.58 ms /    63 runs   (   52.95 ms per token,    18.89 tokens per second)
llama_print_timings:       total time =    3622.55 ms
Llama.generate: prefix-match hit


114 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       9.77 ms /    50 runs   (    0.20 ms per token,  5119.80 tokens per second)
llama_print_timings: prompt eval time =     187.83 ms /    14 tokens (   13.42 ms per token,    74.54 tokens per second)
llama_print_timings:        eval time =    2616.65 ms /    49 runs   (   53.40 ms per token,    18.73 tokens per second)
llama_print_timings:       total time =    2936.55 ms
Llama.generate: prefix-match hit


115 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.89 ms /    61 runs   (    0.10 ms per token, 10360.05 tokens per second)
llama_print_timings: prompt eval time =     210.58 ms /    30 tokens (    7.02 ms per token,   142.47 tokens per second)
llama_print_timings:        eval time =    3180.46 ms /    60 runs   (   53.01 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    3472.72 ms
Llama.generate: prefix-match hit


116 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =      12.61 ms /    53 runs   (    0.24 ms per token,  4204.68 tokens per second)
llama_print_timings: prompt eval time =     186.58 ms /    13 tokens (   14.35 ms per token,    69.68 tokens per second)
llama_print_timings:        eval time =    2796.08 ms /    52 runs   (   53.77 ms per token,    18.60 tokens per second)
llama_print_timings:       total time =    3160.79 ms
Llama.generate: prefix-match hit


117 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       6.13 ms /    63 runs   (    0.10 ms per token, 10270.62 tokens per second)
llama_print_timings: prompt eval time =     198.98 ms /    22 tokens (    9.04 ms per token,   110.56 tokens per second)
llama_print_timings:        eval time =    3287.23 ms /    62 runs   (   53.02 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    3570.85 ms
Llama.generate: prefix-match hit


118 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       8.96 ms /    55 runs   (    0.16 ms per token,  6138.39 tokens per second)
llama_print_timings: prompt eval time =     191.23 ms /    17 tokens (   11.25 ms per token,    88.90 tokens per second)
llama_print_timings:        eval time =    2883.80 ms /    54 runs   (   53.40 ms per token,    18.73 tokens per second)
llama_print_timings:       total time =    3211.39 ms
Llama.generate: prefix-match hit


119 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.26 ms /    55 runs   (    0.10 ms per token, 10450.31 tokens per second)
llama_print_timings: prompt eval time =     193.64 ms /    18 tokens (   10.76 ms per token,    92.95 tokens per second)
llama_print_timings:        eval time =    2856.73 ms /    54 runs   (   52.90 ms per token,    18.90 tokens per second)
llama_print_timings:       total time =    3124.49 ms
Llama.generate: prefix-match hit


120 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.29 ms /    56 runs   (    0.09 ms per token, 10576.02 tokens per second)
llama_print_timings: prompt eval time =     194.64 ms /    20 tokens (    9.73 ms per token,   102.75 tokens per second)
llama_print_timings:        eval time =    2911.18 ms /    55 runs   (   52.93 ms per token,    18.89 tokens per second)
llama_print_timings:       total time =    3180.87 ms
Llama.generate: prefix-match hit


121 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.60 ms /    57 runs   (    0.10 ms per token, 10185.85 tokens per second)
llama_print_timings: prompt eval time =     201.10 ms /    23 tokens (    8.74 ms per token,   114.37 tokens per second)
llama_print_timings:        eval time =    2969.36 ms /    56 runs   (   53.02 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    3248.37 ms
Llama.generate: prefix-match hit


122 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.09 ms /    53 runs   (    0.10 ms per token, 10406.44 tokens per second)
llama_print_timings: prompt eval time =     200.67 ms /    24 tokens (    8.36 ms per token,   119.60 tokens per second)
llama_print_timings:        eval time =    2753.82 ms /    52 runs   (   52.96 ms per token,    18.88 tokens per second)
llama_print_timings:       total time =    3024.88 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =      18.53 ms /    55 runs   (    0.34 ms per token,  2968.64 tokens per second)
llama_print_timings: prompt eval time =     194.35 ms /    19 tokens (   10.23 ms per token,    97.76 tokens per second)
llama_print_timings:        eval time =    2935.28 ms /    54 runs   (   54.36 ms per token,    18.40 tokens per second)
llama_print_timings:       total time =    3386.99 ms
Llama.generate: prefix-

123 -- 1
124 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =      17.03 ms /    57 runs   (    0.30 ms per token,  3347.62 tokens per second)
llama_print_timings: prompt eval time =     207.06 ms /    27 tokens (    7.67 ms per token,   130.40 tokens per second)
llama_print_timings:        eval time =    3010.37 ms /    56 runs   (   53.76 ms per token,    18.60 tokens per second)
llama_print_timings:       total time =    3407.17 ms
Llama.generate: prefix-match hit


125 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.32 ms /    53 runs   (    0.10 ms per token,  9966.15 tokens per second)
llama_print_timings: prompt eval time =     202.40 ms /    25 tokens (    8.10 ms per token,   123.52 tokens per second)
llama_print_timings:        eval time =    2754.72 ms /    52 runs   (   52.98 ms per token,    18.88 tokens per second)
llama_print_timings:       total time =    3030.59 ms
Llama.generate: prefix-match hit


126 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.11 ms /    54 runs   (    0.09 ms per token, 10567.51 tokens per second)
llama_print_timings: prompt eval time =     193.34 ms /    18 tokens (   10.74 ms per token,    93.10 tokens per second)
llama_print_timings:        eval time =    2802.45 ms /    53 runs   (   52.88 ms per token,    18.91 tokens per second)
llama_print_timings:       total time =    3067.57 ms
Llama.generate: prefix-match hit


127 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.39 ms /    57 runs   (    0.09 ms per token, 10567.30 tokens per second)
llama_print_timings: prompt eval time =     192.87 ms /    18 tokens (   10.71 ms per token,    93.33 tokens per second)
llama_print_timings:        eval time =    2968.26 ms /    56 runs   (   53.00 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    3236.52 ms
Llama.generate: prefix-match hit


128 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =      12.76 ms /    55 runs   (    0.23 ms per token,  4310.68 tokens per second)
llama_print_timings: prompt eval time =     194.24 ms /    19 tokens (   10.22 ms per token,    97.82 tokens per second)
llama_print_timings:        eval time =    2895.94 ms /    54 runs   (   53.63 ms per token,    18.65 tokens per second)
llama_print_timings:       total time =    3257.42 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       6.60 ms /    60 runs   (    0.11 ms per token,  9096.42 tokens per second)
llama_print_timings: prompt eval time =     202.22 ms /    25 tokens (    8.09 ms per token,   123.63 tokens per second)
llama_print_timings:        eval time =    3132.34 ms /    59 runs   (   53.09 ms per token,    18.84 tokens per second)
llama_print_timings:       total time =    3427.17 ms
Llama.generate: prefix-

129 -- 1
130 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =      10.30 ms /    56 runs   (    0.18 ms per token,  5439.01 tokens per second)
llama_print_timings: prompt eval time =     191.05 ms /    17 tokens (   11.24 ms per token,    88.98 tokens per second)
llama_print_timings:        eval time =    2935.84 ms /    55 runs   (   53.38 ms per token,    18.73 tokens per second)
llama_print_timings:       total time =    3266.34 ms
Llama.generate: prefix-match hit


131 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.55 ms /    59 runs   (    0.09 ms per token, 10622.97 tokens per second)
llama_print_timings: prompt eval time =     191.03 ms /    17 tokens (   11.24 ms per token,    88.99 tokens per second)
llama_print_timings:        eval time =    3069.32 ms /    58 runs   (   52.92 ms per token,    18.90 tokens per second)
llama_print_timings:       total time =    3339.16 ms
Llama.generate: prefix-match hit


132 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =      15.00 ms /    59 runs   (    0.25 ms per token,  3932.28 tokens per second)
llama_print_timings: prompt eval time =     189.12 ms /    28 tokens (    6.75 ms per token,   148.06 tokens per second)
llama_print_timings:        eval time =    3132.59 ms /    58 runs   (   54.01 ms per token,    18.52 tokens per second)
llama_print_timings:       total time =    3539.31 ms
Llama.generate: prefix-match hit


133 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.15 ms /    55 runs   (    0.09 ms per token, 10683.76 tokens per second)
llama_print_timings: prompt eval time =     187.71 ms /    14 tokens (   13.41 ms per token,    74.58 tokens per second)
llama_print_timings:        eval time =    2858.92 ms /    54 runs   (   52.94 ms per token,    18.89 tokens per second)
llama_print_timings:       total time =    3120.41 ms
Llama.generate: prefix-match hit


134 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =      24.21 ms /    54 runs   (    0.45 ms per token,  2230.11 tokens per second)
llama_print_timings: prompt eval time =     197.34 ms /    21 tokens (    9.40 ms per token,   106.41 tokens per second)
llama_print_timings:        eval time =    2914.88 ms /    53 runs   (   55.00 ms per token,    18.18 tokens per second)
llama_print_timings:       total time =    3398.15 ms
Llama.generate: prefix-match hit


135 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       8.32 ms /    57 runs   (    0.15 ms per token,  6853.43 tokens per second)
llama_print_timings: prompt eval time =     199.96 ms /    22 tokens (    9.09 ms per token,   110.02 tokens per second)
llama_print_timings:        eval time =    2981.76 ms /    56 runs   (   53.25 ms per token,    18.78 tokens per second)
llama_print_timings:       total time =    3284.09 ms
Llama.generate: prefix-match hit


136 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =      24.17 ms /    53 runs   (    0.46 ms per token,  2192.80 tokens per second)
llama_print_timings: prompt eval time =     204.53 ms /    26 tokens (    7.87 ms per token,   127.12 tokens per second)
llama_print_timings:        eval time =    2880.70 ms /    52 runs   (   55.40 ms per token,    18.05 tokens per second)
llama_print_timings:       total time =    3380.75 ms
Llama.generate: prefix-match hit


137 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       4.98 ms /    53 runs   (    0.09 ms per token, 10640.43 tokens per second)
llama_print_timings: prompt eval time =     191.04 ms /    17 tokens (   11.24 ms per token,    88.98 tokens per second)
llama_print_timings:        eval time =    2752.32 ms /    52 runs   (   52.93 ms per token,    18.89 tokens per second)
llama_print_timings:       total time =    3015.02 ms
Llama.generate: prefix-match hit


138 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       6.28 ms /    55 runs   (    0.11 ms per token,  8764.94 tokens per second)
llama_print_timings: prompt eval time =     194.56 ms /    19 tokens (   10.24 ms per token,    97.66 tokens per second)
llama_print_timings:        eval time =    2864.21 ms /    54 runs   (   53.04 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    3146.35 ms
Llama.generate: prefix-match hit


139 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       4.91 ms /    53 runs   (    0.09 ms per token, 10800.90 tokens per second)
llama_print_timings: prompt eval time =     193.28 ms /    18 tokens (   10.74 ms per token,    93.13 tokens per second)
llama_print_timings:        eval time =    2753.34 ms /    52 runs   (   52.95 ms per token,    18.89 tokens per second)
llama_print_timings:       total time =    3016.86 ms
Llama.generate: prefix-match hit


140 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =      12.44 ms /    50 runs   (    0.25 ms per token,  4019.62 tokens per second)
llama_print_timings: prompt eval time =     184.66 ms /    12 tokens (   15.39 ms per token,    64.98 tokens per second)
llama_print_timings:        eval time =    2631.87 ms /    49 runs   (   53.71 ms per token,    18.62 tokens per second)
llama_print_timings:       total time =    2985.64 ms
Llama.generate: prefix-match hit


141 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.13 ms /    55 runs   (    0.09 ms per token, 10721.25 tokens per second)
llama_print_timings: prompt eval time =     189.37 ms /    15 tokens (   12.62 ms per token,    79.21 tokens per second)
llama_print_timings:        eval time =    2855.58 ms /    54 runs   (   52.88 ms per token,    18.91 tokens per second)
llama_print_timings:       total time =    3118.94 ms
Llama.generate: prefix-match hit


142 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.25 ms /    54 runs   (    0.10 ms per token, 10281.80 tokens per second)
llama_print_timings: prompt eval time =     193.47 ms /    18 tokens (   10.75 ms per token,    93.04 tokens per second)
llama_print_timings:        eval time =    2806.03 ms /    53 runs   (   52.94 ms per token,    18.89 tokens per second)
llama_print_timings:       total time =    3072.73 ms
Llama.generate: prefix-match hit


143 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =      20.63 ms /    58 runs   (    0.36 ms per token,  2810.89 tokens per second)
llama_print_timings: prompt eval time =     194.57 ms /    19 tokens (   10.24 ms per token,    97.65 tokens per second)
llama_print_timings:        eval time =    3093.84 ms /    57 runs   (   54.28 ms per token,    18.42 tokens per second)
llama_print_timings:       total time =    3564.89 ms
Llama.generate: prefix-match hit


144 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.46 ms /    56 runs   (    0.10 ms per token, 10247.03 tokens per second)
llama_print_timings: prompt eval time =     354.77 ms /    39 tokens (    9.10 ms per token,   109.93 tokens per second)
llama_print_timings:        eval time =    2918.43 ms /    55 runs   (   53.06 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    3350.85 ms
Llama.generate: prefix-match hit


145 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.81 ms /    57 runs   (    0.10 ms per token,  9814.05 tokens per second)
llama_print_timings: prompt eval time =     194.08 ms /    19 tokens (   10.21 ms per token,    97.90 tokens per second)
llama_print_timings:        eval time =    2967.82 ms /    56 runs   (   53.00 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    3244.00 ms
Llama.generate: prefix-match hit


146 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.37 ms /    55 runs   (    0.10 ms per token, 10251.63 tokens per second)
llama_print_timings: prompt eval time =     188.33 ms /    14 tokens (   13.45 ms per token,    74.34 tokens per second)
llama_print_timings:        eval time =    2856.36 ms /    54 runs   (   52.90 ms per token,    18.91 tokens per second)
llama_print_timings:       total time =    3121.84 ms
Llama.generate: prefix-match hit


147 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.59 ms /    58 runs   (    0.10 ms per token, 10366.40 tokens per second)
llama_print_timings: prompt eval time =     189.16 ms /    16 tokens (   11.82 ms per token,    84.59 tokens per second)
llama_print_timings:        eval time =    3015.12 ms /    57 runs   (   52.90 ms per token,    18.90 tokens per second)
llama_print_timings:       total time =    3283.60 ms
Llama.generate: prefix-match hit


148 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.17 ms /    53 runs   (    0.10 ms per token, 10245.51 tokens per second)
llama_print_timings: prompt eval time =     199.45 ms /    22 tokens (    9.07 ms per token,   110.30 tokens per second)
llama_print_timings:        eval time =    2756.35 ms /    52 runs   (   53.01 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    3028.67 ms
Llama.generate: prefix-match hit


149 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.76 ms /    59 runs   (    0.10 ms per token, 10246.61 tokens per second)
llama_print_timings: prompt eval time =     200.78 ms /    24 tokens (    8.37 ms per token,   119.53 tokens per second)
llama_print_timings:        eval time =    3072.21 ms /    58 runs   (   52.97 ms per token,    18.88 tokens per second)
llama_print_timings:       total time =    3351.62 ms
Llama.generate: prefix-match hit


150 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.01 ms /    51 runs   (    0.10 ms per token, 10175.58 tokens per second)
llama_print_timings: prompt eval time =     199.19 ms /    22 tokens (    9.05 ms per token,   110.45 tokens per second)
llama_print_timings:        eval time =    2651.30 ms /    50 runs   (   53.03 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    2920.58 ms
Llama.generate: prefix-match hit


151 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.48 ms /    55 runs   (    0.10 ms per token, 10040.16 tokens per second)
llama_print_timings: prompt eval time =     200.94 ms /    23 tokens (    8.74 ms per token,   114.46 tokens per second)
llama_print_timings:        eval time =    2859.35 ms /    54 runs   (   52.95 ms per token,    18.89 tokens per second)
llama_print_timings:       total time =    3137.01 ms
Llama.generate: prefix-match hit


152 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =      10.37 ms /    57 runs   (    0.18 ms per token,  5495.04 tokens per second)
llama_print_timings: prompt eval time =     189.36 ms /    16 tokens (   11.83 ms per token,    84.50 tokens per second)
llama_print_timings:        eval time =    3031.06 ms /    56 runs   (   54.13 ms per token,    18.48 tokens per second)
llama_print_timings:       total time =    3371.16 ms
Llama.generate: prefix-match hit


153 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =      18.27 ms /    55 runs   (    0.33 ms per token,  3011.06 tokens per second)
llama_print_timings: prompt eval time =     196.68 ms /    21 tokens (    9.37 ms per token,   106.77 tokens per second)
llama_print_timings:        eval time =    2951.46 ms /    54 runs   (   54.66 ms per token,    18.30 tokens per second)
llama_print_timings:       total time =    3409.33 ms
Llama.generate: prefix-match hit


154 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       7.13 ms /    56 runs   (    0.13 ms per token,  7854.14 tokens per second)
llama_print_timings: prompt eval time =     195.12 ms /    19 tokens (   10.27 ms per token,    97.38 tokens per second)
llama_print_timings:        eval time =    2924.80 ms /    55 runs   (   53.18 ms per token,    18.80 tokens per second)
llama_print_timings:       total time =    3224.33 ms
Llama.generate: prefix-match hit


155 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.73 ms /    55 runs   (    0.10 ms per token,  9598.60 tokens per second)
llama_print_timings: prompt eval time =     201.16 ms /    24 tokens (    8.38 ms per token,   119.31 tokens per second)
llama_print_timings:        eval time =    2865.11 ms /    54 runs   (   53.06 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    3145.77 ms
Llama.generate: prefix-match hit


156 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       6.36 ms /    64 runs   (    0.10 ms per token, 10056.57 tokens per second)
llama_print_timings: prompt eval time =     193.48 ms /    18 tokens (   10.75 ms per token,    93.03 tokens per second)
llama_print_timings:        eval time =    3350.01 ms /    63 runs   (   53.17 ms per token,    18.81 tokens per second)
llama_print_timings:       total time =    3644.16 ms
Llama.generate: prefix-match hit


157 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       7.06 ms /    57 runs   (    0.12 ms per token,  8070.23 tokens per second)
llama_print_timings: prompt eval time =     201.58 ms /    23 tokens (    8.76 ms per token,   114.10 tokens per second)
llama_print_timings:        eval time =    2970.83 ms /    56 runs   (   53.05 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    3272.12 ms
Llama.generate: prefix-match hit


158 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       6.02 ms /    58 runs   (    0.10 ms per token,  9631.35 tokens per second)
llama_print_timings: prompt eval time =     206.06 ms /    23 tokens (    8.96 ms per token,   111.62 tokens per second)
llama_print_timings:        eval time =    3068.74 ms /    57 runs   (   53.84 ms per token,    18.57 tokens per second)
llama_print_timings:       total time =    3361.14 ms
Llama.generate: prefix-match hit


159 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.49 ms /    59 runs   (    0.09 ms per token, 10744.86 tokens per second)
llama_print_timings: prompt eval time =     194.01 ms /    18 tokens (   10.78 ms per token,    92.78 tokens per second)
llama_print_timings:        eval time =    3074.54 ms /    58 runs   (   53.01 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    3347.95 ms
Llama.generate: prefix-match hit


160 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.28 ms /    53 runs   (    0.10 ms per token, 10047.39 tokens per second)
llama_print_timings: prompt eval time =     204.82 ms /    26 tokens (    7.88 ms per token,   126.94 tokens per second)
llama_print_timings:        eval time =    2754.73 ms /    52 runs   (   52.98 ms per token,    18.88 tokens per second)
llama_print_timings:       total time =    3033.30 ms
Llama.generate: prefix-match hit


161 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.65 ms /    56 runs   (    0.10 ms per token,  9908.00 tokens per second)
llama_print_timings: prompt eval time =     200.75 ms /    24 tokens (    8.36 ms per token,   119.55 tokens per second)
llama_print_timings:        eval time =    2908.36 ms /    55 runs   (   52.88 ms per token,    18.91 tokens per second)
llama_print_timings:       total time =    3188.16 ms
Llama.generate: prefix-match hit


162 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.71 ms /    56 runs   (    0.10 ms per token,  9810.79 tokens per second)
llama_print_timings: prompt eval time =     192.98 ms /    18 tokens (   10.72 ms per token,    93.27 tokens per second)
llama_print_timings:        eval time =    2925.14 ms /    55 runs   (   53.18 ms per token,    18.80 tokens per second)
llama_print_timings:       total time =    3198.34 ms
Llama.generate: prefix-match hit


163 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.60 ms /    59 runs   (    0.09 ms per token, 10531.95 tokens per second)
llama_print_timings: prompt eval time =     202.36 ms /    25 tokens (    8.09 ms per token,   123.54 tokens per second)
llama_print_timings:        eval time =    3068.75 ms /    58 runs   (   52.91 ms per token,    18.90 tokens per second)
llama_print_timings:       total time =    3349.65 ms
Llama.generate: prefix-match hit


164 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.71 ms /    58 runs   (    0.10 ms per token, 10162.96 tokens per second)
llama_print_timings: prompt eval time =     198.00 ms /    21 tokens (    9.43 ms per token,   106.06 tokens per second)
llama_print_timings:        eval time =    3017.19 ms /    57 runs   (   52.93 ms per token,    18.89 tokens per second)
llama_print_timings:       total time =    3294.03 ms
Llama.generate: prefix-match hit


165 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       4.73 ms /    52 runs   (    0.09 ms per token, 10998.31 tokens per second)
llama_print_timings: prompt eval time =     194.89 ms /    20 tokens (    9.74 ms per token,   102.62 tokens per second)
llama_print_timings:        eval time =    2698.53 ms /    51 runs   (   52.91 ms per token,    18.90 tokens per second)
llama_print_timings:       total time =    2961.83 ms
Llama.generate: prefix-match hit


166 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       6.27 ms /    58 runs   (    0.11 ms per token,  9248.92 tokens per second)
llama_print_timings: prompt eval time =     192.82 ms /    18 tokens (   10.71 ms per token,    93.35 tokens per second)
llama_print_timings:        eval time =    3019.66 ms /    57 runs   (   52.98 ms per token,    18.88 tokens per second)
llama_print_timings:       total time =    3300.79 ms
Llama.generate: prefix-match hit


167 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.15 ms /    54 runs   (    0.10 ms per token, 10479.33 tokens per second)
llama_print_timings: prompt eval time =     205.43 ms /    27 tokens (    7.61 ms per token,   131.43 tokens per second)
llama_print_timings:        eval time =    2807.64 ms /    53 runs   (   52.97 ms per token,    18.88 tokens per second)
llama_print_timings:       total time =    3084.31 ms
Llama.generate: prefix-match hit


168 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.31 ms /    54 runs   (    0.10 ms per token, 10175.24 tokens per second)
llama_print_timings: prompt eval time =     194.74 ms /    20 tokens (    9.74 ms per token,   102.70 tokens per second)
llama_print_timings:        eval time =    2808.80 ms /    53 runs   (   53.00 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    3076.34 ms
Llama.generate: prefix-match hit


169 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.51 ms /    58 runs   (    0.09 ms per token, 10532.05 tokens per second)
llama_print_timings: prompt eval time =     194.76 ms /    20 tokens (    9.74 ms per token,   102.69 tokens per second)
llama_print_timings:        eval time =    3017.81 ms /    57 runs   (   52.94 ms per token,    18.89 tokens per second)
llama_print_timings:       total time =    3288.97 ms
Llama.generate: prefix-match hit


170 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.17 ms /    53 runs   (    0.10 ms per token, 10253.43 tokens per second)
llama_print_timings: prompt eval time =     200.70 ms /    23 tokens (    8.73 ms per token,   114.60 tokens per second)
llama_print_timings:        eval time =    2754.05 ms /    52 runs   (   52.96 ms per token,    18.88 tokens per second)
llama_print_timings:       total time =    3026.37 ms
Llama.generate: prefix-match hit


171 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.98 ms /    60 runs   (    0.10 ms per token, 10038.48 tokens per second)
llama_print_timings: prompt eval time =     197.03 ms /    21 tokens (    9.38 ms per token,   106.58 tokens per second)
llama_print_timings:        eval time =    3125.07 ms /    59 runs   (   52.97 ms per token,    18.88 tokens per second)
llama_print_timings:       total time =    3406.09 ms
Llama.generate: prefix-match hit


172 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.66 ms /    54 runs   (    0.10 ms per token,  9544.01 tokens per second)
llama_print_timings: prompt eval time =     201.08 ms /    23 tokens (    8.74 ms per token,   114.38 tokens per second)
llama_print_timings:        eval time =    2814.00 ms /    53 runs   (   53.09 ms per token,    18.83 tokens per second)
llama_print_timings:       total time =    3089.95 ms
Llama.generate: prefix-match hit


173 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.27 ms /    56 runs   (    0.09 ms per token, 10622.15 tokens per second)
llama_print_timings: prompt eval time =     194.81 ms /    19 tokens (   10.25 ms per token,    97.53 tokens per second)
llama_print_timings:        eval time =    2915.86 ms /    55 runs   (   53.02 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    3182.82 ms
Llama.generate: prefix-match hit


174 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.46 ms /    59 runs   (    0.09 ms per token, 10797.95 tokens per second)
llama_print_timings: prompt eval time =     351.27 ms /    38 tokens (    9.24 ms per token,   108.18 tokens per second)
llama_print_timings:        eval time =    3083.19 ms /    58 runs   (   53.16 ms per token,    18.81 tokens per second)
llama_print_timings:       total time =    3510.25 ms
Llama.generate: prefix-match hit


175 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.30 ms /    56 runs   (    0.09 ms per token, 10570.03 tokens per second)
llama_print_timings: prompt eval time =     197.15 ms /    21 tokens (    9.39 ms per token,   106.52 tokens per second)
llama_print_timings:        eval time =    2919.11 ms /    55 runs   (   53.07 ms per token,    18.84 tokens per second)
llama_print_timings:       total time =    3189.83 ms
Llama.generate: prefix-match hit


176 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       4.69 ms /    52 runs   (    0.09 ms per token, 11082.69 tokens per second)
llama_print_timings: prompt eval time =     194.67 ms /    19 tokens (   10.25 ms per token,    97.60 tokens per second)
llama_print_timings:        eval time =    2703.43 ms /    51 runs   (   53.01 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    2964.41 ms
Llama.generate: prefix-match hit


177 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.14 ms /    54 runs   (    0.10 ms per token, 10509.93 tokens per second)
llama_print_timings: prompt eval time =     198.19 ms /    21 tokens (    9.44 ms per token,   105.96 tokens per second)
llama_print_timings:        eval time =    2811.76 ms /    53 runs   (   53.05 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    3080.17 ms
Llama.generate: prefix-match hit


178 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       4.67 ms /    50 runs   (    0.09 ms per token, 10718.11 tokens per second)
llama_print_timings: prompt eval time =     197.35 ms /    21 tokens (    9.40 ms per token,   106.41 tokens per second)
llama_print_timings:        eval time =    2597.69 ms /    49 runs   (   53.01 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    2858.77 ms
Llama.generate: prefix-match hit


179 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.57 ms /    57 runs   (    0.10 ms per token, 10233.39 tokens per second)
llama_print_timings: prompt eval time =     202.77 ms /    25 tokens (    8.11 ms per token,   123.29 tokens per second)
llama_print_timings:        eval time =    2971.16 ms /    56 runs   (   53.06 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    3249.89 ms
Llama.generate: prefix-match hit


180 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.18 ms /    57 runs   (    0.09 ms per token, 11005.99 tokens per second)
llama_print_timings: prompt eval time =     195.63 ms /    20 tokens (    9.78 ms per token,   102.23 tokens per second)
llama_print_timings:        eval time =    2972.99 ms /    56 runs   (   53.09 ms per token,    18.84 tokens per second)
llama_print_timings:       total time =    3241.12 ms
Llama.generate: prefix-match hit


181 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.05 ms /    55 runs   (    0.09 ms per token, 10884.62 tokens per second)
llama_print_timings: prompt eval time =     199.36 ms /    22 tokens (    9.06 ms per token,   110.35 tokens per second)
llama_print_timings:        eval time =    2865.34 ms /    54 runs   (   53.06 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    3134.60 ms
Llama.generate: prefix-match hit


182 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.70 ms /    63 runs   (    0.09 ms per token, 11044.88 tokens per second)
llama_print_timings: prompt eval time =     205.50 ms /    26 tokens (    7.90 ms per token,   126.52 tokens per second)
llama_print_timings:        eval time =    3290.38 ms /    62 runs   (   53.07 ms per token,    18.84 tokens per second)
llama_print_timings:       total time =    3576.76 ms
Llama.generate: prefix-match hit


183 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.21 ms /    55 runs   (    0.09 ms per token, 10550.55 tokens per second)
llama_print_timings: prompt eval time =     190.07 ms /    15 tokens (   12.67 ms per token,    78.92 tokens per second)
llama_print_timings:        eval time =    2859.98 ms /    54 runs   (   52.96 ms per token,    18.88 tokens per second)
llama_print_timings:       total time =    3122.34 ms
Llama.generate: prefix-match hit


184 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.40 ms /    61 runs   (    0.09 ms per token, 11302.58 tokens per second)
llama_print_timings: prompt eval time =     360.21 ms /    44 tokens (    8.19 ms per token,   122.15 tokens per second)
llama_print_timings:        eval time =    3189.26 ms /    60 runs   (   53.15 ms per token,    18.81 tokens per second)
llama_print_timings:       total time =    3628.17 ms
Llama.generate: prefix-match hit


185 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.00 ms /    53 runs   (    0.09 ms per token, 10591.53 tokens per second)
llama_print_timings: prompt eval time =     198.30 ms /    22 tokens (    9.01 ms per token,   110.94 tokens per second)
llama_print_timings:        eval time =    2757.87 ms /    52 runs   (   53.04 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    3025.10 ms
Llama.generate: prefix-match hit


186 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.13 ms /    56 runs   (    0.09 ms per token, 10918.31 tokens per second)
llama_print_timings: prompt eval time =     195.64 ms /    20 tokens (    9.78 ms per token,   102.23 tokens per second)
llama_print_timings:        eval time =    2917.89 ms /    55 runs   (   53.05 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    3185.31 ms
Llama.generate: prefix-match hit


187 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.05 ms /    53 runs   (    0.10 ms per token, 10495.05 tokens per second)
llama_print_timings: prompt eval time =     191.28 ms /    17 tokens (   11.25 ms per token,    88.88 tokens per second)
llama_print_timings:        eval time =    2755.36 ms /    52 runs   (   52.99 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    3016.00 ms
Llama.generate: prefix-match hit


188 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       4.92 ms /    55 runs   (    0.09 ms per token, 11185.68 tokens per second)
llama_print_timings: prompt eval time =     191.17 ms /    17 tokens (   11.25 ms per token,    88.93 tokens per second)
llama_print_timings:        eval time =    2861.90 ms /    54 runs   (   53.00 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    3123.47 ms
Llama.generate: prefix-match hit


189 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.77 ms /    61 runs   (    0.09 ms per token, 10566.43 tokens per second)
llama_print_timings: prompt eval time =     205.71 ms /    27 tokens (    7.62 ms per token,   131.26 tokens per second)
llama_print_timings:        eval time =    3182.18 ms /    60 runs   (   53.04 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    3466.36 ms
Llama.generate: prefix-match hit


190 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       4.97 ms /    52 runs   (    0.10 ms per token, 10456.46 tokens per second)
llama_print_timings: prompt eval time =     209.57 ms /    29 tokens (    7.23 ms per token,   138.38 tokens per second)
llama_print_timings:        eval time =    2702.91 ms /    51 runs   (   53.00 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    2979.14 ms
Llama.generate: prefix-match hit


191 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.13 ms /    55 runs   (    0.09 ms per token, 10721.25 tokens per second)
llama_print_timings: prompt eval time =     199.60 ms /    22 tokens (    9.07 ms per token,   110.22 tokens per second)
llama_print_timings:        eval time =    2866.73 ms /    54 runs   (   53.09 ms per token,    18.84 tokens per second)
llama_print_timings:       total time =    3139.10 ms
Llama.generate: prefix-match hit


192 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.18 ms /    55 runs   (    0.09 ms per token, 10621.86 tokens per second)
llama_print_timings: prompt eval time =     194.58 ms /    19 tokens (   10.24 ms per token,    97.65 tokens per second)
llama_print_timings:        eval time =    2864.30 ms /    54 runs   (   53.04 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    3129.72 ms
Llama.generate: prefix-match hit


193 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       4.68 ms /    52 runs   (    0.09 ms per token, 11113.49 tokens per second)
llama_print_timings: prompt eval time =     193.54 ms /    18 tokens (   10.75 ms per token,    93.00 tokens per second)
llama_print_timings:        eval time =    2705.13 ms /    51 runs   (   53.04 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    2964.99 ms
Llama.generate: prefix-match hit


194 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       6.26 ms /    62 runs   (    0.10 ms per token,  9897.83 tokens per second)
llama_print_timings: prompt eval time =     213.20 ms /    31 tokens (    6.88 ms per token,   145.40 tokens per second)
llama_print_timings:        eval time =    3242.13 ms /    61 runs   (   53.15 ms per token,    18.81 tokens per second)
llama_print_timings:       total time =    3539.40 ms
Llama.generate: prefix-match hit


195 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.44 ms /    59 runs   (    0.09 ms per token, 10853.57 tokens per second)
llama_print_timings: prompt eval time =     194.52 ms /    19 tokens (   10.24 ms per token,    97.68 tokens per second)
llama_print_timings:        eval time =    3076.32 ms /    58 runs   (   53.04 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    3346.34 ms
Llama.generate: prefix-match hit


196 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.10 ms /    56 runs   (    0.09 ms per token, 10989.01 tokens per second)
llama_print_timings: prompt eval time =     201.00 ms /    23 tokens (    8.74 ms per token,   114.43 tokens per second)
llama_print_timings:        eval time =    2917.43 ms /    55 runs   (   53.04 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    3190.38 ms
Llama.generate: prefix-match hit


197 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.26 ms /    56 runs   (    0.09 ms per token, 10646.39 tokens per second)
llama_print_timings: prompt eval time =     189.45 ms /    16 tokens (   11.84 ms per token,    84.45 tokens per second)
llama_print_timings:        eval time =    2914.20 ms /    55 runs   (   52.99 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    3177.45 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.12 ms /    54 runs   (    0.09 ms per token, 10546.88 tokens per second)
llama_print_timings: prompt eval time =     205.03 ms /    26 tokens (    7.89 ms per token,   126.81 tokens per second)
llama_print_timings:        eval time =    2810.74 ms /    53 runs   (   53.03 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    3084.20 ms
Llama.generate: prefix-

198 -- 1
199 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.30 ms /    58 runs   (    0.09 ms per token, 10935.14 tokens per second)
llama_print_timings: prompt eval time =     195.93 ms /    20 tokens (    9.80 ms per token,   102.08 tokens per second)
llama_print_timings:        eval time =    3025.87 ms /    57 runs   (   53.09 ms per token,    18.84 tokens per second)
llama_print_timings:       total time =    3296.33 ms
Llama.generate: prefix-match hit


200 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.08 ms /    55 runs   (    0.09 ms per token, 10833.17 tokens per second)
llama_print_timings: prompt eval time =     198.12 ms /    21 tokens (    9.43 ms per token,   106.00 tokens per second)
llama_print_timings:        eval time =    2864.08 ms /    54 runs   (   53.04 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    3133.06 ms
Llama.generate: prefix-match hit


201 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.23 ms /    56 runs   (    0.09 ms per token, 10715.65 tokens per second)
llama_print_timings: prompt eval time =     186.80 ms /    13 tokens (   14.37 ms per token,    69.59 tokens per second)
llama_print_timings:        eval time =    2908.77 ms /    55 runs   (   52.89 ms per token,    18.91 tokens per second)
llama_print_timings:       total time =    3168.85 ms
Llama.generate: prefix-match hit


202 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.14 ms /    55 runs   (    0.09 ms per token, 10694.15 tokens per second)
llama_print_timings: prompt eval time =     195.00 ms /    20 tokens (    9.75 ms per token,   102.57 tokens per second)
llama_print_timings:        eval time =    2857.91 ms /    54 runs   (   52.92 ms per token,    18.89 tokens per second)
llama_print_timings:       total time =    3125.77 ms
Llama.generate: prefix-match hit


203 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       4.81 ms /    53 runs   (    0.09 ms per token, 11011.84 tokens per second)
llama_print_timings: prompt eval time =     186.36 ms /    13 tokens (   14.34 ms per token,    69.76 tokens per second)
llama_print_timings:        eval time =    2753.96 ms /    52 runs   (   52.96 ms per token,    18.88 tokens per second)
llama_print_timings:       total time =    3007.99 ms
Llama.generate: prefix-match hit


204 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.18 ms /    56 runs   (    0.09 ms per token, 10812.90 tokens per second)
llama_print_timings: prompt eval time =     197.66 ms /    21 tokens (    9.41 ms per token,   106.24 tokens per second)
llama_print_timings:        eval time =    2919.41 ms /    55 runs   (   53.08 ms per token,    18.84 tokens per second)
llama_print_timings:       total time =    3189.74 ms
Llama.generate: prefix-match hit


205 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       4.93 ms /    53 runs   (    0.09 ms per token, 10750.51 tokens per second)
llama_print_timings: prompt eval time =     197.90 ms /    21 tokens (    9.42 ms per token,   106.11 tokens per second)
llama_print_timings:        eval time =    2756.25 ms /    52 runs   (   53.00 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    3022.78 ms
Llama.generate: prefix-match hit


206 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.85 ms /    63 runs   (    0.09 ms per token, 10761.87 tokens per second)
llama_print_timings: prompt eval time =     189.20 ms /    28 tokens (    6.76 ms per token,   147.99 tokens per second)
llama_print_timings:        eval time =    3290.67 ms /    62 runs   (   53.08 ms per token,    18.84 tokens per second)
llama_print_timings:       total time =    3560.96 ms
Llama.generate: prefix-match hit


207 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       6.22 ms /    64 runs   (    0.10 ms per token, 10282.78 tokens per second)
llama_print_timings: prompt eval time =     206.10 ms /    27 tokens (    7.63 ms per token,   131.00 tokens per second)
llama_print_timings:        eval time =    3343.53 ms /    63 runs   (   53.07 ms per token,    18.84 tokens per second)
llama_print_timings:       total time =    3635.05 ms
Llama.generate: prefix-match hit


208 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.53 ms /    61 runs   (    0.09 ms per token, 11026.75 tokens per second)
llama_print_timings: prompt eval time =     204.69 ms /    26 tokens (    7.87 ms per token,   127.02 tokens per second)
llama_print_timings:        eval time =    3183.20 ms /    60 runs   (   53.05 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    3466.19 ms
Llama.generate: prefix-match hit


209 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.20 ms /    57 runs   (    0.09 ms per token, 10967.87 tokens per second)
llama_print_timings: prompt eval time =     191.37 ms /    17 tokens (   11.26 ms per token,    88.84 tokens per second)
llama_print_timings:        eval time =    2967.05 ms /    56 runs   (   52.98 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    3231.72 ms
Llama.generate: prefix-match hit


210 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.53 ms /    58 runs   (    0.10 ms per token, 10482.56 tokens per second)
llama_print_timings: prompt eval time =     198.19 ms /    21 tokens (    9.44 ms per token,   105.96 tokens per second)
llama_print_timings:        eval time =    3025.08 ms /    57 runs   (   53.07 ms per token,    18.84 tokens per second)
llama_print_timings:       total time =    3300.25 ms
Llama.generate: prefix-match hit


211 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.41 ms /    55 runs   (    0.10 ms per token, 10172.00 tokens per second)
llama_print_timings: prompt eval time =     197.91 ms /    21 tokens (    9.42 ms per token,   106.11 tokens per second)
llama_print_timings:        eval time =    2865.91 ms /    54 runs   (   53.07 ms per token,    18.84 tokens per second)
llama_print_timings:       total time =    3137.21 ms
Llama.generate: prefix-match hit


212 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.23 ms /    57 runs   (    0.09 ms per token, 10900.75 tokens per second)
llama_print_timings: prompt eval time =     193.12 ms /    18 tokens (   10.73 ms per token,    93.21 tokens per second)
llama_print_timings:        eval time =    2970.00 ms /    56 runs   (   53.04 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    3236.33 ms
Llama.generate: prefix-match hit


213 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.59 ms /    57 runs   (    0.10 ms per token, 10193.13 tokens per second)
llama_print_timings: prompt eval time =     342.03 ms /    33 tokens (   10.36 ms per token,    96.48 tokens per second)
llama_print_timings:        eval time =    2973.19 ms /    56 runs   (   53.09 ms per token,    18.83 tokens per second)
llama_print_timings:       total time =    3390.99 ms
Llama.generate: prefix-match hit


214 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.39 ms /    56 runs   (    0.10 ms per token, 10391.54 tokens per second)
llama_print_timings: prompt eval time =     201.04 ms /    24 tokens (    8.38 ms per token,   119.38 tokens per second)
llama_print_timings:        eval time =    2916.30 ms /    55 runs   (   53.02 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    3189.45 ms
Llama.generate: prefix-match hit


215 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.29 ms /    57 runs   (    0.09 ms per token, 10764.87 tokens per second)
llama_print_timings: prompt eval time =     200.91 ms /    23 tokens (    8.74 ms per token,   114.48 tokens per second)
llama_print_timings:        eval time =    2969.63 ms /    56 runs   (   53.03 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    3243.99 ms
Llama.generate: prefix-match hit


216 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       4.96 ms /    56 runs   (    0.09 ms per token, 11294.88 tokens per second)
llama_print_timings: prompt eval time =     371.81 ms /    52 tokens (    7.15 ms per token,   139.85 tokens per second)
llama_print_timings:        eval time =    2932.25 ms /    55 runs   (   53.31 ms per token,    18.76 tokens per second)
llama_print_timings:       total time =    3378.64 ms
Llama.generate: prefix-match hit


217 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.99 ms /    62 runs   (    0.10 ms per token, 10348.86 tokens per second)
llama_print_timings: prompt eval time =     341.99 ms /    33 tokens (   10.36 ms per token,    96.49 tokens per second)
llama_print_timings:        eval time =    3242.95 ms /    61 runs   (   53.16 ms per token,    18.81 tokens per second)
llama_print_timings:       total time =    3664.86 ms
Llama.generate: prefix-match hit


218 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.00 ms /    53 runs   (    0.09 ms per token, 10604.24 tokens per second)
llama_print_timings: prompt eval time =     209.40 ms /    29 tokens (    7.22 ms per token,   138.49 tokens per second)
llama_print_timings:        eval time =    2758.48 ms /    52 runs   (   53.05 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    3036.12 ms
Llama.generate: prefix-match hit


219 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       4.85 ms /    53 runs   (    0.09 ms per token, 10918.83 tokens per second)
llama_print_timings: prompt eval time =     189.58 ms /    16 tokens (   11.85 ms per token,    84.40 tokens per second)
llama_print_timings:        eval time =    2754.98 ms /    52 runs   (   52.98 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    3013.87 ms
Llama.generate: prefix-match hit


220 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       4.87 ms /    52 runs   (    0.09 ms per token, 10675.43 tokens per second)
llama_print_timings: prompt eval time =     197.76 ms /    21 tokens (    9.42 ms per token,   106.19 tokens per second)
llama_print_timings:        eval time =    2702.51 ms /    51 runs   (   52.99 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    2967.91 ms
Llama.generate: prefix-match hit


221 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       4.49 ms /    49 runs   (    0.09 ms per token, 10922.87 tokens per second)
llama_print_timings: prompt eval time =     189.84 ms /    15 tokens (   12.66 ms per token,    79.01 tokens per second)
llama_print_timings:        eval time =    2544.71 ms /    48 runs   (   53.01 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    2797.87 ms
Llama.generate: prefix-match hit


222 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.42 ms /    55 runs   (    0.10 ms per token, 10147.60 tokens per second)
llama_print_timings: prompt eval time =     347.55 ms /    36 tokens (    9.65 ms per token,   103.58 tokens per second)
llama_print_timings:        eval time =    2868.82 ms /    54 runs   (   53.13 ms per token,    18.82 tokens per second)
llama_print_timings:       total time =    3289.36 ms
Llama.generate: prefix-match hit


223 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       4.92 ms /    54 runs   (    0.09 ms per token, 10980.07 tokens per second)
llama_print_timings: prompt eval time =     194.87 ms /    19 tokens (   10.26 ms per token,    97.50 tokens per second)
llama_print_timings:        eval time =    2809.31 ms /    53 runs   (   53.01 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    3072.93 ms
Llama.generate: prefix-match hit


224 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       4.85 ms /    53 runs   (    0.09 ms per token, 10936.86 tokens per second)
llama_print_timings: prompt eval time =     189.78 ms /    15 tokens (   12.65 ms per token,    79.04 tokens per second)
llama_print_timings:        eval time =    2754.11 ms /    52 runs   (   52.96 ms per token,    18.88 tokens per second)
llama_print_timings:       total time =    3011.59 ms
Llama.generate: prefix-match hit


225 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.16 ms /    54 runs   (    0.10 ms per token, 10473.24 tokens per second)
llama_print_timings: prompt eval time =     194.94 ms /    20 tokens (    9.75 ms per token,   102.60 tokens per second)
llama_print_timings:        eval time =    2810.15 ms /    53 runs   (   53.02 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    3076.93 ms
Llama.generate: prefix-match hit


226 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.04 ms /    55 runs   (    0.09 ms per token, 10901.88 tokens per second)
llama_print_timings: prompt eval time =     191.42 ms /    17 tokens (   11.26 ms per token,    88.81 tokens per second)
llama_print_timings:        eval time =    2862.72 ms /    54 runs   (   53.01 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    3125.32 ms
Llama.generate: prefix-match hit


227 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       6.32 ms /    67 runs   (    0.09 ms per token, 10609.66 tokens per second)
llama_print_timings: prompt eval time =     369.56 ms /    50 tokens (    7.39 ms per token,   135.29 tokens per second)
llama_print_timings:        eval time =    3513.89 ms /    66 runs   (   53.24 ms per token,    18.78 tokens per second)
llama_print_timings:       total time =    3974.19 ms
Llama.generate: prefix-match hit


228 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.33 ms /    58 runs   (    0.09 ms per token, 10887.93 tokens per second)
llama_print_timings: prompt eval time =     191.77 ms /    17 tokens (   11.28 ms per token,    88.65 tokens per second)
llama_print_timings:        eval time =    3021.71 ms /    57 runs   (   53.01 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    3289.80 ms
Llama.generate: prefix-match hit


229 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.24 ms /    55 runs   (    0.10 ms per token, 10496.18 tokens per second)
llama_print_timings: prompt eval time =     200.78 ms /    24 tokens (    8.37 ms per token,   119.53 tokens per second)
llama_print_timings:        eval time =    2864.91 ms /    54 runs   (   53.05 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    3136.01 ms
Llama.generate: prefix-match hit


230 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.21 ms /    55 runs   (    0.09 ms per token, 10564.73 tokens per second)
llama_print_timings: prompt eval time =     345.40 ms /    34 tokens (   10.16 ms per token,    98.44 tokens per second)
llama_print_timings:        eval time =    2867.03 ms /    54 runs   (   53.09 ms per token,    18.83 tokens per second)
llama_print_timings:       total time =    3283.57 ms
Llama.generate: prefix-match hit


231 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.46 ms /    59 runs   (    0.09 ms per token, 10813.78 tokens per second)
llama_print_timings: prompt eval time =     200.47 ms /    23 tokens (    8.72 ms per token,   114.73 tokens per second)
llama_print_timings:        eval time =    3080.00 ms /    58 runs   (   53.10 ms per token,    18.83 tokens per second)
llama_print_timings:       total time =    3358.46 ms
Llama.generate: prefix-match hit


232 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       9.76 ms /   109 runs   (    0.09 ms per token, 11164.60 tokens per second)
llama_print_timings: prompt eval time =     517.44 ms /    67 tokens (    7.72 ms per token,   129.48 tokens per second)
llama_print_timings:        eval time =    5766.69 ms /   108 runs   (   53.40 ms per token,    18.73 tokens per second)
llama_print_timings:       total time =    6429.11 ms
Llama.generate: prefix-match hit


233 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.07 ms /    53 runs   (    0.10 ms per token, 10453.65 tokens per second)
llama_print_timings: prompt eval time =     193.08 ms /    18 tokens (   10.73 ms per token,    93.22 tokens per second)
llama_print_timings:        eval time =    2752.57 ms /    52 runs   (   52.93 ms per token,    18.89 tokens per second)
llama_print_timings:       total time =    3014.62 ms
Llama.generate: prefix-match hit


234 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.31 ms /    56 runs   (    0.09 ms per token, 10550.11 tokens per second)
llama_print_timings: prompt eval time =     194.26 ms /    19 tokens (   10.22 ms per token,    97.81 tokens per second)
llama_print_timings:        eval time =    2920.46 ms /    55 runs   (   53.10 ms per token,    18.83 tokens per second)
llama_print_timings:       total time =    3189.35 ms
Llama.generate: prefix-match hit


235 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.36 ms /    57 runs   (    0.09 ms per token, 10626.40 tokens per second)
llama_print_timings: prompt eval time =     195.30 ms /    20 tokens (    9.77 ms per token,   102.41 tokens per second)
llama_print_timings:        eval time =    2972.09 ms /    56 runs   (   53.07 ms per token,    18.84 tokens per second)
llama_print_timings:       total time =    3240.97 ms
Llama.generate: prefix-match hit


236 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.04 ms /    56 runs   (    0.09 ms per token, 11119.94 tokens per second)
llama_print_timings: prompt eval time =     187.85 ms /    14 tokens (   13.42 ms per token,    74.53 tokens per second)
llama_print_timings:        eval time =    2910.73 ms /    55 runs   (   52.92 ms per token,    18.90 tokens per second)
llama_print_timings:       total time =    3170.98 ms
Llama.generate: prefix-match hit


237 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       4.85 ms /    52 runs   (    0.09 ms per token, 10723.86 tokens per second)
llama_print_timings: prompt eval time =     195.18 ms /    19 tokens (   10.27 ms per token,    97.34 tokens per second)
llama_print_timings:        eval time =    2700.50 ms /    51 runs   (   52.95 ms per token,    18.89 tokens per second)
llama_print_timings:       total time =    2965.03 ms
Llama.generate: prefix-match hit


238 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       4.97 ms /    52 runs   (    0.10 ms per token, 10471.20 tokens per second)
llama_print_timings: prompt eval time =     194.08 ms /    19 tokens (   10.21 ms per token,    97.90 tokens per second)
llama_print_timings:        eval time =    2700.26 ms /    51 runs   (   52.95 ms per token,    18.89 tokens per second)
llama_print_timings:       total time =    2964.07 ms
Llama.generate: prefix-match hit


239 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       4.66 ms /    50 runs   (    0.09 ms per token, 10738.83 tokens per second)
llama_print_timings: prompt eval time =     213.04 ms /    31 tokens (    6.87 ms per token,   145.52 tokens per second)
llama_print_timings:        eval time =    2601.15 ms /    49 runs   (   53.08 ms per token,    18.84 tokens per second)
llama_print_timings:       total time =    2879.43 ms
Llama.generate: prefix-match hit


240 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.60 ms /    58 runs   (    0.10 ms per token, 10357.14 tokens per second)
llama_print_timings: prompt eval time =     205.69 ms /    27 tokens (    7.62 ms per token,   131.26 tokens per second)
llama_print_timings:        eval time =    3021.11 ms /    57 runs   (   53.00 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    3302.07 ms
Llama.generate: prefix-match hit


241 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       4.95 ms /    51 runs   (    0.10 ms per token, 10298.87 tokens per second)
llama_print_timings: prompt eval time =     193.74 ms /    18 tokens (   10.76 ms per token,    92.91 tokens per second)
llama_print_timings:        eval time =    2650.61 ms /    50 runs   (   53.01 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    2913.99 ms
Llama.generate: prefix-match hit


242 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       4.83 ms /    50 runs   (    0.10 ms per token, 10362.69 tokens per second)
llama_print_timings: prompt eval time =     194.29 ms /    19 tokens (   10.23 ms per token,    97.79 tokens per second)
llama_print_timings:        eval time =    2597.05 ms /    49 runs   (   53.00 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    2858.87 ms
Llama.generate: prefix-match hit


243 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.29 ms /    57 runs   (    0.09 ms per token, 10777.08 tokens per second)
llama_print_timings: prompt eval time =     193.73 ms /    18 tokens (   10.76 ms per token,    92.91 tokens per second)
llama_print_timings:        eval time =    2968.14 ms /    56 runs   (   53.00 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    3235.20 ms
Llama.generate: prefix-match hit


244 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.11 ms /    54 runs   (    0.09 ms per token, 10565.45 tokens per second)
llama_print_timings: prompt eval time =     193.84 ms /    19 tokens (   10.20 ms per token,    98.02 tokens per second)
llama_print_timings:        eval time =    2807.76 ms /    53 runs   (   52.98 ms per token,    18.88 tokens per second)
llama_print_timings:       total time =    3073.89 ms
Llama.generate: prefix-match hit


245 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       4.66 ms /    51 runs   (    0.09 ms per token, 10953.61 tokens per second)
llama_print_timings: prompt eval time =     189.37 ms /    16 tokens (   11.84 ms per token,    84.49 tokens per second)
llama_print_timings:        eval time =    2648.00 ms /    50 runs   (   52.96 ms per token,    18.88 tokens per second)
llama_print_timings:       total time =    2902.90 ms
Llama.generate: prefix-match hit


246 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       4.88 ms /    51 runs   (    0.10 ms per token, 10448.68 tokens per second)
llama_print_timings: prompt eval time =     201.00 ms /    24 tokens (    8.38 ms per token,   119.40 tokens per second)
llama_print_timings:        eval time =    2648.64 ms /    50 runs   (   52.97 ms per token,    18.88 tokens per second)
llama_print_timings:       total time =    2915.67 ms
Llama.generate: prefix-match hit


247 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.16 ms /    57 runs   (    0.09 ms per token, 11044.37 tokens per second)
llama_print_timings: prompt eval time =     353.00 ms /    41 tokens (    8.61 ms per token,   116.15 tokens per second)
llama_print_timings:        eval time =    2974.18 ms /    56 runs   (   53.11 ms per token,    18.83 tokens per second)
llama_print_timings:       total time =    3402.10 ms
Llama.generate: prefix-match hit


248 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.67 ms /    61 runs   (    0.09 ms per token, 10765.97 tokens per second)
llama_print_timings: prompt eval time =     200.73 ms /    23 tokens (    8.73 ms per token,   114.58 tokens per second)
llama_print_timings:        eval time =    3183.15 ms /    60 runs   (   53.05 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    3462.03 ms
Llama.generate: prefix-match hit


249 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.30 ms /    57 runs   (    0.09 ms per token, 10760.81 tokens per second)
llama_print_timings: prompt eval time =     194.97 ms /    19 tokens (   10.26 ms per token,    97.45 tokens per second)
llama_print_timings:        eval time =    2969.60 ms /    56 runs   (   53.03 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    3237.09 ms
Llama.generate: prefix-match hit


250 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.22 ms /    54 runs   (    0.10 ms per token, 10350.78 tokens per second)
llama_print_timings: prompt eval time =     194.83 ms /    19 tokens (   10.25 ms per token,    97.52 tokens per second)
llama_print_timings:        eval time =    2810.03 ms /    53 runs   (   53.02 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    3075.52 ms
Llama.generate: prefix-match hit


251 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.26 ms /    58 runs   (    0.09 ms per token, 11026.62 tokens per second)
llama_print_timings: prompt eval time =     198.96 ms /    22 tokens (    9.04 ms per token,   110.58 tokens per second)
llama_print_timings:        eval time =    3021.18 ms /    57 runs   (   53.00 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    3295.54 ms
Llama.generate: prefix-match hit


252 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.00 ms /    53 runs   (    0.09 ms per token, 10610.61 tokens per second)
llama_print_timings: prompt eval time =     199.03 ms /    22 tokens (    9.05 ms per token,   110.53 tokens per second)
llama_print_timings:        eval time =    2757.50 ms /    52 runs   (   53.03 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    3024.97 ms
Llama.generate: prefix-match hit


253 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.25 ms /    53 runs   (    0.10 ms per token, 10087.55 tokens per second)
llama_print_timings: prompt eval time =     199.57 ms /    22 tokens (    9.07 ms per token,   110.24 tokens per second)
llama_print_timings:        eval time =    2757.09 ms /    52 runs   (   53.02 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    3028.10 ms
Llama.generate: prefix-match hit


254 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       4.92 ms /    53 runs   (    0.09 ms per token, 10781.12 tokens per second)
llama_print_timings: prompt eval time =     193.50 ms /    18 tokens (   10.75 ms per token,    93.03 tokens per second)
llama_print_timings:        eval time =    2757.07 ms /    52 runs   (   53.02 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    3019.01 ms
Llama.generate: prefix-match hit


255 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       4.89 ms /    54 runs   (    0.09 ms per token, 11042.94 tokens per second)
llama_print_timings: prompt eval time =     189.94 ms /    15 tokens (   12.66 ms per token,    78.97 tokens per second)
llama_print_timings:        eval time =    2806.97 ms /    53 runs   (   52.96 ms per token,    18.88 tokens per second)
llama_print_timings:       total time =    3067.51 ms
Llama.generate: prefix-match hit


256 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.20 ms /    55 runs   (    0.09 ms per token, 10574.89 tokens per second)
llama_print_timings: prompt eval time =     199.58 ms /    22 tokens (    9.07 ms per token,   110.23 tokens per second)
llama_print_timings:        eval time =    2867.16 ms /    54 runs   (   53.10 ms per token,    18.83 tokens per second)
llama_print_timings:       total time =    3138.93 ms
Llama.generate: prefix-match hit


257 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       4.75 ms /    51 runs   (    0.09 ms per token, 10732.32 tokens per second)
llama_print_timings: prompt eval time =     205.10 ms /    27 tokens (    7.60 ms per token,   131.64 tokens per second)
llama_print_timings:        eval time =    2654.19 ms /    50 runs   (   53.08 ms per token,    18.84 tokens per second)
llama_print_timings:       total time =    2925.30 ms
Llama.generate: prefix-match hit


258 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.11 ms /    55 runs   (    0.09 ms per token, 10763.21 tokens per second)
llama_print_timings: prompt eval time =     199.56 ms /    22 tokens (    9.07 ms per token,   110.24 tokens per second)
llama_print_timings:        eval time =    2866.85 ms /    54 runs   (   53.09 ms per token,    18.84 tokens per second)
llama_print_timings:       total time =    3137.94 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.17 ms /    55 runs   (    0.09 ms per token, 10648.60 tokens per second)
llama_print_timings: prompt eval time =     193.79 ms /    18 tokens (   10.77 ms per token,    92.89 tokens per second)
llama_print_timings:        eval time =    2865.35 ms /    54 runs   (   53.06 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    3131.16 ms
Llama.generate: prefix-

259 -- 1
260 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       4.99 ms /    52 runs   (    0.10 ms per token, 10425.02 tokens per second)
llama_print_timings: prompt eval time =     206.35 ms /    32 tokens (    6.45 ms per token,   155.08 tokens per second)
llama_print_timings:        eval time =    2710.14 ms /    51 runs   (   53.14 ms per token,    18.82 tokens per second)
llama_print_timings:       total time =    2983.51 ms
Llama.generate: prefix-match hit


261 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.24 ms /    56 runs   (    0.09 ms per token, 10697.23 tokens per second)
llama_print_timings: prompt eval time =     195.15 ms /    20 tokens (    9.76 ms per token,   102.48 tokens per second)
llama_print_timings:        eval time =    2919.28 ms /    55 runs   (   53.08 ms per token,    18.84 tokens per second)
llama_print_timings:       total time =    3186.65 ms
Llama.generate: prefix-match hit


262 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.51 ms /    57 runs   (    0.10 ms per token, 10344.83 tokens per second)
llama_print_timings: prompt eval time =     189.46 ms /    28 tokens (    6.77 ms per token,   147.79 tokens per second)
llama_print_timings:        eval time =    2971.65 ms /    56 runs   (   53.07 ms per token,    18.84 tokens per second)
llama_print_timings:       total time =    3233.98 ms
Llama.generate: prefix-match hit


263 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.16 ms /    56 runs   (    0.09 ms per token, 10863.24 tokens per second)
llama_print_timings: prompt eval time =     189.38 ms /    16 tokens (   11.84 ms per token,    84.49 tokens per second)
llama_print_timings:        eval time =    2916.59 ms /    55 runs   (   53.03 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    3178.94 ms
Llama.generate: prefix-match hit


264 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       4.98 ms /    53 runs   (    0.09 ms per token, 10653.27 tokens per second)
llama_print_timings: prompt eval time =     200.65 ms /    24 tokens (    8.36 ms per token,   119.61 tokens per second)
llama_print_timings:        eval time =    2761.07 ms /    52 runs   (   53.10 ms per token,    18.83 tokens per second)
llama_print_timings:       total time =    3029.74 ms
Llama.generate: prefix-match hit


265 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.39 ms /    58 runs   (    0.09 ms per token, 10762.66 tokens per second)
llama_print_timings: prompt eval time =     199.44 ms /    22 tokens (    9.07 ms per token,   110.31 tokens per second)
llama_print_timings:        eval time =    3023.77 ms /    57 runs   (   53.05 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    3298.61 ms
Llama.generate: prefix-match hit


266 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.27 ms /    55 runs   (    0.10 ms per token, 10428.52 tokens per second)
llama_print_timings: prompt eval time =     194.61 ms /    19 tokens (   10.24 ms per token,    97.63 tokens per second)
llama_print_timings:        eval time =    2864.20 ms /    54 runs   (   53.04 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    3132.23 ms
Llama.generate: prefix-match hit


267 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       4.86 ms /    56 runs   (    0.09 ms per token, 11534.50 tokens per second)
llama_print_timings: prompt eval time =     357.56 ms /    43 tokens (    8.32 ms per token,   120.26 tokens per second)
llama_print_timings:        eval time =    2923.80 ms /    55 runs   (   53.16 ms per token,    18.81 tokens per second)
llama_print_timings:       total time =    3353.69 ms
Llama.generate: prefix-match hit


268 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.25 ms /    57 runs   (    0.09 ms per token, 10859.21 tokens per second)
llama_print_timings: prompt eval time =     194.73 ms /    19 tokens (   10.25 ms per token,    97.57 tokens per second)
llama_print_timings:        eval time =    2973.28 ms /    56 runs   (   53.09 ms per token,    18.83 tokens per second)
llama_print_timings:       total time =    3241.38 ms
Llama.generate: prefix-match hit


269 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.74 ms /    63 runs   (    0.09 ms per token, 10971.79 tokens per second)
llama_print_timings: prompt eval time =     345.49 ms /    35 tokens (    9.87 ms per token,   101.31 tokens per second)
llama_print_timings:        eval time =    3297.26 ms /    62 runs   (   53.18 ms per token,    18.80 tokens per second)
llama_print_timings:       total time =    3725.37 ms
Llama.generate: prefix-match hit


270 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.95 ms /    61 runs   (    0.10 ms per token, 10248.66 tokens per second)
llama_print_timings: prompt eval time =     200.94 ms /    23 tokens (    8.74 ms per token,   114.46 tokens per second)
llama_print_timings:        eval time =    3188.26 ms /    60 runs   (   53.14 ms per token,    18.82 tokens per second)
llama_print_timings:       total time =    3471.20 ms
Llama.generate: prefix-match hit


271 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       4.88 ms /    54 runs   (    0.09 ms per token, 11072.38 tokens per second)
llama_print_timings: prompt eval time =     349.90 ms /    37 tokens (    9.46 ms per token,   105.75 tokens per second)
llama_print_timings:        eval time =    2817.58 ms /    53 runs   (   53.16 ms per token,    18.81 tokens per second)
llama_print_timings:       total time =    3236.74 ms
Llama.generate: prefix-match hit


272 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.05 ms /    52 runs   (    0.10 ms per token, 10301.11 tokens per second)
llama_print_timings: prompt eval time =     206.01 ms /    32 tokens (    6.44 ms per token,   155.33 tokens per second)
llama_print_timings:        eval time =    2705.35 ms /    51 runs   (   53.05 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    2978.21 ms
Llama.generate: prefix-match hit


273 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.29 ms /    56 runs   (    0.09 ms per token, 10596.03 tokens per second)
llama_print_timings: prompt eval time =     194.96 ms /    20 tokens (    9.75 ms per token,   102.59 tokens per second)
llama_print_timings:        eval time =    2915.98 ms /    55 runs   (   53.02 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    3185.34 ms
Llama.generate: prefix-match hit


274 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.19 ms /    58 runs   (    0.09 ms per token, 11166.73 tokens per second)
llama_print_timings: prompt eval time =     356.87 ms /    43 tokens (    8.30 ms per token,   120.49 tokens per second)
llama_print_timings:        eval time =    3026.27 ms /    57 runs   (   53.09 ms per token,    18.84 tokens per second)
llama_print_timings:       total time =    3461.29 ms
Llama.generate: prefix-match hit


275 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.43 ms /    57 runs   (    0.10 ms per token, 10503.04 tokens per second)
llama_print_timings: prompt eval time =     209.40 ms /    29 tokens (    7.22 ms per token,   138.49 tokens per second)
llama_print_timings:        eval time =    2971.52 ms /    56 runs   (   53.06 ms per token,    18.85 tokens per second)
llama_print_timings:       total time =    3255.11 ms
Llama.generate: prefix-match hit


276 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.02 ms /    54 runs   (    0.09 ms per token, 10759.12 tokens per second)
llama_print_timings: prompt eval time =     201.12 ms /    21 tokens (    9.58 ms per token,   104.42 tokens per second)
llama_print_timings:        eval time =    2818.12 ms /    53 runs   (   53.17 ms per token,    18.81 tokens per second)
llama_print_timings:       total time =    3092.55 ms
Llama.generate: prefix-match hit


277 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       6.12 ms /    53 runs   (    0.12 ms per token,  8660.13 tokens per second)
llama_print_timings: prompt eval time =     198.94 ms /    22 tokens (    9.04 ms per token,   110.59 tokens per second)
llama_print_timings:        eval time =    2804.61 ms /    52 runs   (   53.93 ms per token,    18.54 tokens per second)
llama_print_timings:       total time =    3091.51 ms
Llama.generate: prefix-match hit


278 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       8.71 ms /    65 runs   (    0.13 ms per token,  7460.97 tokens per second)
llama_print_timings: prompt eval time =     202.90 ms /    24 tokens (    8.45 ms per token,   118.29 tokens per second)
llama_print_timings:        eval time =    3425.91 ms /    64 runs   (   53.53 ms per token,    18.68 tokens per second)
llama_print_timings:       total time =    3750.46 ms
Llama.generate: prefix-match hit


279 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       7.09 ms /    58 runs   (    0.12 ms per token,  8179.38 tokens per second)
llama_print_timings: prompt eval time =     193.74 ms /    19 tokens (   10.20 ms per token,    98.07 tokens per second)
llama_print_timings:        eval time =    3031.85 ms /    57 runs   (   53.19 ms per token,    18.80 tokens per second)
llama_print_timings:       total time =    3324.22 ms
Llama.generate: prefix-match hit


280 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.42 ms /    56 runs   (    0.10 ms per token, 10339.73 tokens per second)
llama_print_timings: prompt eval time =     188.56 ms /    16 tokens (   11.79 ms per token,    84.85 tokens per second)
llama_print_timings:        eval time =    2920.76 ms /    55 runs   (   53.10 ms per token,    18.83 tokens per second)
llama_print_timings:       total time =    3187.44 ms
Llama.generate: prefix-match hit


281 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.01 ms /    53 runs   (    0.09 ms per token, 10585.18 tokens per second)
llama_print_timings: prompt eval time =     189.90 ms /    15 tokens (   12.66 ms per token,    78.99 tokens per second)
llama_print_timings:        eval time =    2755.00 ms /    52 runs   (   52.98 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    3017.76 ms
Llama.generate: prefix-match hit


282 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.08 ms /    56 runs   (    0.09 ms per token, 11012.78 tokens per second)
llama_print_timings: prompt eval time =     189.29 ms /    15 tokens (   12.62 ms per token,    79.24 tokens per second)
llama_print_timings:        eval time =    2913.97 ms /    55 runs   (   52.98 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    3176.33 ms
Llama.generate: prefix-match hit


283 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.23 ms /    55 runs   (    0.10 ms per token, 10508.22 tokens per second)
llama_print_timings: prompt eval time =     199.13 ms /    22 tokens (    9.05 ms per token,   110.48 tokens per second)
llama_print_timings:        eval time =    2866.11 ms /    54 runs   (   53.08 ms per token,    18.84 tokens per second)
llama_print_timings:       total time =    3140.27 ms
Llama.generate: prefix-match hit


284 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.09 ms /    53 runs   (    0.10 ms per token, 10414.62 tokens per second)
llama_print_timings: prompt eval time =     197.46 ms /    21 tokens (    9.40 ms per token,   106.35 tokens per second)
llama_print_timings:        eval time =    2753.89 ms /    52 runs   (   52.96 ms per token,    18.88 tokens per second)
llama_print_timings:       total time =    3022.75 ms
Llama.generate: prefix-match hit


285 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.16 ms /    54 runs   (    0.10 ms per token, 10471.20 tokens per second)
llama_print_timings: prompt eval time =     199.35 ms /    22 tokens (    9.06 ms per token,   110.36 tokens per second)
llama_print_timings:        eval time =    2807.80 ms /    53 runs   (   52.98 ms per token,    18.88 tokens per second)
llama_print_timings:       total time =    3079.27 ms
Llama.generate: prefix-match hit


286 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       4.89 ms /    55 runs   (    0.09 ms per token, 11238.25 tokens per second)
llama_print_timings: prompt eval time =     189.42 ms /    15 tokens (   12.63 ms per token,    79.19 tokens per second)
llama_print_timings:        eval time =    2857.38 ms /    54 runs   (   52.91 ms per token,    18.90 tokens per second)
llama_print_timings:       total time =    3118.83 ms
Llama.generate: prefix-match hit


287 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.27 ms /    55 runs   (    0.10 ms per token, 10444.36 tokens per second)
llama_print_timings: prompt eval time =     193.39 ms /    18 tokens (   10.74 ms per token,    93.08 tokens per second)
llama_print_timings:        eval time =    2861.84 ms /    54 runs   (   53.00 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    3129.49 ms
Llama.generate: prefix-match hit


288 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       4.95 ms /    53 runs   (    0.09 ms per token, 10709.23 tokens per second)
llama_print_timings: prompt eval time =     194.09 ms /    19 tokens (   10.22 ms per token,    97.89 tokens per second)
llama_print_timings:        eval time =    2752.73 ms /    52 runs   (   52.94 ms per token,    18.89 tokens per second)
llama_print_timings:       total time =    3015.77 ms
Llama.generate: prefix-match hit


289 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.48 ms /    58 runs   (    0.09 ms per token, 10589.74 tokens per second)
llama_print_timings: prompt eval time =     193.63 ms /    18 tokens (   10.76 ms per token,    92.96 tokens per second)
llama_print_timings:        eval time =    3019.67 ms /    57 runs   (   52.98 ms per token,    18.88 tokens per second)
llama_print_timings:       total time =    3289.63 ms
Llama.generate: prefix-match hit


290 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.16 ms /    55 runs   (    0.09 ms per token, 10656.85 tokens per second)
llama_print_timings: prompt eval time =     192.90 ms /    18 tokens (   10.72 ms per token,    93.31 tokens per second)
llama_print_timings:        eval time =    2858.64 ms /    54 runs   (   52.94 ms per token,    18.89 tokens per second)
llama_print_timings:       total time =    3124.79 ms
Llama.generate: prefix-match hit


291 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.43 ms /    55 runs   (    0.10 ms per token, 10136.38 tokens per second)
llama_print_timings: prompt eval time =     198.96 ms /    22 tokens (    9.04 ms per token,   110.57 tokens per second)
llama_print_timings:        eval time =    2861.74 ms /    54 runs   (   53.00 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    3136.47 ms
Llama.generate: prefix-match hit


292 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.05 ms /    50 runs   (    0.10 ms per token,  9900.99 tokens per second)
llama_print_timings: prompt eval time =     200.27 ms /    23 tokens (    8.71 ms per token,   114.84 tokens per second)
llama_print_timings:        eval time =    2609.46 ms /    49 runs   (   53.25 ms per token,    18.78 tokens per second)
llama_print_timings:       total time =    2879.17 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       6.28 ms /    56 runs   (    0.11 ms per token,  8921.46 tokens per second)
llama_print_timings: prompt eval time =     194.18 ms /    19 tokens (   10.22 ms per token,    97.85 tokens per second)
llama_print_timings:        eval time =    2944.25 ms /    55 runs   (   53.53 ms per token,    18.68 tokens per second)
llama_print_timings:       total time =    3237.18 ms
Llama.generate: prefix-

293 -- 1
294 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       7.29 ms /    53 runs   (    0.14 ms per token,  7271.23 tokens per second)
llama_print_timings: prompt eval time =     189.71 ms /    16 tokens (   11.86 ms per token,    84.34 tokens per second)
llama_print_timings:        eval time =    2794.32 ms /    52 runs   (   53.74 ms per token,    18.61 tokens per second)
llama_print_timings:       total time =    3079.27 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       6.76 ms /    56 runs   (    0.12 ms per token,  8284.02 tokens per second)
llama_print_timings: prompt eval time =     193.18 ms /    16 tokens (   12.07 ms per token,    82.82 tokens per second)
llama_print_timings:        eval time =    2929.04 ms /    55 runs   (   53.26 ms per token,    18.78 tokens per second)
llama_print_timings:       total time =    3218.08 ms
Llama.generate: prefix-

295 -- 1
296 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.87 ms /    56 runs   (    0.10 ms per token,  9538.41 tokens per second)
llama_print_timings: prompt eval time =     194.33 ms /    19 tokens (   10.23 ms per token,    97.77 tokens per second)
llama_print_timings:        eval time =    2918.71 ms /    55 runs   (   53.07 ms per token,    18.84 tokens per second)
llama_print_timings:       total time =    3194.73 ms
Llama.generate: prefix-match hit


297 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.68 ms /    56 runs   (    0.10 ms per token,  9855.68 tokens per second)
llama_print_timings: prompt eval time =     198.63 ms /    22 tokens (    9.03 ms per token,   110.76 tokens per second)
llama_print_timings:        eval time =    2921.29 ms /    55 runs   (   53.11 ms per token,    18.83 tokens per second)
llama_print_timings:       total time =    3201.33 ms
Llama.generate: prefix-match hit


298 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       7.75 ms /    59 runs   (    0.13 ms per token,  7609.96 tokens per second)
llama_print_timings: prompt eval time =     201.53 ms /    23 tokens (    8.76 ms per token,   114.13 tokens per second)
llama_print_timings:        eval time =    3089.22 ms /    58 runs   (   53.26 ms per token,    18.77 tokens per second)
llama_print_timings:       total time =    3395.35 ms
Llama.generate: prefix-match hit


299 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       8.19 ms /    61 runs   (    0.13 ms per token,  7444.47 tokens per second)
llama_print_timings: prompt eval time =     202.78 ms /    23 tokens (    8.82 ms per token,   113.43 tokens per second)
llama_print_timings:        eval time =    3209.24 ms /    60 runs   (   53.49 ms per token,    18.70 tokens per second)
llama_print_timings:       total time =    3522.84 ms
Llama.generate: prefix-match hit


300 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       6.00 ms /    51 runs   (    0.12 ms per token,  8495.75 tokens per second)
llama_print_timings: prompt eval time =     189.40 ms /    16 tokens (   11.84 ms per token,    84.48 tokens per second)
llama_print_timings:        eval time =    2653.73 ms /    50 runs   (   53.07 ms per token,    18.84 tokens per second)
llama_print_timings:       total time =    2926.56 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       7.66 ms /    57 runs   (    0.13 ms per token,  7442.22 tokens per second)
llama_print_timings: prompt eval time =     342.41 ms /    33 tokens (   10.38 ms per token,    96.38 tokens per second)
llama_print_timings:        eval time =    2975.87 ms /    56 runs   (   53.14 ms per token,    18.82 tokens per second)
llama_print_timings:       total time =    3420.62 ms
Llama.generate: prefix-

301 -- 1
302 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       6.60 ms /    54 runs   (    0.12 ms per token,  8176.86 tokens per second)
llama_print_timings: prompt eval time =     197.26 ms /    21 tokens (    9.39 ms per token,   106.46 tokens per second)
llama_print_timings:        eval time =    2816.03 ms /    53 runs   (   53.13 ms per token,    18.82 tokens per second)
llama_print_timings:       total time =    3105.35 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.68 ms /    52 runs   (    0.11 ms per token,  9159.77 tokens per second)
llama_print_timings: prompt eval time =     204.76 ms /    26 tokens (    7.88 ms per token,   126.98 tokens per second)
llama_print_timings:        eval time =    2709.87 ms /    51 runs   (   53.13 ms per token,    18.82 tokens per second)
llama_print_timings:       total time =    2997.38 ms
Llama.generate: prefix-

303 -- 1
304 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       7.21 ms /    58 runs   (    0.12 ms per token,  8047.73 tokens per second)
llama_print_timings: prompt eval time =     201.69 ms /    24 tokens (    8.40 ms per token,   119.00 tokens per second)
llama_print_timings:        eval time =    3028.88 ms /    57 runs   (   53.14 ms per token,    18.82 tokens per second)
llama_print_timings:       total time =    3328.68 ms
Llama.generate: prefix-match hit


305 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       7.67 ms /    57 runs   (    0.13 ms per token,  7431.55 tokens per second)
llama_print_timings: prompt eval time =     193.77 ms /    18 tokens (   10.77 ms per token,    92.89 tokens per second)
llama_print_timings:        eval time =    2978.41 ms /    56 runs   (   53.19 ms per token,    18.80 tokens per second)
llama_print_timings:       total time =    3278.45 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       7.12 ms /    54 runs   (    0.13 ms per token,  7587.47 tokens per second)
llama_print_timings: prompt eval time =     197.81 ms /    21 tokens (    9.42 ms per token,   106.16 tokens per second)
llama_print_timings:        eval time =    2841.66 ms /    53 runs   (   53.62 ms per token,    18.65 tokens per second)
llama_print_timings:       total time =    3138.54 ms
Llama.generate: prefix-

306 -- 1
307 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       6.20 ms /    55 runs   (    0.11 ms per token,  8869.54 tokens per second)
llama_print_timings: prompt eval time =     199.56 ms /    22 tokens (    9.07 ms per token,   110.25 tokens per second)
llama_print_timings:        eval time =    2855.32 ms /    54 runs   (   52.88 ms per token,    18.91 tokens per second)
llama_print_timings:       total time =    3139.95 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       7.73 ms /    56 runs   (    0.14 ms per token,  7249.19 tokens per second)
llama_print_timings: prompt eval time =     193.66 ms /    18 tokens (   10.76 ms per token,    92.95 tokens per second)
llama_print_timings:        eval time =    2915.47 ms /    55 runs   (   53.01 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    3219.87 ms
Llama.generate: prefix-

308 -- 1
309 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       6.22 ms /    56 runs   (    0.11 ms per token,  9003.22 tokens per second)
llama_print_timings: prompt eval time =     216.89 ms /    31 tokens (    7.00 ms per token,   142.93 tokens per second)
llama_print_timings:        eval time =    2911.45 ms /    55 runs   (   52.94 ms per token,    18.89 tokens per second)
llama_print_timings:       total time =    3214.05 ms
Llama.generate: prefix-match hit


310 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.60 ms /    56 runs   (    0.10 ms per token,  9992.86 tokens per second)
llama_print_timings: prompt eval time =     198.62 ms /    22 tokens (    9.03 ms per token,   110.76 tokens per second)
llama_print_timings:        eval time =    2907.43 ms /    55 runs   (   52.86 ms per token,    18.92 tokens per second)
llama_print_timings:       total time =    3184.71 ms
Llama.generate: prefix-match hit


311 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.17 ms /    53 runs   (    0.10 ms per token, 10241.55 tokens per second)
llama_print_timings: prompt eval time =     200.94 ms /    23 tokens (    8.74 ms per token,   114.46 tokens per second)
llama_print_timings:        eval time =    2775.97 ms /    52 runs   (   53.38 ms per token,    18.73 tokens per second)
llama_print_timings:       total time =    3054.41 ms
Llama.generate: prefix-match hit


312 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       6.36 ms /    60 runs   (    0.11 ms per token,  9436.93 tokens per second)
llama_print_timings: prompt eval time =     194.51 ms /    20 tokens (    9.73 ms per token,   102.82 tokens per second)
llama_print_timings:        eval time =    3127.33 ms /    59 runs   (   53.01 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    3415.35 ms
Llama.generate: prefix-match hit


313 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.14 ms /    53 runs   (    0.10 ms per token, 10313.29 tokens per second)
llama_print_timings: prompt eval time =     193.98 ms /    19 tokens (   10.21 ms per token,    97.95 tokens per second)
llama_print_timings:        eval time =    2748.71 ms /    52 runs   (   52.86 ms per token,    18.92 tokens per second)
llama_print_timings:       total time =    3013.74 ms
Llama.generate: prefix-match hit


314 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       4.92 ms /    49 runs   (    0.10 ms per token,  9967.45 tokens per second)
llama_print_timings: prompt eval time =     182.61 ms /    10 tokens (   18.26 ms per token,    54.76 tokens per second)
llama_print_timings:        eval time =    2534.94 ms /    48 runs   (   52.81 ms per token,    18.94 tokens per second)
llama_print_timings:       total time =    2789.01 ms
Llama.generate: prefix-match hit


315 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       5.30 ms /    55 runs   (    0.10 ms per token, 10375.40 tokens per second)
llama_print_timings: prompt eval time =     198.78 ms /    22 tokens (    9.04 ms per token,   110.67 tokens per second)
llama_print_timings:        eval time =    2857.47 ms /    54 runs   (   52.92 ms per token,    18.90 tokens per second)
llama_print_timings:       total time =    3130.26 ms
Llama.generate: prefix-match hit


316 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       6.41 ms /    62 runs   (    0.10 ms per token,  9675.41 tokens per second)
llama_print_timings: prompt eval time =     210.69 ms /    30 tokens (    7.02 ms per token,   142.39 tokens per second)
llama_print_timings:        eval time =    3232.03 ms /    61 runs   (   52.98 ms per token,    18.87 tokens per second)
llama_print_timings:       total time =    3531.06 ms
Llama.generate: prefix-match hit


317 -- 1



llama_print_timings:        load time =    5882.46 ms
llama_print_timings:      sample time =       6.08 ms /    62 runs   (    0.10 ms per token, 10197.37 tokens per second)
llama_print_timings: prompt eval time =     188.53 ms /    28 tokens (    6.73 ms per token,   148.52 tokens per second)
llama_print_timings:        eval time =    3231.61 ms /    61 runs   (   52.98 ms per token,    18.88 tokens per second)
llama_print_timings:       total time =    3504.93 ms


In [133]:
results_df = pd.DataFrame(results, columns=['text', 'pun_word', 'target_word', 'source_sense', 'target_sense'])
results_df.to_json('outputs-task2-hetero-openocra7b.json', orient="records")
len(results_df)

318

In [134]:
def cprf(true, pred):
  correct = (true['pun_word'].str.strip().str.lower() == pred['pun_word'].str.strip().str.lower()).sum()
  coverage = len(pred) / len(true)
  precision = correct / len(pred)
  recall = correct / len(true)
  f1 = (2 * precision * recall) / (precision + recall)
  return coverage, precision, recall, f1

In [136]:
c, p, r, f1 = cprf(test, results_df)
print(f"Coverage: {c:.4f}")
print(f"Precision: {p:.4f}")
print(f"Recall: {r:.4f}")
print(f"F1-Score: {f1:.4f}")

Coverage: 1.0000
Precision: 0.6258
Recall: 0.6258
F1-Score: 0.6258
