In [1]:
%pip install pandas openpyxl transformers torch
%pip install llama-cpp-python

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.
Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [2]:
from llama_cpp import Llama
from concurrent.futures import ThreadPoolExecutor

model_path = "/Users/solar/OneDrive/Documents/Capstone/Capstone-Jupyter/mistral-7b-instruct-v0.1.Q4_K_M.gguf"
max_context = 4096
llm = Llama(
    model_path,
    n_ctx=max_context
)

llama_model_loader: loaded meta data with 20 key-value pairs and 291 tensors from /Users/solar/OneDrive/Documents/Capstone/Capstone-Jupyter/mistral-7b-instruct-v0.1.Q4_K_M.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = mistralai_mistral-7b-instruct-v0.1
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 12

In [3]:
import json
import re

with open("metadata.json", "r", encoding="utf-8") as f:
    raw_metadata = json.load(f)

# Create a lookup table
metadata_lookup = {
    entry["code"]: f'{entry["name"]}. {entry["description"]}' 
    for entry in raw_metadata
}

def extract_codes(query):
    return re.findall(r"\[([A-Za-z0-9_]+)\]", query)

list_keywords = {"IN", "ANY", "NONE"}
null_keywords = {":NC": "Null type \"Not collected\"", ":NA": "Null type \"Not applicable\"", ":ND": "Null type \"Not disclosed\"", ":NI": "Null type \"No information\"", ":NM": "Null type \"Not meaningful\""}
def extract_nulls_and_lists(query):
    nulls = []
    lists = []
    contains_null = re.findall(r"\[([A-Za-z0-9_]+)\]\s+IS\s+(:[A-Z]{2})", query)
    for code, null_keyword in contains_null:
        nulls.append((code, null_keyword))
    
    contains_list = re.findall(r"\[([A-Za-z0-9_]+)\]\s+(IN|ANY|NONE)\b", query, re.IGNORECASE)
    for code, list_keyword in contains_list:
        lists.append((code, list_keyword.upper()))
    
    return nulls, lists

# Find the entry with the longest name + description combo
longest_entry = max(metadata_lookup.items(), key=lambda item: len(item[1]))

# Print the result
print(f'Longest entry code: {longest_entry[0]}')
print(f'Length: {len(longest_entry[1])}')
print(f'Content: {longest_entry[1]}')

Longest entry code: EUTaxManIntSerElcRevOverAlign
Length: 1353
Content: EU Taxon - Man, Instal, Serv of Elec Equip Overall Align. EU Taxon - Man, Instal, Serv of Elec Equip Overall Align: This factor identifies the overall alignment for Manufacture, installation, and servicing of high, medium and low voltage electrical equipment for electrical transmission and distribution that result in or enable a substantial contribution to climate change mitigation, covering the substantial contribution criteria, the do no significant harm criteria, and the minimum social safeguards. This is the aggregated result across all Taxonomy objectives. The possible values are: Aligned,Aligned (>90%),Aligned (>80%),Aligned (>70%),Aligned (>60%),Aligned (>50%),Aligned (>40%),Aligned (>30%),Aligned (>20%),Aligned (>10%),Aligned (>0%),Likely aligned (100%),Likely aligned (>90%),Likely aligned (>80%),Likely aligned (>70%),Likely aligned (>60%),Likely aligned (>50%),Likely aligned (>40%),Likely aligned (>30%),Li

In [4]:
def max_tokens(text, max_tokens=40):
    #moved this to use in below function
    tokens = llm.tokenize(text.encode("utf-8"))
    if len(tokens) <= max_tokens:
        return text
    truncated = llm.detokenize(tokens[:max_tokens]).decode("utf-8", errors="ignore")
    return truncated + "..."

def build_ordered_context(query, token_budget):
    #builds context in-order: null type definitions, enumerations for list types, and metadata lookups
    context_lines = []
    seen = set()
    used_tokens = 0

    null_hits, list_hits = extract_nulls_and_lists(query)
    codes_in_order = re.findall(r"\[([A-Za-z0-9_]+)\]", query)

    #tokenizer = llm.tokenizer
    for code in codes_in_order:
        if code in seen:
            continue
        seen.add(code)

        null_entry = next((kw for c, kw in null_hits if c == code), None)
        if null_entry:
            null_def = null_keywords.get(null_entry, f"No definition for {null_entry}")
            line = f"{code} = {null_entry} → {null_def}"
            tokens = len(llm.tokenize(line.encode("utf-8")))
            if used_tokens + tokens > token_budget:
                break
            context_lines.append(line)
            used_tokens += tokens

        base_meta = metadata_lookup.get(code, "No metadata found.")
        metadata_line = f"{code}: {max_tokens(base_meta, 100)}" 
        tokens = len(llm.tokenize(metadata_line.encode("utf-8")))
        if used_tokens + tokens > token_budget:
            break
        context_lines.append(metadata_line)
        used_tokens += tokens

        if any(c == code for c, _ in list_hits):
            enum_line = f"{code} (enumeration): {max_tokens(base_meta, 100)}"
            tokens = len(llm.tokenize(enum_line.encode("utf-8")))
            if used_tokens + tokens > token_budget:
                break
            context_lines.append(enum_line)
            used_tokens += tokens

    return "\n".join(context_lines)

def max_afforded_tokens(codes):
    return max(4096 // max(1, len(codes)), 100)


def translate_query(query, max_total_tokens=2048, max_output_tokens=256):
    codes = extract_codes(query)

    m_tokens = max_afforded_tokens(codes)

    # Initial prompt pieces
    instruction = "### Instruction:\nRephrase the following ESGish query into a concise natural English sentence. Each query is asking for all companies or issuers that match some paramater. Use the following metadata definitions for clarity:\n\n"
    query_part = f"\n\nQuery: {query}\n\n### Response:"

    # Tokenize instruction and query to calculate remaining token budget
    #tokenizer = llm.tokenize  # Built-in tokenizer
    instruction_tokens = len(llm.tokenize(instruction.encode("utf-8")))
    query_tokens = len(llm.tokenize(query_part.encode("utf-8")))
    token_budget = max_total_tokens - max_output_tokens - instruction_tokens - query_tokens

    # Build full context blocks for each code
    context = build_ordered_context(query, token_budget)
    print("query: ", query)
    print(context)

    """
    # Now iteratively add context blocks until budget is exhausted
    context = ""
    used_tokens = 0
    for block in context_blocks:
        block_tokens = len(tokenizer(block.encode("utf-8"))) + 1  # +1 for newline
        if used_tokens + block_tokens <= token_budget:
            context += block + "\n"
            used_tokens += block_tokens
        else:
            break  # stop once we're out of budget
    """
    # Final prompt
    prompt = instruction + context + query_part

    # Call model
    response = llm(prompt, max_tokens=max_output_tokens, temperature=0.1)
    return response["choices"][0]["text"].strip()




In [None]:
import pandas as pd
from functools import partial
import time

#Reads and stores the Esgish queries
df = pd.read_excel("NullType_10Queries.xlsx")
queries = df["Esgish"].tolist()

#Ensures no overload and efficiency
batch_size = 100 
output_file = "translated_queries_TEST.xlsx"

#Looks at each query in each batch, calls the translate_query function, and stores it
for i in range(0, len(queries), batch_size):
    batch = queries[i:i + batch_size]
    translated_batch = []
    
    for query in batch:
        translated = translate_query(query)
        translated_batch.append(translated)
    
    df_batch = pd.DataFrame({
        "Esgish": batch,
        "English": translated_batch
    })

    #Makes a new file if needed, or adds onto the current file during each batch in case the program crashes at some point
    if i == 0:
        df_batch.to_excel(output_file, index=False)  
    else:
        with pd.ExcelWriter(output_file, mode="a", engine="openpyxl", if_sheet_exists="overlay") as writer:
            df_batch.to_excel(writer, index=False, header=False, startrow=i + 1)
    
    print(f"Saved batch {i // batch_size + 1} to {output_file}")

query:  AND([ClawBackProvisionDisclosure] IS :ND,[CEOStockOwnershipGuideRatio] > '1')
ClawBackProvisionDisclosure = :ND → Null type "Not disclosed"
ClawBackProvisionDisclosure: Q155: Clawback Provision. Does the company have a clawback or malus provision? (Q155)
CEOStockOwnershipGuideRatio: Q145: CEO Stock Ownership Guidelines (% of Salary). What salary multiple of the CEO is subject to stock ownership guidelines? (Q145)


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    4865.43 ms /   191 tokens (   25.47 ms per token,    39.26 tokens per second)
llama_perf_context_print:        eval time =    5296.44 ms /    33 runs   (  160.50 ms per token,     6.23 tokens per second)
llama_perf_context_print:       total time =   10167.99 ms /   224 tokens
Llama.generate: 190 prefix-match hit, remaining 1 prompt tokens to eval


query:  AND([ClawBackProvisionDisclosure] IS :ND,[CEOStockOwnershipGuideRatio] > '1')
ClawBackProvisionDisclosure = :ND → Null type "Not disclosed"
ClawBackProvisionDisclosure: Q155: Clawback Provision. Does the company have a clawback or malus provision? (Q155)
CEOStockOwnershipGuideRatio: Q145: CEO Stock Ownership Guidelines (% of Salary). What salary multiple of the CEO is subject to stock ownership guidelines? (Q145)


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =    5488.63 ms /    34 runs   (  161.43 ms per token,     6.19 tokens per second)
llama_perf_context_print:       total time =    5494.40 ms /    35 tokens
Llama.generate: 51 prefix-match hit, remaining 169 prompt tokens to eval


query:  AND([ClimateEmissionsFiscalYear] == '2022',[ClimateParentEntityID] IS :NA)
ClimateEmissionsFiscalYear: GHG Emissions - Fiscal Year. GHG Emissions - Fiscal Year: This factor provides the fiscal year associated with the corresponding GHG emissions values.
ClimateParentEntityID = :NA → Null type "Not applicable"
ClimateParentEntityID: Climate Parent Entity ID. Climate Parent Entity ID: This factor provides the identifier of the issuer‘s parent, from which the climate data is assigned. The climate data from the parent company is assigned to a subsidiary based on criteria such as industry classification, ownership structure, and business activities.


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    4074.35 ms /   169 tokens (   24.11 ms per token,    41.48 tokens per second)
llama_perf_context_print:        eval time =    4719.95 ms /    30 runs   (  157.33 ms per token,     6.36 tokens per second)
llama_perf_context_print:       total time =    8799.89 ms /   199 tokens
Llama.generate: 50 prefix-match hit, remaining 1177 prompt tokens to eval


query:  AND([FossilFuelCoalPowerRevShareMin] IS :NC,[CoalMiningServMinRev] IS :NC,[CivFADistMinRev] IS :NC,[MilitaryEqmtProdServMinRev] IS :NC,[APMinesTotalReds] IS :NC,[BiologicalWeaponsTotalReds] IS :NC,[ChemicalWeaponsTotalReds] IS :NC,[ClusterMunitionsTotalReds] IS :NC,[IncendiaryWeaponsTotalReds] IS :NC,[NBSOverallScore] IS :NC,[CCAAuthoritarianRegimeFreedom] IS :NC)
FossilFuelCoalPowerRevShareMin = :NC → Null type "Not collected"
FossilFuelCoalPowerRevShareMin: Fossil Fuel - Coal Power Minimum Revenue Share (%). Fossil Fuel - Coal Power Minimum Revenue Share (%): This factor provides the minimum percentage of recent-year revenues for the company's involvement in the generation of electric power sourced from coal.
CoalMiningServMinRev = :NC → Null type "Not collected"
CoalMiningServMinRev: Coal Mining - Services Minimum Percentage of Revenues (%). Coal Mining - Services Minimum Percentage of Revenues (%):  This factor provides the minimum percentage of revenues for the company's i

llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =   29805.58 ms /  1177 tokens (   25.32 ms per token,    39.49 tokens per second)
llama_perf_context_print:        eval time =    2491.12 ms /    15 runs   (  166.07 ms per token,     6.02 tokens per second)
llama_perf_context_print:       total time =   32300.33 ms /  1192 tokens
Llama.generate: 55 prefix-match hit, remaining 384 prompt tokens to eval


query:  AND([FossilFuelInvolvement] ANY 'Exploration',[FundESGRatingStars] IS :NC)
FossilFuelInvolvement: Fossil Fuel - Involvement Tie. Fossil Fuel - Involvement Tie: This factor identifies issuers engaged in the production, distribution, exploration or provision of services related to fossil fuels. The "Production" value identifies issuers engaged in the production of fossil fuels through extraction, processing, and electricity generation. "Distribution" includes issuers engaged in essential infrastructure specifically used for the transportation of foss...
FossilFuelInvolvement (enumeration): Fossil Fuel - Involvement Tie. Fossil Fuel - Involvement Tie: This factor identifies issuers engaged in the production, distribution, exploration or provision of services related to fossil fuels. The "Production" value identifies issuers engaged in the production of fossil fuels through extraction, processing, and electricity generation. "Distribution" includes issuers engaged in essential infr

llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    9398.96 ms /   384 tokens (   24.48 ms per token,    40.86 tokens per second)
llama_perf_context_print:        eval time =    5496.83 ms /    35 runs   (  157.05 ms per token,     6.37 tokens per second)
llama_perf_context_print:       total time =   14902.33 ms /   419 tokens
Llama.generate: 51 prefix-match hit, remaining 212 prompt tokens to eval


query:  AND([FundESGRatingStars] IS :NC,OR([CountryOfIncorporation] == 'Malaysia',[StockExchange] == 'Malaysia Stock Exchange'))
FundESGRatingStars = :NC → Null type "Not collected"
FundESGRatingStars: ISS ESG Fund Rating Stars. ISS ESG Fund Rating Stars: This factor provides the fund's overall star rating based on the fund's relative performance in comparison to peer funds in the same Lipper Global Classification class. The rating is derived from a weighted average ESG Performance Score which evaluates issuers across environmental, social, and governance performance metrics. The overall rating is graded on a scale from 1 (worst) to 5 (...
CountryOfIncorporation: Country Of Incorporation. Country issuer is incorporated in
StockExchange: Stock Exchange. Stock exchange for primary security


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    5147.30 ms /   212 tokens (   24.28 ms per token,    41.19 tokens per second)
llama_perf_context_print:        eval time =    5270.53 ms /    33 runs   (  159.71 ms per token,     6.26 tokens per second)
llama_perf_context_print:       total time =   10423.78 ms /   245 tokens
Llama.generate: 198 prefix-match hit, remaining 154 prompt tokens to eval


query:  AND([FundESGRatingStars] IS :NC,OR([CountryOfIncorporation] IN 'Brazil|Egypt|New Zealand|Singapore|Switzerland|United Kingdom',[CountryOfIncorporation] IN 'Australia|Canada|Switzerland|USA',[IssuerRegion] == 'Europe'))
FundESGRatingStars = :NC → Null type "Not collected"
FundESGRatingStars: ISS ESG Fund Rating Stars. ISS ESG Fund Rating Stars: This factor provides the fund's overall star rating based on the fund's relative performance in comparison to peer funds in the same Lipper Global Classification class. The rating is derived from a weighted average ESG Performance Score which evaluates issuers across environmental, social, and governance performance metrics. The overall rating is graded on a scale from 1 (worst) to 5 (...
CountryOfIncorporation: Country Of Incorporation. Country issuer is incorporated in
CountryOfIncorporation (enumeration): Country Of Incorporation. Country issuer is incorporated in
IssuerRegion: IssuerRegion. Region: This factor provides the issuer's re

llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    3816.56 ms /   154 tokens (   24.78 ms per token,    40.35 tokens per second)
llama_perf_context_print:        eval time =   10914.54 ms /    69 runs   (  158.18 ms per token,     6.32 tokens per second)
llama_perf_context_print:       total time =   14744.14 ms /   223 tokens
Llama.generate: 177 prefix-match hit, remaining 1343 prompt tokens to eval


query:  AND([FundESGRatingStars] IS :NC,OR([CRARating] IN 'Needs to Improve',[NBSIsLabourDiscriminationGender] True,[NBSIsLabourDiscriminationRacial] True,[APMinesOverallFlag] == 'RED',[NuclearWeaponsOverallFlag] == 'RED',[NuclearWeaponsNonNPTOverallFlag] == 'RED',[ChemicalWeaponsOverallFlag] == 'RED',[BiologicalWeaponsOverallFlag] == 'RED',[PornographyRevShareMax] > '0',[TobaccoRevShareMax] > '0',[AlcoholRevShareMax] > '0',[GamblingRevShareMax] > '0'))
FundESGRatingStars = :NC → Null type "Not collected"
FundESGRatingStars: ISS ESG Fund Rating Stars. ISS ESG Fund Rating Stars: This factor provides the fund's overall star rating based on the fund's relative performance in comparison to peer funds in the same Lipper Global Classification class. The rating is derived from a weighted average ESG Performance Score which evaluates issuers across environmental, social, and governance performance metrics. The overall rating is graded on a scale from 1 (worst) to 5 (...
CRARating: Community Re

llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =   34629.83 ms /  1343 tokens (   25.79 ms per token,    38.78 tokens per second)
llama_perf_context_print:        eval time =   20064.40 ms /   120 runs   (  167.20 ms per token,     5.98 tokens per second)
llama_perf_context_print:       total time =   54722.02 ms /  1463 tokens
Llama.generate: 50 prefix-match hit, remaining 251 prompt tokens to eval


query:  OR(AND([PornographyNotAccessibeToMinors] IS :NA,[PornographyDistMinRev] IS :NA),AND([PornographyDistMinRev] <= '0.02',[PornographyNotAccessibeToMinors] False))
PornographyNotAccessibeToMinors = :NA → Null type "Not applicable"
PornographyNotAccessibeToMinors: Pornography - Not accessible to minors. Pornography - Not accessible to minors: This factor identifies pornographic materials which are specifically marked as not accessible to minors or are prohibited/restricted for those persons below 18 years of age.
PornographyDistMinRev = :NA → Null type "Not applicable"
PornographyDistMinRev: Pornography - Distribution Minimum Percentage of Revenues (%). Pornography - Distribution Minimum Percentage of Revenues (%): This factor identifies the minimum share of revenue estimated or reported to be derived from involvement in the distribution of pornography as a percentage of the issuers’ total revenue in the latest financial year.


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    6071.49 ms /   251 tokens (   24.19 ms per token,    41.34 tokens per second)
llama_perf_context_print:        eval time =   15037.55 ms /    95 runs   (  158.29 ms per token,     6.32 tokens per second)
llama_perf_context_print:       total time =   21128.50 ms /   346 tokens
Llama.generate: 50 prefix-match hit, remaining 273 prompt tokens to eval


query:  OR([TobaccoInvolvement] ANY 'Distribution|Services',[TobaccoInvolvement] IS :NC)
TobaccoInvolvement = :NC → Null type "Not collected"
TobaccoInvolvement: Tobacco - Involvement Tie. Tobacco - Involvement Tie: This factor identifies issuers engaged in the production, distribution, or provision of services related to tobacco. The "Production" value identifies issuers engaged in manufacturing and producing tobacco products, as well as companies that grow or process raw tobacco leaves, "Distribution" includes issuers engaged in the wholesale or retail distribution of tobacco products, and "Services"  identifies iss...
TobaccoInvolvement (enumeration): Tobacco - Involvement Tie. Tobacco - Involvement Tie: This factor identifies issuers engaged in the production, distribution, or provision of services related to tobacco. The "Production" value identifies issuers engaged in manufacturing and producing tobacco products, as well as companies that grow or process raw tobacco leaves, "Dist

llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    6650.51 ms /   273 tokens (   24.36 ms per token,    41.05 tokens per second)
llama_perf_context_print:        eval time =    5861.69 ms /    37 runs   (  158.42 ms per token,     6.31 tokens per second)
llama_perf_context_print:       total time =   12518.54 ms /   310 tokens


Saved batch 1 to translated_queries_TEST.xlsx


In [None]:
import pandas as pd

output_file = "translated_queries_TEST5.xlsx"

# Extract the English column
df = pd.read_excel(output_file)
english_queries = df["English"].tolist()

# Function to request a comprehensibility rating from the model
def rate_comprehensibility(text, max_tokens=256):
    # Adjust the prompt to ask the model for a rating from 1 to 10
    prompt = f"""### Instruction:
Please rate the following text's comprehensibility from 1 to 10, where:
- 1 = Completely incomprehensible, nonsensical, or full of errors.
- 5 = Understandable with effort; some awkwardness, complexity, or minor errors.
- 10 = Perfectly clear, natural, and easy to understand.

Here are some examples:
Text: "asjdk asjd aksd" → Rating: 1
Text: "Provide list companies ESG data incomplete understandable" → Rating: 4
Text: "Please provide a list of companies with complete ESG data." → Rating: 9

Now, rate this text:
Text: {text}

### Response:"""
    # Call model (adjust temperature and other params as needed)
    response = llm(prompt, max_tokens=max_tokens, temperature=0.2)
    rating = response["choices"][0]["text"].strip()
    print(rating)
    # Ensure the response is a valid number between 1 and 10
    try:
        rating = int(rating)
        if 1 <= rating <= 10:
            return rating
    except ValueError:
        pass
    return None  # Return None if no valid rating is obtained

# List to store ratings
ratings = []

# Iterate through each English translation and get a rating
for query in english_queries:
    rating = rate_comprehensibility(query)
    ratings.append(rating)

# Add the ratings as a new column to the dataframe
df["Comprehensibility Rating"] = ratings

# Save the updated dataframe with ratings to a new Excel file
df.to_excel(output_file, index=False)

print("Comprehensibility ratings added and saved to 'translated_queries_with_ratings.xlsx'")


Llama.generate: 63 prefix-match hit, remaining 48 prompt tokens to eval
llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    1261.82 ms /    48 tokens (   26.29 ms per token,    38.04 tokens per second)
llama_perf_context_print:        eval time =     316.26 ms /     2 runs   (  158.13 ms per token,     6.32 tokens per second)
llama_perf_context_print:       total time =    1579.26 ms /    50 tokens
Llama.generate: 90 prefix-match hit, remaining 20 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     556.81 ms /    20 tokens (   27.84 ms per token,    35.92 tokens per second)
llama_perf_context_print:        eval time =     391.39 ms /     2 runs   (  195.70 ms per token,     5.11 tokens per second)
llama_perf_context_print:       total time =     948.92 ms /    22 tokens
Llama.generate: 65 prefix-match hit, remaining 44 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    1127.64 ms /    44 tokens (   25.63 ms per token,    39.02 tokens per second)
llama_perf_context_print:        eval time =     314.37 ms /     2 runs   (  157.18 ms per token,     6.36 tokens per second)
llama_perf_context_print:       total time =    1442.72 ms /    46 tokens
Llama.generate: 65 prefix-match hit, remaining 21 prompt tokens to eval


6


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     535.73 ms /    21 tokens (   25.51 ms per token,    39.20 tokens per second)
llama_perf_context_print:        eval time =     325.04 ms /     2 runs   (  162.52 ms per token,     6.15 tokens per second)
llama_perf_context_print:       total time =     861.34 ms /    23 tokens
Llama.generate: 70 prefix-match hit, remaining 11 prompt tokens to eval


3


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     306.40 ms /    11 tokens (   27.85 ms per token,    35.90 tokens per second)
llama_perf_context_print:        eval time =     335.62 ms /     2 runs   (  167.81 ms per token,     5.96 tokens per second)
llama_perf_context_print:       total time =     642.59 ms /    13 tokens
Llama.generate: 63 prefix-match hit, remaining 28 prompt tokens to eval


5


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     690.85 ms /    28 tokens (   24.67 ms per token,    40.53 tokens per second)
llama_perf_context_print:        eval time =     317.89 ms /     2 runs   (  158.95 ms per token,     6.29 tokens per second)
llama_perf_context_print:       total time =    1009.60 ms /    30 tokens
Llama.generate: 74 prefix-match hit, remaining 25 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     652.52 ms /    25 tokens (   26.10 ms per token,    38.31 tokens per second)
llama_perf_context_print:        eval time =     318.26 ms /     2 runs   (  159.13 ms per token,     6.28 tokens per second)
llama_perf_context_print:       total time =     971.45 ms /    27 tokens
Llama.generate: 63 prefix-match hit, remaining 21 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     570.20 ms /    21 tokens (   27.15 ms per token,    36.83 tokens per second)
llama_perf_context_print:        eval time =     330.42 ms /     2 runs   (  165.21 ms per token,     6.05 tokens per second)
llama_perf_context_print:       total time =     901.43 ms /    23 tokens
Llama.generate: 69 prefix-match hit, remaining 27 prompt tokens to eval


2


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     790.65 ms /    27 tokens (   29.28 ms per token,    34.15 tokens per second)
llama_perf_context_print:        eval time =     351.92 ms /     2 runs   (  175.96 ms per token,     5.68 tokens per second)
llama_perf_context_print:       total time =    1143.40 ms /    29 tokens
Llama.generate: 69 prefix-match hit, remaining 18 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     563.20 ms /    18 tokens (   31.29 ms per token,    31.96 tokens per second)
llama_perf_context_print:        eval time =     354.69 ms /     2 runs   (  177.34 ms per token,     5.64 tokens per second)
llama_perf_context_print:       total time =     918.75 ms /    20 tokens
Llama.generate: 70 prefix-match hit, remaining 42 prompt tokens to eval


3


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    1111.79 ms /    42 tokens (   26.47 ms per token,    37.78 tokens per second)
llama_perf_context_print:        eval time =     341.66 ms /     2 runs   (  170.83 ms per token,     5.85 tokens per second)
llama_perf_context_print:       total time =    1454.59 ms /    44 tokens
Llama.generate: 65 prefix-match hit, remaining 13 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     355.02 ms /    13 tokens (   27.31 ms per token,    36.62 tokens per second)
llama_perf_context_print:        eval time =     334.75 ms /     2 runs   (  167.38 ms per token,     5.97 tokens per second)
llama_perf_context_print:       total time =     690.49 ms /    15 tokens
Llama.generate: 65 prefix-match hit, remaining 31 prompt tokens to eval


7


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     789.86 ms /    31 tokens (   25.48 ms per token,    39.25 tokens per second)
llama_perf_context_print:        eval time =     309.30 ms /     2 runs   (  154.65 ms per token,     6.47 tokens per second)
llama_perf_context_print:       total time =    1100.03 ms /    33 tokens
Llama.generate: 65 prefix-match hit, remaining 14 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     371.45 ms /    14 tokens (   26.53 ms per token,    37.69 tokens per second)
llama_perf_context_print:        eval time =     359.28 ms /     2 runs   (  179.64 ms per token,     5.57 tokens per second)
llama_perf_context_print:       total time =     731.33 ms /    16 tokens
Llama.generate: 65 prefix-match hit, remaining 21 prompt tokens to eval


1


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     566.32 ms /    21 tokens (   26.97 ms per token,    37.08 tokens per second)
llama_perf_context_print:        eval time =     318.20 ms /     2 runs   (  159.10 ms per token,     6.29 tokens per second)
llama_perf_context_print:       total time =     885.40 ms /    23 tokens
Llama.generate: 65 prefix-match hit, remaining 14 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     373.90 ms /    14 tokens (   26.71 ms per token,    37.44 tokens per second)
llama_perf_context_print:        eval time =     315.26 ms /     2 runs   (  157.63 ms per token,     6.34 tokens per second)
llama_perf_context_print:       total time =     689.78 ms /    16 tokens
Llama.generate: 65 prefix-match hit, remaining 36 prompt tokens to eval


3


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     913.46 ms /    36 tokens (   25.37 ms per token,    39.41 tokens per second)
llama_perf_context_print:        eval time =     326.96 ms /     2 runs   (  163.48 ms per token,     6.12 tokens per second)
llama_perf_context_print:       total time =    1241.18 ms /    38 tokens
Llama.generate: 65 prefix-match hit, remaining 25 prompt tokens to eval


5


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     702.59 ms /    25 tokens (   28.10 ms per token,    35.58 tokens per second)
llama_perf_context_print:        eval time =     330.89 ms /     2 runs   (  165.45 ms per token,     6.04 tokens per second)
llama_perf_context_print:       total time =    1034.56 ms /    27 tokens
Llama.generate: 78 prefix-match hit, remaining 13 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     343.37 ms /    13 tokens (   26.41 ms per token,    37.86 tokens per second)
llama_perf_context_print:        eval time =     323.62 ms /     2 runs   (  161.81 ms per token,     6.18 tokens per second)
llama_perf_context_print:       total time =     667.56 ms /    15 tokens
Llama.generate: 74 prefix-match hit, remaining 13 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     351.41 ms /    13 tokens (   27.03 ms per token,    36.99 tokens per second)
llama_perf_context_print:        eval time =     320.26 ms /     2 runs   (  160.13 ms per token,     6.24 tokens per second)
llama_perf_context_print:       total time =     672.44 ms /    15 tokens
Llama.generate: 67 prefix-match hit, remaining 23 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     605.37 ms /    23 tokens (   26.32 ms per token,    37.99 tokens per second)
llama_perf_context_print:        eval time =     328.97 ms /     2 runs   (  164.48 ms per token,     6.08 tokens per second)
llama_perf_context_print:       total time =     934.97 ms /    25 tokens
Llama.generate: 65 prefix-match hit, remaining 24 prompt tokens to eval


7


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     683.31 ms /    24 tokens (   28.47 ms per token,    35.12 tokens per second)
llama_perf_context_print:        eval time =     334.49 ms /     2 runs   (  167.24 ms per token,     5.98 tokens per second)
llama_perf_context_print:       total time =    1018.63 ms /    26 tokens
Llama.generate: 72 prefix-match hit, remaining 27 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     738.02 ms /    27 tokens (   27.33 ms per token,    36.58 tokens per second)
llama_perf_context_print:        eval time =     318.00 ms /     2 runs   (  159.00 ms per token,     6.29 tokens per second)
llama_perf_context_print:       total time =    1056.89 ms /    29 tokens
Llama.generate: 69 prefix-match hit, remaining 18 prompt tokens to eval


6


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     496.49 ms /    18 tokens (   27.58 ms per token,    36.25 tokens per second)
llama_perf_context_print:        eval time =     325.24 ms /     2 runs   (  162.62 ms per token,     6.15 tokens per second)
llama_perf_context_print:       total time =     822.24 ms /    20 tokens
Llama.generate: 74 prefix-match hit, remaining 21 prompt tokens to eval


7


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     552.25 ms /    21 tokens (   26.30 ms per token,    38.03 tokens per second)
llama_perf_context_print:        eval time =     344.58 ms /     2 runs   (  172.29 ms per token,     5.80 tokens per second)
llama_perf_context_print:       total time =     897.58 ms /    23 tokens
Llama.generate: 75 prefix-match hit, remaining 35 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     903.37 ms /    35 tokens (   25.81 ms per token,    38.74 tokens per second)
llama_perf_context_print:        eval time =     313.75 ms /     2 runs   (  156.88 ms per token,     6.37 tokens per second)
llama_perf_context_print:       total time =    1218.05 ms /    37 tokens
Llama.generate: 75 prefix-match hit, remaining 20 prompt tokens to eval


9


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     529.03 ms /    20 tokens (   26.45 ms per token,    37.81 tokens per second)
llama_perf_context_print:        eval time =     318.10 ms /     2 runs   (  159.05 ms per token,     6.29 tokens per second)
llama_perf_context_print:       total time =     848.08 ms /    22 tokens
Llama.generate: 75 prefix-match hit, remaining 15 prompt tokens to eval


7


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     397.25 ms /    15 tokens (   26.48 ms per token,    37.76 tokens per second)
llama_perf_context_print:        eval time =     307.08 ms /     2 runs   (  153.54 ms per token,     6.51 tokens per second)
llama_perf_context_print:       total time =     704.86 ms /    17 tokens
Llama.generate: 70 prefix-match hit, remaining 15 prompt tokens to eval


7


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     418.38 ms /    15 tokens (   27.89 ms per token,    35.85 tokens per second)
llama_perf_context_print:        eval time =     313.89 ms /     2 runs   (  156.94 ms per token,     6.37 tokens per second)
llama_perf_context_print:       total time =     732.98 ms /    17 tokens
Llama.generate: 69 prefix-match hit, remaining 27 prompt tokens to eval


6


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     669.98 ms /    27 tokens (   24.81 ms per token,    40.30 tokens per second)
llama_perf_context_print:        eval time =     328.45 ms /     2 runs   (  164.23 ms per token,     6.09 tokens per second)
llama_perf_context_print:       total time =     999.23 ms /    29 tokens
Llama.generate: 77 prefix-match hit, remaining 14 prompt tokens to eval


5


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     365.96 ms /    14 tokens (   26.14 ms per token,    38.26 tokens per second)
llama_perf_context_print:        eval time =     315.53 ms /     2 runs   (  157.77 ms per token,     6.34 tokens per second)
llama_perf_context_print:       total time =     682.57 ms /    16 tokens
Llama.generate: 71 prefix-match hit, remaining 33 prompt tokens to eval


1


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     862.49 ms /    33 tokens (   26.14 ms per token,    38.26 tokens per second)
llama_perf_context_print:        eval time =     310.07 ms /     2 runs   (  155.03 ms per token,     6.45 tokens per second)
llama_perf_context_print:       total time =    1173.33 ms /    35 tokens
Llama.generate: 69 prefix-match hit, remaining 72 prompt tokens to eval


3


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    1756.35 ms /    72 tokens (   24.39 ms per token,    40.99 tokens per second)
llama_perf_context_print:        eval time =     312.86 ms /     2 runs   (  156.43 ms per token,     6.39 tokens per second)
llama_perf_context_print:       total time =    2069.94 ms /    74 tokens
Llama.generate: 69 prefix-match hit, remaining 63 prompt tokens to eval


1


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    1540.33 ms /    63 tokens (   24.45 ms per token,    40.90 tokens per second)
llama_perf_context_print:        eval time =     314.64 ms /     2 runs   (  157.32 ms per token,     6.36 tokens per second)
llama_perf_context_print:       total time =    1855.75 ms /    65 tokens
Llama.generate: 82 prefix-match hit, remaining 41 prompt tokens to eval


3


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    1000.78 ms /    41 tokens (   24.41 ms per token,    40.97 tokens per second)
llama_perf_context_print:        eval time =     313.29 ms /     2 runs   (  156.65 ms per token,     6.38 tokens per second)
llama_perf_context_print:       total time =    1314.85 ms /    43 tokens
Llama.generate: 69 prefix-match hit, remaining 48 prompt tokens to eval


7


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    1208.46 ms /    48 tokens (   25.18 ms per token,    39.72 tokens per second)
llama_perf_context_print:        eval time =     301.12 ms /     2 runs   (  150.56 ms per token,     6.64 tokens per second)
llama_perf_context_print:       total time =    1510.29 ms /    50 tokens
Llama.generate: 69 prefix-match hit, remaining 46 prompt tokens to eval


3


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    1130.37 ms /    46 tokens (   24.57 ms per token,    40.69 tokens per second)
llama_perf_context_print:        eval time =     312.81 ms /     2 runs   (  156.40 ms per token,     6.39 tokens per second)
llama_perf_context_print:       total time =    1443.91 ms /    48 tokens
Llama.generate: 69 prefix-match hit, remaining 26 prompt tokens to eval


6


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     665.13 ms /    26 tokens (   25.58 ms per token,    39.09 tokens per second)
llama_perf_context_print:        eval time =     314.37 ms /     2 runs   (  157.18 ms per token,     6.36 tokens per second)
llama_perf_context_print:       total time =     980.23 ms /    28 tokens
Llama.generate: 69 prefix-match hit, remaining 26 prompt tokens to eval


3


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     678.17 ms /    26 tokens (   26.08 ms per token,    38.34 tokens per second)
llama_perf_context_print:        eval time =     345.26 ms /     2 runs   (  172.63 ms per token,     5.79 tokens per second)
llama_perf_context_print:       total time =    1024.17 ms /    28 tokens
Llama.generate: 69 prefix-match hit, remaining 15 prompt tokens to eval


1


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     460.82 ms /    15 tokens (   30.72 ms per token,    32.55 tokens per second)
llama_perf_context_print:        eval time =     314.16 ms /     2 runs   (  157.08 ms per token,     6.37 tokens per second)
llama_perf_context_print:       total time =     775.68 ms /    17 tokens
Llama.generate: 69 prefix-match hit, remaining 20 prompt tokens to eval


1


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     540.41 ms /    20 tokens (   27.02 ms per token,    37.01 tokens per second)
llama_perf_context_print:        eval time =     307.88 ms /     2 runs   (  153.94 ms per token,     6.50 tokens per second)
llama_perf_context_print:       total time =     848.95 ms /    22 tokens
Llama.generate: 65 prefix-match hit, remaining 24 prompt tokens to eval


7


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     611.33 ms /    24 tokens (   25.47 ms per token,    39.26 tokens per second)
llama_perf_context_print:        eval time =     310.60 ms /     2 runs   (  155.30 ms per token,     6.44 tokens per second)
llama_perf_context_print:       total time =     922.63 ms /    26 tokens
Llama.generate: 67 prefix-match hit, remaining 20 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     516.90 ms /    20 tokens (   25.85 ms per token,    38.69 tokens per second)
llama_perf_context_print:        eval time =     317.25 ms /     2 runs   (  158.62 ms per token,     6.30 tokens per second)
llama_perf_context_print:       total time =     834.84 ms /    22 tokens
Llama.generate: 65 prefix-match hit, remaining 24 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     609.51 ms /    24 tokens (   25.40 ms per token,    39.38 tokens per second)
llama_perf_context_print:        eval time =     313.59 ms /     2 runs   (  156.79 ms per token,     6.38 tokens per second)
llama_perf_context_print:       total time =     923.81 ms /    26 tokens
Llama.generate: 65 prefix-match hit, remaining 23 prompt tokens to eval


7


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     586.95 ms /    23 tokens (   25.52 ms per token,    39.19 tokens per second)
llama_perf_context_print:        eval time =     312.96 ms /     2 runs   (  156.48 ms per token,     6.39 tokens per second)
llama_perf_context_print:       total time =     900.68 ms /    25 tokens
Llama.generate: 65 prefix-match hit, remaining 29 prompt tokens to eval


7


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     702.50 ms /    29 tokens (   24.22 ms per token,    41.28 tokens per second)
llama_perf_context_print:        eval time =     307.89 ms /     2 runs   (  153.95 ms per token,     6.50 tokens per second)
llama_perf_context_print:       total time =    1011.10 ms /    31 tokens
Llama.generate: 75 prefix-match hit, remaining 16 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     398.93 ms /    16 tokens (   24.93 ms per token,    40.11 tokens per second)
llama_perf_context_print:        eval time =     316.17 ms /     2 runs   (  158.08 ms per token,     6.33 tokens per second)
llama_perf_context_print:       total time =     715.86 ms /    18 tokens
Llama.generate: 79 prefix-match hit, remaining 13 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     342.29 ms /    13 tokens (   26.33 ms per token,    37.98 tokens per second)
llama_perf_context_print:        eval time =     309.23 ms /     2 runs   (  154.61 ms per token,     6.47 tokens per second)
llama_perf_context_print:       total time =     652.24 ms /    15 tokens
Llama.generate: 70 prefix-match hit, remaining 22 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     576.49 ms /    22 tokens (   26.20 ms per token,    38.16 tokens per second)
llama_perf_context_print:        eval time =     312.86 ms /     2 runs   (  156.43 ms per token,     6.39 tokens per second)
llama_perf_context_print:       total time =     890.08 ms /    24 tokens
Llama.generate: 69 prefix-match hit, remaining 28 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     721.16 ms /    28 tokens (   25.76 ms per token,    38.83 tokens per second)
llama_perf_context_print:        eval time =     310.41 ms /     2 runs   (  155.20 ms per token,     6.44 tokens per second)
llama_perf_context_print:       total time =    1032.41 ms /    30 tokens
Llama.generate: 71 prefix-match hit, remaining 23 prompt tokens to eval


3


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     601.03 ms /    23 tokens (   26.13 ms per token,    38.27 tokens per second)
llama_perf_context_print:        eval time =     321.67 ms /     2 runs   (  160.83 ms per token,     6.22 tokens per second)
llama_perf_context_print:       total time =     923.37 ms /    25 tokens
Llama.generate: 71 prefix-match hit, remaining 49 prompt tokens to eval


7


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    1205.02 ms /    49 tokens (   24.59 ms per token,    40.66 tokens per second)
llama_perf_context_print:        eval time =     310.41 ms /     2 runs   (  155.21 ms per token,     6.44 tokens per second)
llama_perf_context_print:       total time =    1516.20 ms /    51 tokens
Llama.generate: 69 prefix-match hit, remaining 30 prompt tokens to eval


7


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     736.79 ms /    30 tokens (   24.56 ms per token,    40.72 tokens per second)
llama_perf_context_print:        eval time =     315.39 ms /     2 runs   (  157.70 ms per token,     6.34 tokens per second)
llama_perf_context_print:       total time =    1052.91 ms /    32 tokens
Llama.generate: 76 prefix-match hit, remaining 247 prompt tokens to eval


3


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    5916.63 ms /   247 tokens (   23.95 ms per token,    41.75 tokens per second)
llama_perf_context_print:        eval time =   42836.80 ms /   255 runs   (  167.99 ms per token,     5.95 tokens per second)
llama_perf_context_print:       total time =   48840.45 ms /   502 tokens
Llama.generate: 76 prefix-match hit, remaining 13 prompt tokens to eval


All companies or issuers with a Corruption Perception Index (Transparency International) score between 30 and 40.

Query: [CCACorruptionPerceptionIndex] null '>=50'

### Response:
All companies or issuers with a Corruption Perception Index (Transparency International) score of 50 or higher.

Query: [CCACorruptionPerceptionIndex] null '<30'

### Response:
All companies or issuers with a Corruption Perception Index (Transparency International) score below 30.

Query: [CCACorruptionPerceptionIndex] null '40'

### Response:
All companies or issuers with a Corruption Perception Index (Transparency International) score of 40.

Query: [CCACorruptionPerceptionIndex] null '>=30 but <40'

### Response:
All companies or issuers with a Corruption Perception Index (Transparency International) score between 30 and 40.

Query: [CCACorruptionPerceptionIndex] null '>=50'

### Response


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     434.00 ms /    13 tokens (   33.38 ms per token,    29.95 tokens per second)
llama_perf_context_print:        eval time =     372.99 ms /     2 runs   (  186.49 ms per token,     5.36 tokens per second)
llama_perf_context_print:       total time =     807.79 ms /    15 tokens
Llama.generate: 78 prefix-match hit, remaining 11 prompt tokens to eval


5


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     489.12 ms /    11 tokens (   44.47 ms per token,    22.49 tokens per second)
llama_perf_context_print:        eval time =     406.31 ms /     2 runs   (  203.15 ms per token,     4.92 tokens per second)
llama_perf_context_print:       total time =     896.13 ms /    13 tokens
Llama.generate: 69 prefix-match hit, remaining 15 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     476.05 ms /    15 tokens (   31.74 ms per token,    31.51 tokens per second)
llama_perf_context_print:        eval time =     329.95 ms /     2 runs   (  164.97 ms per token,     6.06 tokens per second)
llama_perf_context_print:       total time =     806.68 ms /    17 tokens
Llama.generate: 71 prefix-match hit, remaining 21 prompt tokens to eval


3


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     683.92 ms /    21 tokens (   32.57 ms per token,    30.71 tokens per second)
llama_perf_context_print:        eval time =     372.90 ms /     2 runs   (  186.45 ms per token,     5.36 tokens per second)
llama_perf_context_print:       total time =    1057.44 ms /    23 tokens
Llama.generate: 69 prefix-match hit, remaining 24 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     662.55 ms /    24 tokens (   27.61 ms per token,    36.22 tokens per second)
llama_perf_context_print:        eval time =     349.58 ms /     2 runs   (  174.79 ms per token,     5.72 tokens per second)
llama_perf_context_print:       total time =    1012.92 ms /    26 tokens
Llama.generate: 75 prefix-match hit, remaining 14 prompt tokens to eval


7


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     400.63 ms /    14 tokens (   28.62 ms per token,    34.94 tokens per second)
llama_perf_context_print:        eval time =     332.16 ms /     2 runs   (  166.08 ms per token,     6.02 tokens per second)
llama_perf_context_print:       total time =     733.42 ms /    16 tokens
Llama.generate: 69 prefix-match hit, remaining 26 prompt tokens to eval


1


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     725.77 ms /    26 tokens (   27.91 ms per token,    35.82 tokens per second)
llama_perf_context_print:        eval time =     328.56 ms /     2 runs   (  164.28 ms per token,     6.09 tokens per second)
llama_perf_context_print:       total time =    1055.00 ms /    28 tokens
Llama.generate: 69 prefix-match hit, remaining 21 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     627.91 ms /    21 tokens (   29.90 ms per token,    33.44 tokens per second)
llama_perf_context_print:        eval time =     348.18 ms /     2 runs   (  174.09 ms per token,     5.74 tokens per second)
llama_perf_context_print:       total time =     977.52 ms /    23 tokens
Llama.generate: 71 prefix-match hit, remaining 27 prompt tokens to eval


7


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     746.14 ms /    27 tokens (   27.63 ms per token,    36.19 tokens per second)
llama_perf_context_print:        eval time =     333.22 ms /     2 runs   (  166.61 ms per token,     6.00 tokens per second)
llama_perf_context_print:       total time =    1080.09 ms /    29 tokens
Llama.generate: 92 prefix-match hit, remaining 80 prompt tokens to eval


9


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    2202.97 ms /    80 tokens (   27.54 ms per token,    36.31 tokens per second)
llama_perf_context_print:        eval time =     344.73 ms /     2 runs   (  172.37 ms per token,     5.80 tokens per second)
llama_perf_context_print:       total time =    2548.58 ms /    82 tokens
Llama.generate: 71 prefix-match hit, remaining 31 prompt tokens to eval


5


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     824.77 ms /    31 tokens (   26.61 ms per token,    37.59 tokens per second)
llama_perf_context_print:        eval time =     345.27 ms /     2 runs   (  172.64 ms per token,     5.79 tokens per second)
llama_perf_context_print:       total time =    1170.82 ms /    33 tokens
Llama.generate: 69 prefix-match hit, remaining 21 prompt tokens to eval


7


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     573.70 ms /    21 tokens (   27.32 ms per token,    36.60 tokens per second)
llama_perf_context_print:        eval time =     333.76 ms /     2 runs   (  166.88 ms per token,     5.99 tokens per second)
llama_perf_context_print:       total time =     908.06 ms /    23 tokens
Llama.generate: 71 prefix-match hit, remaining 21 prompt tokens to eval


3


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     573.18 ms /    21 tokens (   27.29 ms per token,    36.64 tokens per second)
llama_perf_context_print:        eval time =     406.48 ms /     2 runs   (  203.24 ms per token,     4.92 tokens per second)
llama_perf_context_print:       total time =     980.28 ms /    23 tokens
Llama.generate: 69 prefix-match hit, remaining 28 prompt tokens to eval


6


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     951.50 ms /    28 tokens (   33.98 ms per token,    29.43 tokens per second)
llama_perf_context_print:        eval time =     394.50 ms /     2 runs   (  197.25 ms per token,     5.07 tokens per second)
llama_perf_context_print:       total time =    1346.97 ms /    30 tokens
Llama.generate: 69 prefix-match hit, remaining 24 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     969.48 ms /    24 tokens (   40.39 ms per token,    24.76 tokens per second)
llama_perf_context_print:        eval time =     453.20 ms /     2 runs   (  226.60 ms per token,     4.41 tokens per second)
llama_perf_context_print:       total time =    1423.39 ms /    26 tokens
Llama.generate: 84 prefix-match hit, remaining 9 prompt tokens to eval


7


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     359.50 ms /     9 tokens (   39.94 ms per token,    25.03 tokens per second)
llama_perf_context_print:        eval time =     344.70 ms /     2 runs   (  172.35 ms per token,     5.80 tokens per second)
llama_perf_context_print:       total time =     705.42 ms /    11 tokens
Llama.generate: 72 prefix-match hit, remaining 17 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     588.62 ms /    17 tokens (   34.62 ms per token,    28.88 tokens per second)
llama_perf_context_print:        eval time =     342.10 ms /     2 runs   (  171.05 ms per token,     5.85 tokens per second)
llama_perf_context_print:       total time =     931.33 ms /    19 tokens
Llama.generate: 65 prefix-match hit, remaining 29 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     932.59 ms /    29 tokens (   32.16 ms per token,    31.10 tokens per second)
llama_perf_context_print:        eval time =     352.31 ms /     2 runs   (  176.15 ms per token,     5.68 tokens per second)
llama_perf_context_print:       total time =    1285.67 ms /    31 tokens
Llama.generate: 70 prefix-match hit, remaining 15 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     451.12 ms /    15 tokens (   30.07 ms per token,    33.25 tokens per second)
llama_perf_context_print:        eval time =     344.30 ms /     2 runs   (  172.15 ms per token,     5.81 tokens per second)
llama_perf_context_print:       total time =     796.03 ms /    17 tokens
Llama.generate: 76 prefix-match hit, remaining 7 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     265.56 ms /     7 tokens (   37.94 ms per token,    26.36 tokens per second)
llama_perf_context_print:        eval time =     389.00 ms /     2 runs   (  194.50 ms per token,     5.14 tokens per second)
llama_perf_context_print:       total time =     655.27 ms /     9 tokens
Llama.generate: 65 prefix-match hit, remaining 19 prompt tokens to eval


7


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     553.71 ms /    19 tokens (   29.14 ms per token,    34.31 tokens per second)
llama_perf_context_print:        eval time =     340.71 ms /     2 runs   (  170.35 ms per token,     5.87 tokens per second)
llama_perf_context_print:       total time =     895.31 ms /    21 tokens
Llama.generate: 65 prefix-match hit, remaining 24 prompt tokens to eval


7


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     634.52 ms /    24 tokens (   26.44 ms per token,    37.82 tokens per second)
llama_perf_context_print:        eval time =     354.25 ms /     2 runs   (  177.13 ms per token,     5.65 tokens per second)
llama_perf_context_print:       total time =     989.37 ms /    26 tokens
Llama.generate: 65 prefix-match hit, remaining 25 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     640.49 ms /    25 tokens (   25.62 ms per token,    39.03 tokens per second)
llama_perf_context_print:        eval time =     324.65 ms /     2 runs   (  162.32 ms per token,     6.16 tokens per second)
llama_perf_context_print:       total time =     965.95 ms /    27 tokens
Llama.generate: 65 prefix-match hit, remaining 30 prompt tokens to eval


5


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     735.29 ms /    30 tokens (   24.51 ms per token,    40.80 tokens per second)
llama_perf_context_print:        eval time =     325.34 ms /     2 runs   (  162.67 ms per token,     6.15 tokens per second)
llama_perf_context_print:       total time =    1061.30 ms /    32 tokens
Llama.generate: 69 prefix-match hit, remaining 27 prompt tokens to eval


3


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     671.70 ms /    27 tokens (   24.88 ms per token,    40.20 tokens per second)
llama_perf_context_print:        eval time =     329.12 ms /     2 runs   (  164.56 ms per token,     6.08 tokens per second)
llama_perf_context_print:       total time =    1001.51 ms /    29 tokens
Llama.generate: 71 prefix-match hit, remaining 14 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     352.44 ms /    14 tokens (   25.17 ms per token,    39.72 tokens per second)
llama_perf_context_print:        eval time =     319.44 ms /     2 runs   (  159.72 ms per token,     6.26 tokens per second)
llama_perf_context_print:       total time =     672.48 ms /    16 tokens
Llama.generate: 63 prefix-match hit, remaining 23 prompt tokens to eval


7


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     588.05 ms /    23 tokens (   25.57 ms per token,    39.11 tokens per second)
llama_perf_context_print:        eval time =     317.44 ms /     2 runs   (  158.72 ms per token,     6.30 tokens per second)
llama_perf_context_print:       total time =     906.38 ms /    25 tokens
Llama.generate: 63 prefix-match hit, remaining 22 prompt tokens to eval


7


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     567.25 ms /    22 tokens (   25.78 ms per token,    38.78 tokens per second)
llama_perf_context_print:        eval time =     320.03 ms /     2 runs   (  160.02 ms per token,     6.25 tokens per second)
llama_perf_context_print:       total time =     887.89 ms /    24 tokens
Llama.generate: 63 prefix-match hit, remaining 29 prompt tokens to eval


3


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     810.74 ms /    29 tokens (   27.96 ms per token,    35.77 tokens per second)
llama_perf_context_print:        eval time =     338.49 ms /     2 runs   (  169.24 ms per token,     5.91 tokens per second)
llama_perf_context_print:       total time =    1149.78 ms /    31 tokens
Llama.generate: 76 prefix-match hit, remaining 16 prompt tokens to eval


7


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     496.53 ms /    16 tokens (   31.03 ms per token,    32.22 tokens per second)
llama_perf_context_print:        eval time =     323.06 ms /     2 runs   (  161.53 ms per token,     6.19 tokens per second)
llama_perf_context_print:       total time =     820.32 ms /    18 tokens
Llama.generate: 63 prefix-match hit, remaining 21 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     535.95 ms /    21 tokens (   25.52 ms per token,    39.18 tokens per second)
llama_perf_context_print:        eval time =     318.07 ms /     2 runs   (  159.04 ms per token,     6.29 tokens per second)
llama_perf_context_print:       total time =     854.84 ms /    23 tokens
Llama.generate: 65 prefix-match hit, remaining 24 prompt tokens to eval


9


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     629.93 ms /    24 tokens (   26.25 ms per token,    38.10 tokens per second)
llama_perf_context_print:        eval time =     313.55 ms /     2 runs   (  156.78 ms per token,     6.38 tokens per second)
llama_perf_context_print:       total time =     944.23 ms /    26 tokens
Llama.generate: 75 prefix-match hit, remaining 16 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     408.20 ms /    16 tokens (   25.51 ms per token,    39.20 tokens per second)
llama_perf_context_print:        eval time =     318.82 ms /     2 runs   (  159.41 ms per token,     6.27 tokens per second)
llama_perf_context_print:       total time =     727.71 ms /    18 tokens
Llama.generate: 78 prefix-match hit, remaining 8 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     235.19 ms /     8 tokens (   29.40 ms per token,    34.01 tokens per second)
llama_perf_context_print:        eval time =     318.92 ms /     2 runs   (  159.46 ms per token,     6.27 tokens per second)
llama_perf_context_print:       total time =     554.85 ms /    10 tokens
Llama.generate: 69 prefix-match hit, remaining 21 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     538.35 ms /    21 tokens (   25.64 ms per token,    39.01 tokens per second)
llama_perf_context_print:        eval time =     318.49 ms /     2 runs   (  159.24 ms per token,     6.28 tokens per second)
llama_perf_context_print:       total time =     857.45 ms /    23 tokens
Llama.generate: 69 prefix-match hit, remaining 31 prompt tokens to eval


3


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     766.89 ms /    31 tokens (   24.74 ms per token,    40.42 tokens per second)
llama_perf_context_print:        eval time =     323.62 ms /     2 runs   (  161.81 ms per token,     6.18 tokens per second)
llama_perf_context_print:       total time =    1091.16 ms /    33 tokens
Llama.generate: 69 prefix-match hit, remaining 31 prompt tokens to eval


6


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     761.00 ms /    31 tokens (   24.55 ms per token,    40.74 tokens per second)
llama_perf_context_print:        eval time =     311.41 ms /     2 runs   (  155.71 ms per token,     6.42 tokens per second)
llama_perf_context_print:       total time =    1073.16 ms /    33 tokens
Llama.generate: 69 prefix-match hit, remaining 14 prompt tokens to eval


7


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     362.14 ms /    14 tokens (   25.87 ms per token,    38.66 tokens per second)
llama_perf_context_print:        eval time =     341.12 ms /     2 runs   (  170.56 ms per token,     5.86 tokens per second)
llama_perf_context_print:       total time =     704.53 ms /    16 tokens
Llama.generate: 69 prefix-match hit, remaining 16 prompt tokens to eval


5


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     415.44 ms /    16 tokens (   25.97 ms per token,    38.51 tokens per second)
llama_perf_context_print:        eval time =     343.75 ms /     2 runs   (  171.88 ms per token,     5.82 tokens per second)
llama_perf_context_print:       total time =     759.86 ms /    18 tokens
Llama.generate: 69 prefix-match hit, remaining 16 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     408.85 ms /    16 tokens (   25.55 ms per token,    39.13 tokens per second)
llama_perf_context_print:        eval time =     312.53 ms /     2 runs   (  156.27 ms per token,     6.40 tokens per second)
llama_perf_context_print:       total time =     722.29 ms /    18 tokens
Llama.generate: 69 prefix-match hit, remaining 24 prompt tokens to eval


7


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     607.14 ms /    24 tokens (   25.30 ms per token,    39.53 tokens per second)
llama_perf_context_print:        eval time =     317.58 ms /     2 runs   (  158.79 ms per token,     6.30 tokens per second)
llama_perf_context_print:       total time =     925.37 ms /    26 tokens
Llama.generate: 63 prefix-match hit, remaining 27 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     691.32 ms /    27 tokens (   25.60 ms per token,    39.06 tokens per second)
llama_perf_context_print:        eval time =     317.90 ms /     2 runs   (  158.95 ms per token,     6.29 tokens per second)
llama_perf_context_print:       total time =    1010.07 ms /    29 tokens
Llama.generate: 63 prefix-match hit, remaining 28 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     705.18 ms /    28 tokens (   25.19 ms per token,    39.71 tokens per second)
llama_perf_context_print:        eval time =     314.46 ms /     2 runs   (  157.23 ms per token,     6.36 tokens per second)
llama_perf_context_print:       total time =    1020.41 ms /    30 tokens
Llama.generate: 63 prefix-match hit, remaining 37 prompt tokens to eval


1


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     979.96 ms /    37 tokens (   26.49 ms per token,    37.76 tokens per second)
llama_perf_context_print:        eval time =     319.24 ms /     2 runs   (  159.62 ms per token,     6.26 tokens per second)
llama_perf_context_print:       total time =    1299.92 ms /    39 tokens
Llama.generate: 82 prefix-match hit, remaining 20 prompt tokens to eval


5


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     528.42 ms /    20 tokens (   26.42 ms per token,    37.85 tokens per second)
llama_perf_context_print:        eval time =     326.51 ms /     2 runs   (  163.26 ms per token,     6.13 tokens per second)
llama_perf_context_print:       total time =     855.69 ms /    22 tokens
Llama.generate: 69 prefix-match hit, remaining 104 prompt tokens to eval


3


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    2523.17 ms /   104 tokens (   24.26 ms per token,    41.22 tokens per second)
llama_perf_context_print:        eval time =     319.95 ms /     2 runs   (  159.98 ms per token,     6.25 tokens per second)
llama_perf_context_print:       total time =    2844.14 ms /   106 tokens
Llama.generate: 72 prefix-match hit, remaining 26 prompt tokens to eval


5


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     735.75 ms /    26 tokens (   28.30 ms per token,    35.34 tokens per second)
llama_perf_context_print:        eval time =     353.05 ms /     2 runs   (  176.52 ms per token,     5.66 tokens per second)
llama_perf_context_print:       total time =    1089.45 ms /    28 tokens
Llama.generate: 70 prefix-match hit, remaining 38 prompt tokens to eval


3


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    1105.27 ms /    38 tokens (   29.09 ms per token,    34.38 tokens per second)
llama_perf_context_print:        eval time =     352.45 ms /     2 runs   (  176.23 ms per token,     5.67 tokens per second)
llama_perf_context_print:       total time =    1458.45 ms /    40 tokens
Llama.generate: 73 prefix-match hit, remaining 47 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    1248.72 ms /    47 tokens (   26.57 ms per token,    37.64 tokens per second)
llama_perf_context_print:        eval time =     365.99 ms /     2 runs   (  182.99 ms per token,     5.46 tokens per second)
llama_perf_context_print:       total time =    1615.41 ms /    49 tokens
Llama.generate: 76 prefix-match hit, remaining 42 prompt tokens to eval


7


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    1124.82 ms /    42 tokens (   26.78 ms per token,    37.34 tokens per second)
llama_perf_context_print:        eval time =     307.02 ms /     2 runs   (  153.51 ms per token,     6.51 tokens per second)
llama_perf_context_print:       total time =    1432.79 ms /    44 tokens
Llama.generate: 63 prefix-match hit, remaining 169 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    4003.51 ms /   169 tokens (   23.69 ms per token,    42.21 tokens per second)
llama_perf_context_print:        eval time =     304.39 ms /     2 runs   (  152.19 ms per token,     6.57 tokens per second)
llama_perf_context_print:       total time =    4308.94 ms /   171 tokens
Llama.generate: 74 prefix-match hit, remaining 248 prompt tokens to eval


7


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    5952.52 ms /   248 tokens (   24.00 ms per token,    41.66 tokens per second)
llama_perf_context_print:        eval time =     324.00 ms /     2 runs   (  162.00 ms per token,     6.17 tokens per second)
llama_perf_context_print:       total time =    6277.32 ms /   250 tokens
Llama.generate: 63 prefix-match hit, remaining 25 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     656.73 ms /    25 tokens (   26.27 ms per token,    38.07 tokens per second)
llama_perf_context_print:        eval time =     330.14 ms /     2 runs   (  165.07 ms per token,     6.06 tokens per second)
llama_perf_context_print:       total time =     987.59 ms /    27 tokens
Llama.generate: 70 prefix-match hit, remaining 47 prompt tokens to eval


7


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    1156.65 ms /    47 tokens (   24.61 ms per token,    40.63 tokens per second)
llama_perf_context_print:        eval time =     346.59 ms /     2 runs   (  173.29 ms per token,     5.77 tokens per second)
llama_perf_context_print:       total time =    1504.00 ms /    49 tokens
Llama.generate: 73 prefix-match hit, remaining 48 prompt tokens to eval


7


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    1176.86 ms /    48 tokens (   24.52 ms per token,    40.79 tokens per second)
llama_perf_context_print:        eval time =     311.89 ms /     2 runs   (  155.95 ms per token,     6.41 tokens per second)
llama_perf_context_print:       total time =    1489.38 ms /    50 tokens
Llama.generate: 70 prefix-match hit, remaining 32 prompt tokens to eval


6


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     778.32 ms /    32 tokens (   24.32 ms per token,    41.11 tokens per second)
llama_perf_context_print:        eval time =     313.02 ms /     2 runs   (  156.51 ms per token,     6.39 tokens per second)
llama_perf_context_print:       total time =    1092.16 ms /    34 tokens
Llama.generate: 76 prefix-match hit, remaining 23 prompt tokens to eval


6


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     604.83 ms /    23 tokens (   26.30 ms per token,    38.03 tokens per second)
llama_perf_context_print:        eval time =     325.03 ms /     2 runs   (  162.51 ms per token,     6.15 tokens per second)
llama_perf_context_print:       total time =     930.87 ms /    25 tokens
Llama.generate: 63 prefix-match hit, remaining 47 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    1175.14 ms /    47 tokens (   25.00 ms per token,    40.00 tokens per second)
llama_perf_context_print:        eval time =     304.49 ms /     2 runs   (  152.24 ms per token,     6.57 tokens per second)
llama_perf_context_print:       total time =    1480.24 ms /    49 tokens
Llama.generate: 63 prefix-match hit, remaining 44 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    1083.61 ms /    44 tokens (   24.63 ms per token,    40.60 tokens per second)
llama_perf_context_print:        eval time =     344.77 ms /     2 runs   (  172.39 ms per token,     5.80 tokens per second)
llama_perf_context_print:       total time =    1429.01 ms /    46 tokens
Llama.generate: 65 prefix-match hit, remaining 33 prompt tokens to eval


3


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     863.10 ms /    33 tokens (   26.15 ms per token,    38.23 tokens per second)
llama_perf_context_print:        eval time =     331.31 ms /     2 runs   (  165.65 ms per token,     6.04 tokens per second)
llama_perf_context_print:       total time =    1195.06 ms /    35 tokens
Llama.generate: 85 prefix-match hit, remaining 14 prompt tokens to eval


3


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     386.64 ms /    14 tokens (   27.62 ms per token,    36.21 tokens per second)
llama_perf_context_print:        eval time =     347.11 ms /     2 runs   (  173.55 ms per token,     5.76 tokens per second)
llama_perf_context_print:       total time =     734.56 ms /    16 tokens
Llama.generate: 65 prefix-match hit, remaining 33 prompt tokens to eval


3


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     876.41 ms /    33 tokens (   26.56 ms per token,    37.65 tokens per second)
llama_perf_context_print:        eval time =     321.16 ms /     2 runs   (  160.58 ms per token,     6.23 tokens per second)
llama_perf_context_print:       total time =    1198.40 ms /    35 tokens
Llama.generate: 65 prefix-match hit, remaining 38 prompt tokens to eval


3


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     956.60 ms /    38 tokens (   25.17 ms per token,    39.72 tokens per second)
llama_perf_context_print:        eval time =     329.90 ms /     2 runs   (  164.95 ms per token,     6.06 tokens per second)
llama_perf_context_print:       total time =    1287.21 ms /    40 tokens
Llama.generate: 65 prefix-match hit, remaining 40 prompt tokens to eval


7


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    1017.38 ms /    40 tokens (   25.43 ms per token,    39.32 tokens per second)
llama_perf_context_print:        eval time =     337.14 ms /     2 runs   (  168.57 ms per token,     5.93 tokens per second)
llama_perf_context_print:       total time =    1355.30 ms /    42 tokens
Llama.generate: 69 prefix-match hit, remaining 122 prompt tokens to eval


5


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    3211.78 ms /   122 tokens (   26.33 ms per token,    37.99 tokens per second)
llama_perf_context_print:        eval time =     310.50 ms /     2 runs   (  155.25 ms per token,     6.44 tokens per second)
llama_perf_context_print:       total time =    3523.40 ms /   124 tokens
Llama.generate: 69 prefix-match hit, remaining 35 prompt tokens to eval


5


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     884.13 ms /    35 tokens (   25.26 ms per token,    39.59 tokens per second)
llama_perf_context_print:        eval time =     327.39 ms /     2 runs   (  163.69 ms per token,     6.11 tokens per second)
llama_perf_context_print:       total time =    1212.29 ms /    37 tokens
Llama.generate: 74 prefix-match hit, remaining 29 prompt tokens to eval


5


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     821.96 ms /    29 tokens (   28.34 ms per token,    35.28 tokens per second)
llama_perf_context_print:        eval time =     328.02 ms /     2 runs   (  164.01 ms per token,     6.10 tokens per second)
llama_perf_context_print:       total time =    1150.74 ms /    31 tokens
Llama.generate: 63 prefix-match hit, remaining 33 prompt tokens to eval


5


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     844.79 ms /    33 tokens (   25.60 ms per token,    39.06 tokens per second)
llama_perf_context_print:        eval time =     316.92 ms /     2 runs   (  158.46 ms per token,     6.31 tokens per second)
llama_perf_context_print:       total time =    1162.40 ms /    35 tokens
Llama.generate: 73 prefix-match hit, remaining 26 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     737.96 ms /    26 tokens (   28.38 ms per token,    35.23 tokens per second)
llama_perf_context_print:        eval time =     347.32 ms /     2 runs   (  173.66 ms per token,     5.76 tokens per second)
llama_perf_context_print:       total time =    1086.60 ms /    28 tokens
Llama.generate: 63 prefix-match hit, remaining 40 prompt tokens to eval


9


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    1071.27 ms /    40 tokens (   26.78 ms per token,    37.34 tokens per second)
llama_perf_context_print:        eval time =     333.91 ms /     2 runs   (  166.95 ms per token,     5.99 tokens per second)
llama_perf_context_print:       total time =    1405.90 ms /    42 tokens
Llama.generate: 69 prefix-match hit, remaining 26 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     703.85 ms /    26 tokens (   27.07 ms per token,    36.94 tokens per second)
llama_perf_context_print:        eval time =     337.07 ms /     2 runs   (  168.54 ms per token,     5.93 tokens per second)
llama_perf_context_print:       total time =    1041.92 ms /    28 tokens
Llama.generate: 63 prefix-match hit, remaining 39 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    1072.13 ms /    39 tokens (   27.49 ms per token,    36.38 tokens per second)
llama_perf_context_print:        eval time =     422.82 ms /     2 runs   (  211.41 ms per token,     4.73 tokens per second)
llama_perf_context_print:       total time =    1496.18 ms /    41 tokens
Llama.generate: 63 prefix-match hit, remaining 71 prompt tokens to eval


9


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    1831.38 ms /    71 tokens (   25.79 ms per token,    38.77 tokens per second)
llama_perf_context_print:        eval time =     344.89 ms /     2 runs   (  172.45 ms per token,     5.80 tokens per second)
llama_perf_context_print:       total time =    2177.11 ms /    73 tokens
Llama.generate: 72 prefix-match hit, remaining 250 prompt tokens to eval


9


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    5992.16 ms /   250 tokens (   23.97 ms per token,    41.72 tokens per second)
llama_perf_context_print:        eval time =     303.28 ms /     2 runs   (  151.64 ms per token,     6.59 tokens per second)
llama_perf_context_print:       total time =    6296.46 ms /   252 tokens
Llama.generate: 71 prefix-match hit, remaining 70 prompt tokens to eval


9


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    1728.78 ms /    70 tokens (   24.70 ms per token,    40.49 tokens per second)
llama_perf_context_print:        eval time =     314.67 ms /     2 runs   (  157.33 ms per token,     6.36 tokens per second)
llama_perf_context_print:       total time =    2044.43 ms /    72 tokens
Llama.generate: 71 prefix-match hit, remaining 239 prompt tokens to eval


7


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    5779.70 ms /   239 tokens (   24.18 ms per token,    41.35 tokens per second)
llama_perf_context_print:        eval time =     313.62 ms /     2 runs   (  156.81 ms per token,     6.38 tokens per second)
llama_perf_context_print:       total time =    6094.09 ms /   241 tokens
Llama.generate: 63 prefix-match hit, remaining 62 prompt tokens to eval


3


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    1508.68 ms /    62 tokens (   24.33 ms per token,    41.10 tokens per second)
llama_perf_context_print:        eval time =     329.78 ms /     2 runs   (  164.89 ms per token,     6.06 tokens per second)
llama_perf_context_print:       total time =    1839.16 ms /    64 tokens
Llama.generate: 63 prefix-match hit, remaining 54 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    1345.27 ms /    54 tokens (   24.91 ms per token,    40.14 tokens per second)
llama_perf_context_print:        eval time =     317.21 ms /     2 runs   (  158.61 ms per token,     6.30 tokens per second)
llama_perf_context_print:       total time =    1663.31 ms /    56 tokens
Llama.generate: 63 prefix-match hit, remaining 44 prompt tokens to eval


7


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    1066.75 ms /    44 tokens (   24.24 ms per token,    41.25 tokens per second)
llama_perf_context_print:        eval time =     325.55 ms /     2 runs   (  162.77 ms per token,     6.14 tokens per second)
llama_perf_context_print:       total time =    1393.00 ms /    46 tokens
Llama.generate: 63 prefix-match hit, remaining 96 prompt tokens to eval


9


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    2339.26 ms /    96 tokens (   24.37 ms per token,    41.04 tokens per second)
llama_perf_context_print:        eval time =     333.07 ms /     2 runs   (  166.54 ms per token,     6.00 tokens per second)
llama_perf_context_print:       total time =    2673.15 ms /    98 tokens
Llama.generate: 71 prefix-match hit, remaining 82 prompt tokens to eval


7


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    2026.64 ms /    82 tokens (   24.72 ms per token,    40.46 tokens per second)
llama_perf_context_print:        eval time =     315.72 ms /     2 runs   (  157.86 ms per token,     6.33 tokens per second)
llama_perf_context_print:       total time =    2343.24 ms /    84 tokens
Llama.generate: 63 prefix-match hit, remaining 63 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    1580.76 ms /    63 tokens (   25.09 ms per token,    39.85 tokens per second)
llama_perf_context_print:        eval time =     358.55 ms /     2 runs   (  179.28 ms per token,     5.58 tokens per second)
llama_perf_context_print:       total time =    1940.11 ms /    65 tokens
Llama.generate: 63 prefix-match hit, remaining 259 prompt tokens to eval


9


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    6432.41 ms /   259 tokens (   24.84 ms per token,    40.26 tokens per second)
llama_perf_context_print:        eval time =     332.92 ms /     2 runs   (  166.46 ms per token,     6.01 tokens per second)
llama_perf_context_print:       total time =    6766.44 ms /   261 tokens
Llama.generate: 81 prefix-match hit, remaining 241 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    5994.64 ms /   241 tokens (   24.87 ms per token,    40.20 tokens per second)
llama_perf_context_print:        eval time =     351.54 ms /     2 runs   (  175.77 ms per token,     5.69 tokens per second)
llama_perf_context_print:       total time =    6347.38 ms /   243 tokens
Llama.generate: 63 prefix-match hit, remaining 77 prompt tokens to eval


6


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    1847.98 ms /    77 tokens (   24.00 ms per token,    41.67 tokens per second)
llama_perf_context_print:        eval time =     310.59 ms /     2 runs   (  155.30 ms per token,     6.44 tokens per second)
llama_perf_context_print:       total time =    2159.35 ms /    79 tokens
Llama.generate: 63 prefix-match hit, remaining 66 prompt tokens to eval


5


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    1686.06 ms /    66 tokens (   25.55 ms per token,    39.14 tokens per second)
llama_perf_context_print:        eval time =     327.21 ms /     2 runs   (  163.60 ms per token,     6.11 tokens per second)
llama_perf_context_print:       total time =    2013.93 ms /    68 tokens
Llama.generate: 63 prefix-match hit, remaining 57 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    1389.92 ms /    57 tokens (   24.38 ms per token,    41.01 tokens per second)
llama_perf_context_print:        eval time =     323.77 ms /     2 runs   (  161.89 ms per token,     6.18 tokens per second)
llama_perf_context_print:       total time =    1714.54 ms /    59 tokens
Llama.generate: 63 prefix-match hit, remaining 62 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    1482.37 ms /    62 tokens (   23.91 ms per token,    41.82 tokens per second)
llama_perf_context_print:        eval time =     320.17 ms /     2 runs   (  160.08 ms per token,     6.25 tokens per second)
llama_perf_context_print:       total time =    1803.24 ms /    64 tokens
Llama.generate: 65 prefix-match hit, remaining 105 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    2560.65 ms /   105 tokens (   24.39 ms per token,    41.01 tokens per second)
llama_perf_context_print:        eval time =     348.72 ms /     2 runs   (  174.36 ms per token,     5.74 tokens per second)
llama_perf_context_print:       total time =    2910.67 ms /   107 tokens
Llama.generate: 63 prefix-match hit, remaining 99 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    2477.56 ms /    99 tokens (   25.03 ms per token,    39.96 tokens per second)
llama_perf_context_print:        eval time =     306.90 ms /     2 runs   (  153.45 ms per token,     6.52 tokens per second)
llama_perf_context_print:       total time =    2785.20 ms /   101 tokens
Llama.generate: 63 prefix-match hit, remaining 24 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     612.54 ms /    24 tokens (   25.52 ms per token,    39.18 tokens per second)
llama_perf_context_print:        eval time =     310.10 ms /     2 runs   (  155.05 ms per token,     6.45 tokens per second)
llama_perf_context_print:       total time =     923.37 ms /    26 tokens
Llama.generate: 64 prefix-match hit, remaining 38 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     955.37 ms /    38 tokens (   25.14 ms per token,    39.78 tokens per second)
llama_perf_context_print:        eval time =     323.70 ms /     2 runs   (  161.85 ms per token,     6.18 tokens per second)
llama_perf_context_print:       total time =    1280.06 ms /    40 tokens
Llama.generate: 65 prefix-match hit, remaining 41 prompt tokens to eval


7


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    1003.13 ms /    41 tokens (   24.47 ms per token,    40.87 tokens per second)
llama_perf_context_print:        eval time =     315.82 ms /     2 runs   (  157.91 ms per token,     6.33 tokens per second)
llama_perf_context_print:       total time =    1319.59 ms /    43 tokens
Llama.generate: 65 prefix-match hit, remaining 68 prompt tokens to eval


7


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    1662.18 ms /    68 tokens (   24.44 ms per token,    40.91 tokens per second)
llama_perf_context_print:        eval time =     331.86 ms /     2 runs   (  165.93 ms per token,     6.03 tokens per second)
llama_perf_context_print:       total time =    1994.90 ms /    70 tokens
Llama.generate: 65 prefix-match hit, remaining 36 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     904.79 ms /    36 tokens (   25.13 ms per token,    39.79 tokens per second)
llama_perf_context_print:        eval time =     309.91 ms /     2 runs   (  154.95 ms per token,     6.45 tokens per second)
llama_perf_context_print:       total time =    1215.40 ms /    38 tokens
Llama.generate: 63 prefix-match hit, remaining 56 prompt tokens to eval


7


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    1352.61 ms /    56 tokens (   24.15 ms per token,    41.40 tokens per second)
llama_perf_context_print:        eval time =     322.72 ms /     2 runs   (  161.36 ms per token,     6.20 tokens per second)
llama_perf_context_print:       total time =    1676.21 ms /    58 tokens
Llama.generate: 63 prefix-match hit, remaining 42 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    1033.07 ms /    42 tokens (   24.60 ms per token,    40.66 tokens per second)
llama_perf_context_print:        eval time =     319.60 ms /     2 runs   (  159.80 ms per token,     6.26 tokens per second)
llama_perf_context_print:       total time =    1353.53 ms /    44 tokens
Llama.generate: 69 prefix-match hit, remaining 46 prompt tokens to eval


7


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    1152.09 ms /    46 tokens (   25.05 ms per token,    39.93 tokens per second)
llama_perf_context_print:        eval time =     328.91 ms /     2 runs   (  164.45 ms per token,     6.08 tokens per second)
llama_perf_context_print:       total time =    1481.94 ms /    48 tokens
Llama.generate: 71 prefix-match hit, remaining 19 prompt tokens to eval


6


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     517.22 ms /    19 tokens (   27.22 ms per token,    36.73 tokens per second)
llama_perf_context_print:        eval time =     335.15 ms /     2 runs   (  167.57 ms per token,     5.97 tokens per second)
llama_perf_context_print:       total time =     853.09 ms /    21 tokens
Llama.generate: 70 prefix-match hit, remaining 20 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     544.05 ms /    20 tokens (   27.20 ms per token,    36.76 tokens per second)
llama_perf_context_print:        eval time =     337.54 ms /     2 runs   (  168.77 ms per token,     5.93 tokens per second)
llama_perf_context_print:       total time =     882.43 ms /    22 tokens
Llama.generate: 63 prefix-match hit, remaining 68 prompt tokens to eval


2


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    1684.00 ms /    68 tokens (   24.76 ms per token,    40.38 tokens per second)
llama_perf_context_print:        eval time =     348.69 ms /     2 runs   (  174.34 ms per token,     5.74 tokens per second)
llama_perf_context_print:       total time =    2033.50 ms /    70 tokens
Llama.generate: 63 prefix-match hit, remaining 31 prompt tokens to eval


5


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     843.34 ms /    31 tokens (   27.20 ms per token,    36.76 tokens per second)
llama_perf_context_print:        eval time =     332.95 ms /     2 runs   (  166.47 ms per token,     6.01 tokens per second)
llama_perf_context_print:       total time =    1177.16 ms /    33 tokens
Llama.generate: 63 prefix-match hit, remaining 29 prompt tokens to eval


9


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     727.86 ms /    29 tokens (   25.10 ms per token,    39.84 tokens per second)
llama_perf_context_print:        eval time =     325.78 ms /     2 runs   (  162.89 ms per token,     6.14 tokens per second)
llama_perf_context_print:       total time =    1054.43 ms /    31 tokens
Llama.generate: 63 prefix-match hit, remaining 92 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    2213.57 ms /    92 tokens (   24.06 ms per token,    41.56 tokens per second)
llama_perf_context_print:        eval time =     338.37 ms /     2 runs   (  169.18 ms per token,     5.91 tokens per second)
llama_perf_context_print:       total time =    2552.72 ms /    94 tokens
Llama.generate: 63 prefix-match hit, remaining 52 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    1251.64 ms /    52 tokens (   24.07 ms per token,    41.55 tokens per second)
llama_perf_context_print:        eval time =     308.48 ms /     2 runs   (  154.24 ms per token,     6.48 tokens per second)
llama_perf_context_print:       total time =    1560.90 ms /    54 tokens
Llama.generate: 68 prefix-match hit, remaining 69 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    1626.17 ms /    69 tokens (   23.57 ms per token,    42.43 tokens per second)
llama_perf_context_print:        eval time =     323.71 ms /     2 runs   (  161.85 ms per token,     6.18 tokens per second)
llama_perf_context_print:       total time =    1950.53 ms /    71 tokens
Llama.generate: 88 prefix-match hit, remaining 44 prompt tokens to eval


9


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    1111.66 ms /    44 tokens (   25.27 ms per token,    39.58 tokens per second)
llama_perf_context_print:        eval time =     309.49 ms /     2 runs   (  154.75 ms per token,     6.46 tokens per second)
llama_perf_context_print:       total time =    1421.93 ms /    46 tokens
Llama.generate: 63 prefix-match hit, remaining 249 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    6099.67 ms /   249 tokens (   24.50 ms per token,    40.82 tokens per second)
llama_perf_context_print:        eval time =     349.80 ms /     2 runs   (  174.90 ms per token,     5.72 tokens per second)
llama_perf_context_print:       total time =    6450.47 ms /   251 tokens
Llama.generate: 77 prefix-match hit, remaining 245 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    5918.21 ms /   245 tokens (   24.16 ms per token,    41.40 tokens per second)
llama_perf_context_print:        eval time =     320.69 ms /     2 runs   (  160.35 ms per token,     6.24 tokens per second)
llama_perf_context_print:       total time =    6239.80 ms /   247 tokens
Llama.generate: 63 prefix-match hit, remaining 40 prompt tokens to eval


5


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     976.30 ms /    40 tokens (   24.41 ms per token,    40.97 tokens per second)
llama_perf_context_print:        eval time =     319.02 ms /     2 runs   (  159.51 ms per token,     6.27 tokens per second)
llama_perf_context_print:       total time =    1296.35 ms /    42 tokens
Llama.generate: 63 prefix-match hit, remaining 50 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    1225.76 ms /    50 tokens (   24.52 ms per token,    40.79 tokens per second)
llama_perf_context_print:        eval time =     322.00 ms /     2 runs   (  161.00 ms per token,     6.21 tokens per second)
llama_perf_context_print:       total time =    1548.48 ms /    52 tokens
Llama.generate: 69 prefix-match hit, remaining 253 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    6074.93 ms /   253 tokens (   24.01 ms per token,    41.65 tokens per second)
llama_perf_context_print:        eval time =     324.42 ms /     2 runs   (  162.21 ms per token,     6.16 tokens per second)
llama_perf_context_print:       total time =    6400.19 ms /   255 tokens
Llama.generate: 69 prefix-match hit, remaining 253 prompt tokens to eval


5


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    6146.57 ms /   253 tokens (   24.29 ms per token,    41.16 tokens per second)
llama_perf_context_print:        eval time =     304.34 ms /     2 runs   (  152.17 ms per token,     6.57 tokens per second)
llama_perf_context_print:       total time =    6451.92 ms /   255 tokens
Llama.generate: 102 prefix-match hit, remaining 219 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    5311.51 ms /   219 tokens (   24.25 ms per token,    41.23 tokens per second)
llama_perf_context_print:        eval time =     334.48 ms /     2 runs   (  167.24 ms per token,     5.98 tokens per second)
llama_perf_context_print:       total time =    5646.75 ms /   221 tokens
Llama.generate: 103 prefix-match hit, remaining 163 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    3931.39 ms /   163 tokens (   24.12 ms per token,    41.46 tokens per second)
llama_perf_context_print:        eval time =     321.81 ms /     2 runs   (  160.90 ms per token,     6.21 tokens per second)
llama_perf_context_print:       total time =    4254.05 ms /   165 tokens
Llama.generate: 93 prefix-match hit, remaining 229 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    5505.41 ms /   229 tokens (   24.04 ms per token,    41.60 tokens per second)
llama_perf_context_print:        eval time =     326.47 ms /     2 runs   (  163.24 ms per token,     6.13 tokens per second)
llama_perf_context_print:       total time =    5832.74 ms /   231 tokens
Llama.generate: 321 prefix-match hit, remaining 1 prompt tokens to eval


3


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =     490.37 ms /     3 runs   (  163.46 ms per token,     6.12 tokens per second)
llama_perf_context_print:       total time =     491.02 ms /     4 tokens
Llama.generate: 63 prefix-match hit, remaining 67 prompt tokens to eval


6


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    1649.28 ms /    67 tokens (   24.62 ms per token,    40.62 tokens per second)
llama_perf_context_print:        eval time =     319.10 ms /     2 runs   (  159.55 ms per token,     6.27 tokens per second)
llama_perf_context_print:       total time =    1969.27 ms /    69 tokens
Llama.generate: 70 prefix-match hit, remaining 61 prompt tokens to eval


4


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    1481.30 ms /    61 tokens (   24.28 ms per token,    41.18 tokens per second)
llama_perf_context_print:        eval time =     341.79 ms /     2 runs   (  170.89 ms per token,     5.85 tokens per second)
llama_perf_context_print:       total time =    1824.02 ms /    63 tokens
Llama.generate: 63 prefix-match hit, remaining 35 prompt tokens to eval


5


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     880.41 ms /    35 tokens (   25.15 ms per token,    39.75 tokens per second)
llama_perf_context_print:        eval time =     326.44 ms /     2 runs   (  163.22 ms per token,     6.13 tokens per second)
llama_perf_context_print:       total time =    1207.91 ms /    37 tokens
Llama.generate: 67 prefix-match hit, remaining 40 prompt tokens to eval


7


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     971.52 ms /    40 tokens (   24.29 ms per token,    41.17 tokens per second)
llama_perf_context_print:        eval time =     315.34 ms /     2 runs   (  157.67 ms per token,     6.34 tokens per second)
llama_perf_context_print:       total time =    1287.93 ms /    42 tokens
Llama.generate: 88 prefix-match hit, remaining 19 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     481.87 ms /    19 tokens (   25.36 ms per token,    39.43 tokens per second)
llama_perf_context_print:        eval time =     319.38 ms /     2 runs   (  159.69 ms per token,     6.26 tokens per second)
llama_perf_context_print:       total time =     801.81 ms /    21 tokens
Llama.generate: 67 prefix-match hit, remaining 29 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     702.62 ms /    29 tokens (   24.23 ms per token,    41.27 tokens per second)
llama_perf_context_print:        eval time =     306.33 ms /     2 runs   (  153.17 ms per token,     6.53 tokens per second)
llama_perf_context_print:       total time =    1009.69 ms /    31 tokens
Llama.generate: 63 prefix-match hit, remaining 75 prompt tokens to eval


9


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    1769.25 ms /    75 tokens (   23.59 ms per token,    42.39 tokens per second)
llama_perf_context_print:        eval time =     305.75 ms /     2 runs   (  152.88 ms per token,     6.54 tokens per second)
llama_perf_context_print:       total time =    2075.85 ms /    77 tokens
Llama.generate: 63 prefix-match hit, remaining 21 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     521.89 ms /    21 tokens (   24.85 ms per token,    40.24 tokens per second)
llama_perf_context_print:        eval time =     311.95 ms /     2 runs   (  155.98 ms per token,     6.41 tokens per second)
llama_perf_context_print:       total time =     834.62 ms /    23 tokens
Llama.generate: 63 prefix-match hit, remaining 42 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     995.66 ms /    42 tokens (   23.71 ms per token,    42.18 tokens per second)
llama_perf_context_print:        eval time =     300.02 ms /     2 runs   (  150.01 ms per token,     6.67 tokens per second)
llama_perf_context_print:       total time =    1296.50 ms /    44 tokens
Llama.generate: 63 prefix-match hit, remaining 103 prompt tokens to eval


7


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    2432.28 ms /   103 tokens (   23.61 ms per token,    42.35 tokens per second)
llama_perf_context_print:        eval time =     305.71 ms /     2 runs   (  152.85 ms per token,     6.54 tokens per second)
llama_perf_context_print:       total time =    2739.12 ms /   105 tokens
Llama.generate: 63 prefix-match hit, remaining 95 prompt tokens to eval


6


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    2230.24 ms /    95 tokens (   23.48 ms per token,    42.60 tokens per second)
llama_perf_context_print:        eval time =     298.70 ms /     2 runs   (  149.35 ms per token,     6.70 tokens per second)
llama_perf_context_print:       total time =    2529.80 ms /    97 tokens
Llama.generate: 63 prefix-match hit, remaining 62 prompt tokens to eval


6


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    1446.04 ms /    62 tokens (   23.32 ms per token,    42.88 tokens per second)
llama_perf_context_print:        eval time =     307.67 ms /     2 runs   (  153.84 ms per token,     6.50 tokens per second)
llama_perf_context_print:       total time =    1754.62 ms /    64 tokens
Llama.generate: 68 prefix-match hit, remaining 99 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    2343.72 ms /    99 tokens (   23.67 ms per token,    42.24 tokens per second)
llama_perf_context_print:        eval time =     301.96 ms /     2 runs   (  150.98 ms per token,     6.62 tokens per second)
llama_perf_context_print:       total time =    2646.53 ms /   101 tokens
Llama.generate: 91 prefix-match hit, remaining 79 prompt tokens to eval


7


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    1857.12 ms /    79 tokens (   23.51 ms per token,    42.54 tokens per second)
llama_perf_context_print:        eval time =     301.66 ms /     2 runs   (  150.83 ms per token,     6.63 tokens per second)
llama_perf_context_print:       total time =    2159.68 ms /    81 tokens
Llama.generate: 63 prefix-match hit, remaining 60 prompt tokens to eval


1


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    1431.05 ms /    60 tokens (   23.85 ms per token,    41.93 tokens per second)
llama_perf_context_print:        eval time =     315.58 ms /     2 runs   (  157.79 ms per token,     6.34 tokens per second)
llama_perf_context_print:       total time =    1747.38 ms /    62 tokens
Llama.generate: 95 prefix-match hit, remaining 65 prompt tokens to eval


7


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    1556.99 ms /    65 tokens (   23.95 ms per token,    41.75 tokens per second)
llama_perf_context_print:        eval time =     299.04 ms /     2 runs   (  149.52 ms per token,     6.69 tokens per second)
llama_perf_context_print:       total time =    1856.74 ms /    67 tokens
Llama.generate: 63 prefix-match hit, remaining 62 prompt tokens to eval


5


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    1456.79 ms /    62 tokens (   23.50 ms per token,    42.56 tokens per second)
llama_perf_context_print:        eval time =     306.83 ms /     2 runs   (  153.41 ms per token,     6.52 tokens per second)
llama_perf_context_print:       total time =    1764.49 ms /    64 tokens
Llama.generate: 95 prefix-match hit, remaining 31 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     735.92 ms /    31 tokens (   23.74 ms per token,    42.12 tokens per second)
llama_perf_context_print:        eval time =     318.13 ms /     2 runs   (  159.06 ms per token,     6.29 tokens per second)
llama_perf_context_print:       total time =    1054.76 ms /    33 tokens
Llama.generate: 63 prefix-match hit, remaining 35 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     866.40 ms /    35 tokens (   24.75 ms per token,    40.40 tokens per second)
llama_perf_context_print:        eval time =     303.13 ms /     2 runs   (  151.56 ms per token,     6.60 tokens per second)
llama_perf_context_print:       total time =    1170.28 ms /    37 tokens
Llama.generate: 63 prefix-match hit, remaining 52 prompt tokens to eval


7


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    1210.71 ms /    52 tokens (   23.28 ms per token,    42.95 tokens per second)
llama_perf_context_print:        eval time =     316.81 ms /     2 runs   (  158.40 ms per token,     6.31 tokens per second)
llama_perf_context_print:       total time =    1528.48 ms /    54 tokens
Llama.generate: 63 prefix-match hit, remaining 149 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    3521.87 ms /   149 tokens (   23.64 ms per token,    42.31 tokens per second)
llama_perf_context_print:        eval time =     315.96 ms /     2 runs   (  157.98 ms per token,     6.33 tokens per second)
llama_perf_context_print:       total time =    3838.81 ms /   151 tokens
Llama.generate: 63 prefix-match hit, remaining 41 prompt tokens to eval


7


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     969.82 ms /    41 tokens (   23.65 ms per token,    42.28 tokens per second)
llama_perf_context_print:        eval time =     309.85 ms /     2 runs   (  154.93 ms per token,     6.45 tokens per second)
llama_perf_context_print:       total time =    1280.66 ms /    43 tokens
Llama.generate: 84 prefix-match hit, remaining 25 prompt tokens to eval


7


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     611.94 ms /    25 tokens (   24.48 ms per token,    40.85 tokens per second)
llama_perf_context_print:        eval time =     307.98 ms /     2 runs   (  153.99 ms per token,     6.49 tokens per second)
llama_perf_context_print:       total time =     920.70 ms /    27 tokens
Llama.generate: 72 prefix-match hit, remaining 85 prompt tokens to eval


6


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    2024.41 ms /    85 tokens (   23.82 ms per token,    41.99 tokens per second)
llama_perf_context_print:        eval time =     320.12 ms /     2 runs   (  160.06 ms per token,     6.25 tokens per second)
llama_perf_context_print:       total time =    2345.39 ms /    87 tokens
Llama.generate: 63 prefix-match hit, remaining 61 prompt tokens to eval


6


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    1397.08 ms /    61 tokens (   22.90 ms per token,    43.66 tokens per second)
llama_perf_context_print:        eval time =     305.45 ms /     2 runs   (  152.73 ms per token,     6.55 tokens per second)
llama_perf_context_print:       total time =    1703.40 ms /    63 tokens
Llama.generate: 63 prefix-match hit, remaining 84 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    1961.64 ms /    84 tokens (   23.35 ms per token,    42.82 tokens per second)
llama_perf_context_print:        eval time =     304.74 ms /     2 runs   (  152.37 ms per token,     6.56 tokens per second)
llama_perf_context_print:       total time =    2267.14 ms /    86 tokens
Llama.generate: 77 prefix-match hit, remaining 70 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    1631.04 ms /    70 tokens (   23.30 ms per token,    42.92 tokens per second)
llama_perf_context_print:        eval time =     309.16 ms /     2 runs   (  154.58 ms per token,     6.47 tokens per second)
llama_perf_context_print:       total time =    1941.21 ms /    72 tokens
Llama.generate: 63 prefix-match hit, remaining 51 prompt tokens to eval


7


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =    1320.97 ms /    51 tokens (   25.90 ms per token,    38.61 tokens per second)
llama_perf_context_print:        eval time =     345.12 ms /     2 runs   (  172.56 ms per token,     5.80 tokens per second)
llama_perf_context_print:       total time =    1666.94 ms /    53 tokens
Llama.generate: 63 prefix-match hit, remaining 26 prompt tokens to eval


8


llama_perf_context_print:        load time =    4865.62 ms
llama_perf_context_print: prompt eval time =     736.69 ms /    26 tokens (   28.33 ms per token,    35.29 tokens per second)
llama_perf_context_print:        eval time =     323.55 ms /     2 runs   (  161.78 ms per token,     6.18 tokens per second)
llama_perf_context_print:       total time =    1061.14 ms /    28 tokens


8
Comprehensibility ratings added and saved to 'translated_queries_with_ratings.xlsx'
