In [17]:
def benchmark_translation(n_ctx_value, queries, batch_size=20, output_file=None):
    from llama_cpp import Llama
    import time

    llm = Llama(
        model_path="/Users/pierr/Desktop/mistral-7b-instruct-v0.1.Q4_K_M.gguf",
        n_ctx=n_ctx_value
    )

    def translate_query(query):
        if n_ctx_value == 512: # abbreviated form
            cheat_sheet = """Rephrase this ESGish query as a natural English sentence. Each query is asking for all companies or issuers that match some paramater.
            """  
        else:
            cheat_sheet ="""
            ### ESGish Query Language – Cheat Sheet

            1. Query Structure:
            - A query can be a single condition or a group of conditions using AND, OR, NOT.

            2. Filters:
            - Format: [FieldName] Operator 'Value'
            - Operators: =, >, <, >=, <=, !=
            - Example: [BoardIndependencePct] > '0.7'

            3. Functions:
            - RATIO(F1, F2): ratio of F1 to F2
            - SUM(F1, F2, ...): total of multiple fields
            - CASE_COUNT(Query): number of cases matching a query

            4. Logical Operators:
            - AND(...), OR(...), NOT(...)
            - Used to combine multiple conditions

            5. Syntax Notes:
            - Field names use square brackets []
            - Values are quoted: 'Yes', '0.5', 'High'
            - Fields can be numeric, boolean, date, or string

            6. Your Task:
            - Reprhase the ESGish queries provided as natural English statements
            - Use lead-ins like: "Find companies where...", "Show issuers with...", etc.
            - Output should be clear, concise, and human-friendly
            """
        
        # Truncate query if needed
        cheat_sheet_tokens = llm.tokenize(cheat_sheet.encode("utf-8"))
        max_query_tokens = n_ctx_value - len(cheat_sheet_tokens) - 100  # leave 100 tokens for response

        query_tokens = llm.tokenize(query.encode("utf-8"))
        if len(query_tokens) > max_query_tokens:
            query_tokens = query_tokens[:max_query_tokens]
            query = llm.detokenize(query_tokens).decode("utf-8", errors="ignore")

        prompt = f"{cheat_sheet}\n\n{query}\n\n### Response:"
        prompt_tokens = llm.tokenize(prompt.encode("utf-8"))
        prompt_token_count = len(prompt_tokens)

        max_response_tokens = n_ctx_value - prompt_token_count
        if max_response_tokens <= 0:
            raise ValueError(f"Prompt too long for context size {n_ctx_value}. Try increasing n_ctx or shortening input.")

        response = llm(prompt, max_tokens=max_response_tokens, temperature=0.1)
        return response["choices"][0]["text"].strip()

    translated_all = []
    start = time.time()

    for i in range(0, len(queries), batch_size):
        batch = queries[i:i + batch_size]
        translated_batch = [translate_query(q) for q in batch]
        translated_all.extend(translated_batch)

    end = time.time()
    duration = end - start
    avg_time = duration / len(queries)

    return {
        "n_ctx": n_ctx_value,
        "total_time_sec": duration,
        "avg_time_per_query_sec": avg_time,
        "num_queries": len(queries)
    }

In [None]:
import pandas as pd

results = []
context_sizes = [512, 2048, 3072, 4096]
df = pd.read_csv("results2.csv")
queries = df["query_text"].tolist()

for ctx in context_sizes:
    result = benchmark_translation(ctx, queries)
    results.append(result)


In [None]:
import matplotlib.pyplot as plt
import pandas as pd

df_results = pd.DataFrame(results)

plt.figure(figsize=(10, 6))
plt.plot(df_results["n_ctx"], df_results["avg_time_per_query_sec"], marker='o')
plt.title("LLM Performance vs Context Size")
plt.xlabel("Context Size (n_ctx)")
plt.ylabel("Avg Time per Query (sec)")
plt.grid(True)
plt.show()
