### CPU Inferencing Notebook

#### Install the required packages

In [1]:
import pandas as pd
import re
import torch
import time
from langchain import PromptTemplate
from langchain_community.llms import LlamaCpp
from langchain.chains import RetrievalQA
from langchain_community.embeddings import SentenceTransformerEmbeddings
from qdrant_client import QdrantClient
from langchain_community.vectorstores import Qdrant
from tqdm import tqdm
from torch import cuda

### Check whether the machine has GPU or not and set the GPU layers

In [2]:
device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'
print(device)

cpu


In [3]:
from accelerate import Accelerator

if cuda.is_available():
    accelerator = Accelerator()
    gpu_layers = 50

else:
    gpu_layers = 0

print(gpu_layers)

0


### Initialize LLM

In [4]:
def initialize_llm(local_llm):
    return LlamaCpp(
        model_path=local_llm,
        temperature=0.3,
        max_tokens=2048,
        top_p=1,
    )

### Initialize Prompt Template

In [5]:
def initialize_prompt_template():
    return """
    Below is a question posed by healthcare professionals
    including nurses, doctors, and researchers in Malawi,
    all of whom actively engage in disease surveillance efforts.

    Context: {context}

    ### Question:
    {question}

    Offer a response that is both accurate and concise, incorporating
    relevant keywords to address the inquiry effectively.
    """

### Initialize Embeddings

In [6]:
def initialize_embeddings():
    return SentenceTransformerEmbeddings(model_name="NeuML/pubmedbert-base-embeddings")

### Initialize Qdrant Client

In [7]:
def initialize_qdrant_client(url):
    return QdrantClient(url=url, prefer_grpc=False)

### Initialize Qdrant Vector Store

In [8]:
def initialize_qdrant_vector_store(client, embeddings):
    return Qdrant(client=client, embeddings=embeddings, collection_name="vector_db")

### Initialize Prompt

In [9]:
def initialize_prompt(prompt_template):
    return PromptTemplate(template=prompt_template, input_variables=['context', 'question'])

### Initialize Retriever

In [10]:
def initialize_retriever(db):
    return db.as_retriever(search_kwargs={"k":1})

### Regex to extract answer and keywords

In [11]:
def regex_to_preprocess_text_from_biomistral(text):
    answer_text = ""
    keywords = []

    # Define patterns for extracting information
    response_pattern = r"### Answer:\n(.*?)\n\n"
    keywords_pattern = r"### Keywords:\n(.*?)\n\n"

    # Extract information using regular expressions
    response_match = re.search(response_pattern, text)
    keywords_match = re.search(keywords_pattern, text)

    # Check if matches were found and extract information
    if response_match:
        answer_text = response_match.group(1)

    if keywords_match:
        keywords = keywords_match.group(1).strip()

    # Return the extracted information
    return answer_text, keywords

### Regex to obtain paragraphs number

In [12]:
def regex_to_obtain_paragraphs_number(source_document):
    paragraphs_number = None

    # Define the regular expression pattern
    paragraphs_number_pattern = r'(\d+)\n'

    # Search for the pattern in the source document
    paragraphs_number_match = re.search(paragraphs_number_pattern, source_document)

    # Extract the paragraph number or page number if the pattern was found
    if paragraphs_number_match:
        paragraphs_number = paragraphs_number_match.group(1)

    # Return the extracted paragraph number
    return paragraphs_number

### Regex to obtain file name and get booklets

In [13]:
def regex_to_obtain_file_name_and_get_booklet(doc):
    # Define the regular expression pattern
    pattern = r'([^/]+)$'

    # Search for the pattern in the text
    matches = re.search(pattern, doc)

    # Extract the file name if found
    if matches:
        file_name = matches.group(1)

    file_booklet_name = {
        "TG_Booklet_1.xlsx": "TG Booklet 1",
        "TG_Booklet_2.xlsx": "TG Booklet 2",
        "TG_Booklet_3.xlsx": "TG Booklet 3",
        "TG_Booklet_4.xlsx": "TG Booklet 4",
        "TG_Booklet_5.xlsx": "TG Booklet 5",
        "TG_Booklet_6.xlsx": "TG Booklet 6"
    }

    # Return the correspondent booklet name based on the file name
    return file_booklet_name[file_name]

In [14]:
local_llm = "../biomistral-quantized-version/biomistral-finetuned-7b-v2.1-8-bit-gguf-unsloth.Q8_0.gguf"
test_csv = "./Test.csv"
llm = initialize_llm(local_llm)
print("LLM Initialized....")
test_df = pd.read_csv(test_csv)
print("Test Data Loaded....")

prompt_template = initialize_prompt_template()
embeddings = initialize_embeddings()

url = "http://localhost:6333"
client = initialize_qdrant_client(url)

db = initialize_qdrant_vector_store(client, embeddings)
prompt = initialize_prompt(prompt_template)
retriever = initialize_retriever(db)

chain_type_kwargs = {"prompt": prompt}
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True, chain_type_kwargs=chain_type_kwargs, verbose=True)


results = []
keywords = []
sources = []

total_rows = len(test_df)
batch_size = 298
num_batches = (total_rows + batch_size - 1) // batch_size


for batch_index in range(num_batches):
    start_index = batch_index * batch_size
    end_index = min((batch_index + 1) * batch_size, total_rows)
    batch_df = test_df[start_index:end_index]

    for index, row in tqdm(batch_df.iterrows(), total=len(batch_df)):
        try:
            query = row['Question Text']
            response = qa(query)
            answer = response['result']
            answer_, keywords_ = regex_to_preprocess_text_from_biomistral(answer)
            answer_ = answer_.strip()
            source_document = response['source_documents'][0].page_content
            doc = response['source_documents'][0].metadata['source'].split("/")[-1]
            results.append(answer_)
            keywords.append(keywords_)
            sources.append(doc)
        except:
            results.append("Error")
            keywords.append("Error")
            sources.append("Error")

llama_model_loader: loaded meta data with 24 key-value pairs and 291 tensors from ./biomistral-quantized-version/biomistral-finetuned-7b-v2.1-gguf-unsloth.Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Adeptschneider
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader:

LLM Initialized....
Test Data Loaded....


  warn_deprecated(




[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     170.65 ms /   260 runs   (    0.66 ms per token,  1523.63 tokens per second)
llama_print_timings: prompt eval time =   73046.75 ms /   252 tokens (  289.87 ms per token,     3.45 tokens per second)
llama_print_timings:        eval time =  103726.63 ms /   259 runs   (  400.49 ms per token,     2.50 tokens per second)
llama_print_timings:       total time =  178088.87 ms /   511 tokens
  0%|          | 1/298 [02:58<14:45:21, 178.86s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     191.05 ms /   267 runs   (    0.72 ms per token,  1397.53 tokens per second)
llama_print_timings: prompt eval time =   49612.57 ms /   195 tokens (  254.42 ms per token,     3.93 tokens per second)
llama_print_timings:        eval time =  124738.09 ms /   266 runs   (  468.94 ms per token,     2.13 tokens per second)
llama_print_timings:       total time =  175947.70 ms /   461 tokens
  1%|          | 2/298 [05:54<14:34:21, 177.24s/it]


[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


Llama.generate: prefix-match hit

llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     238.21 ms /   344 runs   (    0.69 ms per token,  1444.13 tokens per second)
llama_print_timings: prompt eval time =   42704.89 ms /   119 tokens (  358.86 ms per token,     2.79 tokens per second)
llama_print_timings:        eval time =  146438.86 ms /   343 runs   (  426.94 ms per token,     2.34 tokens per second)
llama_print_timings:       total time =  191013.40 ms /   462 tokens
  1%|          | 3/298 [09:06<15:02:53, 183.64s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     175.62 ms /   277 runs   (    0.63 ms per token,  1577.31 tokens per second)
llama_print_timings: prompt eval time =   49122.42 ms /   187 tokens (  262.69 ms per token,     3.81 tokens per second)
llama_print_timings:        eval time =  103118.39 ms /   276 runs   (  373.62 ms per token,     2.68 tokens per second)
llama_print_timings:       total time =  153656.80 ms /   463 tokens
  1%|▏         | 4/298 [11:40<14:02:04, 171.85s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     202.19 ms /   325 runs   (    0.62 ms per token,  1607.38 tokens per second)
llama_print_timings: prompt eval time =   46741.70 ms /   139 tokens (  336.27 ms per token,     2.97 tokens per second)
llama_print_timings:        eval time =  118611.15 ms /   324 runs   (  366.08 ms per token,     2.73 tokens per second)
llama_print_timings:       total time =  166971.66 ms /   463 tokens
  2%|▏         | 5/298 [14:27<13:50:52, 170.15s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     194.37 ms /   285 runs   (    0.68 ms per token,  1466.30 tokens per second)
llama_print_timings: prompt eval time =   52613.65 ms /   179 tokens (  293.93 ms per token,     3.40 tokens per second)
llama_print_timings:        eval time =  109429.39 ms /   284 runs   (  385.31 ms per token,     2.60 tokens per second)
llama_print_timings:       total time =  163522.41 ms /   463 tokens
  2%|▏         | 6/298 [17:10<13:37:17, 167.94s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     178.55 ms /   267 runs   (    0.67 ms per token,  1495.38 tokens per second)
llama_print_timings: prompt eval time =   51525.97 ms /   197 tokens (  261.55 ms per token,     3.82 tokens per second)
llama_print_timings:        eval time =  101570.82 ms /   266 runs   (  381.85 ms per token,     2.62 tokens per second)
llama_print_timings:       total time =  154490.03 ms /   463 tokens
  2%|▏         | 7/298 [19:45<13:13:22, 163.58s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     269.23 ms /   372 runs   (    0.72 ms per token,  1381.70 tokens per second)
llama_print_timings: prompt eval time =   28396.28 ms /    93 tokens (  305.34 ms per token,     3.28 tokens per second)
llama_print_timings:        eval time =  155572.30 ms /   371 runs   (  419.33 ms per token,     2.38 tokens per second)
llama_print_timings:       total time =  186129.23 ms /   464 tokens
  3%|▎         | 8/298 [22:51<13:45:33, 170.81s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     178.97 ms /   251 runs   (    0.71 ms per token,  1402.44 tokens per second)
llama_print_timings: prompt eval time =   61853.75 ms /   214 tokens (  289.04 ms per token,     3.46 tokens per second)
llama_print_timings:        eval time =  109341.88 ms /   250 runs   (  437.37 ms per token,     2.29 tokens per second)
llama_print_timings:       total time =  172672.60 ms /   464 tokens
  3%|▎         | 9/298 [25:44<13:45:44, 171.43s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     203.32 ms /   298 runs   (    0.68 ms per token,  1465.68 tokens per second)
llama_print_timings: prompt eval time =   51956.76 ms /   165 tokens (  314.89 ms per token,     3.18 tokens per second)
llama_print_timings:        eval time =  117207.16 ms /   297 runs   (  394.64 ms per token,     2.53 tokens per second)
llama_print_timings:       total time =  170761.84 ms /   462 tokens
  3%|▎         | 10/298 [28:35<13:42:07, 171.28s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     230.93 ms /   306 runs   (    0.75 ms per token,  1325.09 tokens per second)
llama_print_timings: prompt eval time =   13934.05 ms /    39 tokens (  357.28 ms per token,     2.80 tokens per second)
llama_print_timings:        eval time =  134584.14 ms /   305 runs   (  441.26 ms per token,     2.27 tokens per second)
llama_print_timings:       total time =  150261.69 ms /   344 tokens
  4%|▎         | 11/298 [31:05<13:08:39, 164.88s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     167.78 ms /   264 runs   (    0.64 ms per token,  1573.53 tokens per second)
llama_print_timings: prompt eval time =   59980.53 ms /   200 tokens (  299.90 ms per token,     3.33 tokens per second)
llama_print_timings:        eval time =   94681.09 ms /   264 runs   (  358.64 ms per token,     2.79 tokens per second)
llama_print_timings:       total time =  156003.00 ms /   464 tokens
  4%|▍         | 12/298 [33:41<12:53:13, 162.21s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     165.49 ms /   253 runs   (    0.65 ms per token,  1528.76 tokens per second)
llama_print_timings: prompt eval time =   60994.13 ms /   212 tokens (  287.71 ms per token,     3.48 tokens per second)
llama_print_timings:        eval time =   91156.56 ms /   252 runs   (  361.73 ms per token,     2.76 tokens per second)
llama_print_timings:       total time =  153428.87 ms /   464 tokens
  4%|▍         | 13/298 [36:15<12:38:01, 159.59s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     198.83 ms /   259 runs   (    0.77 ms per token,  1302.61 tokens per second)
llama_print_timings: prompt eval time =   56009.54 ms /   206 tokens (  271.89 ms per token,     3.68 tokens per second)
llama_print_timings:        eval time =  113983.17 ms /   258 runs   (  441.80 ms per token,     2.26 tokens per second)
llama_print_timings:       total time =  171568.74 ms /   464 tokens
  5%|▍         | 14/298 [39:07<12:52:42, 163.25s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     180.48 ms /   276 runs   (    0.65 ms per token,  1529.26 tokens per second)
llama_print_timings: prompt eval time =   61636.38 ms /   188 tokens (  327.85 ms per token,     3.05 tokens per second)
llama_print_timings:        eval time =  114391.91 ms /   275 runs   (  415.97 ms per token,     2.40 tokens per second)
llama_print_timings:       total time =  177565.44 ms /   463 tokens
  5%|▌         | 15/298 [42:04<13:10:29, 167.59s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     176.23 ms /   276 runs   (    0.64 ms per token,  1566.15 tokens per second)
llama_print_timings: prompt eval time =    9353.91 ms /    35 tokens (  267.25 ms per token,     3.74 tokens per second)
llama_print_timings:        eval time =  109060.94 ms /   275 runs   (  396.59 ms per token,     2.52 tokens per second)
llama_print_timings:       total time =  119841.04 ms /   310 tokens
  5%|▌         | 16/298 [44:04<12:00:14, 153.24s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     193.61 ms /   289 runs   (    0.67 ms per token,  1492.69 tokens per second)
llama_print_timings: prompt eval time =   49318.52 ms /   176 tokens (  280.22 ms per token,     3.57 tokens per second)
llama_print_timings:        eval time =  112022.58 ms /   288 runs   (  388.97 ms per token,     2.57 tokens per second)
llama_print_timings:       total time =  162881.34 ms /   464 tokens
  6%|▌         | 17/298 [46:47<12:11:24, 156.17s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     285.73 ms /   403 runs   (    0.71 ms per token,  1410.43 tokens per second)
llama_print_timings: prompt eval time =   17242.94 ms /    62 tokens (  278.11 ms per token,     3.60 tokens per second)
llama_print_timings:        eval time =  179137.55 ms /   402 runs   (  445.62 ms per token,     2.24 tokens per second)
llama_print_timings:       total time =  198844.46 ms /   464 tokens
  6%|▌         | 18/298 [50:06<13:08:45, 169.02s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     168.20 ms /   243 runs   (    0.69 ms per token,  1444.71 tokens per second)
llama_print_timings: prompt eval time =   82517.72 ms /   222 tokens (  371.70 ms per token,     2.69 tokens per second)
llama_print_timings:        eval time =  104215.91 ms /   242 runs   (  430.64 ms per token,     2.32 tokens per second)
llama_print_timings:       total time =  188186.39 ms /   464 tokens
  6%|▋         | 19/298 [53:14<13:32:53, 174.82s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     185.90 ms /   280 runs   (    0.66 ms per token,  1506.15 tokens per second)
llama_print_timings: prompt eval time =   51001.27 ms /   183 tokens (  278.70 ms per token,     3.59 tokens per second)
llama_print_timings:        eval time =  108480.51 ms /   279 runs   (  388.82 ms per token,     2.57 tokens per second)
llama_print_timings:       total time =  161041.55 ms /   462 tokens
  7%|▋         | 20/298 [55:56<13:11:01, 170.73s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     181.37 ms /   270 runs   (    0.67 ms per token,  1488.71 tokens per second)
llama_print_timings: prompt eval time =   54086.02 ms /   194 tokens (  278.79 ms per token,     3.59 tokens per second)
llama_print_timings:        eval time =  100900.59 ms /   269 runs   (  375.10 ms per token,     2.67 tokens per second)
llama_print_timings:       total time =  156435.11 ms /   463 tokens
  7%|▋         | 21/298 [58:32<12:48:32, 166.47s/it]


[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


Llama.generate: prefix-match hit

llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     175.23 ms /   287 runs   (    0.61 ms per token,  1637.85 tokens per second)
llama_print_timings: prompt eval time =   47033.50 ms /   176 tokens (  267.24 ms per token,     3.74 tokens per second)
llama_print_timings:        eval time =   92762.27 ms /   287 runs   (  323.21 ms per token,     3.09 tokens per second)
llama_print_timings:       total time =  141152.39 ms /   463 tokens
  7%|▋         | 22/298 [1:00:54<12:11:22, 158.99s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     174.20 ms /   274 runs   (    0.64 ms per token,  1572.90 tokens per second)
llama_print_timings: prompt eval time =   46994.06 ms /   190 tokens (  247.34 ms per token,     4.04 tokens per second)
llama_print_timings:        eval time =   91245.61 ms /   273 runs   (  334.23 ms per token,     2.99 tokens per second)
llama_print_timings:       total time =  139539.55 ms /   463 tokens
  8%|▊         | 23/298 [1:03:13<11:42:06, 153.19s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     147.26 ms /   232 runs   (    0.63 ms per token,  1575.46 tokens per second)
llama_print_timings: prompt eval time =   59080.06 ms /   232 tokens (  254.66 ms per token,     3.93 tokens per second)
llama_print_timings:        eval time =   75362.64 ms /   231 runs   (  326.25 ms per token,     3.07 tokens per second)
llama_print_timings:       total time =  135587.33 ms /   463 tokens
  8%|▊         | 24/298 [1:05:29<11:15:36, 147.94s/it]


[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


Llama.generate: prefix-match hit

llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     245.34 ms /   386 runs   (    0.64 ms per token,  1573.30 tokens per second)
llama_print_timings: prompt eval time =   18594.23 ms /    79 tokens (  235.37 ms per token,     4.25 tokens per second)
llama_print_timings:        eval time =  128801.63 ms /   385 runs   (  334.55 ms per token,     2.99 tokens per second)
llama_print_timings:       total time =  149171.24 ms /   464 tokens
  8%|▊         | 25/298 [1:07:59<11:15:13, 148.40s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     226.71 ms /   344 runs   (    0.66 ms per token,  1517.36 tokens per second)
llama_print_timings: prompt eval time =   32052.21 ms /   120 tokens (  267.10 ms per token,     3.74 tokens per second)
llama_print_timings:        eval time =  126153.94 ms /   344 runs   (  366.73 ms per token,     2.73 tokens per second)
llama_print_timings:       total time =  159847.04 ms /   464 tokens
  9%|▊         | 26/298 [1:10:39<11:28:26, 151.86s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     268.91 ms /   381 runs   (    0.71 ms per token,  1416.83 tokens per second)
llama_print_timings: prompt eval time =   22786.05 ms /    84 tokens (  271.26 ms per token,     3.69 tokens per second)
llama_print_timings:        eval time =  175548.70 ms /   380 runs   (  461.97 ms per token,     2.16 tokens per second)
llama_print_timings:       total time =  200627.88 ms /   464 tokens
  9%|▉         | 27/298 [1:13:59<12:32:09, 166.53s/it]


[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


Llama.generate: prefix-match hit

llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     206.60 ms /   261 runs   (    0.79 ms per token,  1263.30 tokens per second)
llama_print_timings: prompt eval time =   76876.30 ms /   204 tokens (  376.84 ms per token,     2.65 tokens per second)
llama_print_timings:        eval time =  111695.41 ms /   260 runs   (  429.60 ms per token,     2.33 tokens per second)
llama_print_timings:       total time =  190172.80 ms /   464 tokens
  9%|▉         | 28/298 [1:17:10<13:01:57, 173.77s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     241.11 ms /   361 runs   (    0.67 ms per token,  1497.26 tokens per second)
llama_print_timings: prompt eval time =   33848.54 ms /   103 tokens (  328.63 ms per token,     3.04 tokens per second)
llama_print_timings:        eval time =  134308.73 ms /   360 runs   (  373.08 ms per token,     2.68 tokens per second)
llama_print_timings:       total time =  169954.17 ms /   463 tokens
 10%|▉         | 29/298 [1:20:00<12:54:04, 172.66s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     154.01 ms /   248 runs   (    0.62 ms per token,  1610.30 tokens per second)
llama_print_timings: prompt eval time =   53772.19 ms /   216 tokens (  248.95 ms per token,     4.02 tokens per second)
llama_print_timings:        eval time =   83809.32 ms /   247 runs   (  339.31 ms per token,     2.95 tokens per second)
llama_print_timings:       total time =  138765.26 ms /   463 tokens
 10%|█         | 30/298 [1:22:19<12:05:55, 162.52s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     208.73 ms /   345 runs   (    0.61 ms per token,  1652.88 tokens per second)
llama_print_timings: prompt eval time =   37144.57 ms /   119 tokens (  312.14 ms per token,     3.20 tokens per second)
llama_print_timings:        eval time =  115880.95 ms /   344 runs   (  336.86 ms per token,     2.97 tokens per second)
llama_print_timings:       total time =  154632.56 ms /   463 tokens
 10%|█         | 31/298 [1:24:54<11:52:49, 160.19s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     274.47 ms /   394 runs   (    0.70 ms per token,  1435.47 tokens per second)
llama_print_timings: prompt eval time =   22419.21 ms /    71 tokens (  315.76 ms per token,     3.17 tokens per second)
llama_print_timings:        eval time =  160554.63 ms /   393 runs   (  408.54 ms per token,     2.45 tokens per second)
llama_print_timings:       total time =  185067.13 ms /   464 tokens
 11%|█         | 32/298 [1:27:59<12:23:24, 167.69s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     219.90 ms /   279 runs   (    0.79 ms per token,  1268.74 tokens per second)
llama_print_timings: prompt eval time =   70729.23 ms /   186 tokens (  380.26 ms per token,     2.63 tokens per second)
llama_print_timings:        eval time =  147963.41 ms /   278 runs   (  532.24 ms per token,     1.88 tokens per second)
llama_print_timings:       total time =  220621.97 ms /   464 tokens
 11%|█         | 33/298 [1:31:40<13:30:56, 183.61s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     201.11 ms /   273 runs   (    0.74 ms per token,  1357.47 tokens per second)
llama_print_timings: prompt eval time =   72659.55 ms /   191 tokens (  380.42 ms per token,     2.63 tokens per second)
llama_print_timings:        eval time =  138654.08 ms /   272 runs   (  509.76 ms per token,     1.96 tokens per second)
llama_print_timings:       total time =  213020.26 ms /   463 tokens
 11%|█▏        | 34/298 [1:35:13<14:06:55, 192.48s/it]


[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


Llama.generate: prefix-match hit

llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     160.48 ms /   217 runs   (    0.74 ms per token,  1352.18 tokens per second)
llama_print_timings: prompt eval time =   92384.34 ms /   247 tokens (  374.03 ms per token,     2.67 tokens per second)
llama_print_timings:        eval time =  111671.87 ms /   216 runs   (  517.00 ms per token,     1.93 tokens per second)
llama_print_timings:       total time =  205503.86 ms /   463 tokens
 12%|█▏        | 35/298 [1:38:39<14:21:25, 196.52s/it]


[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


Llama.generate: prefix-match hit

llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     265.25 ms /   344 runs   (    0.77 ms per token,  1296.91 tokens per second)
llama_print_timings: prompt eval time =   42989.50 ms /   118 tokens (  364.32 ms per token,     2.74 tokens per second)
llama_print_timings:        eval time =  177757.57 ms /   343 runs   (  518.24 ms per token,     1.93 tokens per second)
llama_print_timings:       total time =  222969.40 ms /   461 tokens
 12%|█▏        | 36/298 [1:42:22<14:53:03, 204.52s/it]


[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


Llama.generate: prefix-match hit

llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     259.42 ms /   347 runs   (    0.75 ms per token,  1337.61 tokens per second)
llama_print_timings: prompt eval time =   43654.10 ms /   118 tokens (  369.95 ms per token,     2.70 tokens per second)
llama_print_timings:        eval time =  178057.87 ms /   346 runs   (  514.62 ms per token,     1.94 tokens per second)
llama_print_timings:       total time =  223990.74 ms /   464 tokens
 12%|█▏        | 37/298 [1:46:06<15:15:20, 210.43s/it]


[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


Llama.generate: prefix-match hit

llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     199.38 ms /   259 runs   (    0.77 ms per token,  1299.06 tokens per second)
llama_print_timings: prompt eval time =   76561.41 ms /   206 tokens (  371.66 ms per token,     2.69 tokens per second)
llama_print_timings:        eval time =  133743.62 ms /   258 runs   (  518.39 ms per token,     1.93 tokens per second)
llama_print_timings:       total time =  212018.75 ms /   464 tokens
 13%|█▎        | 38/298 [1:49:38<15:14:13, 210.97s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     203.48 ms /   272 runs   (    0.75 ms per token,  1336.73 tokens per second)
llama_print_timings: prompt eval time =   72335.22 ms /   192 tokens (  376.75 ms per token,     2.65 tokens per second)
llama_print_timings:        eval time =  142377.39 ms /   271 runs   (  525.38 ms per token,     1.90 tokens per second)
llama_print_timings:       total time =  216515.64 ms /   463 tokens
 13%|█▎        | 39/298 [1:53:15<15:18:05, 212.68s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     155.47 ms /   224 runs   (    0.69 ms per token,  1440.79 tokens per second)
llama_print_timings: prompt eval time =   82802.56 ms /   240 tokens (  345.01 ms per token,     2.90 tokens per second)
llama_print_timings:        eval time =  105320.85 ms /   223 runs   (  472.29 ms per token,     2.12 tokens per second)
llama_print_timings:       total time =  189469.67 ms /   463 tokens
 13%|█▎        | 40/298 [1:56:25<14:44:49, 205.78s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     208.07 ms /   273 runs   (    0.76 ms per token,  1312.07 tokens per second)
llama_print_timings: prompt eval time =   65763.24 ms /   191 tokens (  344.31 ms per token,     2.90 tokens per second)
llama_print_timings:        eval time =  128026.53 ms /   272 runs   (  470.69 ms per token,     2.12 tokens per second)
llama_print_timings:       total time =  195471.84 ms /   463 tokens
 14%|█▍        | 41/298 [1:59:40<14:28:23, 202.74s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     244.52 ms /   337 runs   (    0.73 ms per token,  1378.20 tokens per second)
llama_print_timings: prompt eval time =   43815.40 ms /   127 tokens (  345.00 ms per token,     2.90 tokens per second)
llama_print_timings:        eval time =  157141.91 ms /   336 runs   (  467.68 ms per token,     2.14 tokens per second)
llama_print_timings:       total time =  203033.27 ms /   463 tokens
 14%|█▍        | 42/298 [2:03:04<14:25:38, 202.88s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     271.11 ms /   384 runs   (    0.71 ms per token,  1416.42 tokens per second)
llama_print_timings: prompt eval time =   27782.95 ms /    80 tokens (  347.29 ms per token,     2.88 tokens per second)
llama_print_timings:        eval time =  180650.29 ms /   383 runs   (  471.67 ms per token,     2.12 tokens per second)
llama_print_timings:       total time =  210726.35 ms /   463 tokens
 14%|█▍        | 43/298 [2:06:34<14:32:30, 205.30s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     169.10 ms /   247 runs   (    0.68 ms per token,  1460.69 tokens per second)
llama_print_timings: prompt eval time =   72859.97 ms /   216 tokens (  337.31 ms per token,     2.96 tokens per second)
llama_print_timings:        eval time =  114732.07 ms /   246 runs   (  466.39 ms per token,     2.14 tokens per second)
llama_print_timings:       total time =  189065.01 ms /   462 tokens
 15%|█▍        | 44/298 [2:09:44<14:08:41, 200.48s/it]


[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


Llama.generate: prefix-match hit

llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     241.89 ms /   337 runs   (    0.72 ms per token,  1393.19 tokens per second)
llama_print_timings: prompt eval time =   42985.37 ms /   127 tokens (  338.47 ms per token,     2.95 tokens per second)
llama_print_timings:        eval time =  158817.37 ms /   336 runs   (  472.67 ms per token,     2.12 tokens per second)
llama_print_timings:       total time =  203836.65 ms /   463 tokens
 15%|█▌        | 45/298 [2:13:08<14:10:07, 201.61s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     183.48 ms /   263 runs   (    0.70 ms per token,  1433.40 tokens per second)
llama_print_timings: prompt eval time =   67301.22 ms /   200 tokens (  336.51 ms per token,     2.97 tokens per second)
llama_print_timings:        eval time =  125669.55 ms /   263 runs   (  477.83 ms per token,     2.09 tokens per second)
llama_print_timings:       total time =  194590.55 ms /   463 tokens
 15%|█▌        | 46/298 [2:16:23<13:58:10, 199.57s/it]


[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


Llama.generate: prefix-match hit

llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     259.90 ms /   354 runs   (    0.73 ms per token,  1362.06 tokens per second)
llama_print_timings: prompt eval time =   38743.31 ms /   111 tokens (  349.04 ms per token,     2.87 tokens per second)
llama_print_timings:        eval time =  164708.55 ms /   353 runs   (  466.60 ms per token,     2.14 tokens per second)
llama_print_timings:       total time =  205537.04 ms /   464 tokens
 16%|█▌        | 47/298 [2:19:48<14:02:38, 201.43s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     219.44 ms /   299 runs   (    0.73 ms per token,  1362.58 tokens per second)
llama_print_timings: prompt eval time =   56317.32 ms /   166 tokens (  339.26 ms per token,     2.95 tokens per second)
llama_print_timings:        eval time =  141483.83 ms /   298 runs   (  474.78 ms per token,     2.11 tokens per second)
llama_print_timings:       total time =  199603.12 ms /   464 tokens
 16%|█▌        | 48/298 [2:23:08<13:57:12, 200.93s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     251.87 ms /   360 runs   (    0.70 ms per token,  1429.34 tokens per second)
llama_print_timings: prompt eval time =   34710.66 ms /   102 tokens (  340.30 ms per token,     2.94 tokens per second)
llama_print_timings:        eval time =  168619.89 ms /   359 runs   (  469.69 ms per token,     2.13 tokens per second)
llama_print_timings:       total time =  205476.67 ms /   461 tokens
 16%|█▋        | 49/298 [2:26:34<13:59:45, 202.35s/it]


[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


Llama.generate: prefix-match hit

llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     252.31 ms /   352 runs   (    0.72 ms per token,  1395.13 tokens per second)
llama_print_timings: prompt eval time =   39187.22 ms /   112 tokens (  349.89 ms per token,     2.86 tokens per second)
llama_print_timings:        eval time =  165055.43 ms /   352 runs   (  468.91 ms per token,     2.13 tokens per second)
llama_print_timings:       total time =  206374.01 ms /   464 tokens
 17%|█▋        | 50/298 [2:30:01<14:01:45, 203.65s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     209.65 ms /   304 runs   (    0.69 ms per token,  1450.06 tokens per second)
llama_print_timings: prompt eval time =   55290.72 ms /   160 tokens (  345.57 ms per token,     2.89 tokens per second)
llama_print_timings:        eval time =  142524.33 ms /   304 runs   (  468.83 ms per token,     2.13 tokens per second)
llama_print_timings:       total time =  199628.01 ms /   464 tokens
 17%|█▋        | 51/298 [2:33:20<13:53:38, 202.50s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     164.31 ms /   248 runs   (    0.66 ms per token,  1509.31 tokens per second)
llama_print_timings: prompt eval time =   72963.56 ms /   216 tokens (  337.79 ms per token,     2.96 tokens per second)
llama_print_timings:        eval time =  114246.22 ms /   247 runs   (  462.54 ms per token,     2.16 tokens per second)
llama_print_timings:       total time =  188680.16 ms /   463 tokens
 17%|█▋        | 52/298 [2:36:29<13:33:27, 198.41s/it]


[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


Llama.generate: prefix-match hit

llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     232.38 ms /   328 runs   (    0.71 ms per token,  1411.45 tokens per second)
llama_print_timings: prompt eval time =   47482.43 ms /   136 tokens (  349.14 ms per token,     2.86 tokens per second)
llama_print_timings:        eval time =  152262.56 ms /   327 runs   (  465.63 ms per token,     2.15 tokens per second)
llama_print_timings:       total time =  201713.95 ms /   463 tokens
 18%|█▊        | 53/298 [2:39:51<13:34:28, 199.46s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     184.86 ms /   251 runs   (    0.74 ms per token,  1357.77 tokens per second)
llama_print_timings: prompt eval time =   75125.24 ms /   212 tokens (  354.36 ms per token,     2.82 tokens per second)
llama_print_timings:        eval time =  117833.68 ms /   250 runs   (  471.33 ms per token,     2.12 tokens per second)
llama_print_timings:       total time =  194474.31 ms /   462 tokens
 18%|█▊        | 54/298 [2:43:06<13:25:16, 198.02s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     245.45 ms /   341 runs   (    0.72 ms per token,  1389.28 tokens per second)
llama_print_timings: prompt eval time =   35959.55 ms /   109 tokens (  329.90 ms per token,     3.03 tokens per second)
llama_print_timings:        eval time =  160183.45 ms /   340 runs   (  471.13 ms per token,     2.12 tokens per second)
llama_print_timings:       total time =  198133.89 ms /   449 tokens
 18%|█▊        | 55/298 [2:46:24<13:22:18, 198.10s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     192.17 ms /   271 runs   (    0.71 ms per token,  1410.19 tokens per second)
llama_print_timings: prompt eval time =   65732.32 ms /   194 tokens (  338.83 ms per token,     2.95 tokens per second)
llama_print_timings:        eval time =  127309.31 ms /   270 runs   (  471.52 ms per token,     2.12 tokens per second)
llama_print_timings:       total time =  194673.14 ms /   464 tokens
 19%|█▉        | 56/298 [2:49:39<13:15:05, 197.13s/it]


[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


Llama.generate: prefix-match hit

llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     250.47 ms /   350 runs   (    0.72 ms per token,  1397.39 tokens per second)
llama_print_timings: prompt eval time =   40355.27 ms /   115 tokens (  350.92 ms per token,     2.85 tokens per second)
llama_print_timings:        eval time =  166086.78 ms /   349 runs   (  475.89 ms per token,     2.10 tokens per second)
llama_print_timings:       total time =  208547.47 ms /   464 tokens
 19%|█▉        | 57/298 [2:53:08<13:25:52, 200.63s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     167.20 ms /   234 runs   (    0.71 ms per token,  1399.50 tokens per second)
llama_print_timings: prompt eval time =   79145.08 ms /   231 tokens (  342.62 ms per token,     2.92 tokens per second)
llama_print_timings:        eval time =  110948.07 ms /   233 runs   (  476.17 ms per token,     2.10 tokens per second)
llama_print_timings:       total time =  191562.45 ms /   464 tokens
 19%|█▉        | 58/298 [2:56:20<13:11:51, 197.97s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     152.20 ms /   218 runs   (    0.70 ms per token,  1432.35 tokens per second)
llama_print_timings: prompt eval time =   83586.74 ms /   246 tokens (  339.78 ms per token,     2.94 tokens per second)
llama_print_timings:        eval time =  100835.77 ms /   217 runs   (  464.68 ms per token,     2.15 tokens per second)
llama_print_timings:       total time =  185759.14 ms /   463 tokens
 20%|█▉        | 59/298 [2:59:26<12:54:13, 194.36s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     273.44 ms /   384 runs   (    0.71 ms per token,  1404.35 tokens per second)
llama_print_timings: prompt eval time =   27324.07 ms /    80 tokens (  341.55 ms per token,     2.93 tokens per second)
llama_print_timings:        eval time =  181350.28 ms /   383 runs   (  473.50 ms per token,     2.11 tokens per second)
llama_print_timings:       total time =  211001.70 ms /   463 tokens
 20%|██        | 60/298 [3:02:57<13:11:01, 199.42s/it]


[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


Llama.generate: prefix-match hit

llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     193.91 ms /   248 runs   (    0.78 ms per token,  1278.96 tokens per second)
llama_print_timings: prompt eval time =   73343.51 ms /   216 tokens (  339.55 ms per token,     2.95 tokens per second)
llama_print_timings:        eval time =  116973.95 ms /   248 runs   (  471.67 ms per token,     2.12 tokens per second)
llama_print_timings:       total time =  191904.07 ms /   464 tokens
 20%|██        | 61/298 [3:06:09<12:59:03, 197.23s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     144.57 ms /   209 runs   (    0.69 ms per token,  1445.66 tokens per second)
llama_print_timings: prompt eval time =   91575.96 ms /   256 tokens (  357.72 ms per token,     2.80 tokens per second)
llama_print_timings:        eval time =   95505.13 ms /   208 runs   (  459.16 ms per token,     2.18 tokens per second)
llama_print_timings:       total time =  188317.64 ms /   464 tokens
 21%|██        | 62/298 [3:09:17<12:45:28, 194.61s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     152.84 ms /   217 runs   (    0.70 ms per token,  1419.79 tokens per second)
llama_print_timings: prompt eval time =   19494.43 ms /    55 tokens (  354.44 ms per token,     2.82 tokens per second)
llama_print_timings:        eval time =  103687.71 ms /   216 runs   (  480.04 ms per token,     2.08 tokens per second)
llama_print_timings:       total time =  124410.28 ms /   271 tokens
 21%|██        | 63/298 [3:11:22<11:19:58, 173.61s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     272.14 ms /   383 runs   (    0.71 ms per token,  1407.36 tokens per second)
llama_print_timings: prompt eval time =   28723.29 ms /    80 tokens (  359.04 ms per token,     2.79 tokens per second)
llama_print_timings:        eval time =  180927.43 ms /   383 runs   (  472.40 ms per token,     2.12 tokens per second)
llama_print_timings:       total time =  211904.81 ms /   463 tokens
 21%|██▏       | 64/298 [3:14:54<12:02:06, 185.16s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     110.05 ms /   150 runs   (    0.73 ms per token,  1363.04 tokens per second)
llama_print_timings: prompt eval time =  108333.18 ms /   314 tokens (  345.01 ms per token,     2.90 tokens per second)
llama_print_timings:        eval time =   70963.78 ms /   149 runs   (  476.27 ms per token,     2.10 tokens per second)
llama_print_timings:       total time =  180303.77 ms /   463 tokens
 22%|██▏       | 65/298 [3:17:55<11:53:36, 183.76s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     163.36 ms /   232 runs   (    0.70 ms per token,  1420.17 tokens per second)
llama_print_timings: prompt eval time =   79453.07 ms /   232 tokens (  342.47 ms per token,     2.92 tokens per second)
llama_print_timings:        eval time =  106256.55 ms /   231 runs   (  459.99 ms per token,     2.17 tokens per second)
llama_print_timings:       total time =  187101.16 ms /   463 tokens
 22%|██▏       | 66/298 [3:21:02<11:54:39, 184.83s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =      21.24 ms /    30 runs   (    0.71 ms per token,  1412.70 tokens per second)
llama_print_timings: prompt eval time =  150092.27 ms /   434 tokens (  345.83 ms per token,     2.89 tokens per second)
llama_print_timings:        eval time =   12796.25 ms /    29 runs   (  441.25 ms per token,     2.27 tokens per second)
llama_print_timings:       total time =  163226.71 ms /   463 tokens
 22%|██▏       | 67/298 [3:23:45<11:26:52, 178.41s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     213.94 ms /   293 runs   (    0.73 ms per token,  1369.51 tokens per second)
llama_print_timings: prompt eval time =   55626.30 ms /   171 tokens (  325.30 ms per token,     3.07 tokens per second)
llama_print_timings:        eval time =  141471.95 ms /   292 runs   (  484.49 ms per token,     2.06 tokens per second)
llama_print_timings:       total time =  198898.50 ms /   463 tokens
 23%|██▎       | 68/298 [3:27:04<11:47:39, 184.61s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     185.03 ms /   253 runs   (    0.73 ms per token,  1367.34 tokens per second)
llama_print_timings: prompt eval time =   69681.01 ms /   211 tokens (  330.24 ms per token,     3.03 tokens per second)
llama_print_timings:        eval time =  118099.63 ms /   252 runs   (  468.65 ms per token,     2.13 tokens per second)
llama_print_timings:       total time =  189355.58 ms /   463 tokens
 23%|██▎       | 69/298 [3:30:14<11:50:15, 186.09s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     203.48 ms /   292 runs   (    0.70 ms per token,  1435.02 tokens per second)
llama_print_timings: prompt eval time =   57786.97 ms /   172 tokens (  335.97 ms per token,     2.98 tokens per second)
llama_print_timings:        eval time =  137562.82 ms /   291 runs   (  472.72 ms per token,     2.12 tokens per second)
llama_print_timings:       total time =  197086.24 ms /   463 tokens
 23%|██▎       | 70/298 [3:33:31<11:59:53, 189.45s/it]


[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


Llama.generate: prefix-match hit

llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     278.16 ms /   387 runs   (    0.72 ms per token,  1391.31 tokens per second)
llama_print_timings: prompt eval time =   26113.24 ms /    78 tokens (  334.79 ms per token,     2.99 tokens per second)
llama_print_timings:        eval time =  180346.42 ms /   386 runs   (  467.22 ms per token,     2.14 tokens per second)
llama_print_timings:       total time =  208839.40 ms /   464 tokens
 24%|██▍       | 71/298 [3:37:00<12:19:04, 195.35s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     216.05 ms /   310 runs   (    0.70 ms per token,  1434.84 tokens per second)
llama_print_timings: prompt eval time =   53581.85 ms /   155 tokens (  345.69 ms per token,     2.89 tokens per second)
llama_print_timings:        eval time =  144538.38 ms /   309 runs   (  467.76 ms per token,     2.14 tokens per second)
llama_print_timings:       total time =  199989.27 ms /   464 tokens
 24%|██▍       | 72/298 [3:40:21<12:21:15, 196.79s/it]


[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


Llama.generate: prefix-match hit

llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     171.03 ms /   230 runs   (    0.74 ms per token,  1344.77 tokens per second)
llama_print_timings: prompt eval time =   78443.21 ms /   232 tokens (  338.12 ms per token,     2.96 tokens per second)
llama_print_timings:        eval time =  108667.44 ms /   230 runs   (  472.47 ms per token,     2.12 tokens per second)
llama_print_timings:       total time =  188559.56 ms /   462 tokens
 24%|██▍       | 73/298 [3:43:29<12:08:57, 194.39s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     252.02 ms /   357 runs   (    0.71 ms per token,  1416.56 tokens per second)
llama_print_timings: prompt eval time =   37402.22 ms /   108 tokens (  346.32 ms per token,     2.89 tokens per second)
llama_print_timings:        eval time =  165867.39 ms /   356 runs   (  465.92 ms per token,     2.15 tokens per second)
llama_print_timings:       total time =  205413.52 ms /   464 tokens
 25%|██▍       | 74/298 [3:46:55<12:18:14, 197.74s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     166.17 ms /   233 runs   (    0.71 ms per token,  1402.21 tokens per second)
llama_print_timings: prompt eval time =   80061.70 ms /   232 tokens (  345.09 ms per token,     2.90 tokens per second)
llama_print_timings:        eval time =  108272.12 ms /   232 runs   (  466.69 ms per token,     2.14 tokens per second)
llama_print_timings:       total time =  189741.23 ms /   464 tokens
 25%|██▌       | 75/298 [3:50:05<12:06:14, 195.40s/it]


[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


Llama.generate: prefix-match hit

llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     238.43 ms /   335 runs   (    0.71 ms per token,  1405.05 tokens per second)
llama_print_timings: prompt eval time =   23840.60 ms /    68 tokens (  350.60 ms per token,     2.85 tokens per second)
llama_print_timings:        eval time =  158128.59 ms /   334 runs   (  473.44 ms per token,     2.11 tokens per second)
llama_print_timings:       total time =  184001.71 ms /   402 tokens
 26%|██▌       | 76/298 [3:53:09<11:50:43, 192.09s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     166.06 ms /   234 runs   (    0.71 ms per token,  1409.16 tokens per second)
llama_print_timings: prompt eval time =   79370.91 ms /   231 tokens (  343.60 ms per token,     2.91 tokens per second)
llama_print_timings:        eval time =  108784.88 ms /   233 runs   (  466.89 ms per token,     2.14 tokens per second)
llama_print_timings:       total time =  189620.76 ms /   464 tokens
 26%|██▌       | 77/298 [3:56:19<11:45:01, 191.41s/it]


[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


Llama.generate: prefix-match hit

llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     171.01 ms /   243 runs   (    0.70 ms per token,  1420.94 tokens per second)
llama_print_timings: prompt eval time =   75270.13 ms /   222 tokens (  339.05 ms per token,     2.95 tokens per second)
llama_print_timings:        eval time =  113847.27 ms /   242 runs   (  470.44 ms per token,     2.13 tokens per second)
llama_print_timings:       total time =  190664.75 ms /   464 tokens
 26%|██▌       | 78/298 [3:59:30<11:41:33, 191.33s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     187.99 ms /   269 runs   (    0.70 ms per token,  1430.90 tokens per second)
llama_print_timings: prompt eval time =   23039.38 ms /    67 tokens (  343.87 ms per token,     2.91 tokens per second)
llama_print_timings:        eval time =  125096.56 ms /   268 runs   (  466.78 ms per token,     2.14 tokens per second)
llama_print_timings:       total time =  149755.69 ms /   335 tokens
 27%|██▋       | 79/298 [4:02:00<10:53:04, 178.92s/it]


[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


Llama.generate: prefix-match hit

llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     214.03 ms /   287 runs   (    0.75 ms per token,  1340.96 tokens per second)
llama_print_timings: prompt eval time =   59378.07 ms /   178 tokens (  333.58 ms per token,     3.00 tokens per second)
llama_print_timings:        eval time =  133862.55 ms /   286 runs   (  468.05 ms per token,     2.14 tokens per second)
llama_print_timings:       total time =  195026.91 ms /   464 tokens
 27%|██▋       | 80/298 [4:05:15<11:07:54, 183.83s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     160.35 ms /   238 runs   (    0.67 ms per token,  1484.25 tokens per second)
llama_print_timings: prompt eval time =   75260.20 ms /   227 tokens (  331.54 ms per token,     3.02 tokens per second)
llama_print_timings:        eval time =   92823.23 ms /   237 runs   (  391.66 ms per token,     2.55 tokens per second)
llama_print_timings:       total time =  169407.20 ms /   464 tokens
 27%|██▋       | 81/298 [4:08:05<10:49:23, 179.56s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     231.83 ms /   355 runs   (    0.65 ms per token,  1531.27 tokens per second)
llama_print_timings: prompt eval time =   25744.16 ms /   110 tokens (  234.04 ms per token,     4.27 tokens per second)
llama_print_timings:        eval time =  112453.51 ms /   354 runs   (  317.67 ms per token,     3.15 tokens per second)
llama_print_timings:       total time =  139817.64 ms /   464 tokens
 28%|██▊       | 82/298 [4:10:25<10:03:37, 167.68s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     169.04 ms /   268 runs   (    0.63 ms per token,  1585.42 tokens per second)
llama_print_timings: prompt eval time =   46730.80 ms /   197 tokens (  237.21 ms per token,     4.22 tokens per second)
llama_print_timings:        eval time =   88073.66 ms /   267 runs   (  329.86 ms per token,     3.03 tokens per second)
llama_print_timings:       total time =  136084.10 ms /   464 tokens
 28%|██▊       | 83/298 [4:12:41<9:27:00, 158.24s/it] Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     149.88 ms /   248 runs   (    0.60 ms per token,  1654.68 tokens per second)
llama_print_timings: prompt eval time =   51706.41 ms /   216 tokens (  239.38 ms per token,     4.18 tokens per second)
llama_print_timings:        eval time =   78065.91 ms /   247 runs   (  316.06 ms per token,     3.16 tokens per second)
llama_print_timings:       total time =  130933.15 ms /   463 tokens
 28%|██▊       | 84/298 [4:14:52<8:55:16, 150.08s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     162.69 ms /   260 runs   (    0.63 ms per token,  1598.15 tokens per second)
llama_print_timings: prompt eval time =   48884.25 ms /   204 tokens (  239.63 ms per token,     4.17 tokens per second)
llama_print_timings:        eval time =   82390.89 ms /   259 runs   (  318.11 ms per token,     3.14 tokens per second)
llama_print_timings:       total time =  132505.92 ms /   463 tokens
 29%|██▊       | 85/298 [4:17:05<8:34:12, 144.85s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     182.58 ms /   296 runs   (    0.62 ms per token,  1621.17 tokens per second)
llama_print_timings: prompt eval time =   38786.91 ms /   167 tokens (  232.26 ms per token,     4.31 tokens per second)
llama_print_timings:        eval time =   93725.95 ms /   295 runs   (  317.72 ms per token,     3.15 tokens per second)
llama_print_timings:       total time =  133851.89 ms /   462 tokens
 29%|██▉       | 86/298 [4:19:19<8:20:15, 141.58s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     133.39 ms /   219 runs   (    0.61 ms per token,  1641.81 tokens per second)
llama_print_timings: prompt eval time =   57835.64 ms /   244 tokens (  237.03 ms per token,     4.22 tokens per second)
llama_print_timings:        eval time =   69020.81 ms /   218 runs   (  316.61 ms per token,     3.16 tokens per second)
llama_print_timings:       total time =  127866.72 ms /   462 tokens
 29%|██▉       | 87/298 [4:21:27<8:03:31, 137.50s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     140.00 ms /   230 runs   (    0.61 ms per token,  1642.83 tokens per second)
llama_print_timings: prompt eval time =   56146.33 ms /   234 tokens (  239.94 ms per token,     4.17 tokens per second)
llama_print_timings:        eval time =   71716.37 ms /   229 runs   (  313.17 ms per token,     3.19 tokens per second)
llama_print_timings:       total time =  128927.70 ms /   463 tokens
 30%|██▉       | 88/298 [4:23:36<7:52:21, 134.96s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     148.80 ms /   225 runs   (    0.66 ms per token,  1512.10 tokens per second)
llama_print_timings: prompt eval time =   56477.51 ms /   239 tokens (  236.31 ms per token,     4.23 tokens per second)
llama_print_timings:        eval time =   71391.73 ms /   224 runs   (  318.71 ms per token,     3.14 tokens per second)
llama_print_timings:       total time =  128939.24 ms /   463 tokens
 30%|██▉       | 89/298 [4:25:45<7:43:56, 133.19s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     217.12 ms /   349 runs   (    0.62 ms per token,  1607.38 tokens per second)
llama_print_timings: prompt eval time =   27180.79 ms /   116 tokens (  234.32 ms per token,     4.27 tokens per second)
llama_print_timings:        eval time =  109191.21 ms /   348 runs   (  313.77 ms per token,     3.19 tokens per second)
llama_print_timings:       total time =  137960.28 ms /   464 tokens
 30%|███       | 90/298 [4:28:03<7:46:51, 134.67s/it]


[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


Llama.generate: prefix-match hit

llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     151.63 ms /   238 runs   (    0.64 ms per token,  1569.58 tokens per second)
llama_print_timings: prompt eval time =   54385.94 ms /   227 tokens (  239.59 ms per token,     4.17 tokens per second)
llama_print_timings:        eval time =   74866.57 ms /   237 runs   (  315.89 ms per token,     3.17 tokens per second)
llama_print_timings:       total time =  130367.53 ms /   464 tokens
 31%|███       | 91/298 [4:30:14<7:40:24, 133.45s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     214.47 ms /   341 runs   (    0.63 ms per token,  1589.94 tokens per second)
llama_print_timings: prompt eval time =   29007.30 ms /   122 tokens (  237.76 ms per token,     4.21 tokens per second)
llama_print_timings:        eval time =  106707.79 ms /   340 runs   (  313.85 ms per token,     3.19 tokens per second)
llama_print_timings:       total time =  137273.91 ms /   462 tokens
 31%|███       | 92/298 [4:32:31<7:42:13, 134.63s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     245.38 ms /   396 runs   (    0.62 ms per token,  1613.85 tokens per second)
llama_print_timings: prompt eval time =   17541.48 ms /    69 tokens (  254.22 ms per token,     3.93 tokens per second)
llama_print_timings:        eval time =  123037.93 ms /   395 runs   (  311.49 ms per token,     3.21 tokens per second)
llama_print_timings:       total time =  142317.06 ms /   464 tokens
 31%|███       | 93/298 [4:34:53<7:48:01, 136.98s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     246.27 ms /   398 runs   (    0.62 ms per token,  1616.11 tokens per second)
llama_print_timings: prompt eval time =   16120.31 ms /    67 tokens (  240.60 ms per token,     4.16 tokens per second)
llama_print_timings:        eval time =  123747.03 ms /   397 runs   (  311.71 ms per token,     3.21 tokens per second)
llama_print_timings:       total time =  141624.38 ms /   464 tokens
 32%|███▏      | 94/298 [4:37:15<7:50:37, 138.42s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     222.11 ms /   358 runs   (    0.62 ms per token,  1611.84 tokens per second)
llama_print_timings: prompt eval time =   25620.07 ms /   107 tokens (  239.44 ms per token,     4.18 tokens per second)
llama_print_timings:        eval time =  111261.88 ms /   357 runs   (  311.66 ms per token,     3.21 tokens per second)
llama_print_timings:       total time =  138487.27 ms /   464 tokens
 32%|███▏      | 95/298 [4:39:34<7:48:30, 138.47s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     222.02 ms /   355 runs   (    0.63 ms per token,  1598.93 tokens per second)
llama_print_timings: prompt eval time =   10406.57 ms /    39 tokens (  266.84 ms per token,     3.75 tokens per second)
llama_print_timings:        eval time =  110426.35 ms /   354 runs   (  311.94 ms per token,     3.21 tokens per second)
llama_print_timings:       total time =  122458.28 ms /   393 tokens
 32%|███▏      | 96/298 [4:41:36<7:30:11, 133.72s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     216.61 ms /   357 runs   (    0.61 ms per token,  1648.15 tokens per second)
llama_print_timings: prompt eval time =   26392.17 ms /   108 tokens (  244.37 ms per token,     4.09 tokens per second)
llama_print_timings:        eval time =  110180.37 ms /   356 runs   (  309.50 ms per token,     3.23 tokens per second)
llama_print_timings:       total time =  138137.88 ms /   464 tokens
 33%|███▎      | 97/298 [4:43:55<7:32:31, 135.08s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     250.57 ms /   398 runs   (    0.63 ms per token,  1588.37 tokens per second)
llama_print_timings: prompt eval time =   16137.56 ms /    67 tokens (  240.86 ms per token,     4.15 tokens per second)
llama_print_timings:        eval time =  124451.74 ms /   397 runs   (  313.48 ms per token,     3.19 tokens per second)
llama_print_timings:       total time =  142389.10 ms /   464 tokens
 33%|███▎      | 98/298 [4:46:17<7:37:41, 137.31s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     219.16 ms /   337 runs   (    0.65 ms per token,  1537.67 tokens per second)
llama_print_timings: prompt eval time =   30387.64 ms /   128 tokens (  237.40 ms per token,     4.21 tokens per second)
llama_print_timings:        eval time =  105099.14 ms /   336 runs   (  312.80 ms per token,     3.20 tokens per second)
llama_print_timings:       total time =  137054.16 ms /   464 tokens
 33%|███▎      | 99/298 [4:48:34<7:35:17, 137.27s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     142.52 ms /   232 runs   (    0.61 ms per token,  1627.86 tokens per second)
llama_print_timings: prompt eval time =   53589.47 ms /   232 tokens (  230.99 ms per token,     4.33 tokens per second)
llama_print_timings:        eval time =   71768.70 ms /   232 runs   (  309.35 ms per token,     3.23 tokens per second)
llama_print_timings:       total time =  126412.67 ms /   464 tokens
 34%|███▎      | 100/298 [4:50:41<7:22:22, 134.05s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     146.56 ms /   228 runs   (    0.64 ms per token,  1555.69 tokens per second)
llama_print_timings: prompt eval time =   11786.28 ms /    45 tokens (  261.92 ms per token,     3.82 tokens per second)
llama_print_timings:        eval time =   72870.65 ms /   227 runs   (  321.02 ms per token,     3.12 tokens per second)
llama_print_timings:       total time =   85692.69 ms /   272 tokens
 34%|███▍      | 101/298 [4:52:07<6:32:38, 119.59s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     214.21 ms /   339 runs   (    0.63 ms per token,  1582.57 tokens per second)
llama_print_timings: prompt eval time =   28746.74 ms /   126 tokens (  228.15 ms per token,     4.38 tokens per second)
llama_print_timings:        eval time =  108063.34 ms /   338 runs   (  319.71 ms per token,     3.13 tokens per second)
llama_print_timings:       total time =  138368.82 ms /   464 tokens
 34%|███▍      | 102/298 [4:54:25<6:49:10, 125.26s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     254.28 ms /   397 runs   (    0.64 ms per token,  1561.30 tokens per second)
llama_print_timings: prompt eval time =   16157.38 ms /    68 tokens (  237.61 ms per token,     4.21 tokens per second)
llama_print_timings:        eval time =  132038.71 ms /   396 runs   (  333.43 ms per token,     3.00 tokens per second)
llama_print_timings:       total time =  150056.17 ms /   464 tokens
 35%|███▍      | 103/298 [4:56:55<7:11:22, 132.73s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     166.09 ms /   259 runs   (    0.64 ms per token,  1559.40 tokens per second)
llama_print_timings: prompt eval time =   48721.79 ms /   206 tokens (  236.51 ms per token,     4.23 tokens per second)
llama_print_timings:        eval time =   83266.68 ms /   258 runs   (  322.74 ms per token,     3.10 tokens per second)
llama_print_timings:       total time =  133257.56 ms /   464 tokens
 35%|███▍      | 104/298 [4:59:09<7:09:48, 132.93s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     170.28 ms /   259 runs   (    0.66 ms per token,  1521.01 tokens per second)
llama_print_timings: prompt eval time =   48926.29 ms /   203 tokens (  241.02 ms per token,     4.15 tokens per second)
llama_print_timings:        eval time =   93365.48 ms /   258 runs   (  361.88 ms per token,     2.76 tokens per second)
llama_print_timings:       total time =  143618.21 ms /   461 tokens
 35%|███▌      | 105/298 [5:01:33<7:18:02, 136.18s/it]


[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


Llama.generate: prefix-match hit

llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     161.81 ms /   256 runs   (    0.63 ms per token,  1582.07 tokens per second)
llama_print_timings: prompt eval time =   10790.17 ms /    46 tokens (  234.57 ms per token,     4.26 tokens per second)
llama_print_timings:        eval time =   79917.98 ms /   255 runs   (  313.40 ms per token,     3.19 tokens per second)
llama_print_timings:       total time =   91821.87 ms /   301 tokens
 36%|███▌      | 106/298 [5:03:05<6:33:23, 122.94s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     148.16 ms /   246 runs   (    0.60 ms per token,  1660.41 tokens per second)
llama_print_timings: prompt eval time =   51859.72 ms /   216 tokens (  240.09 ms per token,     4.17 tokens per second)
llama_print_timings:        eval time =   76536.82 ms /   245 runs   (  312.40 ms per token,     3.20 tokens per second)
llama_print_timings:       total time =  129499.61 ms /   461 tokens
 36%|███▌      | 107/298 [5:05:14<6:37:42, 124.94s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     149.66 ms /   245 runs   (    0.61 ms per token,  1637.08 tokens per second)
llama_print_timings: prompt eval time =   55026.13 ms /   219 tokens (  251.26 ms per token,     3.98 tokens per second)
llama_print_timings:        eval time =   76914.78 ms /   244 runs   (  315.22 ms per token,     3.17 tokens per second)
llama_print_timings:       total time =  133063.74 ms /   463 tokens
 36%|███▌      | 108/298 [5:07:27<6:43:28, 127.41s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     130.85 ms /   216 runs   (    0.61 ms per token,  1650.72 tokens per second)
llama_print_timings: prompt eval time =   58380.20 ms /   247 tokens (  236.36 ms per token,     4.23 tokens per second)
llama_print_timings:        eval time =   67929.24 ms /   215 runs   (  315.95 ms per token,     3.17 tokens per second)
llama_print_timings:       total time =  127322.35 ms /   462 tokens
 37%|███▋      | 109/298 [5:09:35<6:41:22, 127.42s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     143.52 ms /   242 runs   (    0.59 ms per token,  1686.15 tokens per second)
llama_print_timings: prompt eval time =   50006.68 ms /   222 tokens (  225.26 ms per token,     4.44 tokens per second)
llama_print_timings:        eval time =   76109.31 ms /   241 runs   (  315.81 ms per token,     3.17 tokens per second)
llama_print_timings:       total time =  127227.16 ms /   463 tokens
 37%|███▋      | 110/298 [5:11:42<6:39:11, 127.40s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     245.46 ms /   395 runs   (    0.62 ms per token,  1609.22 tokens per second)
llama_print_timings: prompt eval time =   16349.41 ms /    69 tokens (  236.95 ms per token,     4.22 tokens per second)
llama_print_timings:        eval time =  122943.25 ms /   394 runs   (  312.04 ms per token,     3.20 tokens per second)
llama_print_timings:       total time =  141067.19 ms /   463 tokens
 37%|███▋      | 111/298 [5:14:03<6:49:56, 131.53s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     247.69 ms /   398 runs   (    0.62 ms per token,  1606.85 tokens per second)
llama_print_timings: prompt eval time =   15210.77 ms /    67 tokens (  227.03 ms per token,     4.40 tokens per second)
llama_print_timings:        eval time =  124173.95 ms /   397 runs   (  312.78 ms per token,     3.20 tokens per second)
llama_print_timings:       total time =  141165.01 ms /   464 tokens
 38%|███▊      | 112/298 [5:16:25<6:56:49, 134.46s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     250.31 ms /   400 runs   (    0.63 ms per token,  1598.02 tokens per second)
llama_print_timings: prompt eval time =   16102.15 ms /    64 tokens (  251.60 ms per token,     3.97 tokens per second)
llama_print_timings:        eval time =  123914.26 ms /   400 runs   (  309.79 ms per token,     3.23 tokens per second)
llama_print_timings:       total time =  141798.70 ms /   464 tokens
 38%|███▊      | 113/298 [5:18:47<7:01:28, 136.69s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     237.21 ms /   378 runs   (    0.63 ms per token,  1593.55 tokens per second)
llama_print_timings: prompt eval time =   20256.73 ms /    86 tokens (  235.54 ms per token,     4.25 tokens per second)
llama_print_timings:        eval time =  118044.11 ms /   377 runs   (  313.11 ms per token,     3.19 tokens per second)
llama_print_timings:       total time =  139997.73 ms /   463 tokens
 38%|███▊      | 114/298 [5:21:07<7:02:22, 137.73s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     161.47 ms /   262 runs   (    0.62 ms per token,  1622.63 tokens per second)
llama_print_timings: prompt eval time =   48944.63 ms /   203 tokens (  241.11 ms per token,     4.15 tokens per second)
llama_print_timings:        eval time =   80692.38 ms /   261 runs   (  309.17 ms per token,     3.23 tokens per second)
llama_print_timings:       total time =  130840.30 ms /   464 tokens
 39%|███▊      | 115/298 [5:23:18<6:53:54, 135.71s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     231.95 ms /   381 runs   (    0.61 ms per token,  1642.57 tokens per second)
llama_print_timings: prompt eval time =   19876.14 ms /    84 tokens (  236.62 ms per token,     4.23 tokens per second)
llama_print_timings:        eval time =  118543.13 ms /   380 runs   (  311.96 ms per token,     3.21 tokens per second)
llama_print_timings:       total time =  140114.17 ms /   464 tokens
 39%|███▉      | 116/298 [5:25:38<6:55:48, 137.08s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     236.47 ms /   384 runs   (    0.62 ms per token,  1623.86 tokens per second)
llama_print_timings: prompt eval time =   19218.57 ms /    80 tokens (  240.23 ms per token,     4.16 tokens per second)
llama_print_timings:        eval time =  119911.30 ms /   384 runs   (  312.27 ms per token,     3.20 tokens per second)
llama_print_timings:       total time =  140855.05 ms /   464 tokens
 39%|███▉      | 117/298 [5:27:59<6:57:04, 138.26s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     202.62 ms /   331 runs   (    0.61 ms per token,  1633.62 tokens per second)
llama_print_timings: prompt eval time =   32178.45 ms /   134 tokens (  240.14 ms per token,     4.16 tokens per second)
llama_print_timings:        eval time =  101770.58 ms /   330 runs   (  308.40 ms per token,     3.24 tokens per second)
llama_print_timings:       total time =  135432.67 ms /   464 tokens
 40%|███▉      | 118/298 [5:30:15<6:52:24, 137.47s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     168.48 ms /   272 runs   (    0.62 ms per token,  1614.43 tokens per second)
llama_print_timings: prompt eval time =   43996.69 ms /   190 tokens (  231.56 ms per token,     4.32 tokens per second)
llama_print_timings:        eval time =   85355.34 ms /   271 runs   (  314.96 ms per token,     3.17 tokens per second)
llama_print_timings:       total time =  130580.05 ms /   461 tokens
 40%|███▉      | 119/298 [5:32:25<6:44:03, 135.44s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     175.29 ms /   277 runs   (    0.63 ms per token,  1580.20 tokens per second)
llama_print_timings: prompt eval time =   44237.27 ms /   188 tokens (  235.30 ms per token,     4.25 tokens per second)
llama_print_timings:        eval time =   85890.47 ms /   276 runs   (  311.20 ms per token,     3.21 tokens per second)
llama_print_timings:       total time =  131402.52 ms /   464 tokens
 40%|████      | 120/298 [5:34:37<6:38:18, 134.26s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     169.25 ms /   269 runs   (    0.63 ms per token,  1589.34 tokens per second)
llama_print_timings: prompt eval time =   46185.70 ms /   196 tokens (  235.64 ms per token,     4.24 tokens per second)
llama_print_timings:        eval time =   84252.40 ms /   268 runs   (  314.37 ms per token,     3.18 tokens per second)
llama_print_timings:       total time =  131663.65 ms /   464 tokens
 41%|████      | 121/298 [5:36:49<6:33:53, 133.52s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     161.90 ms /   260 runs   (    0.62 ms per token,  1605.91 tokens per second)
llama_print_timings: prompt eval time =   47638.15 ms /   205 tokens (  232.38 ms per token,     4.30 tokens per second)
llama_print_timings:        eval time =   79449.52 ms /   259 runs   (  306.75 ms per token,     3.26 tokens per second)
llama_print_timings:       total time =  128244.21 ms /   464 tokens
 41%|████      | 122/298 [5:38:57<6:27:08, 131.98s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     151.25 ms /   245 runs   (    0.62 ms per token,  1619.81 tokens per second)
llama_print_timings: prompt eval time =   53341.03 ms /   219 tokens (  243.57 ms per token,     4.11 tokens per second)
llama_print_timings:        eval time =   74165.17 ms /   244 runs   (  303.96 ms per token,     3.29 tokens per second)
llama_print_timings:       total time =  128601.82 ms /   463 tokens
 41%|████▏     | 123/298 [5:41:06<6:22:06, 131.01s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     151.55 ms /   244 runs   (    0.62 ms per token,  1609.99 tokens per second)
llama_print_timings: prompt eval time =   52890.07 ms /   220 tokens (  240.41 ms per token,     4.16 tokens per second)
llama_print_timings:        eval time =   75678.39 ms /   243 runs   (  311.43 ms per token,     3.21 tokens per second)
llama_print_timings:       total time =  129675.78 ms /   463 tokens
 42%|████▏     | 124/298 [5:43:16<6:18:52, 130.65s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     156.78 ms /   243 runs   (    0.65 ms per token,  1549.92 tokens per second)
llama_print_timings: prompt eval time =   52517.26 ms /   221 tokens (  237.63 ms per token,     4.21 tokens per second)
llama_print_timings:        eval time =   75729.36 ms /   242 runs   (  312.93 ms per token,     3.20 tokens per second)
llama_print_timings:       total time =  129381.30 ms /   463 tokens
 42%|████▏     | 125/298 [5:45:25<6:15:43, 130.31s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     246.07 ms /   389 runs   (    0.63 ms per token,  1580.83 tokens per second)
llama_print_timings: prompt eval time =   18042.91 ms /    74 tokens (  243.82 ms per token,     4.10 tokens per second)
llama_print_timings:        eval time =  120419.25 ms /   388 runs   (  310.36 ms per token,     3.22 tokens per second)
llama_print_timings:       total time =  140189.58 ms /   462 tokens
 42%|████▏     | 126/298 [5:47:45<6:22:09, 133.31s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     196.92 ms /   341 runs   (    0.58 ms per token,  1731.70 tokens per second)
llama_print_timings: prompt eval time =   28702.73 ms /   123 tokens (  233.36 ms per token,     4.29 tokens per second)
llama_print_timings:        eval time =  105303.08 ms /   340 runs   (  309.71 ms per token,     3.23 tokens per second)
llama_print_timings:       total time =  135535.23 ms /   463 tokens
 43%|████▎     | 127/298 [5:50:01<6:21:57, 134.02s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     244.56 ms /   389 runs   (    0.63 ms per token,  1590.59 tokens per second)
llama_print_timings: prompt eval time =   18943.39 ms /    75 tokens (  252.58 ms per token,     3.96 tokens per second)
llama_print_timings:        eval time =  122523.53 ms /   388 runs   (  315.78 ms per token,     3.17 tokens per second)
llama_print_timings:       total time =  143235.80 ms /   463 tokens
 43%|████▎     | 128/298 [5:52:24<6:27:40, 136.82s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     185.10 ms /   309 runs   (    0.60 ms per token,  1669.37 tokens per second)
llama_print_timings: prompt eval time =   37288.42 ms /   155 tokens (  240.57 ms per token,     4.16 tokens per second)
llama_print_timings:        eval time =   95238.98 ms /   308 runs   (  309.22 ms per token,     3.23 tokens per second)
llama_print_timings:       total time =  133903.52 ms /   463 tokens
 43%|████▎     | 129/298 [5:54:38<6:23:02, 135.99s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     153.46 ms /   235 runs   (    0.65 ms per token,  1531.32 tokens per second)
llama_print_timings: prompt eval time =   53800.62 ms /   229 tokens (  234.94 ms per token,     4.26 tokens per second)
llama_print_timings:        eval time =   73990.04 ms /   234 runs   (  316.20 ms per token,     3.16 tokens per second)
llama_print_timings:       total time =  128892.83 ms /   463 tokens
 44%|████▎     | 130/298 [5:56:47<6:14:54, 133.90s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     141.53 ms /   222 runs   (    0.64 ms per token,  1568.55 tokens per second)
llama_print_timings: prompt eval time =   55489.84 ms /   243 tokens (  228.35 ms per token,     4.38 tokens per second)
llama_print_timings:        eval time =   70405.97 ms /   221 runs   (  318.58 ms per token,     3.14 tokens per second)
llama_print_timings:       total time =  126952.42 ms /   464 tokens
 44%|████▍     | 131/298 [5:58:55<6:06:59, 131.85s/it]


[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


Llama.generate: prefix-match hit

llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     194.51 ms /   317 runs   (    0.61 ms per token,  1629.75 tokens per second)
llama_print_timings: prompt eval time =   33528.62 ms /   148 tokens (  226.54 ms per token,     4.41 tokens per second)
llama_print_timings:        eval time =   97810.41 ms /   316 runs   (  309.53 ms per token,     3.23 tokens per second)
llama_print_timings:       total time =  132725.75 ms /   464 tokens
 44%|████▍     | 132/298 [6:01:08<6:05:45, 132.20s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     204.81 ms /   323 runs   (    0.63 ms per token,  1577.05 tokens per second)
llama_print_timings: prompt eval time =   32761.68 ms /   141 tokens (  232.35 ms per token,     4.30 tokens per second)
llama_print_timings:        eval time =  100578.01 ms /   322 runs   (  312.35 ms per token,     3.20 tokens per second)
llama_print_timings:       total time =  134783.14 ms /   463 tokens
 45%|████▍     | 133/298 [6:03:22<6:05:46, 133.01s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     235.25 ms /   379 runs   (    0.62 ms per token,  1611.07 tokens per second)
llama_print_timings: prompt eval time =   19805.88 ms /    85 tokens (  233.01 ms per token,     4.29 tokens per second)
llama_print_timings:        eval time =  116514.63 ms /   378 runs   (  308.24 ms per token,     3.24 tokens per second)
llama_print_timings:       total time =  138020.01 ms /   463 tokens
 45%|████▍     | 134/298 [6:05:41<6:07:46, 134.55s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     162.40 ms /   257 runs   (    0.63 ms per token,  1582.56 tokens per second)
llama_print_timings: prompt eval time =   48947.22 ms /   207 tokens (  236.46 ms per token,     4.23 tokens per second)
llama_print_timings:        eval time =   80327.92 ms /   256 runs   (  313.78 ms per token,     3.19 tokens per second)
llama_print_timings:       total time =  130462.27 ms /   463 tokens
 45%|████▌     | 135/298 [6:07:51<6:02:19, 133.37s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     166.09 ms /   265 runs   (    0.63 ms per token,  1595.49 tokens per second)
llama_print_timings: prompt eval time =   47550.51 ms /   199 tokens (  238.95 ms per token,     4.19 tokens per second)
llama_print_timings:        eval time =   81521.77 ms /   264 runs   (  308.79 ms per token,     3.24 tokens per second)
llama_print_timings:       total time =  130273.66 ms /   463 tokens
 46%|████▌     | 136/298 [6:10:02<5:57:41, 132.48s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     165.84 ms /   267 runs   (    0.62 ms per token,  1609.96 tokens per second)
llama_print_timings: prompt eval time =   45616.36 ms /   197 tokens (  231.56 ms per token,     4.32 tokens per second)
llama_print_timings:        eval time =   83691.13 ms /   266 runs   (  314.63 ms per token,     3.18 tokens per second)
llama_print_timings:       total time =  130511.75 ms /   463 tokens
 46%|████▌     | 137/298 [6:12:12<5:54:00, 131.93s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     200.94 ms /   319 runs   (    0.63 ms per token,  1587.54 tokens per second)
llama_print_timings: prompt eval time =   33417.86 ms /   144 tokens (  232.07 ms per token,     4.31 tokens per second)
llama_print_timings:        eval time =   99380.22 ms /   319 runs   (  311.54 ms per token,     3.21 tokens per second)
llama_print_timings:       total time =  134244.71 ms /   463 tokens
 46%|████▋     | 138/298 [6:14:27<5:53:47, 132.67s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     169.12 ms /   267 runs   (    0.63 ms per token,  1578.81 tokens per second)
llama_print_timings: prompt eval time =   45905.25 ms /   197 tokens (  233.02 ms per token,     4.29 tokens per second)
llama_print_timings:        eval time =   84046.14 ms /   266 runs   (  315.96 ms per token,     3.16 tokens per second)
llama_print_timings:       total time =  131180.18 ms /   463 tokens
 47%|████▋     | 139/298 [6:16:38<5:50:30, 132.27s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     166.12 ms /   259 runs   (    0.64 ms per token,  1559.07 tokens per second)
llama_print_timings: prompt eval time =   48138.65 ms /   205 tokens (  234.82 ms per token,     4.26 tokens per second)
llama_print_timings:        eval time =   79790.56 ms /   258 runs   (  309.27 ms per token,     3.23 tokens per second)
llama_print_timings:       total time =  129132.19 ms /   463 tokens
 47%|████▋     | 140/298 [6:18:47<5:45:55, 131.36s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     141.68 ms /   227 runs   (    0.62 ms per token,  1602.25 tokens per second)
llama_print_timings: prompt eval time =   55669.29 ms /   238 tokens (  233.90 ms per token,     4.28 tokens per second)
llama_print_timings:        eval time =   70403.49 ms /   226 runs   (  311.52 ms per token,     3.21 tokens per second)
llama_print_timings:       total time =  127120.88 ms /   464 tokens
 47%|████▋     | 141/298 [6:20:55<5:40:29, 130.13s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     141.17 ms /   227 runs   (    0.62 ms per token,  1607.98 tokens per second)
llama_print_timings: prompt eval time =   55638.60 ms /   238 tokens (  233.78 ms per token,     4.28 tokens per second)
llama_print_timings:        eval time =   70131.17 ms /   226 runs   (  310.31 ms per token,     3.22 tokens per second)
llama_print_timings:       total time =  126844.49 ms /   464 tokens
 48%|████▊     | 142/298 [6:23:01<5:35:51, 129.18s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     225.11 ms /   377 runs   (    0.60 ms per token,  1674.74 tokens per second)
llama_print_timings: prompt eval time =   20543.87 ms /    88 tokens (  233.45 ms per token,     4.28 tokens per second)
llama_print_timings:        eval time =  115995.00 ms /   376 runs   (  308.50 ms per token,     3.24 tokens per second)
llama_print_timings:       total time =  138181.81 ms /   464 tokens
 48%|████▊     | 143/298 [6:25:20<5:40:47, 131.92s/it]


[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


Llama.generate: prefix-match hit

llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     194.65 ms /   328 runs   (    0.59 ms per token,  1685.10 tokens per second)
llama_print_timings: prompt eval time =   34756.96 ms /   136 tokens (  255.57 ms per token,     3.91 tokens per second)
llama_print_timings:        eval time =  101883.11 ms /   327 runs   (  311.57 ms per token,     3.21 tokens per second)
llama_print_timings:       total time =  138118.67 ms /   463 tokens
 48%|████▊     | 144/298 [6:27:38<5:43:33, 133.85s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     157.62 ms /   268 runs   (    0.59 ms per token,  1700.30 tokens per second)
llama_print_timings: prompt eval time =   46899.30 ms /   196 tokens (  239.28 ms per token,     4.18 tokens per second)
llama_print_timings:        eval time =   84061.65 ms /   267 runs   (  314.84 ms per token,     3.18 tokens per second)
llama_print_timings:       total time =  132159.23 ms /   463 tokens
 49%|████▊     | 145/298 [6:29:50<5:40:06, 133.38s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     155.10 ms /   265 runs   (    0.59 ms per token,  1708.58 tokens per second)
llama_print_timings: prompt eval time =   11745.23 ms /    48 tokens (  244.69 ms per token,     4.09 tokens per second)
llama_print_timings:        eval time =   81809.79 ms /   265 runs   (  308.72 ms per token,     3.24 tokens per second)
llama_print_timings:       total time =   94677.18 ms /   313 tokens
 49%|████▉     | 146/298 [6:31:25<5:08:35, 121.82s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     156.40 ms /   267 runs   (    0.59 ms per token,  1707.16 tokens per second)
llama_print_timings: prompt eval time =   11298.41 ms /    47 tokens (  240.39 ms per token,     4.16 tokens per second)
llama_print_timings:        eval time =   82087.19 ms /   266 runs   (  308.60 ms per token,     3.24 tokens per second)
llama_print_timings:       total time =   94545.03 ms /   313 tokens
 49%|████▉     | 147/298 [6:33:00<4:46:05, 113.68s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     208.93 ms /   337 runs   (    0.62 ms per token,  1612.95 tokens per second)
llama_print_timings: prompt eval time =   29054.44 ms /   127 tokens (  228.78 ms per token,     4.37 tokens per second)
llama_print_timings:        eval time =  104388.88 ms /   336 runs   (  310.68 ms per token,     3.22 tokens per second)
llama_print_timings:       total time =  134977.34 ms /   463 tokens
 50%|████▉     | 148/298 [6:35:15<5:00:17, 120.12s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     173.52 ms /   287 runs   (    0.60 ms per token,  1654.03 tokens per second)
llama_print_timings: prompt eval time =   45511.82 ms /   176 tokens (  258.59 ms per token,     3.87 tokens per second)
llama_print_timings:        eval time =   87947.38 ms /   287 runs   (  306.44 ms per token,     3.26 tokens per second)
llama_print_timings:       total time =  134760.29 ms /   463 tokens
 50%|█████     | 149/298 [6:37:30<5:09:18, 124.55s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     158.92 ms /   262 runs   (    0.61 ms per token,  1648.62 tokens per second)
llama_print_timings: prompt eval time =   48165.22 ms /   203 tokens (  237.27 ms per token,     4.21 tokens per second)
llama_print_timings:        eval time =   80853.65 ms /   261 runs   (  309.78 ms per token,     3.23 tokens per second)
llama_print_timings:       total time =  130210.33 ms /   464 tokens
 50%|█████     | 150/298 [6:39:40<5:11:31, 126.29s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     174.87 ms /   276 runs   (    0.63 ms per token,  1578.34 tokens per second)
llama_print_timings: prompt eval time =   44882.46 ms /   189 tokens (  237.47 ms per token,     4.21 tokens per second)
llama_print_timings:        eval time =   85182.46 ms /   275 runs   (  309.75 ms per token,     3.23 tokens per second)
llama_print_timings:       total time =  131326.01 ms /   464 tokens
 51%|█████     | 151/298 [6:41:52<5:13:12, 127.84s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     179.98 ms /   295 runs   (    0.61 ms per token,  1639.08 tokens per second)
llama_print_timings: prompt eval time =   39865.04 ms /   170 tokens (  234.50 ms per token,     4.26 tokens per second)
llama_print_timings:        eval time =   90762.93 ms /   294 runs   (  308.72 ms per token,     3.24 tokens per second)
llama_print_timings:       total time =  131957.09 ms /   464 tokens
 51%|█████     | 152/298 [6:44:04<5:14:11, 129.12s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     157.83 ms /   252 runs   (    0.63 ms per token,  1596.63 tokens per second)
llama_print_timings: prompt eval time =   48289.47 ms /   212 tokens (  227.78 ms per token,     4.39 tokens per second)
llama_print_timings:        eval time =   78804.48 ms /   251 runs   (  313.96 ms per token,     3.19 tokens per second)
llama_print_timings:       total time =  128282.99 ms /   463 tokens
 51%|█████▏    | 153/298 [6:46:12<5:11:31, 128.91s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     159.01 ms /   258 runs   (    0.62 ms per token,  1622.53 tokens per second)
llama_print_timings: prompt eval time =   49401.56 ms /   206 tokens (  239.81 ms per token,     4.17 tokens per second)
llama_print_timings:        eval time =   80027.71 ms /   257 runs   (  311.39 ms per token,     3.21 tokens per second)
llama_print_timings:       total time =  130625.88 ms /   463 tokens
 52%|█████▏    | 154/298 [6:48:23<5:10:42, 129.46s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     234.15 ms /   371 runs   (    0.63 ms per token,  1584.46 tokens per second)
llama_print_timings: prompt eval time =   22281.01 ms /    94 tokens (  237.03 ms per token,     4.22 tokens per second)
llama_print_timings:        eval time =  114949.10 ms /   370 runs   (  310.67 ms per token,     3.22 tokens per second)
llama_print_timings:       total time =  138881.88 ms /   464 tokens
 52%|█████▏    | 155/298 [6:50:42<5:15:23, 132.33s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     157.11 ms /   260 runs   (    0.60 ms per token,  1654.88 tokens per second)
llama_print_timings: prompt eval time =   50979.81 ms /   205 tokens (  248.68 ms per token,     4.02 tokens per second)
llama_print_timings:        eval time =   80375.51 ms /   259 runs   (  310.33 ms per token,     3.22 tokens per second)
llama_print_timings:       total time =  132543.20 ms /   464 tokens
 52%|█████▏    | 156/298 [6:52:55<5:13:26, 132.44s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     133.10 ms /   223 runs   (    0.60 ms per token,  1675.44 tokens per second)
llama_print_timings: prompt eval time =   55783.45 ms /   240 tokens (  232.43 ms per token,     4.30 tokens per second)
llama_print_timings:        eval time =   69657.41 ms /   223 runs   (  312.37 ms per token,     3.20 tokens per second)
llama_print_timings:       total time =  126506.55 ms /   463 tokens
 53%|█████▎    | 157/298 [6:55:01<5:07:07, 130.69s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     145.65 ms /   232 runs   (    0.63 ms per token,  1592.82 tokens per second)
llama_print_timings: prompt eval time =   53596.33 ms /   231 tokens (  232.02 ms per token,     4.31 tokens per second)
llama_print_timings:        eval time =   72875.30 ms /   231 runs   (  315.48 ms per token,     3.17 tokens per second)
llama_print_timings:       total time =  127568.85 ms /   462 tokens
 53%|█████▎    | 158/298 [6:57:09<5:02:51, 129.79s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     132.54 ms /   223 runs   (    0.59 ms per token,  1682.54 tokens per second)
llama_print_timings: prompt eval time =   55902.92 ms /   240 tokens (  232.93 ms per token,     4.29 tokens per second)
llama_print_timings:        eval time =   68468.54 ms /   222 runs   (  308.42 ms per token,     3.24 tokens per second)
llama_print_timings:       total time =  125401.72 ms /   462 tokens
 53%|█████▎    | 159/298 [6:59:15<4:57:43, 128.51s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     156.55 ms /   254 runs   (    0.62 ms per token,  1622.52 tokens per second)
llama_print_timings: prompt eval time =   48372.85 ms /   208 tokens (  232.56 ms per token,     4.30 tokens per second)
llama_print_timings:        eval time =   79151.66 ms /   254 runs   (  311.62 ms per token,     3.21 tokens per second)
llama_print_timings:       total time =  128709.20 ms /   462 tokens
 54%|█████▎    | 160/298 [7:01:23<4:55:48, 128.61s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     208.28 ms /   343 runs   (    0.61 ms per token,  1646.80 tokens per second)
llama_print_timings: prompt eval time =   28596.52 ms /   122 tokens (  234.40 ms per token,     4.27 tokens per second)
llama_print_timings:        eval time =  105358.59 ms /   342 runs   (  308.07 ms per token,     3.25 tokens per second)
llama_print_timings:       total time =  135476.36 ms /   464 tokens
 54%|█████▍    | 161/298 [7:03:39<4:58:27, 130.71s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     138.71 ms /   222 runs   (    0.62 ms per token,  1600.45 tokens per second)
llama_print_timings: prompt eval time =   56624.14 ms /   243 tokens (  233.02 ms per token,     4.29 tokens per second)
llama_print_timings:        eval time =   69006.96 ms /   221 runs   (  312.25 ms per token,     3.20 tokens per second)
llama_print_timings:       total time =  126656.55 ms /   464 tokens
 54%|█████▍    | 162/298 [7:05:46<4:53:37, 129.54s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     142.73 ms /   236 runs   (    0.60 ms per token,  1653.47 tokens per second)
llama_print_timings: prompt eval time =   50848.30 ms /   226 tokens (  224.99 ms per token,     4.44 tokens per second)
llama_print_timings:        eval time =   73098.92 ms /   235 runs   (  311.06 ms per token,     3.21 tokens per second)
llama_print_timings:       total time =  125030.48 ms /   461 tokens
 55%|█████▍    | 163/298 [7:07:51<4:48:30, 128.22s/it]


[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


Llama.generate: prefix-match hit

llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     140.22 ms /   231 runs   (    0.61 ms per token,  1647.46 tokens per second)
llama_print_timings: prompt eval time =   54867.97 ms /   232 tokens (  236.50 ms per token,     4.23 tokens per second)
llama_print_timings:        eval time =   71500.80 ms /   230 runs   (  310.87 ms per token,     3.22 tokens per second)
llama_print_timings:       total time =  127424.49 ms /   462 tokens
 55%|█████▌    | 164/298 [7:09:59<4:45:58, 128.05s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     158.97 ms /   254 runs   (    0.63 ms per token,  1597.83 tokens per second)
llama_print_timings: prompt eval time =   48187.53 ms /   210 tokens (  229.46 ms per token,     4.36 tokens per second)
llama_print_timings:        eval time =   77702.15 ms /   253 runs   (  307.12 ms per token,     3.26 tokens per second)
llama_print_timings:       total time =  127077.84 ms /   463 tokens
 55%|█████▌    | 165/298 [7:12:06<4:43:17, 127.80s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     136.09 ms /   222 runs   (    0.61 ms per token,  1631.32 tokens per second)
llama_print_timings: prompt eval time =   57258.29 ms /   243 tokens (  235.63 ms per token,     4.24 tokens per second)
llama_print_timings:        eval time =   69809.33 ms /   221 runs   (  315.88 ms per token,     3.17 tokens per second)
llama_print_timings:       total time =  128084.03 ms /   464 tokens
 56%|█████▌    | 166/298 [7:14:14<4:41:27, 127.93s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     145.98 ms /   242 runs   (    0.60 ms per token,  1657.76 tokens per second)
llama_print_timings: prompt eval time =   52145.23 ms /   223 tokens (  233.84 ms per token,     4.28 tokens per second)
llama_print_timings:        eval time =   73976.51 ms /   241 runs   (  306.96 ms per token,     3.26 tokens per second)
llama_print_timings:       total time =  127250.18 ms /   464 tokens
 56%|█████▌    | 167/298 [7:16:22<4:38:58, 127.77s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     230.53 ms /   370 runs   (    0.62 ms per token,  1604.97 tokens per second)
llama_print_timings: prompt eval time =   21833.66 ms /    94 tokens (  232.27 ms per token,     4.31 tokens per second)
llama_print_timings:        eval time =  115258.69 ms /   369 runs   (  312.35 ms per token,     3.20 tokens per second)
llama_print_timings:       total time =  138758.34 ms /   463 tokens
 56%|█████▋    | 168/298 [7:18:40<4:44:04, 131.11s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     162.64 ms /   285 runs   (    0.57 ms per token,  1752.33 tokens per second)
llama_print_timings: prompt eval time =   40741.62 ms /   176 tokens (  231.49 ms per token,     4.32 tokens per second)
llama_print_timings:        eval time =   87173.10 ms /   285 runs   (  305.87 ms per token,     3.27 tokens per second)
llama_print_timings:       total time =  129188.86 ms /   461 tokens
 57%|█████▋    | 169/298 [7:20:50<4:40:43, 130.57s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     214.88 ms /   350 runs   (    0.61 ms per token,  1628.79 tokens per second)
llama_print_timings: prompt eval time =   26826.14 ms /   115 tokens (  233.27 ms per token,     4.29 tokens per second)
llama_print_timings:        eval time =  109256.41 ms /   349 runs   (  313.06 ms per token,     3.19 tokens per second)
llama_print_timings:       total time =  137628.69 ms /   464 tokens
 57%|█████▋    | 170/298 [7:23:08<4:43:09, 132.73s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     229.62 ms /   355 runs   (    0.65 ms per token,  1546.04 tokens per second)
llama_print_timings: prompt eval time =   25678.38 ms /   110 tokens (  233.44 ms per token,     4.28 tokens per second)
llama_print_timings:        eval time =  110571.09 ms /   354 runs   (  312.35 ms per token,     3.20 tokens per second)
llama_print_timings:       total time =  137846.14 ms /   464 tokens
 57%|█████▋    | 171/298 [7:25:26<4:44:17, 134.31s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     160.55 ms /   259 runs   (    0.62 ms per token,  1613.25 tokens per second)
llama_print_timings: prompt eval time =   47768.99 ms /   206 tokens (  231.89 ms per token,     4.31 tokens per second)
llama_print_timings:        eval time =   81277.02 ms /   258 runs   (  315.03 ms per token,     3.17 tokens per second)
llama_print_timings:       total time =  130241.12 ms /   464 tokens
 58%|█████▊    | 172/298 [7:27:36<4:39:35, 133.14s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     206.84 ms /   331 runs   (    0.62 ms per token,  1600.28 tokens per second)
llama_print_timings: prompt eval time =   30419.02 ms /   134 tokens (  227.01 ms per token,     4.41 tokens per second)
llama_print_timings:        eval time =  101643.23 ms /   330 runs   (  308.01 ms per token,     3.25 tokens per second)
llama_print_timings:       total time =  133541.67 ms /   464 tokens
 58%|█████▊    | 173/298 [7:29:50<4:37:43, 133.30s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     191.90 ms /   317 runs   (    0.61 ms per token,  1651.88 tokens per second)
llama_print_timings: prompt eval time =   35744.62 ms /   148 tokens (  241.52 ms per token,     4.14 tokens per second)
llama_print_timings:        eval time =   97105.93 ms /   316 runs   (  307.30 ms per token,     3.25 tokens per second)
llama_print_timings:       total time =  134276.27 ms /   464 tokens
 58%|█████▊    | 174/298 [7:32:04<4:36:10, 133.64s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     214.67 ms /   342 runs   (    0.63 ms per token,  1593.11 tokens per second)
llama_print_timings: prompt eval time =   29256.26 ms /   123 tokens (  237.86 ms per token,     4.20 tokens per second)
llama_print_timings:        eval time =  104190.02 ms /   341 runs   (  305.54 ms per token,     3.27 tokens per second)
llama_print_timings:       total time =  134971.76 ms /   464 tokens
 59%|█████▊    | 175/298 [7:34:19<4:34:51, 134.08s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     127.95 ms /   227 runs   (    0.56 ms per token,  1774.09 tokens per second)
llama_print_timings: prompt eval time =   55499.60 ms /   238 tokens (  233.19 ms per token,     4.29 tokens per second)
llama_print_timings:        eval time =   70660.23 ms /   226 runs   (  312.66 ms per token,     3.20 tokens per second)
llama_print_timings:       total time =  127195.00 ms /   464 tokens
 59%|█████▉    | 176/298 [7:36:26<4:28:32, 132.07s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     165.42 ms /   279 runs   (    0.59 ms per token,  1686.63 tokens per second)
llama_print_timings: prompt eval time =   43092.22 ms /   184 tokens (  234.20 ms per token,     4.27 tokens per second)
llama_print_timings:        eval time =   86777.07 ms /   278 runs   (  312.15 ms per token,     3.20 tokens per second)
llama_print_timings:       total time =  131167.45 ms /   462 tokens
 59%|█████▉    | 177/298 [7:38:38<4:25:52, 131.84s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     163.67 ms /   261 runs   (    0.63 ms per token,  1594.63 tokens per second)
llama_print_timings: prompt eval time =   47653.94 ms /   204 tokens (  233.60 ms per token,     4.28 tokens per second)
llama_print_timings:        eval time =   81621.21 ms /   260 runs   (  313.93 ms per token,     3.19 tokens per second)
llama_print_timings:       total time =  130478.36 ms /   464 tokens
 60%|█████▉    | 178/298 [7:40:48<4:22:56, 131.47s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     154.20 ms /   245 runs   (    0.63 ms per token,  1588.81 tokens per second)
llama_print_timings: prompt eval time =   52226.68 ms /   220 tokens (  237.39 ms per token,     4.21 tokens per second)
llama_print_timings:        eval time =   78866.42 ms /   244 runs   (  323.22 ms per token,     3.09 tokens per second)
llama_print_timings:       total time =  132292.63 ms /   464 tokens
 60%|██████    | 179/298 [7:43:01<4:21:18, 131.75s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     144.67 ms /   237 runs   (    0.61 ms per token,  1638.17 tokens per second)
llama_print_timings: prompt eval time =   55394.64 ms /   227 tokens (  244.03 ms per token,     4.10 tokens per second)
llama_print_timings:        eval time =   72398.03 ms /   236 runs   (  306.77 ms per token,     3.26 tokens per second)
llama_print_timings:       total time =  128874.40 ms /   463 tokens
 60%|██████    | 180/298 [7:45:10<4:17:29, 130.93s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     157.20 ms /   255 runs   (    0.62 ms per token,  1622.14 tokens per second)
llama_print_timings: prompt eval time =   50502.61 ms /   208 tokens (  242.80 ms per token,     4.12 tokens per second)
llama_print_timings:        eval time =   78462.95 ms /   255 runs   (  307.70 ms per token,     3.25 tokens per second)
llama_print_timings:       total time =  130130.57 ms /   463 tokens
 61%|██████    | 181/298 [7:47:20<4:14:55, 130.73s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     192.53 ms /   300 runs   (    0.64 ms per token,  1558.20 tokens per second)
llama_print_timings: prompt eval time =   38838.08 ms /   165 tokens (  235.38 ms per token,     4.25 tokens per second)
llama_print_timings:        eval time =   92201.64 ms /   299 runs   (  308.37 ms per token,     3.24 tokens per second)
llama_print_timings:       total time =  132444.04 ms /   464 tokens
 61%|██████    | 182/298 [7:49:33<4:13:48, 131.28s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     172.04 ms /   277 runs   (    0.62 ms per token,  1610.12 tokens per second)
llama_print_timings: prompt eval time =   43649.93 ms /   188 tokens (  232.18 ms per token,     4.31 tokens per second)
llama_print_timings:        eval time =   85044.89 ms /   276 runs   (  308.13 ms per token,     3.25 tokens per second)
llama_print_timings:       total time =  129957.34 ms /   464 tokens
 61%|██████▏   | 183/298 [7:51:43<4:10:55, 130.92s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     145.88 ms /   239 runs   (    0.61 ms per token,  1638.30 tokens per second)
llama_print_timings: prompt eval time =   53865.10 ms /   224 tokens (  240.47 ms per token,     4.16 tokens per second)
llama_print_timings:        eval time =   74292.36 ms /   239 runs   (  310.85 ms per token,     3.22 tokens per second)
llama_print_timings:       total time =  129253.06 ms /   463 tokens
 62%|██████▏   | 184/298 [7:53:52<4:07:53, 130.47s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     170.68 ms /   268 runs   (    0.64 ms per token,  1570.23 tokens per second)
llama_print_timings: prompt eval time =   46447.59 ms /   197 tokens (  235.77 ms per token,     4.24 tokens per second)
llama_print_timings:        eval time =   82079.74 ms /   267 runs   (  307.41 ms per token,     3.25 tokens per second)
llama_print_timings:       total time =  129799.39 ms /   464 tokens
 62%|██████▏   | 185/298 [7:56:02<4:05:26, 130.32s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     218.79 ms /   359 runs   (    0.61 ms per token,  1640.84 tokens per second)
llama_print_timings: prompt eval time =   25841.31 ms /   106 tokens (  243.79 ms per token,     4.10 tokens per second)
llama_print_timings:        eval time =  111094.07 ms /   358 runs   (  310.32 ms per token,     3.22 tokens per second)
llama_print_timings:       total time =  138516.81 ms /   464 tokens
 62%|██████▏   | 186/298 [7:58:21<4:07:57, 132.84s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     180.97 ms /   304 runs   (    0.60 ms per token,  1679.88 tokens per second)
llama_print_timings: prompt eval time =   30671.90 ms /   135 tokens (  227.20 ms per token,     4.40 tokens per second)
llama_print_timings:        eval time =   93751.40 ms /   303 runs   (  309.41 ms per token,     3.23 tokens per second)
llama_print_timings:       total time =  125746.87 ms /   438 tokens
 63%|██████▎   | 187/298 [8:00:27<4:01:53, 130.75s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     170.48 ms /   277 runs   (    0.62 ms per token,  1624.86 tokens per second)
llama_print_timings: prompt eval time =   45611.25 ms /   187 tokens (  243.91 ms per token,     4.10 tokens per second)
llama_print_timings:        eval time =   84961.69 ms /   276 runs   (  307.83 ms per token,     3.25 tokens per second)
llama_print_timings:       total time =  131811.34 ms /   463 tokens
 63%|██████▎   | 188/298 [8:02:39<4:00:21, 131.11s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     200.59 ms /   330 runs   (    0.61 ms per token,  1645.11 tokens per second)
llama_print_timings: prompt eval time =   30534.01 ms /   134 tokens (  227.87 ms per token,     4.39 tokens per second)
llama_print_timings:        eval time =  101962.91 ms /   329 runs   (  309.92 ms per token,     3.23 tokens per second)
llama_print_timings:       total time =  133995.97 ms /   463 tokens
 63%|██████▎   | 189/298 [8:04:53<3:59:50, 132.02s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     144.78 ms /   234 runs   (    0.62 ms per token,  1616.19 tokens per second)
llama_print_timings: prompt eval time =   53679.08 ms /   229 tokens (  234.41 ms per token,     4.27 tokens per second)
llama_print_timings:        eval time =   72509.28 ms /   233 runs   (  311.20 ms per token,     3.21 tokens per second)
llama_print_timings:       total time =  127260.11 ms /   462 tokens
 64%|██████▍   | 190/298 [8:07:00<3:55:08, 130.63s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     167.00 ms /   283 runs   (    0.59 ms per token,  1694.66 tokens per second)
llama_print_timings: prompt eval time =   41446.97 ms /   180 tokens (  230.26 ms per token,     4.34 tokens per second)
llama_print_timings:        eval time =   87714.65 ms /   282 runs   (  311.04 ms per token,     3.21 tokens per second)
llama_print_timings:       total time =  130401.34 ms /   462 tokens
 64%|██████▍   | 191/298 [8:09:11<3:52:55, 130.61s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     132.31 ms /   224 runs   (    0.59 ms per token,  1692.99 tokens per second)
llama_print_timings: prompt eval time =   55856.00 ms /   240 tokens (  232.73 ms per token,     4.30 tokens per second)
llama_print_timings:        eval time =   68365.19 ms /   223 runs   (  306.57 ms per token,     3.26 tokens per second)
llama_print_timings:       total time =  125252.51 ms /   463 tokens
 64%|██████▍   | 192/298 [8:11:16<3:47:58, 129.05s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     198.08 ms /   326 runs   (    0.61 ms per token,  1645.83 tokens per second)
llama_print_timings: prompt eval time =   33294.04 ms /   138 tokens (  241.26 ms per token,     4.14 tokens per second)
llama_print_timings:        eval time =  102239.61 ms /   325 runs   (  314.58 ms per token,     3.18 tokens per second)
llama_print_timings:       total time =  136979.38 ms /   463 tokens
 65%|██████▍   | 193/298 [8:13:33<3:50:04, 131.47s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     182.12 ms /   281 runs   (    0.65 ms per token,  1542.94 tokens per second)
llama_print_timings: prompt eval time =   41664.94 ms /   184 tokens (  226.44 ms per token,     4.42 tokens per second)
llama_print_timings:        eval time =   88103.64 ms /   280 runs   (  314.66 ms per token,     3.18 tokens per second)
llama_print_timings:       total time =  131062.34 ms /   464 tokens
 65%|██████▌   | 194/298 [8:15:45<3:47:44, 131.39s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     225.59 ms /   369 runs   (    0.61 ms per token,  1635.73 tokens per second)
llama_print_timings: prompt eval time =   22107.55 ms /    96 tokens (  230.29 ms per token,     4.34 tokens per second)
llama_print_timings:        eval time =  112230.52 ms /   368 runs   (  304.97 ms per token,     3.28 tokens per second)
llama_print_timings:       total time =  135972.24 ms /   464 tokens
 65%|██████▌   | 195/298 [8:18:01<3:47:58, 132.80s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     206.09 ms /   349 runs   (    0.59 ms per token,  1693.39 tokens per second)
llama_print_timings: prompt eval time =   27065.21 ms /   116 tokens (  233.32 ms per token,     4.29 tokens per second)
llama_print_timings:        eval time =  107430.95 ms /   348 runs   (  308.71 ms per token,     3.24 tokens per second)
llama_print_timings:       total time =  136100.24 ms /   464 tokens
 66%|██████▌   | 196/298 [8:20:17<3:47:29, 133.82s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     152.17 ms /   252 runs   (    0.60 ms per token,  1656.02 tokens per second)
llama_print_timings: prompt eval time =   38090.84 ms /   158 tokens (  241.08 ms per token,     4.15 tokens per second)
llama_print_timings:        eval time =   77410.63 ms /   251 runs   (  308.41 ms per token,     3.24 tokens per second)
llama_print_timings:       total time =  116654.11 ms /   409 tokens
 66%|██████▌   | 197/298 [8:22:14<3:36:39, 128.71s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     225.44 ms /   367 runs   (    0.61 ms per token,  1627.91 tokens per second)
llama_print_timings: prompt eval time =   22842.72 ms /    98 tokens (  233.09 ms per token,     4.29 tokens per second)
llama_print_timings:        eval time =  112171.52 ms /   366 runs   (  306.48 ms per token,     3.26 tokens per second)
llama_print_timings:       total time =  136658.86 ms /   464 tokens
 66%|██████▋   | 198/298 [8:24:30<3:38:32, 131.13s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     161.78 ms /   270 runs   (    0.60 ms per token,  1668.89 tokens per second)
llama_print_timings: prompt eval time =   46609.35 ms /   195 tokens (  239.02 ms per token,     4.18 tokens per second)
llama_print_timings:        eval time =   84027.61 ms /   269 runs   (  312.37 ms per token,     3.20 tokens per second)
llama_print_timings:       total time =  131849.92 ms /   464 tokens
 67%|██████▋   | 199/298 [8:26:42<3:36:48, 131.40s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     227.14 ms /   367 runs   (    0.62 ms per token,  1615.75 tokens per second)
llama_print_timings: prompt eval time =   23564.84 ms /    98 tokens (  240.46 ms per token,     4.16 tokens per second)
llama_print_timings:        eval time =  112765.25 ms /   366 runs   (  308.10 ms per token,     3.25 tokens per second)
llama_print_timings:       total time =  137987.52 ms /   464 tokens
 67%|██████▋   | 200/298 [8:29:00<3:37:54, 133.41s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     159.64 ms /   260 runs   (    0.61 ms per token,  1628.65 tokens per second)
llama_print_timings: prompt eval time =   36641.65 ms /   159 tokens (  230.45 ms per token,     4.34 tokens per second)
llama_print_timings:        eval time =   81127.81 ms /   259 runs   (  313.23 ms per token,     3.19 tokens per second)
llama_print_timings:       total time =  118947.67 ms /   418 tokens
 67%|██████▋   | 201/298 [8:31:00<3:28:43, 129.11s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     166.86 ms /   279 runs   (    0.60 ms per token,  1672.08 tokens per second)
llama_print_timings: prompt eval time =   42909.41 ms /   184 tokens (  233.20 ms per token,     4.29 tokens per second)
llama_print_timings:        eval time =   86371.49 ms /   279 runs   (  309.58 ms per token,     3.23 tokens per second)
llama_print_timings:       total time =  130530.66 ms /   463 tokens
 68%|██████▊   | 202/298 [8:33:10<3:27:18, 129.57s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     231.44 ms /   369 runs   (    0.63 ms per token,  1594.33 tokens per second)
llama_print_timings: prompt eval time =   21947.46 ms /    96 tokens (  228.62 ms per token,     4.37 tokens per second)
llama_print_timings:        eval time =  114459.15 ms /   368 runs   (  311.03 ms per token,     3.22 tokens per second)
llama_print_timings:       total time =  138094.87 ms /   464 tokens
 68%|██████▊   | 203/298 [8:35:28<3:29:15, 132.16s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     169.31 ms /   275 runs   (    0.62 ms per token,  1624.25 tokens per second)
llama_print_timings: prompt eval time =   43495.52 ms /   190 tokens (  228.92 ms per token,     4.37 tokens per second)
llama_print_timings:        eval time =   86166.34 ms /   274 runs   (  314.48 ms per token,     3.18 tokens per second)
llama_print_timings:       total time =  130934.12 ms /   464 tokens
 68%|██████▊   | 204/298 [8:37:39<3:26:32, 131.83s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     197.67 ms /   329 runs   (    0.60 ms per token,  1664.36 tokens per second)
llama_print_timings: prompt eval time =   31249.06 ms /   134 tokens (  233.20 ms per token,     4.29 tokens per second)
llama_print_timings:        eval time =   99745.04 ms /   328 runs   (  304.10 ms per token,     3.29 tokens per second)
llama_print_timings:       total time =  132471.84 ms /   462 tokens
 69%|██████▉   | 205/298 [8:39:52<3:24:41, 132.06s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     182.27 ms /   297 runs   (    0.61 ms per token,  1629.49 tokens per second)
llama_print_timings: prompt eval time =   39834.26 ms /   167 tokens (  238.53 ms per token,     4.19 tokens per second)
llama_print_timings:        eval time =   90989.48 ms /   296 runs   (  307.40 ms per token,     3.25 tokens per second)
llama_print_timings:       total time =  132164.11 ms /   463 tokens
 69%|██████▉   | 206/298 [8:42:04<3:22:36, 132.13s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     192.94 ms /   326 runs   (    0.59 ms per token,  1689.65 tokens per second)
llama_print_timings: prompt eval time =   32538.46 ms /   138 tokens (  235.79 ms per token,     4.24 tokens per second)
llama_print_timings:        eval time =  100690.69 ms /   325 runs   (  309.82 ms per token,     3.23 tokens per second)
llama_print_timings:       total time =  134750.58 ms /   463 tokens
 69%|██████▉   | 207/298 [8:44:19<3:21:39, 132.96s/it]


[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


Llama.generate: prefix-match hit

llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     119.71 ms /   193 runs   (    0.62 ms per token,  1612.26 tokens per second)
llama_print_timings: prompt eval time =   63184.11 ms /   272 tokens (  232.29 ms per token,     4.30 tokens per second)
llama_print_timings:        eval time =   59089.29 ms /   192 runs   (  307.76 ms per token,     3.25 tokens per second)
llama_print_timings:       total time =  123165.01 ms /   464 tokens
 70%|██████▉   | 208/298 [8:46:23<3:15:07, 130.09s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     140.09 ms /   222 runs   (    0.63 ms per token,  1584.70 tokens per second)
llama_print_timings: prompt eval time =   55485.23 ms /   243 tokens (  228.33 ms per token,     4.38 tokens per second)
llama_print_timings:        eval time =   69380.78 ms /   221 runs   (  313.94 ms per token,     3.19 tokens per second)
llama_print_timings:       total time =  125878.94 ms /   464 tokens
 70%|███████   | 209/298 [8:48:29<3:11:10, 128.88s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     174.24 ms /   279 runs   (    0.62 ms per token,  1601.26 tokens per second)
llama_print_timings: prompt eval time =   43169.79 ms /   186 tokens (  232.10 ms per token,     4.31 tokens per second)
llama_print_timings:        eval time =   86806.49 ms /   278 runs   (  312.25 ms per token,     3.20 tokens per second)
llama_print_timings:       total time =  131286.27 ms /   464 tokens
 70%|███████   | 210/298 [8:50:40<3:10:09, 129.65s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     177.02 ms /   293 runs   (    0.60 ms per token,  1655.15 tokens per second)
llama_print_timings: prompt eval time =   40547.40 ms /   171 tokens (  237.12 ms per token,     4.22 tokens per second)
llama_print_timings:        eval time =   90923.34 ms /   292 runs   (  311.38 ms per token,     3.21 tokens per second)
llama_print_timings:       total time =  132794.70 ms /   463 tokens
 71%|███████   | 211/298 [8:52:53<3:09:26, 130.64s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     141.13 ms /   220 runs   (    0.64 ms per token,  1558.85 tokens per second)
llama_print_timings: prompt eval time =   57438.06 ms /   245 tokens (  234.44 ms per token,     4.27 tokens per second)
llama_print_timings:        eval time =   67829.80 ms /   219 runs   (  309.73 ms per token,     3.23 tokens per second)
llama_print_timings:       total time =  126299.34 ms /   464 tokens
 71%|███████   | 212/298 [8:55:00<3:05:28, 129.40s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     237.31 ms /   393 runs   (    0.60 ms per token,  1656.05 tokens per second)
llama_print_timings: prompt eval time =   17063.77 ms /    72 tokens (  237.00 ms per token,     4.22 tokens per second)
llama_print_timings:        eval time =  120780.46 ms /   392 runs   (  308.11 ms per token,     3.25 tokens per second)
llama_print_timings:       total time =  139603.62 ms /   464 tokens
 71%|███████▏  | 213/298 [8:57:19<3:07:42, 132.50s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     124.50 ms /   196 runs   (    0.64 ms per token,  1574.26 tokens per second)
llama_print_timings: prompt eval time =   62630.70 ms /   269 tokens (  232.83 ms per token,     4.30 tokens per second)
llama_print_timings:        eval time =   60478.02 ms /   195 runs   (  310.14 ms per token,     3.22 tokens per second)
llama_print_timings:       total time =  124053.04 ms /   464 tokens
 72%|███████▏  | 214/298 [8:59:24<3:02:00, 130.00s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     158.97 ms /   268 runs   (    0.59 ms per token,  1685.87 tokens per second)
llama_print_timings: prompt eval time =   46436.97 ms /   197 tokens (  235.72 ms per token,     4.24 tokens per second)
llama_print_timings:        eval time =   81759.97 ms /   267 runs   (  306.22 ms per token,     3.27 tokens per second)
llama_print_timings:       total time =  129414.78 ms /   464 tokens
 72%|███████▏  | 215/298 [9:01:33<2:59:38, 129.86s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     235.63 ms /   373 runs   (    0.63 ms per token,  1582.96 tokens per second)
llama_print_timings: prompt eval time =   20868.23 ms /    88 tokens (  237.14 ms per token,     4.22 tokens per second)
llama_print_timings:        eval time =  114922.99 ms /   373 runs   (  308.10 ms per token,     3.25 tokens per second)
llama_print_timings:       total time =  137505.04 ms /   461 tokens
 72%|███████▏  | 216/298 [9:03:51<3:00:40, 132.20s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     158.31 ms /   268 runs   (    0.59 ms per token,  1692.88 tokens per second)
llama_print_timings: prompt eval time =   45183.55 ms /   194 tokens (  232.90 ms per token,     4.29 tokens per second)
llama_print_timings:        eval time =   83584.94 ms /   267 runs   (  313.05 ms per token,     3.19 tokens per second)
llama_print_timings:       total time =  129979.26 ms /   461 tokens
 73%|███████▎  | 217/298 [9:06:01<2:57:38, 131.58s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     156.57 ms /   270 runs   (    0.58 ms per token,  1724.50 tokens per second)
llama_print_timings: prompt eval time =    8536.36 ms /    36 tokens (  237.12 ms per token,     4.22 tokens per second)
llama_print_timings:        eval time =   83969.58 ms /   269 runs   (  312.15 ms per token,     3.20 tokens per second)
llama_print_timings:       total time =   93651.48 ms /   305 tokens
 73%|███████▎  | 218/298 [9:07:35<2:40:19, 120.24s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     209.39 ms /   339 runs   (    0.62 ms per token,  1619.01 tokens per second)
llama_print_timings: prompt eval time =   29870.71 ms /   126 tokens (  237.07 ms per token,     4.22 tokens per second)
llama_print_timings:        eval time =  103858.08 ms /   338 runs   (  307.27 ms per token,     3.25 tokens per second)
llama_print_timings:       total time =  135278.88 ms /   464 tokens
 73%|███████▎  | 219/298 [9:09:50<2:44:18, 124.79s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     243.39 ms /   390 runs   (    0.62 ms per token,  1602.35 tokens per second)
llama_print_timings: prompt eval time =   19282.53 ms /    75 tokens (  257.10 ms per token,     3.89 tokens per second)
llama_print_timings:        eval time =  120824.15 ms /   389 runs   (  310.60 ms per token,     3.22 tokens per second)
llama_print_timings:       total time =  141850.12 ms /   464 tokens
 74%|███████▍  | 220/298 [9:12:12<2:48:56, 129.95s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     231.15 ms /   375 runs   (    0.62 ms per token,  1622.32 tokens per second)
llama_print_timings: prompt eval time =   19982.84 ms /    88 tokens (  227.08 ms per token,     4.40 tokens per second)
llama_print_timings:        eval time =  115779.79 ms /   374 runs   (  309.57 ms per token,     3.23 tokens per second)
llama_print_timings:       total time =  137425.04 ms /   462 tokens
 74%|███████▍  | 221/298 [9:14:30<2:49:42, 132.24s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     112.08 ms /   184 runs   (    0.61 ms per token,  1641.66 tokens per second)
llama_print_timings: prompt eval time =   66628.04 ms /   280 tokens (  237.96 ms per token,     4.20 tokens per second)
llama_print_timings:        eval time =   56940.58 ms /   184 runs   (  309.46 ms per token,     3.23 tokens per second)
llama_print_timings:       total time =  124426.31 ms /   464 tokens
 74%|███████▍  | 222/298 [9:16:34<2:44:34, 129.93s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     163.76 ms /   270 runs   (    0.61 ms per token,  1648.77 tokens per second)
llama_print_timings: prompt eval time =   45988.92 ms /   195 tokens (  235.84 ms per token,     4.24 tokens per second)
llama_print_timings:        eval time =   93446.99 ms /   269 runs   (  347.39 ms per token,     2.88 tokens per second)
llama_print_timings:       total time =  140757.65 ms /   464 tokens
 75%|███████▍  | 223/298 [9:18:55<2:46:30, 133.21s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     170.08 ms /   270 runs   (    0.63 ms per token,  1587.45 tokens per second)
llama_print_timings: prompt eval time =   12316.32 ms /    36 tokens (  342.12 ms per token,     2.92 tokens per second)
llama_print_timings:        eval time =  102825.13 ms /   269 runs   (  382.25 ms per token,     2.62 tokens per second)
llama_print_timings:       total time =  116463.62 ms /   305 tokens
 75%|███████▌  | 224/298 [9:20:52<2:38:09, 128.24s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     252.23 ms /   394 runs   (    0.64 ms per token,  1562.05 tokens per second)
llama_print_timings: prompt eval time =   21648.90 ms /    69 tokens (  313.75 ms per token,     3.19 tokens per second)
llama_print_timings:        eval time =  149474.52 ms /   393 runs   (  380.34 ms per token,     2.63 tokens per second)
llama_print_timings:       total time =  173111.45 ms /   462 tokens
 76%|███████▌  | 225/298 [9:23:45<2:52:27, 141.74s/it]


[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


Llama.generate: prefix-match hit

llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     261.26 ms /   380 runs   (    0.69 ms per token,  1454.51 tokens per second)
llama_print_timings: prompt eval time =   21385.25 ms /    82 tokens (  260.80 ms per token,     3.83 tokens per second)
llama_print_timings:        eval time =  145826.15 ms /   379 runs   (  384.77 ms per token,     2.60 tokens per second)
llama_print_timings:       total time =  169223.06 ms /   461 tokens
 76%|███████▌  | 226/298 [9:26:34<3:00:03, 150.04s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     216.62 ms /   341 runs   (    0.64 ms per token,  1574.16 tokens per second)
llama_print_timings: prompt eval time =   37043.19 ms /   124 tokens (  298.74 ms per token,     3.35 tokens per second)
llama_print_timings:        eval time =  117296.35 ms /   340 runs   (  344.99 ms per token,     2.90 tokens per second)
llama_print_timings:       total time =  156018.60 ms /   464 tokens
 76%|███████▌  | 227/298 [9:29:10<2:59:42, 151.86s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     171.49 ms /   269 runs   (    0.64 ms per token,  1568.58 tokens per second)
llama_print_timings: prompt eval time =   66582.80 ms /   196 tokens (  339.71 ms per token,     2.94 tokens per second)
llama_print_timings:        eval time =  108716.09 ms /   268 runs   (  405.66 ms per token,     2.47 tokens per second)
llama_print_timings:       total time =  176824.41 ms /   464 tokens
 77%|███████▋  | 228/298 [9:32:07<3:05:56, 159.38s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     259.10 ms /   401 runs   (    0.65 ms per token,  1547.66 tokens per second)
llama_print_timings: prompt eval time =   18077.75 ms /    63 tokens (  286.95 ms per token,     3.48 tokens per second)
llama_print_timings:        eval time =  155491.86 ms /   400 runs   (  388.73 ms per token,     2.57 tokens per second)
llama_print_timings:       total time =  175687.34 ms /   463 tokens
 77%|███████▋  | 229/298 [9:35:03<3:08:57, 164.32s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     249.85 ms /   401 runs   (    0.62 ms per token,  1604.99 tokens per second)
llama_print_timings: prompt eval time =   17672.17 ms /    62 tokens (  285.03 ms per token,     3.51 tokens per second)
llama_print_timings:        eval time =  140609.94 ms /   400 runs   (  351.52 ms per token,     2.84 tokens per second)
llama_print_timings:       total time =  160279.72 ms /   462 tokens
 77%|███████▋  | 230/298 [9:37:44<3:04:53, 163.15s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     244.63 ms /   382 runs   (    0.64 ms per token,  1561.57 tokens per second)
llama_print_timings: prompt eval time =   22119.42 ms /    80 tokens (  276.49 ms per token,     3.62 tokens per second)
llama_print_timings:        eval time =  127479.68 ms /   381 runs   (  334.59 ms per token,     2.99 tokens per second)
llama_print_timings:       total time =  151487.47 ms /   461 tokens
 78%|███████▊  | 231/298 [9:40:15<2:58:19, 159.69s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     107.84 ms /   176 runs   (    0.61 ms per token,  1632.09 tokens per second)
llama_print_timings: prompt eval time =   76220.00 ms /   288 tokens (  264.65 ms per token,     3.78 tokens per second)
llama_print_timings:        eval time =   58094.43 ms /   176 runs   (  330.08 ms per token,     3.03 tokens per second)
llama_print_timings:       total time =  135219.76 ms /   464 tokens
 78%|███████▊  | 232/298 [9:42:31<2:47:37, 152.38s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     247.15 ms /   380 runs   (    0.65 ms per token,  1537.51 tokens per second)
llama_print_timings: prompt eval time =   20725.35 ms /    85 tokens (  243.83 ms per token,     4.10 tokens per second)
llama_print_timings:        eval time =  125705.21 ms /   379 runs   (  331.68 ms per token,     3.01 tokens per second)
llama_print_timings:       total time =  148287.84 ms /   464 tokens
 78%|███████▊  | 233/298 [9:44:59<2:43:47, 151.20s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     244.26 ms /   382 runs   (    0.64 ms per token,  1563.91 tokens per second)
llama_print_timings: prompt eval time =   21072.82 ms /    80 tokens (  263.41 ms per token,     3.80 tokens per second)
llama_print_timings:        eval time =  126102.65 ms /   382 runs   (  330.11 ms per token,     3.03 tokens per second)
llama_print_timings:       total time =  148993.15 ms /   462 tokens
 79%|███████▊  | 234/298 [9:47:28<2:40:37, 150.59s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     236.91 ms /   385 runs   (    0.62 ms per token,  1625.06 tokens per second)
llama_print_timings: prompt eval time =   19094.96 ms /    77 tokens (  247.99 ms per token,     4.03 tokens per second)
llama_print_timings:        eval time =  125695.43 ms /   384 runs   (  327.33 ms per token,     3.06 tokens per second)
llama_print_timings:       total time =  146594.78 ms /   461 tokens
 79%|███████▉  | 235/298 [9:49:55<2:36:54, 149.43s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     160.44 ms /   267 runs   (    0.60 ms per token,  1664.12 tokens per second)
llama_print_timings: prompt eval time =   51792.94 ms /   197 tokens (  262.91 ms per token,     3.80 tokens per second)
llama_print_timings:        eval time =   90088.95 ms /   266 runs   (  338.68 ms per token,     2.95 tokens per second)
llama_print_timings:       total time =  143215.91 ms /   463 tokens
 79%|███████▉  | 236/298 [9:52:18<2:32:31, 147.61s/it]


[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


Llama.generate: prefix-match hit

llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     210.16 ms /   342 runs   (    0.61 ms per token,  1627.31 tokens per second)
llama_print_timings: prompt eval time =   30461.20 ms /   122 tokens (  249.68 ms per token,     4.01 tokens per second)
llama_print_timings:        eval time =  112087.74 ms /   341 runs   (  328.70 ms per token,     3.04 tokens per second)
llama_print_timings:       total time =  144209.74 ms /   463 tokens
 80%|███████▉  | 237/298 [9:54:43<2:29:05, 146.65s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     232.77 ms /   384 runs   (    0.61 ms per token,  1649.67 tokens per second)
llama_print_timings: prompt eval time =   19892.10 ms /    78 tokens (  255.03 ms per token,     3.92 tokens per second)
llama_print_timings:        eval time =  126962.61 ms /   383 runs   (  331.50 ms per token,     3.02 tokens per second)
llama_print_timings:       total time =  148672.47 ms /   461 tokens
 80%|███████▉  | 238/298 [9:57:11<2:27:17, 147.29s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     102.31 ms /   149 runs   (    0.69 ms per token,  1456.34 tokens per second)
llama_print_timings: prompt eval time =   79876.85 ms /   314 tokens (  254.38 ms per token,     3.93 tokens per second)
llama_print_timings:        eval time =   48166.06 ms /   148 runs   (  325.45 ms per token,     3.07 tokens per second)
llama_print_timings:       total time =  128855.51 ms /   462 tokens
 80%|████████  | 239/298 [9:59:20<2:19:26, 141.80s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     113.77 ms /   180 runs   (    0.63 ms per token,  1582.13 tokens per second)
llama_print_timings: prompt eval time =   57106.24 ms /   232 tokens (  246.15 ms per token,     4.06 tokens per second)
llama_print_timings:        eval time =   59181.25 ms /   180 runs   (  328.78 ms per token,     3.04 tokens per second)
llama_print_timings:       total time =  117187.25 ms /   412 tokens
 81%|████████  | 240/298 [10:01:18<2:09:58, 134.46s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     188.36 ms /   304 runs   (    0.62 ms per token,  1613.96 tokens per second)
llama_print_timings: prompt eval time =   41786.32 ms /   160 tokens (  261.16 ms per token,     3.83 tokens per second)
llama_print_timings:        eval time =  100242.07 ms /   304 runs   (  329.74 ms per token,     3.03 tokens per second)
llama_print_timings:       total time =  143504.54 ms /   464 tokens
 81%|████████  | 241/298 [10:03:41<2:10:20, 137.21s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     182.09 ms /   287 runs   (    0.63 ms per token,  1576.17 tokens per second)
llama_print_timings: prompt eval time =   21098.24 ms /    86 tokens (  245.33 ms per token,     4.08 tokens per second)
llama_print_timings:        eval time =   94968.46 ms /   286 runs   (  332.06 ms per token,     3.01 tokens per second)
llama_print_timings:       total time =  117450.55 ms /   372 tokens
 81%|████████  | 242/298 [10:05:39<2:02:33, 131.32s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     164.56 ms /   267 runs   (    0.62 ms per token,  1622.54 tokens per second)
llama_print_timings: prompt eval time =   49868.36 ms /   198 tokens (  251.86 ms per token,     3.97 tokens per second)
llama_print_timings:        eval time =   86598.54 ms /   266 runs   (  325.56 ms per token,     3.07 tokens per second)
llama_print_timings:       total time =  137761.45 ms /   464 tokens
 82%|████████▏ | 243/298 [10:07:57<2:02:11, 133.30s/it]


[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


Llama.generate: prefix-match hit

llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     163.85 ms /   263 runs   (    0.62 ms per token,  1605.14 tokens per second)
llama_print_timings: prompt eval time =   50084.70 ms /   200 tokens (  250.42 ms per token,     3.99 tokens per second)
llama_print_timings:        eval time =   86830.59 ms /   263 runs   (  330.15 ms per token,     3.03 tokens per second)
llama_print_timings:       total time =  138211.78 ms /   463 tokens
 82%|████████▏ | 245/298 [10:10:16<1:23:25, 94.45s/it] 


[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new RetrievalQA chain...[0m


Llama.generate: prefix-match hit

llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     158.90 ms /   271 runs   (    0.59 ms per token,  1705.43 tokens per second)
llama_print_timings: prompt eval time =   48430.35 ms /   192 tokens (  252.24 ms per token,     3.96 tokens per second)
llama_print_timings:        eval time =   88307.37 ms /   271 runs   (  325.86 ms per token,     3.07 tokens per second)
llama_print_timings:       total time =  138050.21 ms /   463 tokens
 83%|████████▎ | 246/298 [10:12:34<1:33:13, 107.57s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     226.37 ms /   353 runs   (    0.64 ms per token,  1559.43 tokens per second)
llama_print_timings: prompt eval time =   30223.44 ms /   111 tokens (  272.28 ms per token,     3.67 tokens per second)
llama_print_timings:        eval time =  130345.66 ms /   352 runs   (  370.30 ms per token,     2.70 tokens per second)
llama_print_timings:       total time =  162326.31 ms /   463 tokens
 83%|████████▎ | 248/298 [10:15:16<1:12:23, 86.87s/it] 


[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new RetrievalQA chain...[0m


 84%|████████▎ | 249/298 [10:15:17<49:41, 60.84s/it]  Llama.generate: prefix-match hit




[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     249.59 ms /   376 runs   (    0.66 ms per token,  1506.47 tokens per second)
llama_print_timings: prompt eval time =   22149.42 ms /    88 tokens (  251.70 ms per token,     3.97 tokens per second)
llama_print_timings:        eval time =  141938.92 ms /   376 runs   (  377.50 ms per token,     2.65 tokens per second)
llama_print_timings:       total time =  166088.50 ms /   464 tokens
 84%|████████▍ | 251/298 [10:18:03<50:44, 64.78s/it]  


[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new RetrievalQA chain...[0m


Llama.generate: prefix-match hit

llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     214.37 ms /   355 runs   (    0.60 ms per token,  1656.02 tokens per second)
llama_print_timings: prompt eval time =   26449.14 ms /   110 tokens (  240.45 ms per token,     4.16 tokens per second)
llama_print_timings:        eval time =  112472.95 ms /   354 runs   (  317.72 ms per token,     3.15 tokens per second)
llama_print_timings:       total time =  140543.86 ms /   464 tokens
 85%|████████▍ | 252/298 [10:20:24<1:07:09, 87.59s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     176.50 ms /   293 runs   (    0.60 ms per token,  1660.03 tokens per second)
llama_print_timings: prompt eval time =   40003.08 ms /   172 tokens (  232.58 ms per token,     4.30 tokens per second)
llama_print_timings:        eval time =   93982.18 ms /   292 runs   (  321.86 ms per token,     3.11 tokens per second)
llama_print_timings:       total time =  135338.43 ms /   464 tokens
 86%|████████▌ | 255/298 [10:22:39<42:17, 59.02s/it]  


[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new RetrievalQA chain...[0m


 86%|████████▌ | 256/298 [10:22:40<30:31, 43.62s/it]



[1m> Entering new RetrievalQA chain...[0m


Llama.generate: prefix-match hit

llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     175.02 ms /   276 runs   (    0.63 ms per token,  1576.97 tokens per second)
llama_print_timings: prompt eval time =   23991.16 ms /   102 tokens (  235.21 ms per token,     4.25 tokens per second)
llama_print_timings:        eval time =   87717.47 ms /   275 runs   (  318.97 ms per token,     3.14 tokens per second)
llama_print_timings:       total time =  112954.09 ms /   377 tokens
 86%|████████▌ | 257/298 [10:24:33<42:44, 62.54s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     128.96 ms /   223 runs   (    0.58 ms per token,  1729.25 tokens per second)
llama_print_timings: prompt eval time =   58080.01 ms /   242 tokens (  240.00 ms per token,     4.17 tokens per second)
llama_print_timings:        eval time =   70996.46 ms /   222 runs   (  319.80 ms per token,     3.13 tokens per second)
llama_print_timings:       total time =  130136.52 ms /   464 tokens
 87%|████████▋ | 258/298 [10:26:43<54:19, 81.49s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     207.98 ms /   349 runs   (    0.60 ms per token,  1678.04 tokens per second)
llama_print_timings: prompt eval time =   28381.02 ms /   116 tokens (  244.66 ms per token,     4.09 tokens per second)
llama_print_timings:        eval time =  110276.20 ms /   348 runs   (  316.89 ms per token,     3.16 tokens per second)
llama_print_timings:       total time =  140224.00 ms /   464 tokens
 87%|████████▋ | 259/298 [10:29:03<1:03:53, 98.29s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     244.34 ms /   382 runs   (    0.64 ms per token,  1563.39 tokens per second)
llama_print_timings: prompt eval time =   20418.84 ms /    83 tokens (  246.01 ms per token,     4.06 tokens per second)
llama_print_timings:        eval time =  121320.79 ms /   381 runs   (  318.43 ms per token,     3.14 tokens per second)
llama_print_timings:       total time =  143484.34 ms /   464 tokens
 87%|████████▋ | 260/298 [10:31:27<1:10:34, 111.43s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     223.34 ms /   367 runs   (    0.61 ms per token,  1643.22 tokens per second)
llama_print_timings: prompt eval time =   22538.04 ms /    98 tokens (  229.98 ms per token,     4.35 tokens per second)
llama_print_timings:        eval time =  115615.58 ms /   366 runs   (  315.89 ms per token,     3.17 tokens per second)
llama_print_timings:       total time =  139800.74 ms /   464 tokens
 88%|████████▊ | 262/298 [10:33:47<50:41, 84.49s/it]   


[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new RetrievalQA chain...[0m


 88%|████████▊ | 263/298 [10:33:47<34:42, 59.50s/it]Llama.generate: prefix-match hit




[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     213.11 ms /   342 runs   (    0.62 ms per token,  1604.81 tokens per second)
llama_print_timings: prompt eval time =   28743.01 ms /   123 tokens (  233.68 ms per token,     4.28 tokens per second)
llama_print_timings:        eval time =  108809.13 ms /   341 runs   (  319.09 ms per token,     3.13 tokens per second)
llama_print_timings:       total time =  139124.11 ms /   464 tokens
 89%|████████▉ | 266/298 [10:36:07<24:01, 45.04s/it]


[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new RetrievalQA chain...[0m


Llama.generate: prefix-match hit

llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     231.17 ms /   370 runs   (    0.62 ms per token,  1600.55 tokens per second)
llama_print_timings: prompt eval time =   22128.10 ms /    94 tokens (  235.41 ms per token,     4.25 tokens per second)
llama_print_timings:        eval time =  117211.52 ms /   369 runs   (  317.65 ms per token,     3.15 tokens per second)
llama_print_timings:       total time =  141045.27 ms /   463 tokens
 90%|████████▉ | 267/298 [10:38:28<35:32, 68.79s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     213.78 ms /   354 runs   (    0.60 ms per token,  1655.92 tokens per second)
llama_print_timings: prompt eval time =   27467.76 ms /   111 tokens (  247.46 ms per token,     4.04 tokens per second)
llama_print_timings:        eval time =  111591.11 ms /   353 runs   (  316.12 ms per token,     3.16 tokens per second)
llama_print_timings:       total time =  140665.34 ms /   464 tokens
 90%|████████▉ | 268/298 [10:40:49<43:47, 87.57s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     179.20 ms /   297 runs   (    0.60 ms per token,  1657.38 tokens per second)
llama_print_timings: prompt eval time =   38759.14 ms /   168 tokens (  230.71 ms per token,     4.33 tokens per second)
llama_print_timings:        eval time =   94740.82 ms /   296 runs   (  320.07 ms per token,     3.12 tokens per second)
llama_print_timings:       total time =  134880.90 ms /   464 tokens
 90%|█████████ | 269/298 [10:43:04<48:33, 100.45s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     130.84 ms /   223 runs   (    0.59 ms per token,  1704.37 tokens per second)
llama_print_timings: prompt eval time =   57994.68 ms /   240 tokens (  241.64 ms per token,     4.14 tokens per second)
llama_print_timings:        eval time =   71613.04 ms /   222 runs   (  322.58 ms per token,     3.10 tokens per second)
llama_print_timings:       total time =  130646.60 ms /   462 tokens
 91%|█████████ | 270/298 [10:45:14<50:49, 108.92s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     227.24 ms /   371 runs   (    0.61 ms per token,  1632.61 tokens per second)
llama_print_timings: prompt eval time =   22400.25 ms /    93 tokens (  240.86 ms per token,     4.15 tokens per second)
llama_print_timings:        eval time =  117958.60 ms /   370 runs   (  318.81 ms per token,     3.14 tokens per second)
llama_print_timings:       total time =  142034.63 ms /   463 tokens
 91%|█████████ | 271/298 [10:47:37<53:16, 118.41s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     241.12 ms /   395 runs   (    0.61 ms per token,  1638.18 tokens per second)
llama_print_timings: prompt eval time =   17143.26 ms /    69 tokens (  248.45 ms per token,     4.02 tokens per second)
llama_print_timings:        eval time =  124235.19 ms /   394 runs   (  315.32 ms per token,     3.17 tokens per second)
llama_print_timings:       total time =  143161.29 ms /   463 tokens
 91%|█████████▏| 272/298 [10:50:00<54:25, 125.60s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     130.61 ms /   204 runs   (    0.64 ms per token,  1561.94 tokens per second)
llama_print_timings: prompt eval time =   63155.15 ms /   261 tokens (  241.97 ms per token,     4.13 tokens per second)
llama_print_timings:        eval time =   65370.64 ms /   203 runs   (  322.02 ms per token,     3.11 tokens per second)
llama_print_timings:       total time =  129514.21 ms /   464 tokens
 92%|█████████▏| 273/298 [10:52:09<52:49, 126.78s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     122.65 ms /   192 runs   (    0.64 ms per token,  1565.40 tokens per second)
llama_print_timings: prompt eval time =   63974.29 ms /   272 tokens (  235.20 ms per token,     4.25 tokens per second)
llama_print_timings:        eval time =   61354.01 ms /   192 runs   (  319.55 ms per token,     3.13 tokens per second)
llama_print_timings:       total time =  126240.57 ms /   464 tokens
 92%|█████████▏| 274/298 [10:54:16<50:39, 126.66s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     144.44 ms /   232 runs   (    0.62 ms per token,  1606.17 tokens per second)
llama_print_timings: prompt eval time =   54566.78 ms /   232 tokens (  235.20 ms per token,     4.25 tokens per second)
llama_print_timings:        eval time =   74645.98 ms /   232 runs   (  321.75 ms per token,     3.11 tokens per second)
llama_print_timings:       total time =  130316.61 ms /   464 tokens
 92%|█████████▏| 275/298 [10:56:26<48:58, 127.78s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     202.51 ms /   327 runs   (    0.62 ms per token,  1614.75 tokens per second)
llama_print_timings: prompt eval time =   31740.90 ms /   138 tokens (  230.01 ms per token,     4.35 tokens per second)
llama_print_timings:        eval time =  107096.08 ms /   326 runs   (  328.52 ms per token,     3.04 tokens per second)
llama_print_timings:       total time =  140398.31 ms /   464 tokens
 93%|█████████▎| 276/298 [10:58:47<48:14, 131.57s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     166.64 ms /   263 runs   (    0.63 ms per token,  1578.25 tokens per second)
llama_print_timings: prompt eval time =   47811.71 ms /   200 tokens (  239.06 ms per token,     4.18 tokens per second)
llama_print_timings:        eval time =   93939.44 ms /   263 runs   (  357.18 ms per token,     2.80 tokens per second)
llama_print_timings:       total time =  143052.58 ms /   463 tokens
 93%|█████████▎| 277/298 [11:01:10<47:15, 135.04s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     122.39 ms /   192 runs   (    0.64 ms per token,  1568.82 tokens per second)
llama_print_timings: prompt eval time =   68041.19 ms /   272 tokens (  250.15 ms per token,     4.00 tokens per second)
llama_print_timings:        eval time =   61336.29 ms /   192 runs   (  319.46 ms per token,     3.13 tokens per second)
llama_print_timings:       total time =  130295.97 ms /   464 tokens
 93%|█████████▎| 278/298 [11:03:20<44:33, 133.65s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     127.73 ms /   201 runs   (    0.64 ms per token,  1573.62 tokens per second)
llama_print_timings: prompt eval time =   61949.03 ms /   264 tokens (  234.66 ms per token,     4.26 tokens per second)
llama_print_timings:        eval time =   63540.63 ms /   200 runs   (  317.70 ms per token,     3.15 tokens per second)
llama_print_timings:       total time =  126462.19 ms /   464 tokens
 94%|█████████▎| 279/298 [11:05:27<41:39, 131.54s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     182.54 ms /   285 runs   (    0.64 ms per token,  1561.28 tokens per second)
llama_print_timings: prompt eval time =   41496.61 ms /   180 tokens (  230.54 ms per token,     4.34 tokens per second)
llama_print_timings:        eval time =   90984.35 ms /   284 runs   (  320.37 ms per token,     3.12 tokens per second)
llama_print_timings:       total time =  133810.07 ms /   464 tokens
 94%|█████████▍| 280/298 [11:07:41<39:40, 132.25s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     145.98 ms /   229 runs   (    0.64 ms per token,  1568.70 tokens per second)
llama_print_timings: prompt eval time =   55562.98 ms /   236 tokens (  235.44 ms per token,     4.25 tokens per second)
llama_print_timings:        eval time =   71857.44 ms /   228 runs   (  315.16 ms per token,     3.17 tokens per second)
llama_print_timings:       total time =  128518.67 ms /   464 tokens
 94%|█████████▍| 281/298 [11:09:49<37:09, 131.16s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     128.64 ms /   206 runs   (    0.62 ms per token,  1601.37 tokens per second)
llama_print_timings: prompt eval time =   61522.74 ms /   259 tokens (  237.54 ms per token,     4.21 tokens per second)
llama_print_timings:        eval time =   65568.68 ms /   205 runs   (  319.85 ms per token,     3.13 tokens per second)
llama_print_timings:       total time =  128089.56 ms /   464 tokens
 95%|█████████▍| 282/298 [11:11:58<34:44, 130.27s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     111.64 ms /   180 runs   (    0.62 ms per token,  1612.28 tokens per second)
llama_print_timings: prompt eval time =   67467.89 ms /   285 tokens (  236.73 ms per token,     4.22 tokens per second)
llama_print_timings:        eval time =   57011.76 ms /   179 runs   (  318.50 ms per token,     3.14 tokens per second)
llama_print_timings:       total time =  125336.50 ms /   464 tokens
 95%|█████████▍| 283/298 [11:14:03<32:12, 128.82s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     131.51 ms /   208 runs   (    0.63 ms per token,  1581.59 tokens per second)
llama_print_timings: prompt eval time =   60703.72 ms /   256 tokens (  237.12 ms per token,     4.22 tokens per second)
llama_print_timings:        eval time =   67399.31 ms /   208 runs   (  324.04 ms per token,     3.09 tokens per second)
llama_print_timings:       total time =  129118.83 ms /   464 tokens
 95%|█████████▌| 284/298 [11:16:12<30:05, 128.94s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     139.81 ms /   222 runs   (    0.63 ms per token,  1587.85 tokens per second)
llama_print_timings: prompt eval time =   57415.97 ms /   243 tokens (  236.28 ms per token,     4.23 tokens per second)
llama_print_timings:        eval time =   69415.87 ms /   221 runs   (  314.10 ms per token,     3.18 tokens per second)
llama_print_timings:       total time =  127871.50 ms /   464 tokens
 96%|█████████▌| 285/298 [11:18:20<27:52, 128.65s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     124.21 ms /   193 runs   (    0.64 ms per token,  1553.86 tokens per second)
llama_print_timings: prompt eval time =   64307.37 ms /   272 tokens (  236.42 ms per token,     4.23 tokens per second)
llama_print_timings:        eval time =   60649.94 ms /   192 runs   (  315.89 ms per token,     3.17 tokens per second)
llama_print_timings:       total time =  125883.90 ms /   464 tokens
 96%|█████████▌| 286/298 [11:20:26<25:34, 127.86s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     137.41 ms /   217 runs   (    0.63 ms per token,  1579.26 tokens per second)
llama_print_timings: prompt eval time =   58294.39 ms /   248 tokens (  235.06 ms per token,     4.25 tokens per second)
llama_print_timings:        eval time =   68998.98 ms /   216 runs   (  319.44 ms per token,     3.13 tokens per second)
llama_print_timings:       total time =  128318.06 ms /   464 tokens
 96%|█████████▋| 287/298 [11:22:35<23:28, 128.02s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     239.90 ms /   385 runs   (    0.62 ms per token,  1604.82 tokens per second)
llama_print_timings: prompt eval time =   19614.44 ms /    80 tokens (  245.18 ms per token,     4.08 tokens per second)
llama_print_timings:        eval time =  121544.24 ms /   384 runs   (  316.52 ms per token,     3.16 tokens per second)
llama_print_timings:       total time =  142879.28 ms /   464 tokens
 97%|█████████▋| 288/298 [11:24:58<22:05, 132.51s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     221.98 ms /   358 runs   (    0.62 ms per token,  1612.75 tokens per second)
llama_print_timings: prompt eval time =   24731.25 ms /   107 tokens (  231.13 ms per token,     4.33 tokens per second)
llama_print_timings:        eval time =  113141.19 ms /   357 runs   (  316.92 ms per token,     3.16 tokens per second)
llama_print_timings:       total time =  139532.93 ms /   464 tokens
 97%|█████████▋| 289/298 [11:27:17<20:11, 134.64s/it]


[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


Llama.generate: prefix-match hit

llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     242.14 ms /   386 runs   (    0.63 ms per token,  1594.11 tokens per second)
llama_print_timings: prompt eval time =   18212.44 ms /    79 tokens (  230.54 ms per token,     4.34 tokens per second)
llama_print_timings:        eval time =  121911.04 ms /   385 runs   (  316.65 ms per token,     3.16 tokens per second)
llama_print_timings:       total time =  141901.41 ms /   464 tokens
 97%|█████████▋| 290/298 [11:29:39<18:15, 136.90s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     120.39 ms /   192 runs   (    0.63 ms per token,  1594.78 tokens per second)
llama_print_timings: prompt eval time =   63515.98 ms /   272 tokens (  233.51 ms per token,     4.28 tokens per second)
llama_print_timings:        eval time =   60806.68 ms /   192 runs   (  316.70 ms per token,     3.16 tokens per second)
llama_print_timings:       total time =  125244.83 ms /   464 tokens
 98%|█████████▊| 291/298 [11:31:45<15:34, 133.43s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     144.11 ms /   231 runs   (    0.62 ms per token,  1602.93 tokens per second)
llama_print_timings: prompt eval time =   55283.49 ms /   234 tokens (  236.25 ms per token,     4.23 tokens per second)
llama_print_timings:        eval time =   72329.68 ms /   230 runs   (  314.48 ms per token,     3.18 tokens per second)
llama_print_timings:       total time =  128695.83 ms /   464 tokens
 98%|█████████▊| 292/298 [11:33:54<13:12, 132.04s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     124.12 ms /   208 runs   (    0.60 ms per token,  1675.77 tokens per second)
llama_print_timings: prompt eval time =   60107.19 ms /   256 tokens (  234.79 ms per token,     4.26 tokens per second)
llama_print_timings:        eval time =   65691.15 ms /   208 runs   (  315.82 ms per token,     3.17 tokens per second)
llama_print_timings:       total time =  126784.74 ms /   464 tokens
 98%|█████████▊| 293/298 [11:36:00<10:52, 130.49s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     151.68 ms /   247 runs   (    0.61 ms per token,  1628.42 tokens per second)
llama_print_timings: prompt eval time =   50830.25 ms /   218 tokens (  233.17 ms per token,     4.29 tokens per second)
llama_print_timings:        eval time =   77460.05 ms /   246 runs   (  314.88 ms per token,     3.18 tokens per second)
llama_print_timings:       total time =  129445.83 ms /   464 tokens
 99%|█████████▊| 294/298 [11:38:10<08:40, 130.21s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     143.17 ms /   226 runs   (    0.63 ms per token,  1578.53 tokens per second)
llama_print_timings: prompt eval time =   56208.62 ms /   239 tokens (  235.18 ms per token,     4.25 tokens per second)
llama_print_timings:        eval time =   71759.14 ms /   225 runs   (  318.93 ms per token,     3.14 tokens per second)
llama_print_timings:       total time =  129031.61 ms /   464 tokens
 99%|█████████▉| 295/298 [11:40:19<06:29, 129.89s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     156.61 ms /   257 runs   (    0.61 ms per token,  1640.98 tokens per second)
llama_print_timings: prompt eval time =   49619.00 ms /   208 tokens (  238.55 ms per token,     4.19 tokens per second)
llama_print_timings:        eval time =   82345.92 ms /   256 runs   (  321.66 ms per token,     3.11 tokens per second)
llama_print_timings:       total time =  133166.64 ms /   464 tokens
 99%|█████████▉| 296/298 [11:42:32<04:21, 130.91s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     157.61 ms /   251 runs   (    0.63 ms per token,  1592.56 tokens per second)
llama_print_timings: prompt eval time =   13006.54 ms /    51 tokens (  255.03 ms per token,     3.92 tokens per second)
llama_print_timings:        eval time =   78896.65 ms /   250 runs   (  315.59 ms per token,     3.17 tokens per second)
llama_print_timings:       total time =   92996.89 ms /   301 tokens
100%|█████████▉| 297/298 [11:44:06<01:59, 119.56s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     190.70 ms /   291 runs   (    0.66 ms per token,  1525.93 tokens per second)
llama_print_timings: prompt eval time =   41362.94 ms /   174 tokens (  237.72 ms per token,     4.21 tokens per second)
llama_print_timings:        eval time =   90986.41 ms /   290 runs   (  313.75 ms per token,     3.19 tokens per second)
llama_print_timings:       total time =  133725.69 ms /   464 tokens
100%|██████████| 298/298 [11:46:19<00:00, 142.21s/it]



[1m> Finished chain.[0m


  0%|          | 0/201 [00:00<?, ?it/s]Llama.generate: prefix-match hit




[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     107.83 ms /   171 runs   (    0.63 ms per token,  1585.83 tokens per second)
llama_print_timings: prompt eval time =   70012.16 ms /   294 tokens (  238.14 ms per token,     4.20 tokens per second)
llama_print_timings:        eval time =   53802.19 ms /   170 runs   (  316.48 ms per token,     3.16 tokens per second)
llama_print_timings:       total time =  124653.41 ms /   464 tokens
  0%|          | 1/201 [02:04<6:55:55, 124.78s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     106.06 ms /   172 runs   (    0.62 ms per token,  1621.72 tokens per second)
llama_print_timings: prompt eval time =   69015.51 ms /   293 tokens (  235.55 ms per token,     4.25 tokens per second)
llama_print_timings:        eval time =   53223.02 ms /   171 runs   (  311.25 ms per token,     3.21 tokens per second)
llama_print_timings:       total time =  123068.88 ms /   464 tokens
  1%|          | 2/201 [04:07<6:50:44, 123.84s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     229.22 ms /   381 runs   (    0.60 ms per token,  1662.17 tokens per second)
llama_print_timings: prompt eval time =   21272.41 ms /    84 tokens (  253.24 ms per token,     3.95 tokens per second)
llama_print_timings:        eval time =  119456.75 ms /   380 runs   (  314.36 ms per token,     3.18 tokens per second)
llama_print_timings:       total time =  142433.82 ms /   464 tokens
  1%|▏         | 3/201 [06:30<7:16:53, 132.39s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     108.05 ms /   173 runs   (    0.62 ms per token,  1601.11 tokens per second)
llama_print_timings: prompt eval time =   70628.48 ms /   292 tokens (  241.88 ms per token,     4.13 tokens per second)
llama_print_timings:        eval time =   54634.52 ms /   172 runs   (  317.64 ms per token,     3.15 tokens per second)
llama_print_timings:       total time =  126129.11 ms /   464 tokens
  2%|▏         | 4/201 [08:36<7:06:43, 129.97s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =      96.86 ms /   157 runs   (    0.62 ms per token,  1620.90 tokens per second)
llama_print_timings: prompt eval time =   72540.92 ms /   308 tokens (  235.52 ms per token,     4.25 tokens per second)
llama_print_timings:        eval time =   49164.86 ms /   156 runs   (  315.16 ms per token,     3.17 tokens per second)
llama_print_timings:       total time =  122497.80 ms /   464 tokens
  2%|▏         | 5/201 [10:39<6:55:54, 127.32s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =      73.01 ms /   123 runs   (    0.59 ms per token,  1684.61 tokens per second)
llama_print_timings: prompt eval time =   80256.32 ms /   342 tokens (  234.67 ms per token,     4.26 tokens per second)
llama_print_timings:        eval time =   39217.39 ms /   122 runs   (  321.45 ms per token,     3.11 tokens per second)
llama_print_timings:       total time =  120110.10 ms /   464 tokens
  3%|▎         | 6/201 [12:39<6:45:57, 124.91s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     236.59 ms /   378 runs   (    0.63 ms per token,  1597.67 tokens per second)
llama_print_timings: prompt eval time =   19955.00 ms /    87 tokens (  229.37 ms per token,     4.36 tokens per second)
llama_print_timings:        eval time =  119091.19 ms /   377 runs   (  315.89 ms per token,     3.17 tokens per second)
llama_print_timings:       total time =  140746.52 ms /   464 tokens
  3%|▎         | 7/201 [15:00<7:00:45, 130.13s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     230.43 ms /   372 runs   (    0.62 ms per token,  1614.35 tokens per second)
llama_print_timings: prompt eval time =   14432.56 ms /    60 tokens (  240.54 ms per token,     4.16 tokens per second)
llama_print_timings:        eval time =  116692.30 ms /   371 runs   (  314.53 ms per token,     3.18 tokens per second)
llama_print_timings:       total time =  132827.98 ms /   431 tokens
  4%|▍         | 8/201 [17:13<7:01:27, 131.02s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     128.81 ms /   209 runs   (    0.62 ms per token,  1622.49 tokens per second)
llama_print_timings: prompt eval time =   61619.87 ms /   256 tokens (  240.70 ms per token,     4.15 tokens per second)
llama_print_timings:        eval time =   65763.35 ms /   208 runs   (  316.17 ms per token,     3.16 tokens per second)
llama_print_timings:       total time =  128391.21 ms /   464 tokens
  4%|▍         | 9/201 [19:21<6:56:45, 130.24s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     120.25 ms /   199 runs   (    0.60 ms per token,  1654.95 tokens per second)
llama_print_timings: prompt eval time =   62030.69 ms /   266 tokens (  233.20 ms per token,     4.29 tokens per second)
llama_print_timings:        eval time =   63221.90 ms /   198 runs   (  319.30 ms per token,     3.13 tokens per second)
llama_print_timings:       total time =  126198.44 ms /   464 tokens
  5%|▍         | 10/201 [21:28<6:50:44, 129.03s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     210.71 ms /   346 runs   (    0.61 ms per token,  1642.06 tokens per second)
llama_print_timings: prompt eval time =   28526.57 ms /   119 tokens (  239.72 ms per token,     4.17 tokens per second)
llama_print_timings:        eval time =  109715.21 ms /   345 runs   (  318.02 ms per token,     3.14 tokens per second)
llama_print_timings:       total time =  139807.03 ms /   464 tokens
  5%|▌         | 11/201 [23:48<6:59:08, 132.36s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     159.73 ms /   269 runs   (    0.59 ms per token,  1684.12 tokens per second)
llama_print_timings: prompt eval time =   46094.49 ms /   196 tokens (  235.18 ms per token,     4.25 tokens per second)
llama_print_timings:        eval time =   85555.20 ms /   268 runs   (  319.24 ms per token,     3.13 tokens per second)
llama_print_timings:       total time =  132897.21 ms /   464 tokens
  6%|▌         | 12/201 [26:01<6:57:33, 132.56s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     147.20 ms /   234 runs   (    0.63 ms per token,  1589.65 tokens per second)
llama_print_timings: prompt eval time =   54624.78 ms /   230 tokens (  237.50 ms per token,     4.21 tokens per second)
llama_print_timings:        eval time =   74215.62 ms /   233 runs   (  318.52 ms per token,     3.14 tokens per second)
llama_print_timings:       total time =  129949.04 ms /   463 tokens
  6%|▋         | 13/201 [28:11<6:53:00, 131.81s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     244.49 ms /   386 runs   (    0.63 ms per token,  1578.83 tokens per second)
llama_print_timings: prompt eval time =   19104.79 ms /    78 tokens (  244.93 ms per token,     4.08 tokens per second)
llama_print_timings:        eval time =  122476.71 ms /   385 runs   (  318.12 ms per token,     3.14 tokens per second)
llama_print_timings:       total time =  143369.32 ms /   463 tokens
  7%|▋         | 14/201 [30:34<7:01:47, 135.33s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     193.77 ms /   310 runs   (    0.63 ms per token,  1599.81 tokens per second)
llama_print_timings: prompt eval time =   37219.15 ms /   155 tokens (  240.12 ms per token,     4.16 tokens per second)
llama_print_timings:        eval time =   98112.53 ms /   309 runs   (  317.52 ms per token,     3.15 tokens per second)
llama_print_timings:       total time =  136770.99 ms /   464 tokens
  7%|▋         | 15/201 [32:51<7:00:59, 135.80s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     107.31 ms /   168 runs   (    0.64 ms per token,  1565.63 tokens per second)
llama_print_timings: prompt eval time =   72161.34 ms /   296 tokens (  243.79 ms per token,     4.10 tokens per second)
llama_print_timings:        eval time =   52727.40 ms /   168 runs   (  313.85 ms per token,     3.19 tokens per second)
llama_print_timings:       total time =  125733.92 ms /   464 tokens
  8%|▊         | 16/201 [34:57<6:49:29, 132.81s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     139.18 ms /   229 runs   (    0.61 ms per token,  1645.40 tokens per second)
llama_print_timings: prompt eval time =   55668.27 ms /   236 tokens (  235.88 ms per token,     4.24 tokens per second)
llama_print_timings:        eval time =   72739.15 ms /   228 runs   (  319.03 ms per token,     3.13 tokens per second)
llama_print_timings:       total time =  129456.87 ms /   464 tokens
  8%|▊         | 17/201 [37:07<6:44:17, 131.84s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     155.40 ms /   259 runs   (    0.60 ms per token,  1666.68 tokens per second)
llama_print_timings: prompt eval time =   48740.96 ms /   206 tokens (  236.61 ms per token,     4.23 tokens per second)
llama_print_timings:        eval time =   81786.68 ms /   258 runs   (  317.00 ms per token,     3.15 tokens per second)
llama_print_timings:       total time =  131718.34 ms /   464 tokens
  9%|▉         | 18/201 [39:18<6:42:07, 131.84s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     108.82 ms /   170 runs   (    0.64 ms per token,  1562.20 tokens per second)
llama_print_timings: prompt eval time =   70289.56 ms /   295 tokens (  238.27 ms per token,     4.20 tokens per second)
llama_print_timings:        eval time =   54314.99 ms /   169 runs   (  321.39 ms per token,     3.11 tokens per second)
llama_print_timings:       total time =  125453.41 ms /   464 tokens
  9%|▉         | 19/201 [41:24<6:34:13, 129.96s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =      74.69 ms /   121 runs   (    0.62 ms per token,  1619.96 tokens per second)
llama_print_timings: prompt eval time =   81025.21 ms /   344 tokens (  235.54 ms per token,     4.25 tokens per second)
llama_print_timings:        eval time =   38019.52 ms /   120 runs   (  316.83 ms per token,     3.16 tokens per second)
llama_print_timings:       total time =  119670.23 ms /   464 tokens
 10%|▉         | 20/201 [43:24<6:22:50, 126.91s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     129.62 ms /   210 runs   (    0.62 ms per token,  1620.16 tokens per second)
llama_print_timings: prompt eval time =   60626.36 ms /   255 tokens (  237.75 ms per token,     4.21 tokens per second)
llama_print_timings:        eval time =   66234.61 ms /   209 runs   (  316.91 ms per token,     3.16 tokens per second)
llama_print_timings:       total time =  127848.33 ms /   464 tokens
 10%|█         | 21/201 [45:32<6:21:42, 127.24s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     237.95 ms /   377 runs   (    0.63 ms per token,  1584.35 tokens per second)
llama_print_timings: prompt eval time =   20801.25 ms /    88 tokens (  236.38 ms per token,     4.23 tokens per second)
llama_print_timings:        eval time =  120932.45 ms /   376 runs   (  321.63 ms per token,     3.11 tokens per second)
llama_print_timings:       total time =  143482.77 ms /   464 tokens
 11%|█         | 22/201 [47:55<6:34:14, 132.15s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     123.19 ms /   200 runs   (    0.62 ms per token,  1623.52 tokens per second)
llama_print_timings: prompt eval time =   65047.80 ms /   264 tokens (  246.39 ms per token,     4.06 tokens per second)
llama_print_timings:        eval time =   62748.80 ms /   200 runs   (  313.74 ms per token,     3.19 tokens per second)
llama_print_timings:       total time =  128765.43 ms /   464 tokens
 11%|█▏        | 23/201 [50:04<6:29:08, 131.17s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     165.38 ms /   273 runs   (    0.61 ms per token,  1650.70 tokens per second)
llama_print_timings: prompt eval time =   45404.96 ms /   192 tokens (  236.48 ms per token,     4.23 tokens per second)
llama_print_timings:        eval time =   85285.64 ms /   272 runs   (  313.55 ms per token,     3.19 tokens per second)
llama_print_timings:       total time =  131912.65 ms /   464 tokens
 12%|█▏        | 24/201 [52:16<6:27:42, 131.43s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     171.25 ms /   280 runs   (    0.61 ms per token,  1635.02 tokens per second)
llama_print_timings: prompt eval time =   30532.45 ms /   127 tokens (  240.41 ms per token,     4.16 tokens per second)
llama_print_timings:        eval time =   86863.64 ms /   279 runs   (  311.34 ms per token,     3.21 tokens per second)
llama_print_timings:       total time =  118673.02 ms /   406 tokens
 12%|█▏        | 25/201 [54:15<6:14:23, 127.63s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     194.25 ms /   302 runs   (    0.64 ms per token,  1554.67 tokens per second)
llama_print_timings: prompt eval time =   38798.73 ms /   163 tokens (  238.03 ms per token,     4.20 tokens per second)
llama_print_timings:        eval time =   95916.31 ms /   301 runs   (  318.66 ms per token,     3.14 tokens per second)
llama_print_timings:       total time =  136125.62 ms /   464 tokens
 13%|█▎        | 26/201 [56:31<6:19:47, 130.21s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     130.79 ms /   209 runs   (    0.63 ms per token,  1597.98 tokens per second)
llama_print_timings: prompt eval time =   61526.35 ms /   256 tokens (  240.34 ms per token,     4.16 tokens per second)
llama_print_timings:        eval time =   66067.78 ms /   208 runs   (  317.63 ms per token,     3.15 tokens per second)
llama_print_timings:       total time =  128587.85 ms /   464 tokens
 13%|█▎        | 27/201 [58:40<6:16:18, 129.76s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     140.25 ms /   228 runs   (    0.62 ms per token,  1625.71 tokens per second)
llama_print_timings: prompt eval time =   55286.33 ms /   237 tokens (  233.28 ms per token,     4.29 tokens per second)
llama_print_timings:        eval time =   73456.45 ms /   227 runs   (  323.60 ms per token,     3.09 tokens per second)
llama_print_timings:       total time =  129792.48 ms /   464 tokens
 14%|█▍        | 28/201 [1:00:50<6:14:16, 129.81s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     125.47 ms /   221 runs   (    0.57 ms per token,  1761.43 tokens per second)
llama_print_timings: prompt eval time =   56983.73 ms /   244 tokens (  233.54 ms per token,     4.28 tokens per second)
llama_print_timings:        eval time =   70483.04 ms /   220 runs   (  320.38 ms per token,     3.12 tokens per second)
llama_print_timings:       total time =  128494.48 ms /   464 tokens
 14%|█▍        | 29/201 [1:02:59<6:11:04, 129.45s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     193.45 ms /   307 runs   (    0.63 ms per token,  1586.99 tokens per second)
llama_print_timings: prompt eval time =   36082.64 ms /   158 tokens (  228.37 ms per token,     4.38 tokens per second)
llama_print_timings:        eval time =   96352.82 ms /   306 runs   (  314.88 ms per token,     3.18 tokens per second)
llama_print_timings:       total time =  133850.85 ms /   464 tokens
 15%|█▍        | 30/201 [1:05:13<6:12:47, 130.81s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     156.30 ms /   245 runs   (    0.64 ms per token,  1567.47 tokens per second)
llama_print_timings: prompt eval time =   51538.35 ms /   220 tokens (  234.27 ms per token,     4.27 tokens per second)
llama_print_timings:        eval time =   78467.13 ms /   244 runs   (  321.59 ms per token,     3.11 tokens per second)
llama_print_timings:       total time =  131197.85 ms /   464 tokens
 15%|█▌        | 31/201 [1:07:24<6:11:02, 130.96s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     139.04 ms /   239 runs   (    0.58 ms per token,  1718.88 tokens per second)
llama_print_timings: prompt eval time =   53660.57 ms /   226 tokens (  237.44 ms per token,     4.21 tokens per second)
llama_print_timings:        eval time =   74660.01 ms /   238 runs   (  313.70 ms per token,     3.19 tokens per second)
llama_print_timings:       total time =  129430.64 ms /   464 tokens
 16%|█▌        | 32/201 [1:09:33<6:07:40, 130.53s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     133.15 ms /   209 runs   (    0.64 ms per token,  1569.63 tokens per second)
llama_print_timings: prompt eval time =   60045.31 ms /   256 tokens (  234.55 ms per token,     4.26 tokens per second)
llama_print_timings:        eval time =   66755.00 ms /   208 runs   (  320.94 ms per token,     3.12 tokens per second)
llama_print_timings:       total time =  127815.06 ms /   464 tokens
 16%|█▋        | 33/201 [1:11:41<6:03:19, 129.76s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     200.64 ms /   331 runs   (    0.61 ms per token,  1649.75 tokens per second)
llama_print_timings: prompt eval time =   32783.97 ms /   134 tokens (  244.66 ms per token,     4.09 tokens per second)
llama_print_timings:        eval time =  103288.76 ms /   330 runs   (  313.00 ms per token,     3.19 tokens per second)
llama_print_timings:       total time =  137555.20 ms /   464 tokens
 17%|█▋        | 34/201 [1:13:59<6:07:47, 132.14s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     223.41 ms /   355 runs   (    0.63 ms per token,  1589.01 tokens per second)
llama_print_timings: prompt eval time =   26321.11 ms /   110 tokens (  239.28 ms per token,     4.18 tokens per second)
llama_print_timings:        eval time =  112110.36 ms /   354 runs   (  316.70 ms per token,     3.16 tokens per second)
llama_print_timings:       total time =  140080.28 ms /   464 tokens
 17%|█▋        | 35/201 [1:16:19<6:12:18, 134.57s/it]


[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


Llama.generate: prefix-match hit

llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     143.33 ms /   239 runs   (    0.60 ms per token,  1667.52 tokens per second)
llama_print_timings: prompt eval time =   52504.83 ms /   226 tokens (  232.32 ms per token,     4.30 tokens per second)
llama_print_timings:        eval time =   75615.67 ms /   238 runs   (  317.71 ms per token,     3.15 tokens per second)
llama_print_timings:       total time =  129273.86 ms /   464 tokens
 18%|█▊        | 36/201 [1:18:29<6:05:54, 133.06s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     232.30 ms /   368 runs   (    0.63 ms per token,  1584.19 tokens per second)
llama_print_timings: prompt eval time =   22466.82 ms /    96 tokens (  234.03 ms per token,     4.27 tokens per second)
llama_print_timings:        eval time =  117253.57 ms /   368 runs   (  318.62 ms per token,     3.14 tokens per second)
llama_print_timings:       total time =  141547.98 ms /   464 tokens
 18%|█▊        | 37/201 [1:20:50<6:10:44, 135.64s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     117.87 ms /   191 runs   (    0.62 ms per token,  1620.47 tokens per second)
llama_print_timings: prompt eval time =   66598.12 ms /   274 tokens (  243.06 ms per token,     4.11 tokens per second)
llama_print_timings:        eval time =   60365.09 ms /   190 runs   (  317.71 ms per token,     3.15 tokens per second)
llama_print_timings:       total time =  127884.87 ms /   464 tokens
 19%|█▉        | 38/201 [1:22:59<6:02:16, 133.35s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     105.76 ms /   163 runs   (    0.65 ms per token,  1541.18 tokens per second)
llama_print_timings: prompt eval time =   71073.19 ms /   302 tokens (  235.34 ms per token,     4.25 tokens per second)
llama_print_timings:        eval time =   52159.19 ms /   162 runs   (  321.97 ms per token,     3.11 tokens per second)
llama_print_timings:       total time =  124063.43 ms /   464 tokens
 19%|█▉        | 39/201 [1:25:03<5:52:40, 130.62s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     119.02 ms /   185 runs   (    0.64 ms per token,  1554.35 tokens per second)
llama_print_timings: prompt eval time =   66195.11 ms /   280 tokens (  236.41 ms per token,     4.23 tokens per second)
llama_print_timings:        eval time =   57749.38 ms /   184 runs   (  313.86 ms per token,     3.19 tokens per second)
llama_print_timings:       total time =  124861.73 ms /   464 tokens
 20%|█▉        | 40/201 [1:27:08<5:46:00, 128.95s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     131.76 ms /   202 runs   (    0.65 ms per token,  1533.13 tokens per second)
llama_print_timings: prompt eval time =   62784.71 ms /   263 tokens (  238.73 ms per token,     4.19 tokens per second)
llama_print_timings:        eval time =   66225.13 ms /   201 runs   (  329.48 ms per token,     3.04 tokens per second)
llama_print_timings:       total time =  130002.80 ms /   464 tokens
 20%|██        | 41/201 [1:29:18<5:44:48, 129.30s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     151.52 ms /   244 runs   (    0.62 ms per token,  1610.35 tokens per second)
llama_print_timings: prompt eval time =   51862.44 ms /   221 tokens (  234.67 ms per token,     4.26 tokens per second)
llama_print_timings:        eval time =   76834.21 ms /   243 runs   (  316.19 ms per token,     3.16 tokens per second)
llama_print_timings:       total time =  129836.33 ms /   464 tokens
 21%|██        | 42/201 [1:31:28<5:43:11, 129.51s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     120.28 ms /   188 runs   (    0.64 ms per token,  1563.06 tokens per second)
llama_print_timings: prompt eval time =   66384.36 ms /   277 tokens (  239.65 ms per token,     4.17 tokens per second)
llama_print_timings:        eval time =   59070.48 ms /   187 runs   (  315.88 ms per token,     3.17 tokens per second)
llama_print_timings:       total time =  126359.73 ms /   464 tokens
 21%|██▏       | 43/201 [1:33:34<5:38:39, 128.60s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     133.95 ms /   222 runs   (    0.60 ms per token,  1657.33 tokens per second)
llama_print_timings: prompt eval time =   58022.22 ms /   243 tokens (  238.77 ms per token,     4.19 tokens per second)
llama_print_timings:        eval time =   70175.42 ms /   221 runs   (  317.54 ms per token,     3.15 tokens per second)
llama_print_timings:       total time =  129250.38 ms /   464 tokens
 22%|██▏       | 44/201 [1:35:44<5:37:05, 128.83s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     145.98 ms /   231 runs   (    0.63 ms per token,  1582.37 tokens per second)
llama_print_timings: prompt eval time =   56000.82 ms /   234 tokens (  239.32 ms per token,     4.18 tokens per second)
llama_print_timings:        eval time =   73400.64 ms /   230 runs   (  319.13 ms per token,     3.13 tokens per second)
llama_print_timings:       total time =  130501.14 ms /   464 tokens
 22%|██▏       | 45/201 [1:37:54<5:36:23, 129.38s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     153.58 ms /   243 runs   (    0.63 ms per token,  1582.24 tokens per second)
llama_print_timings: prompt eval time =   51736.35 ms /   222 tokens (  233.05 ms per token,     4.29 tokens per second)
llama_print_timings:        eval time =   76420.87 ms /   242 runs   (  315.79 ms per token,     3.17 tokens per second)
llama_print_timings:       total time =  129329.15 ms /   464 tokens
 23%|██▎       | 46/201 [1:40:04<5:34:16, 129.40s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     146.18 ms /   231 runs   (    0.63 ms per token,  1580.21 tokens per second)
llama_print_timings: prompt eval time =   55243.52 ms /   234 tokens (  236.08 ms per token,     4.24 tokens per second)
llama_print_timings:        eval time =   72597.87 ms /   230 runs   (  315.64 ms per token,     3.17 tokens per second)
llama_print_timings:       total time =  128930.40 ms /   464 tokens
 23%|██▎       | 47/201 [1:42:13<5:31:51, 129.30s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     147.82 ms /   248 runs   (    0.60 ms per token,  1677.73 tokens per second)
llama_print_timings: prompt eval time =   53204.18 ms /   216 tokens (  246.32 ms per token,     4.06 tokens per second)
llama_print_timings:        eval time =   78405.03 ms /   248 runs   (  316.15 ms per token,     3.16 tokens per second)
llama_print_timings:       total time =  132754.73 ms /   464 tokens
 24%|██▍       | 48/201 [1:44:26<5:32:25, 130.36s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     175.57 ms /   293 runs   (    0.60 ms per token,  1668.85 tokens per second)
llama_print_timings: prompt eval time =   41361.68 ms /   172 tokens (  240.47 ms per token,     4.16 tokens per second)
llama_print_timings:        eval time =   91965.62 ms /   292 runs   (  314.95 ms per token,     3.18 tokens per second)
llama_print_timings:       total time =  134658.68 ms /   464 tokens
 24%|██▍       | 49/201 [1:46:41<5:33:36, 131.69s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     148.01 ms /   237 runs   (    0.62 ms per token,  1601.22 tokens per second)
llama_print_timings: prompt eval time =   54283.01 ms /   228 tokens (  238.08 ms per token,     4.20 tokens per second)
llama_print_timings:        eval time =   75064.79 ms /   236 runs   (  318.07 ms per token,     3.14 tokens per second)
llama_print_timings:       total time =  130433.79 ms /   464 tokens
 25%|██▍       | 50/201 [1:48:51<5:30:32, 131.34s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     154.82 ms /   251 runs   (    0.62 ms per token,  1621.28 tokens per second)
llama_print_timings: prompt eval time =   50816.25 ms /   214 tokens (  237.46 ms per token,     4.21 tokens per second)
llama_print_timings:        eval time =   78209.82 ms /   250 runs   (  312.84 ms per token,     3.20 tokens per second)
llama_print_timings:       total time =  130207.43 ms /   464 tokens
 25%|██▌       | 51/201 [1:51:01<5:27:34, 131.03s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     160.36 ms /   271 runs   (    0.59 ms per token,  1689.99 tokens per second)
llama_print_timings: prompt eval time =   46081.37 ms /   194 tokens (  237.53 ms per token,     4.21 tokens per second)
llama_print_timings:        eval time =   86384.75 ms /   270 runs   (  319.94 ms per token,     3.13 tokens per second)
llama_print_timings:       total time =  133728.13 ms /   464 tokens
 26%|██▌       | 52/201 [1:53:15<5:27:29, 131.87s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     180.30 ms /   288 runs   (    0.63 ms per token,  1597.36 tokens per second)
llama_print_timings: prompt eval time =   40824.90 ms /   176 tokens (  231.96 ms per token,     4.31 tokens per second)
llama_print_timings:        eval time =   91142.53 ms /   288 runs   (  316.47 ms per token,     3.16 tokens per second)
llama_print_timings:       total time =  133300.10 ms /   464 tokens
 26%|██▋       | 53/201 [1:55:29<5:26:27, 132.35s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     134.78 ms /   233 runs   (    0.58 ms per token,  1728.70 tokens per second)
llama_print_timings: prompt eval time =   55175.48 ms /   232 tokens (  237.83 ms per token,     4.20 tokens per second)
llama_print_timings:        eval time =   73441.75 ms /   232 runs   (  316.56 ms per token,     3.16 tokens per second)
llama_print_timings:       total time =  129695.91 ms /   464 tokens
 27%|██▋       | 54/201 [1:57:38<5:22:23, 131.59s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     144.29 ms /   229 runs   (    0.63 ms per token,  1587.08 tokens per second)
llama_print_timings: prompt eval time =   56334.88 ms /   236 tokens (  238.71 ms per token,     4.19 tokens per second)
llama_print_timings:        eval time =   71909.79 ms /   228 runs   (  315.39 ms per token,     3.17 tokens per second)
llama_print_timings:       total time =  129335.31 ms /   464 tokens
 27%|██▋       | 55/201 [1:59:48<5:18:37, 130.94s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     159.00 ms /   269 runs   (    0.59 ms per token,  1691.80 tokens per second)
llama_print_timings: prompt eval time =   46068.22 ms /   196 tokens (  235.04 ms per token,     4.25 tokens per second)
llama_print_timings:        eval time =   84641.48 ms /   268 runs   (  315.83 ms per token,     3.17 tokens per second)
llama_print_timings:       total time =  131954.85 ms /   464 tokens
 28%|██▊       | 56/201 [2:02:00<5:17:14, 131.28s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     218.79 ms /   355 runs   (    0.62 ms per token,  1622.53 tokens per second)
llama_print_timings: prompt eval time =   25479.40 ms /   110 tokens (  231.63 ms per token,     4.32 tokens per second)
llama_print_timings:        eval time =  113636.46 ms /   354 runs   (  321.01 ms per token,     3.12 tokens per second)
llama_print_timings:       total time =  140718.71 ms /   464 tokens
 28%|██▊       | 57/201 [2:04:21<5:21:56, 134.14s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     164.23 ms /   282 runs   (    0.58 ms per token,  1717.06 tokens per second)
llama_print_timings: prompt eval time =   43236.85 ms /   183 tokens (  236.27 ms per token,     4.23 tokens per second)
llama_print_timings:        eval time =   89425.46 ms /   281 runs   (  318.24 ms per token,     3.14 tokens per second)
llama_print_timings:       total time =  133940.58 ms /   464 tokens
 29%|██▉       | 58/201 [2:06:35<5:19:38, 134.11s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     147.96 ms /   231 runs   (    0.64 ms per token,  1561.23 tokens per second)
llama_print_timings: prompt eval time =   55534.73 ms /   234 tokens (  237.33 ms per token,     4.21 tokens per second)
llama_print_timings:        eval time =   73515.52 ms /   230 runs   (  319.63 ms per token,     3.13 tokens per second)
llama_print_timings:       total time =  130151.95 ms /   464 tokens
 29%|██▉       | 59/201 [2:08:45<5:14:40, 132.96s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     120.53 ms /   193 runs   (    0.62 ms per token,  1601.33 tokens per second)
llama_print_timings: prompt eval time =   63358.91 ms /   272 tokens (  232.94 ms per token,     4.29 tokens per second)
llama_print_timings:        eval time =   62055.05 ms /   192 runs   (  323.20 ms per token,     3.09 tokens per second)
llama_print_timings:       total time =  126320.86 ms /   464 tokens
 30%|██▉       | 60/201 [2:10:52<5:07:52, 131.01s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     122.10 ms /   200 runs   (    0.61 ms per token,  1637.99 tokens per second)
llama_print_timings: prompt eval time =   62940.08 ms /   264 tokens (  238.41 ms per token,     4.19 tokens per second)
llama_print_timings:        eval time =   63249.68 ms /   200 runs   (  316.25 ms per token,     3.16 tokens per second)
llama_print_timings:       total time =  127136.82 ms /   464 tokens
 30%|███       | 61/201 [2:12:59<5:03:06, 129.90s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     120.65 ms /   198 runs   (    0.61 ms per token,  1641.06 tokens per second)
llama_print_timings: prompt eval time =   15952.46 ms /    66 tokens (  241.70 ms per token,     4.14 tokens per second)
llama_print_timings:        eval time =   62424.09 ms /   197 runs   (  316.87 ms per token,     3.16 tokens per second)
llama_print_timings:       total time =   79222.90 ms /   263 tokens
 31%|███       | 62/201 [2:14:18<4:25:48, 114.74s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     212.52 ms /   344 runs   (    0.62 ms per token,  1618.65 tokens per second)
llama_print_timings: prompt eval time =   28159.61 ms /   120 tokens (  234.66 ms per token,     4.26 tokens per second)
llama_print_timings:        eval time =  109055.25 ms /   344 runs   (  317.02 ms per token,     3.15 tokens per second)
llama_print_timings:       total time =  138763.11 ms /   464 tokens
 31%|███▏      | 63/201 [2:16:37<4:40:33, 121.99s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     165.07 ms /   277 runs   (    0.60 ms per token,  1678.06 tokens per second)
llama_print_timings: prompt eval time =   44849.26 ms /   188 tokens (  238.56 ms per token,     4.19 tokens per second)
llama_print_timings:        eval time =   88020.58 ms /   276 runs   (  318.92 ms per token,     3.14 tokens per second)
llama_print_timings:       total time =  134140.65 ms /   464 tokens
 32%|███▏      | 64/201 [2:18:51<4:46:55, 125.66s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     181.60 ms /   288 runs   (    0.63 ms per token,  1585.93 tokens per second)
llama_print_timings: prompt eval time =   41848.74 ms /   176 tokens (  237.78 ms per token,     4.21 tokens per second)
llama_print_timings:        eval time =   92138.20 ms /   287 runs   (  321.04 ms per token,     3.11 tokens per second)
llama_print_timings:       total time =  135310.89 ms /   463 tokens
 32%|███▏      | 65/201 [2:21:07<4:51:28, 128.59s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     213.60 ms /   348 runs   (    0.61 ms per token,  1629.19 tokens per second)
llama_print_timings: prompt eval time =   27330.71 ms /   117 tokens (  233.60 ms per token,     4.28 tokens per second)
llama_print_timings:        eval time =  109061.97 ms /   347 runs   (  314.30 ms per token,     3.18 tokens per second)
llama_print_timings:       total time =  137947.42 ms /   464 tokens
 33%|███▎      | 66/201 [2:23:25<4:55:43, 131.43s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     132.41 ms /   219 runs   (    0.60 ms per token,  1653.97 tokens per second)
llama_print_timings: prompt eval time =   59455.08 ms /   246 tokens (  241.69 ms per token,     4.14 tokens per second)
llama_print_timings:        eval time =   69323.02 ms /   218 runs   (  318.00 ms per token,     3.14 tokens per second)
llama_print_timings:       total time =  129791.73 ms /   464 tokens
 33%|███▎      | 67/201 [2:25:35<4:52:31, 130.98s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     143.12 ms /   227 runs   (    0.63 ms per token,  1586.14 tokens per second)
llama_print_timings: prompt eval time =   56353.69 ms /   238 tokens (  236.78 ms per token,     4.22 tokens per second)
llama_print_timings:        eval time =   72127.98 ms /   226 runs   (  319.15 ms per token,     3.13 tokens per second)
llama_print_timings:       total time =  129581.52 ms /   464 tokens
 34%|███▍      | 68/201 [2:27:45<4:49:28, 130.59s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     162.14 ms /   253 runs   (    0.64 ms per token,  1560.35 tokens per second)
llama_print_timings: prompt eval time =   50692.85 ms /   212 tokens (  239.12 ms per token,     4.18 tokens per second)
llama_print_timings:        eval time =   82014.60 ms /   252 runs   (  325.45 ms per token,     3.07 tokens per second)
llama_print_timings:       total time =  133928.12 ms /   464 tokens
 34%|███▍      | 69/201 [2:29:59<4:49:34, 131.63s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     148.26 ms /   250 runs   (    0.59 ms per token,  1686.25 tokens per second)
llama_print_timings: prompt eval time =   50790.38 ms /   215 tokens (  236.23 ms per token,     4.23 tokens per second)
llama_print_timings:        eval time =   78449.35 ms /   249 runs   (  315.06 ms per token,     3.17 tokens per second)
llama_print_timings:       total time =  130398.97 ms /   464 tokens
 35%|███▍      | 70/201 [2:32:09<4:46:38, 131.29s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     129.11 ms /   202 runs   (    0.64 ms per token,  1564.61 tokens per second)
llama_print_timings: prompt eval time =   62320.79 ms /   263 tokens (  236.96 ms per token,     4.22 tokens per second)
llama_print_timings:        eval time =   64014.72 ms /   201 runs   (  318.48 ms per token,     3.14 tokens per second)
llama_print_timings:       total time =  127326.50 ms /   464 tokens
 35%|███▌      | 71/201 [2:34:16<4:41:57, 130.14s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     160.47 ms /   255 runs   (    0.63 ms per token,  1589.13 tokens per second)
llama_print_timings: prompt eval time =   49714.34 ms /   210 tokens (  236.73 ms per token,     4.22 tokens per second)
llama_print_timings:        eval time =   81082.04 ms /   254 runs   (  319.22 ms per token,     3.13 tokens per second)
llama_print_timings:       total time =  131991.24 ms /   464 tokens
 36%|███▌      | 72/201 [2:36:29<4:41:03, 130.72s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     163.73 ms /   276 runs   (    0.59 ms per token,  1685.69 tokens per second)
llama_print_timings: prompt eval time =   44970.37 ms /   189 tokens (  237.94 ms per token,     4.20 tokens per second)
llama_print_timings:        eval time =   88535.07 ms /   275 runs   (  321.95 ms per token,     3.11 tokens per second)
llama_print_timings:       total time =  134818.46 ms /   464 tokens
 36%|███▋      | 73/201 [2:38:44<4:41:33, 131.98s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     116.64 ms /   182 runs   (    0.64 ms per token,  1560.32 tokens per second)
llama_print_timings: prompt eval time =   68359.47 ms /   283 tokens (  241.55 ms per token,     4.14 tokens per second)
llama_print_timings:        eval time =   57631.60 ms /   181 runs   (  318.41 ms per token,     3.14 tokens per second)
llama_print_timings:       total time =  126923.61 ms /   464 tokens
 37%|███▋      | 74/201 [2:40:51<4:36:12, 130.49s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     135.43 ms /   215 runs   (    0.63 ms per token,  1587.49 tokens per second)
llama_print_timings: prompt eval time =   59147.46 ms /   250 tokens (  236.59 ms per token,     4.23 tokens per second)
llama_print_timings:        eval time =   66922.72 ms /   214 runs   (  312.72 ms per token,     3.20 tokens per second)
llama_print_timings:       total time =  127091.71 ms /   464 tokens
 37%|███▋      | 75/201 [2:42:58<4:31:58, 129.51s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     135.41 ms /   223 runs   (    0.61 ms per token,  1646.86 tokens per second)
llama_print_timings: prompt eval time =   58635.60 ms /   242 tokens (  242.30 ms per token,     4.13 tokens per second)
llama_print_timings:        eval time =   70404.90 ms /   222 runs   (  317.14 ms per token,     3.15 tokens per second)
llama_print_timings:       total time =  130095.72 ms /   464 tokens
 38%|███▊      | 76/201 [2:45:08<4:30:14, 129.72s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     160.99 ms /   271 runs   (    0.59 ms per token,  1683.33 tokens per second)
llama_print_timings: prompt eval time =   46243.13 ms /   194 tokens (  238.37 ms per token,     4.20 tokens per second)
llama_print_timings:        eval time =   85489.04 ms /   270 runs   (  316.63 ms per token,     3.16 tokens per second)
llama_print_timings:       total time =  132988.47 ms /   464 tokens
 38%|███▊      | 77/201 [2:47:21<4:30:10, 130.73s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     220.27 ms /   356 runs   (    0.62 ms per token,  1616.20 tokens per second)
llama_print_timings: prompt eval time =   26226.85 ms /   109 tokens (  240.61 ms per token,     4.16 tokens per second)
llama_print_timings:        eval time =  111820.74 ms /   355 runs   (  314.99 ms per token,     3.17 tokens per second)
llama_print_timings:       total time =  139700.76 ms /   464 tokens
 39%|███▉      | 78/201 [2:49:41<4:33:35, 133.46s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     132.35 ms /   217 runs   (    0.61 ms per token,  1639.53 tokens per second)
llama_print_timings: prompt eval time =   57940.82 ms /   248 tokens (  233.63 ms per token,     4.28 tokens per second)
llama_print_timings:        eval time =   67771.18 ms /   216 runs   (  313.76 ms per token,     3.19 tokens per second)
llama_print_timings:       total time =  126746.35 ms /   464 tokens
 39%|███▉      | 79/201 [2:51:48<4:27:20, 131.48s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     218.79 ms /   348 runs   (    0.63 ms per token,  1590.54 tokens per second)
llama_print_timings: prompt eval time =   28526.70 ms /   117 tokens (  243.82 ms per token,     4.10 tokens per second)
llama_print_timings:        eval time =  110425.07 ms /   347 runs   (  318.23 ms per token,     3.14 tokens per second)
llama_print_timings:       total time =  140533.11 ms /   464 tokens
 40%|███▉      | 80/201 [2:54:08<4:30:41, 134.23s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     161.10 ms /   244 runs   (    0.66 ms per token,  1514.54 tokens per second)
llama_print_timings: prompt eval time =   53118.75 ms /   221 tokens (  240.36 ms per token,     4.16 tokens per second)
llama_print_timings:        eval time =   77320.42 ms /   243 runs   (  318.19 ms per token,     3.14 tokens per second)
llama_print_timings:       total time =  131601.26 ms /   464 tokens
 40%|████      | 81/201 [2:56:20<4:26:56, 133.47s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     138.29 ms /   215 runs   (    0.64 ms per token,  1554.74 tokens per second)
llama_print_timings: prompt eval time =   58482.63 ms /   250 tokens (  233.93 ms per token,     4.27 tokens per second)
llama_print_timings:        eval time =   74275.53 ms /   214 runs   (  347.08 ms per token,     2.88 tokens per second)
llama_print_timings:       total time =  133835.49 ms /   464 tokens
 41%|████      | 82/201 [2:58:34<4:25:01, 133.63s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     212.64 ms /   346 runs   (    0.61 ms per token,  1627.16 tokens per second)
llama_print_timings: prompt eval time =   28360.22 ms /   119 tokens (  238.32 ms per token,     4.20 tokens per second)
llama_print_timings:        eval time =  110499.39 ms /   345 runs   (  320.29 ms per token,     3.12 tokens per second)
llama_print_timings:       total time =  140453.68 ms /   464 tokens
 41%|████▏     | 83/201 [3:00:55<4:26:53, 135.71s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     139.09 ms /   219 runs   (    0.64 ms per token,  1574.46 tokens per second)
llama_print_timings: prompt eval time =   58790.07 ms /   246 tokens (  238.98 ms per token,     4.18 tokens per second)
llama_print_timings:        eval time =   69721.19 ms /   218 runs   (  319.82 ms per token,     3.13 tokens per second)
llama_print_timings:       total time =  129542.89 ms /   464 tokens
 42%|████▏     | 84/201 [3:03:04<4:21:04, 133.89s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     173.01 ms /   272 runs   (    0.64 ms per token,  1572.20 tokens per second)
llama_print_timings: prompt eval time =   45723.29 ms /   192 tokens (  238.14 ms per token,     4.20 tokens per second)
llama_print_timings:        eval time =   87218.39 ms /   272 runs   (  320.66 ms per token,     3.12 tokens per second)
llama_print_timings:       total time =  134232.36 ms /   464 tokens
 42%|████▏     | 85/201 [3:05:19<4:19:06, 134.03s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     237.49 ms /   382 runs   (    0.62 ms per token,  1608.52 tokens per second)
llama_print_timings: prompt eval time =   20053.46 ms /    83 tokens (  241.61 ms per token,     4.14 tokens per second)
llama_print_timings:        eval time =  120505.82 ms /   381 runs   (  316.29 ms per token,     3.16 tokens per second)
llama_print_timings:       total time =  142292.76 ms /   464 tokens
 43%|████▎     | 86/201 [3:07:41<4:21:41, 136.53s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     236.52 ms /   377 runs   (    0.63 ms per token,  1593.97 tokens per second)
llama_print_timings: prompt eval time =   21158.06 ms /    88 tokens (  240.43 ms per token,     4.16 tokens per second)
llama_print_timings:        eval time =  120871.06 ms /   376 runs   (  321.47 ms per token,     3.11 tokens per second)
llama_print_timings:       total time =  143761.39 ms /   464 tokens
 43%|████▎     | 87/201 [3:10:05<4:23:35, 138.73s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     204.01 ms /   324 runs   (    0.63 ms per token,  1588.16 tokens per second)
llama_print_timings: prompt eval time =   24095.41 ms /   102 tokens (  236.23 ms per token,     4.23 tokens per second)
llama_print_timings:        eval time =  101956.42 ms /   323 runs   (  315.65 ms per token,     3.17 tokens per second)
llama_print_timings:       total time =  127545.69 ms /   425 tokens
 44%|████▍     | 88/201 [3:12:13<4:15:01, 135.41s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =      96.99 ms /   158 runs   (    0.61 ms per token,  1629.08 tokens per second)
llama_print_timings: prompt eval time =   76672.40 ms /   307 tokens (  249.75 ms per token,     4.00 tokens per second)
llama_print_timings:        eval time =   49057.12 ms /   157 runs   (  312.47 ms per token,     3.20 tokens per second)
llama_print_timings:       total time =  126516.06 ms /   464 tokens
 44%|████▍     | 89/201 [3:14:19<4:07:51, 132.78s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =      97.35 ms /   159 runs   (    0.61 ms per token,  1633.30 tokens per second)
llama_print_timings: prompt eval time =   12782.07 ms /    50 tokens (  255.64 ms per token,     3.91 tokens per second)
llama_print_timings:        eval time =   49069.39 ms /   158 runs   (  310.57 ms per token,     3.22 tokens per second)
llama_print_timings:       total time =   62527.21 ms /   208 tokens
 45%|████▍     | 90/201 [3:15:22<3:26:42, 111.74s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     142.66 ms /   226 runs   (    0.63 ms per token,  1584.23 tokens per second)
llama_print_timings: prompt eval time =   56510.57 ms /   239 tokens (  236.45 ms per token,     4.23 tokens per second)
llama_print_timings:        eval time =   70500.21 ms /   225 runs   (  313.33 ms per token,     3.19 tokens per second)
llama_print_timings:       total time =  128095.89 ms /   464 tokens
 45%|████▌     | 91/201 [3:17:30<3:33:54, 116.68s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     161.56 ms /   253 runs   (    0.64 ms per token,  1566.02 tokens per second)
llama_print_timings: prompt eval time =   50557.47 ms /   212 tokens (  238.48 ms per token,     4.19 tokens per second)
llama_print_timings:        eval time =   80905.27 ms /   252 runs   (  321.05 ms per token,     3.11 tokens per second)
llama_print_timings:       total time =  132669.13 ms /   464 tokens
 46%|████▌     | 92/201 [3:19:43<3:40:43, 121.50s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     155.18 ms /   249 runs   (    0.62 ms per token,  1604.55 tokens per second)
llama_print_timings: prompt eval time =   50826.22 ms /   216 tokens (  235.31 ms per token,     4.25 tokens per second)
llama_print_timings:        eval time =   79815.67 ms /   248 runs   (  321.84 ms per token,     3.11 tokens per second)
llama_print_timings:       total time =  131801.86 ms /   464 tokens
 46%|████▋     | 93/201 [3:21:55<3:44:19, 124.63s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     132.55 ms /   231 runs   (    0.57 ms per token,  1742.76 tokens per second)
llama_print_timings: prompt eval time =   55159.43 ms /   234 tokens (  235.72 ms per token,     4.24 tokens per second)
llama_print_timings:        eval time =   72523.45 ms /   230 runs   (  315.32 ms per token,     3.17 tokens per second)
llama_print_timings:       total time =  128766.39 ms /   464 tokens
 47%|████▋     | 94/201 [3:24:04<3:44:31, 125.90s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     132.77 ms /   230 runs   (    0.58 ms per token,  1732.32 tokens per second)
llama_print_timings: prompt eval time =   11184.25 ms /    47 tokens (  237.96 ms per token,     4.20 tokens per second)
llama_print_timings:        eval time =   74239.16 ms /   229 runs   (  324.19 ms per token,     3.08 tokens per second)
llama_print_timings:       total time =   86427.56 ms /   276 tokens
 47%|████▋     | 95/201 [3:25:30<3:21:33, 114.09s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     143.45 ms /   231 runs   (    0.62 ms per token,  1610.32 tokens per second)
llama_print_timings: prompt eval time =   55580.93 ms /   234 tokens (  237.53 ms per token,     4.21 tokens per second)
llama_print_timings:        eval time =   72375.82 ms /   230 runs   (  314.68 ms per token,     3.18 tokens per second)
llama_print_timings:       total time =  129043.31 ms /   464 tokens
 48%|████▊     | 96/201 [3:27:39<3:27:33, 118.60s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     159.53 ms /   253 runs   (    0.63 ms per token,  1585.87 tokens per second)
llama_print_timings: prompt eval time =   50198.46 ms /   212 tokens (  236.79 ms per token,     4.22 tokens per second)
llama_print_timings:        eval time =   80633.27 ms /   252 runs   (  319.97 ms per token,     3.13 tokens per second)
llama_print_timings:       total time =  132082.21 ms /   464 tokens
 48%|████▊     | 97/201 [3:29:51<3:32:39, 122.69s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     150.66 ms /   240 runs   (    0.63 ms per token,  1592.94 tokens per second)
llama_print_timings: prompt eval time =   52513.26 ms /   224 tokens (  234.43 ms per token,     4.27 tokens per second)
llama_print_timings:        eval time =   78911.25 ms /   240 runs   (  328.80 ms per token,     3.04 tokens per second)
llama_print_timings:       total time =  132560.11 ms /   464 tokens
 49%|████▉     | 98/201 [3:32:04<3:35:47, 125.70s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     142.75 ms /   230 runs   (    0.62 ms per token,  1611.19 tokens per second)
llama_print_timings: prompt eval time =   55933.49 ms /   235 tokens (  238.01 ms per token,     4.20 tokens per second)
llama_print_timings:        eval time =   73040.52 ms /   229 runs   (  318.95 ms per token,     3.14 tokens per second)
llama_print_timings:       total time =  130048.61 ms /   464 tokens
 49%|████▉     | 99/201 [3:34:14<3:35:58, 127.04s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     145.65 ms /   231 runs   (    0.63 ms per token,  1585.98 tokens per second)
llama_print_timings: prompt eval time =   55346.15 ms /   234 tokens (  236.52 ms per token,     4.23 tokens per second)
llama_print_timings:        eval time =   73253.81 ms /   230 runs   (  318.49 ms per token,     3.14 tokens per second)
llama_print_timings:       total time =  129691.16 ms /   464 tokens
 50%|████▉     | 100/201 [3:36:24<3:35:14, 127.86s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     178.85 ms /   283 runs   (    0.63 ms per token,  1582.30 tokens per second)
llama_print_timings: prompt eval time =   43248.46 ms /   182 tokens (  237.63 ms per token,     4.21 tokens per second)
llama_print_timings:        eval time =   89260.39 ms /   282 runs   (  316.53 ms per token,     3.16 tokens per second)
llama_print_timings:       total time =  133816.58 ms /   464 tokens
 50%|█████     | 101/201 [3:38:38<3:36:07, 129.68s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     180.55 ms /   280 runs   (    0.64 ms per token,  1550.81 tokens per second)
llama_print_timings: prompt eval time =   10966.72 ms /    46 tokens (  238.41 ms per token,     4.19 tokens per second)
llama_print_timings:        eval time =   89386.22 ms /   279 runs   (  320.38 ms per token,     3.12 tokens per second)
llama_print_timings:       total time =  101616.99 ms /   325 tokens
 51%|█████     | 102/201 [3:40:20<3:20:07, 121.29s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     144.64 ms /   236 runs   (    0.61 ms per token,  1631.64 tokens per second)
llama_print_timings: prompt eval time =   53712.33 ms /   229 tokens (  234.55 ms per token,     4.26 tokens per second)
llama_print_timings:        eval time =   74263.12 ms /   235 runs   (  316.01 ms per token,     3.16 tokens per second)
llama_print_timings:       total time =  129091.16 ms /   464 tokens
 51%|█████     | 103/201 [3:42:29<3:21:58, 123.66s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     139.54 ms /   220 runs   (    0.63 ms per token,  1576.65 tokens per second)
llama_print_timings: prompt eval time =   59455.01 ms /   245 tokens (  242.67 ms per token,     4.12 tokens per second)
llama_print_timings:        eval time =   69581.47 ms /   219 runs   (  317.72 ms per token,     3.15 tokens per second)
llama_print_timings:       total time =  130079.99 ms /   464 tokens
 52%|█████▏    | 104/201 [3:44:39<3:23:04, 125.62s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     164.54 ms /   243 runs   (    0.68 ms per token,  1476.85 tokens per second)
llama_print_timings: prompt eval time =   53526.55 ms /   222 tokens (  241.11 ms per token,     4.15 tokens per second)
llama_print_timings:        eval time =   76940.98 ms /   242 runs   (  317.94 ms per token,     3.15 tokens per second)
llama_print_timings:       total time =  131669.71 ms /   464 tokens
 52%|█████▏    | 105/201 [3:46:51<3:23:57, 127.47s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     142.26 ms /   229 runs   (    0.62 ms per token,  1609.73 tokens per second)
llama_print_timings: prompt eval time =   55629.11 ms /   236 tokens (  235.72 ms per token,     4.24 tokens per second)
llama_print_timings:        eval time =   74127.75 ms /   228 runs   (  325.12 ms per token,     3.08 tokens per second)
llama_print_timings:       total time =  130849.02 ms /   464 tokens
 53%|█████▎    | 106/201 [3:49:02<3:23:29, 128.52s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     138.27 ms /   223 runs   (    0.62 ms per token,  1612.81 tokens per second)
llama_print_timings: prompt eval time =   57127.57 ms /   242 tokens (  236.06 ms per token,     4.24 tokens per second)
llama_print_timings:        eval time =   70329.25 ms /   222 runs   (  316.80 ms per token,     3.16 tokens per second)
llama_print_timings:       total time =  128485.44 ms /   464 tokens
 53%|█████▎    | 107/201 [3:51:10<3:21:22, 128.54s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     141.21 ms /   224 runs   (    0.63 ms per token,  1586.28 tokens per second)
llama_print_timings: prompt eval time =   57857.30 ms /   240 tokens (  241.07 ms per token,     4.15 tokens per second)
llama_print_timings:        eval time =   71003.62 ms /   224 runs   (  316.98 ms per token,     3.15 tokens per second)
llama_print_timings:       total time =  129950.74 ms /   464 tokens
 54%|█████▎    | 108/201 [3:53:21<3:19:56, 129.00s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     232.42 ms /   364 runs   (    0.64 ms per token,  1566.14 tokens per second)
llama_print_timings: prompt eval time =   24753.70 ms /   101 tokens (  245.09 ms per token,     4.08 tokens per second)
llama_print_timings:        eval time =  116004.45 ms /   363 runs   (  319.57 ms per token,     3.13 tokens per second)
llama_print_timings:       total time =  142432.41 ms /   464 tokens
 54%|█████▍    | 109/201 [3:55:43<3:24:01, 133.06s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     139.49 ms /   221 runs   (    0.63 ms per token,  1584.33 tokens per second)
llama_print_timings: prompt eval time =   57243.47 ms /   244 tokens (  234.60 ms per token,     4.26 tokens per second)
llama_print_timings:        eval time =   69859.76 ms /   220 runs   (  317.54 ms per token,     3.15 tokens per second)
llama_print_timings:       total time =  128150.19 ms /   464 tokens
 55%|█████▍    | 110/201 [3:57:51<3:19:37, 131.62s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     117.86 ms /   192 runs   (    0.61 ms per token,  1629.07 tokens per second)
llama_print_timings: prompt eval time =   66432.96 ms /   272 tokens (  244.24 ms per token,     4.09 tokens per second)
llama_print_timings:        eval time =   60603.22 ms /   192 runs   (  315.64 ms per token,     3.17 tokens per second)
llama_print_timings:       total time =  127942.83 ms /   464 tokens
 55%|█████▌    | 111/201 [3:59:59<3:15:49, 130.55s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     120.42 ms /   188 runs   (    0.64 ms per token,  1561.23 tokens per second)
llama_print_timings: prompt eval time =   65455.34 ms /   277 tokens (  236.30 ms per token,     4.23 tokens per second)
llama_print_timings:        eval time =   59752.27 ms /   187 runs   (  319.53 ms per token,     3.13 tokens per second)
llama_print_timings:       total time =  126126.86 ms /   464 tokens
 56%|█████▌    | 112/201 [4:02:06<3:11:43, 129.25s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     163.94 ms /   256 runs   (    0.64 ms per token,  1561.51 tokens per second)
llama_print_timings: prompt eval time =   49811.39 ms /   210 tokens (  237.20 ms per token,     4.22 tokens per second)
llama_print_timings:        eval time =   80210.65 ms /   255 runs   (  314.55 ms per token,     3.18 tokens per second)
llama_print_timings:       total time =  131194.78 ms /   465 tokens
 56%|█████▌    | 113/201 [4:04:17<3:10:28, 129.87s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     114.18 ms /   178 runs   (    0.64 ms per token,  1558.90 tokens per second)
llama_print_timings: prompt eval time =   68440.76 ms /   287 tokens (  238.47 ms per token,     4.19 tokens per second)
llama_print_timings:        eval time =   56781.10 ms /   177 runs   (  320.80 ms per token,     3.12 tokens per second)
llama_print_timings:       total time =  126105.95 ms /   464 tokens
 57%|█████▋    | 114/201 [4:06:23<3:06:43, 128.78s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     152.81 ms /   261 runs   (    0.59 ms per token,  1708.04 tokens per second)
llama_print_timings: prompt eval time =   48918.64 ms /   204 tokens (  239.80 ms per token,     4.17 tokens per second)
llama_print_timings:        eval time =   81908.96 ms /   260 runs   (  315.03 ms per token,     3.17 tokens per second)
llama_print_timings:       total time =  131999.07 ms /   464 tokens
 57%|█████▋    | 115/201 [4:08:35<3:06:01, 129.78s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     147.27 ms /   245 runs   (    0.60 ms per token,  1663.64 tokens per second)
llama_print_timings: prompt eval time =   51262.52 ms /   218 tokens (  235.15 ms per token,     4.25 tokens per second)
llama_print_timings:        eval time =   78198.50 ms /   244 runs   (  320.49 ms per token,     3.12 tokens per second)
llama_print_timings:       total time =  130612.56 ms /   462 tokens
 58%|█████▊    | 116/201 [4:10:46<3:04:15, 130.06s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     123.10 ms /   194 runs   (    0.63 ms per token,  1575.90 tokens per second)
llama_print_timings: prompt eval time =   64286.63 ms /   271 tokens (  237.22 ms per token,     4.22 tokens per second)
llama_print_timings:        eval time =   61872.25 ms /   193 runs   (  320.58 ms per token,     3.12 tokens per second)
llama_print_timings:       total time =  127072.26 ms /   464 tokens
 58%|█████▊    | 117/201 [4:12:53<3:00:53, 129.20s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     225.95 ms /   359 runs   (    0.63 ms per token,  1588.86 tokens per second)
llama_print_timings: prompt eval time =   25448.79 ms /   106 tokens (  240.08 ms per token,     4.17 tokens per second)
llama_print_timings:        eval time =  114318.06 ms /   358 runs   (  319.32 ms per token,     3.13 tokens per second)
llama_print_timings:       total time =  141442.75 ms /   464 tokens
 59%|█████▊    | 118/201 [4:15:15<3:03:51, 132.91s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     120.59 ms /   190 runs   (    0.63 ms per token,  1575.64 tokens per second)
llama_print_timings: prompt eval time =   65199.76 ms /   275 tokens (  237.09 ms per token,     4.22 tokens per second)
llama_print_timings:        eval time =   61619.53 ms /   189 runs   (  326.03 ms per token,     3.07 tokens per second)
llama_print_timings:       total time =  127745.39 ms /   464 tokens
 59%|█████▉    | 119/201 [4:17:23<2:59:34, 131.39s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     156.18 ms /   264 runs   (    0.59 ms per token,  1690.36 tokens per second)
llama_print_timings: prompt eval time =   47132.36 ms /   200 tokens (  235.66 ms per token,     4.24 tokens per second)
llama_print_timings:        eval time =   83274.04 ms /   264 runs   (  315.43 ms per token,     3.17 tokens per second)
llama_print_timings:       total time =  131642.78 ms /   464 tokens
 60%|█████▉    | 120/201 [4:19:34<2:57:31, 131.50s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     235.59 ms /   390 runs   (    0.60 ms per token,  1655.43 tokens per second)
llama_print_timings: prompt eval time =   17563.14 ms /    74 tokens (  237.34 ms per token,     4.21 tokens per second)
llama_print_timings:        eval time =  123161.86 ms /   389 runs   (  316.61 ms per token,     3.16 tokens per second)
llama_print_timings:       total time =  142519.15 ms /   463 tokens
 60%|██████    | 121/201 [4:21:57<2:59:47, 134.84s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     123.07 ms /   193 runs   (    0.64 ms per token,  1568.21 tokens per second)
llama_print_timings: prompt eval time =   67064.54 ms /   272 tokens (  246.56 ms per token,     4.06 tokens per second)
llama_print_timings:        eval time =   61566.26 ms /   192 runs   (  320.66 ms per token,     3.12 tokens per second)
llama_print_timings:       total time =  129566.48 ms /   464 tokens
 61%|██████    | 122/201 [4:24:07<2:55:30, 133.30s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     148.13 ms /   230 runs   (    0.64 ms per token,  1552.68 tokens per second)
llama_print_timings: prompt eval time =   55446.82 ms /   235 tokens (  235.94 ms per token,     4.24 tokens per second)
llama_print_timings:        eval time =   73401.92 ms /   229 runs   (  320.53 ms per token,     3.12 tokens per second)
llama_print_timings:       total time =  129950.34 ms /   464 tokens
 61%|██████    | 123/201 [4:26:17<2:52:01, 132.33s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     118.28 ms /   193 runs   (    0.61 ms per token,  1631.68 tokens per second)
llama_print_timings: prompt eval time =   64344.62 ms /   272 tokens (  236.56 ms per token,     4.23 tokens per second)
llama_print_timings:        eval time =   61510.56 ms /   192 runs   (  320.37 ms per token,     3.12 tokens per second)
llama_print_timings:       total time =  126790.31 ms /   464 tokens
 62%|██████▏   | 124/201 [4:28:24<2:47:44, 130.71s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     212.11 ms /   348 runs   (    0.61 ms per token,  1640.65 tokens per second)
llama_print_timings: prompt eval time =   28736.67 ms /   117 tokens (  245.61 ms per token,     4.07 tokens per second)
llama_print_timings:        eval time =  111013.20 ms /   347 runs   (  319.92 ms per token,     3.13 tokens per second)
llama_print_timings:       total time =  141355.78 ms /   464 tokens
 62%|██████▏   | 125/201 [4:30:45<2:49:39, 133.94s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     156.41 ms /   262 runs   (    0.60 ms per token,  1675.12 tokens per second)
llama_print_timings: prompt eval time =   49677.48 ms /   202 tokens (  245.93 ms per token,     4.07 tokens per second)
llama_print_timings:        eval time =   81995.50 ms /   261 runs   (  314.16 ms per token,     3.18 tokens per second)
llama_print_timings:       total time =  132881.69 ms /   463 tokens
 63%|██████▎   | 126/201 [4:32:58<2:47:04, 133.66s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     131.18 ms /   216 runs   (    0.61 ms per token,  1646.55 tokens per second)
llama_print_timings: prompt eval time =   61578.58 ms /   248 tokens (  248.30 ms per token,     4.03 tokens per second)
llama_print_timings:        eval time =   68971.33 ms /   216 runs   (  319.31 ms per token,     3.13 tokens per second)
llama_print_timings:       total time =  131577.54 ms /   464 tokens
 63%|██████▎   | 127/201 [4:35:10<2:44:07, 133.07s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     167.08 ms /   283 runs   (    0.59 ms per token,  1693.81 tokens per second)
llama_print_timings: prompt eval time =   43207.54 ms /   182 tokens (  237.40 ms per token,     4.21 tokens per second)
llama_print_timings:        eval time =   89683.11 ms /   282 runs   (  318.03 ms per token,     3.14 tokens per second)
llama_print_timings:       total time =  134223.72 ms /   464 tokens
 64%|██████▎   | 128/201 [4:37:24<2:42:22, 133.45s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     211.09 ms /   346 runs   (    0.61 ms per token,  1639.15 tokens per second)
llama_print_timings: prompt eval time =   28332.91 ms /   118 tokens (  240.11 ms per token,     4.16 tokens per second)
llama_print_timings:        eval time =  109187.01 ms /   345 runs   (  316.48 ms per token,     3.16 tokens per second)
llama_print_timings:       total time =  139113.38 ms /   463 tokens
 64%|██████▍   | 129/201 [4:39:43<2:42:13, 135.18s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     181.19 ms /   288 runs   (    0.63 ms per token,  1589.52 tokens per second)
llama_print_timings: prompt eval time =   41685.92 ms /   176 tokens (  236.85 ms per token,     4.22 tokens per second)
llama_print_timings:        eval time =   91592.49 ms /   287 runs   (  319.14 ms per token,     3.13 tokens per second)
llama_print_timings:       total time =  134641.30 ms /   463 tokens
 65%|██████▍   | 130/201 [4:41:58<2:39:48, 135.06s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     223.57 ms /   365 runs   (    0.61 ms per token,  1632.61 tokens per second)
llama_print_timings: prompt eval time =   23262.95 ms /    99 tokens (  234.98 ms per token,     4.26 tokens per second)
llama_print_timings:        eval time =  114804.49 ms /   364 runs   (  315.40 ms per token,     3.17 tokens per second)
llama_print_timings:       total time =  139731.92 ms /   463 tokens
 65%|██████▌   | 131/201 [4:44:18<2:39:14, 136.50s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     151.40 ms /   249 runs   (    0.61 ms per token,  1644.62 tokens per second)
llama_print_timings: prompt eval time =   51985.61 ms /   215 tokens (  241.79 ms per token,     4.14 tokens per second)
llama_print_timings:        eval time =   79548.35 ms /   248 runs   (  320.76 ms per token,     3.12 tokens per second)
llama_print_timings:       total time =  132665.50 ms /   463 tokens
 66%|██████▌   | 132/201 [4:46:31<2:35:41, 135.38s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     162.38 ms /   258 runs   (    0.63 ms per token,  1588.85 tokens per second)
llama_print_timings: prompt eval time =   49186.90 ms /   206 tokens (  238.77 ms per token,     4.19 tokens per second)
llama_print_timings:        eval time =   82517.64 ms /   257 runs   (  321.08 ms per token,     3.11 tokens per second)
llama_print_timings:       total time =  132924.79 ms /   463 tokens
 66%|██████▌   | 133/201 [4:48:44<2:32:37, 134.67s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     144.28 ms /   241 runs   (    0.60 ms per token,  1670.41 tokens per second)
llama_print_timings: prompt eval time =   54712.08 ms /   223 tokens (  245.35 ms per token,     4.08 tokens per second)
llama_print_timings:        eval time =   75745.56 ms /   240 runs   (  315.61 ms per token,     3.17 tokens per second)
llama_print_timings:       total time =  131594.73 ms /   463 tokens
 67%|██████▋   | 134/201 [4:50:56<2:29:23, 133.78s/it]


[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


Llama.generate: prefix-match hit

llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     179.64 ms /   290 runs   (    0.62 ms per token,  1614.38 tokens per second)
llama_print_timings: prompt eval time =   41825.89 ms /   174 tokens (  240.38 ms per token,     4.16 tokens per second)
llama_print_timings:        eval time =   92694.62 ms /   289 runs   (  320.74 ms per token,     3.12 tokens per second)
llama_print_timings:       total time =  135844.50 ms /   463 tokens
 67%|██████▋   | 135/201 [4:53:12<2:27:57, 134.50s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     189.76 ms /   296 runs   (    0.64 ms per token,  1559.84 tokens per second)
llama_print_timings: prompt eval time =   39493.06 ms /   168 tokens (  235.08 ms per token,     4.25 tokens per second)
llama_print_timings:        eval time =   94073.55 ms /   296 runs   (  317.82 ms per token,     3.15 tokens per second)
llama_print_timings:       total time =  134994.99 ms /   464 tokens
 68%|██████▊   | 136/201 [4:55:27<2:25:53, 134.68s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     159.25 ms /   269 runs   (    0.59 ms per token,  1689.15 tokens per second)
llama_print_timings: prompt eval time =   47948.33 ms /   196 tokens (  244.63 ms per token,     4.09 tokens per second)
llama_print_timings:        eval time =   85411.27 ms /   268 runs   (  318.70 ms per token,     3.14 tokens per second)
llama_print_timings:       total time =  134601.41 ms /   464 tokens
 68%|██████▊   | 137/201 [4:57:42<2:23:40, 134.69s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     147.88 ms /   238 runs   (    0.62 ms per token,  1609.36 tokens per second)
llama_print_timings: prompt eval time =   55200.67 ms /   227 tokens (  243.17 ms per token,     4.11 tokens per second)
llama_print_timings:        eval time =   75717.11 ms /   237 runs   (  319.48 ms per token,     3.13 tokens per second)
llama_print_timings:       total time =  132038.37 ms /   464 tokens
 69%|██████▊   | 138/201 [4:59:54<2:20:37, 133.93s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     153.27 ms /   246 runs   (    0.62 ms per token,  1604.98 tokens per second)
llama_print_timings: prompt eval time =   53989.18 ms /   219 tokens (  246.53 ms per token,     4.06 tokens per second)
llama_print_timings:        eval time =   78533.77 ms /   245 runs   (  320.55 ms per token,     3.12 tokens per second)
llama_print_timings:       total time =  133653.23 ms /   464 tokens
 69%|██████▉   | 139/201 [5:02:07<2:18:20, 133.88s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     212.81 ms /   343 runs   (    0.62 ms per token,  1611.80 tokens per second)
llama_print_timings: prompt eval time =   29973.16 ms /   120 tokens (  249.78 ms per token,     4.00 tokens per second)
llama_print_timings:        eval time =  108814.52 ms /   343 runs   (  317.24 ms per token,     3.15 tokens per second)
llama_print_timings:       total time =  140399.13 ms /   463 tokens
 70%|██████▉   | 140/201 [5:04:28<2:18:07, 135.86s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     220.68 ms /   365 runs   (    0.60 ms per token,  1654.02 tokens per second)
llama_print_timings: prompt eval time =   24133.26 ms /   100 tokens (  241.33 ms per token,     4.14 tokens per second)
llama_print_timings:        eval time =  116517.50 ms /   364 runs   (  320.10 ms per token,     3.12 tokens per second)
llama_print_timings:       total time =  142310.96 ms /   464 tokens
 70%|███████   | 141/201 [5:06:50<2:17:49, 137.83s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     155.99 ms /   252 runs   (    0.62 ms per token,  1615.53 tokens per second)
llama_print_timings: prompt eval time =   51742.11 ms /   213 tokens (  242.92 ms per token,     4.12 tokens per second)
llama_print_timings:        eval time =   80004.73 ms /   251 runs   (  318.74 ms per token,     3.14 tokens per second)
llama_print_timings:       total time =  132960.92 ms /   464 tokens
 71%|███████   | 142/201 [5:09:03<2:14:07, 136.40s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     256.27 ms /   402 runs   (    0.64 ms per token,  1568.63 tokens per second)
llama_print_timings: prompt eval time =   14607.13 ms /    63 tokens (  231.86 ms per token,     4.31 tokens per second)
llama_print_timings:        eval time =  128628.44 ms /   401 runs   (  320.77 ms per token,     3.12 tokens per second)
llama_print_timings:       total time =  145128.28 ms /   464 tokens
 71%|███████   | 143/201 [5:11:29<2:14:25, 139.06s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     172.48 ms /   273 runs   (    0.63 ms per token,  1582.82 tokens per second)
llama_print_timings: prompt eval time =   47033.18 ms /   192 tokens (  244.96 ms per token,     4.08 tokens per second)
llama_print_timings:        eval time =   86617.82 ms /   272 runs   (  318.45 ms per token,     3.14 tokens per second)
llama_print_timings:       total time =  134945.97 ms /   464 tokens
 72%|███████▏  | 144/201 [5:13:44<2:10:57, 137.86s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     164.63 ms /   254 runs   (    0.65 ms per token,  1542.89 tokens per second)
llama_print_timings: prompt eval time =   51444.87 ms /   211 tokens (  243.81 ms per token,     4.10 tokens per second)
llama_print_timings:        eval time =   81703.31 ms /   253 runs   (  322.94 ms per token,     3.10 tokens per second)
llama_print_timings:       total time =  134358.18 ms /   464 tokens
 72%|███████▏  | 145/201 [5:15:58<2:07:43, 136.84s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     240.08 ms /   384 runs   (    0.63 ms per token,  1599.47 tokens per second)
llama_print_timings: prompt eval time =   18671.29 ms /    80 tokens (  233.39 ms per token,     4.28 tokens per second)
llama_print_timings:        eval time =  122742.45 ms /   384 runs   (  319.64 ms per token,     3.13 tokens per second)
llama_print_timings:       total time =  143157.55 ms /   464 tokens
 73%|███████▎  | 146/201 [5:18:22<2:07:12, 138.77s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     149.71 ms /   248 runs   (    0.60 ms per token,  1656.55 tokens per second)
llama_print_timings: prompt eval time =   51081.56 ms /   216 tokens (  236.49 ms per token,     4.23 tokens per second)
llama_print_timings:        eval time =   79673.46 ms /   248 runs   (  321.26 ms per token,     3.11 tokens per second)
llama_print_timings:       total time =  131944.08 ms /   464 tokens
 73%|███████▎  | 147/201 [5:20:34<2:03:04, 136.75s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     154.83 ms /   246 runs   (    0.63 ms per token,  1588.89 tokens per second)
llama_print_timings: prompt eval time =   53075.43 ms /   218 tokens (  243.47 ms per token,     4.11 tokens per second)
llama_print_timings:        eval time =   77398.62 ms /   245 runs   (  315.91 ms per token,     3.17 tokens per second)
llama_print_timings:       total time =  131650.92 ms /   463 tokens
 74%|███████▎  | 148/201 [5:22:45<1:59:28, 135.26s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     172.01 ms /   271 runs   (    0.63 ms per token,  1575.46 tokens per second)
llama_print_timings: prompt eval time =   46778.84 ms /   192 tokens (  243.64 ms per token,     4.10 tokens per second)
llama_print_timings:        eval time =   87346.98 ms /   271 runs   (  322.31 ms per token,     3.10 tokens per second)
llama_print_timings:       total time =  135414.89 ms /   463 tokens
 74%|███████▍  | 149/201 [5:25:01<1:57:17, 135.34s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     254.39 ms /   384 runs   (    0.66 ms per token,  1509.47 tokens per second)
llama_print_timings: prompt eval time =   18987.44 ms /    80 tokens (  237.34 ms per token,     4.21 tokens per second)
llama_print_timings:        eval time =  122682.54 ms /   384 runs   (  319.49 ms per token,     3.13 tokens per second)
llama_print_timings:       total time =  143466.88 ms /   464 tokens
 75%|███████▍  | 150/201 [5:27:24<1:57:08, 137.81s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     169.78 ms /   264 runs   (    0.64 ms per token,  1555.00 tokens per second)
llama_print_timings: prompt eval time =   48355.35 ms /   200 tokens (  241.78 ms per token,     4.14 tokens per second)
llama_print_timings:        eval time =   85751.30 ms /   264 runs   (  324.82 ms per token,     3.08 tokens per second)
llama_print_timings:       total time =  135392.47 ms /   464 tokens
 75%|███████▌  | 151/201 [5:29:40<1:54:15, 137.12s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     163.94 ms /   260 runs   (    0.63 ms per token,  1585.99 tokens per second)
llama_print_timings: prompt eval time =   48497.02 ms /   204 tokens (  237.73 ms per token,     4.21 tokens per second)
llama_print_timings:        eval time =   83790.65 ms /   259 runs   (  323.52 ms per token,     3.09 tokens per second)
llama_print_timings:       total time =  133536.65 ms /   463 tokens
 76%|███████▌  | 152/201 [5:31:54<1:51:07, 136.07s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     154.96 ms /   254 runs   (    0.61 ms per token,  1639.17 tokens per second)
llama_print_timings: prompt eval time =   49665.06 ms /   208 tokens (  238.77 ms per token,     4.19 tokens per second)
llama_print_timings:        eval time =   81607.33 ms /   254 runs   (  321.29 ms per token,     3.11 tokens per second)
llama_print_timings:       total time =  132494.40 ms /   462 tokens
 76%|███████▌  | 153/201 [5:34:06<1:48:01, 135.03s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     220.04 ms /   354 runs   (    0.62 ms per token,  1608.81 tokens per second)
llama_print_timings: prompt eval time =   27466.84 ms /   110 tokens (  249.70 ms per token,     4.00 tokens per second)
llama_print_timings:        eval time =  112489.01 ms /   353 runs   (  318.67 ms per token,     3.14 tokens per second)
llama_print_timings:       total time =  141582.80 ms /   463 tokens
 77%|███████▋  | 154/201 [5:36:28<1:47:20, 137.03s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     159.98 ms /   253 runs   (    0.63 ms per token,  1581.48 tokens per second)
llama_print_timings: prompt eval time =   50589.91 ms /   211 tokens (  239.76 ms per token,     4.17 tokens per second)
llama_print_timings:        eval time =   82779.68 ms /   252 runs   (  328.49 ms per token,     3.04 tokens per second)
llama_print_timings:       total time =  134592.71 ms /   463 tokens
 77%|███████▋  | 155/201 [5:38:43<1:44:31, 136.34s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     149.30 ms /   248 runs   (    0.60 ms per token,  1661.13 tokens per second)
llama_print_timings: prompt eval time =   51391.63 ms /   216 tokens (  237.92 ms per token,     4.20 tokens per second)
llama_print_timings:        eval time =   79303.03 ms /   247 runs   (  321.06 ms per token,     3.11 tokens per second)
llama_print_timings:       total time =  131875.31 ms /   463 tokens
 78%|███████▊  | 156/201 [5:40:55<1:41:16, 135.03s/it]


[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


Llama.generate: prefix-match hit

llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     175.47 ms /   286 runs   (    0.61 ms per token,  1629.92 tokens per second)
llama_print_timings: prompt eval time =   42302.95 ms /   176 tokens (  240.36 ms per token,     4.16 tokens per second)
llama_print_timings:        eval time =   91268.33 ms /   286 runs   (  319.12 ms per token,     3.13 tokens per second)
llama_print_timings:       total time =  134927.30 ms /   462 tokens
 78%|███████▊  | 157/201 [5:43:10<1:39:05, 135.13s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     154.61 ms /   244 runs   (    0.63 ms per token,  1578.21 tokens per second)
llama_print_timings: prompt eval time =   52771.94 ms /   219 tokens (  240.97 ms per token,     4.15 tokens per second)
llama_print_timings:        eval time =   77672.92 ms /   243 runs   (  319.64 ms per token,     3.13 tokens per second)
llama_print_timings:       total time =  131613.67 ms /   462 tokens
 79%|███████▊  | 158/201 [5:45:22<1:36:07, 134.12s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     174.58 ms /   287 runs   (    0.61 ms per token,  1643.94 tokens per second)
llama_print_timings: prompt eval time =   42464.83 ms /   176 tokens (  241.28 ms per token,     4.14 tokens per second)
llama_print_timings:        eval time =   91660.98 ms /   286 runs   (  320.49 ms per token,     3.12 tokens per second)
llama_print_timings:       total time =  135442.01 ms /   462 tokens
 79%|███████▉  | 159/201 [5:47:37<1:34:11, 134.55s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     175.19 ms /   289 runs   (    0.61 ms per token,  1649.64 tokens per second)
llama_print_timings: prompt eval time =   11136.48 ms /    42 tokens (  265.15 ms per token,     3.77 tokens per second)
llama_print_timings:        eval time =   91598.96 ms /   288 runs   (  318.05 ms per token,     3.14 tokens per second)
llama_print_timings:       total time =  104039.19 ms /   330 tokens
 80%|███████▉  | 160/201 [5:49:21<1:25:42, 125.43s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     180.63 ms /   283 runs   (    0.64 ms per token,  1566.71 tokens per second)
llama_print_timings: prompt eval time =   44453.58 ms /   181 tokens (  245.60 ms per token,     4.07 tokens per second)
llama_print_timings:        eval time =   89036.90 ms /   282 runs   (  315.73 ms per token,     3.17 tokens per second)
llama_print_timings:       total time =  134832.08 ms /   463 tokens
 80%|████████  | 161/201 [5:51:36<1:25:31, 128.29s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =      44.20 ms /    73 runs   (    0.61 ms per token,  1651.47 tokens per second)
llama_print_timings: prompt eval time =   96148.18 ms /   391 tokens (  245.90 ms per token,     4.07 tokens per second)
llama_print_timings:        eval time =   22438.00 ms /    72 runs   (  311.64 ms per token,     3.21 tokens per second)
llama_print_timings:       total time =  119033.94 ms /   463 tokens
 81%|████████  | 162/201 [5:53:36<1:21:36, 125.55s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     226.53 ms /   368 runs   (    0.62 ms per token,  1624.48 tokens per second)
llama_print_timings: prompt eval time =   23029.40 ms /    95 tokens (  242.41 ms per token,     4.13 tokens per second)
llama_print_timings:        eval time =  117367.19 ms /   367 runs   (  319.80 ms per token,     3.13 tokens per second)
llama_print_timings:       total time =  142123.68 ms /   462 tokens
 81%|████████  | 163/201 [5:55:58<1:22:41, 130.56s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     165.11 ms /   260 runs   (    0.64 ms per token,  1574.73 tokens per second)
llama_print_timings: prompt eval time =   47502.23 ms /   204 tokens (  232.85 ms per token,     4.29 tokens per second)
llama_print_timings:        eval time =   83553.34 ms /   259 runs   (  322.60 ms per token,     3.10 tokens per second)
llama_print_timings:       total time =  132313.01 ms /   463 tokens
 82%|████████▏ | 164/201 [5:58:10<1:20:51, 131.11s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     138.98 ms /   228 runs   (    0.61 ms per token,  1640.48 tokens per second)
llama_print_timings: prompt eval time =   57069.60 ms /   235 tokens (  242.85 ms per token,     4.12 tokens per second)
llama_print_timings:        eval time =   71644.98 ms /   227 runs   (  315.62 ms per token,     3.17 tokens per second)
llama_print_timings:       total time =  129811.85 ms /   462 tokens
 82%|████████▏ | 165/201 [6:00:20<1:18:27, 130.77s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     154.62 ms /   244 runs   (    0.63 ms per token,  1578.05 tokens per second)
llama_print_timings: prompt eval time =   53194.09 ms /   221 tokens (  240.70 ms per token,     4.15 tokens per second)
llama_print_timings:        eval time =   77929.93 ms /   243 runs   (  320.70 ms per token,     3.12 tokens per second)
llama_print_timings:       total time =  132296.01 ms /   464 tokens
 83%|████████▎ | 166/201 [6:02:33<1:16:34, 131.27s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     140.62 ms /   239 runs   (    0.59 ms per token,  1699.58 tokens per second)
llama_print_timings: prompt eval time =   55401.19 ms /   226 tokens (  245.14 ms per token,     4.08 tokens per second)
llama_print_timings:        eval time =   75434.49 ms /   238 runs   (  316.95 ms per token,     3.16 tokens per second)
llama_print_timings:       total time =  131944.53 ms /   464 tokens
 83%|████████▎ | 167/201 [6:04:45<1:14:31, 131.51s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     138.63 ms /   235 runs   (    0.59 ms per token,  1695.18 tokens per second)
llama_print_timings: prompt eval time =   56973.00 ms /   228 tokens (  249.88 ms per token,     4.00 tokens per second)
llama_print_timings:        eval time =   73741.38 ms /   234 runs   (  315.13 ms per token,     3.17 tokens per second)
llama_print_timings:       total time =  131797.17 ms /   462 tokens
 84%|████████▎ | 168/201 [6:06:57<1:12:24, 131.64s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     250.80 ms /   396 runs   (    0.63 ms per token,  1578.98 tokens per second)
llama_print_timings: prompt eval time =   17832.14 ms /    69 tokens (  258.44 ms per token,     3.87 tokens per second)
llama_print_timings:        eval time =  127697.64 ms /   395 runs   (  323.29 ms per token,     3.09 tokens per second)
llama_print_timings:       total time =  147359.62 ms /   464 tokens
 84%|████████▍ | 169/201 [6:09:24<1:12:44, 136.39s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     227.90 ms /   359 runs   (    0.63 ms per token,  1575.28 tokens per second)
llama_print_timings: prompt eval time =   24452.86 ms /   106 tokens (  230.69 ms per token,     4.33 tokens per second)
llama_print_timings:        eval time =  115309.04 ms /   358 runs   (  322.09 ms per token,     3.10 tokens per second)
llama_print_timings:       total time =  141437.92 ms /   464 tokens
 85%|████████▍ | 170/201 [6:11:46<1:11:16, 137.94s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =      69.56 ms /   113 runs   (    0.62 ms per token,  1624.50 tokens per second)
llama_print_timings: prompt eval time =   84976.81 ms /   352 tokens (  241.41 ms per token,     4.14 tokens per second)
llama_print_timings:        eval time =   35861.51 ms /   112 runs   (  320.19 ms per token,     3.12 tokens per second)
llama_print_timings:       total time =  121452.08 ms /   464 tokens
 85%|████████▌ | 171/201 [6:13:47<1:06:30, 133.03s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     167.28 ms /   261 runs   (    0.64 ms per token,  1560.30 tokens per second)
llama_print_timings: prompt eval time =   48079.17 ms /   202 tokens (  238.02 ms per token,     4.20 tokens per second)
llama_print_timings:        eval time =   82950.98 ms /   260 runs   (  319.04 ms per token,     3.13 tokens per second)
llama_print_timings:       total time =  132311.70 ms /   462 tokens
 86%|████████▌ | 172/201 [6:16:00<1:04:12, 132.86s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     166.69 ms /   259 runs   (    0.64 ms per token,  1553.78 tokens per second)
llama_print_timings: prompt eval time =   50786.44 ms /   205 tokens (  247.74 ms per token,     4.04 tokens per second)
llama_print_timings:        eval time =   82788.70 ms /   258 runs   (  320.89 ms per token,     3.12 tokens per second)
llama_print_timings:       total time =  134783.33 ms /   463 tokens
 86%|████████▌ | 173/201 [6:18:15<1:02:17, 133.47s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     215.06 ms /   342 runs   (    0.63 ms per token,  1590.29 tokens per second)
llama_print_timings: prompt eval time =   29038.89 ms /   122 tokens (  238.02 ms per token,     4.20 tokens per second)
llama_print_timings:        eval time =  108796.33 ms /   341 runs   (  319.05 ms per token,     3.13 tokens per second)
llama_print_timings:       total time =  139463.31 ms /   463 tokens
 87%|████████▋ | 174/201 [6:20:34<1:00:53, 135.31s/it]


[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


Llama.generate: prefix-match hit

llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     143.72 ms /   243 runs   (    0.59 ms per token,  1690.81 tokens per second)
llama_print_timings: prompt eval time =   54376.72 ms /   221 tokens (  246.05 ms per token,     4.06 tokens per second)
llama_print_timings:        eval time =   76939.51 ms /   242 runs   (  317.93 ms per token,     3.15 tokens per second)
llama_print_timings:       total time =  132461.28 ms /   463 tokens
 87%|████████▋ | 175/201 [6:22:47<58:17, 134.52s/it]  Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =      73.62 ms /   116 runs   (    0.63 ms per token,  1575.68 tokens per second)
llama_print_timings: prompt eval time =   83654.16 ms /   347 tokens (  241.08 ms per token,     4.15 tokens per second)
llama_print_timings:        eval time =   37361.05 ms /   115 runs   (  324.88 ms per token,     3.08 tokens per second)
llama_print_timings:       total time =  121648.92 ms /   462 tokens
 88%|████████▊ | 176/201 [6:24:49<54:27, 130.69s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     190.18 ms /   303 runs   (    0.63 ms per token,  1593.24 tokens per second)
llama_print_timings: prompt eval time =   37684.58 ms /   160 tokens (  235.53 ms per token,     4.25 tokens per second)
llama_print_timings:        eval time =   96986.14 ms /   302 runs   (  321.15 ms per token,     3.11 tokens per second)
llama_print_timings:       total time =  136084.65 ms /   462 tokens
 88%|████████▊ | 177/201 [6:27:05<52:56, 132.34s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     156.97 ms /   242 runs   (    0.65 ms per token,  1541.65 tokens per second)
llama_print_timings: prompt eval time =   54611.24 ms /   220 tokens (  248.23 ms per token,     4.03 tokens per second)
llama_print_timings:        eval time =   79525.09 ms /   241 runs   (  329.98 ms per token,     3.03 tokens per second)
llama_print_timings:       total time =  135319.16 ms /   461 tokens
 89%|████████▊ | 178/201 [6:29:20<51:05, 133.28s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     172.90 ms /   277 runs   (    0.62 ms per token,  1602.06 tokens per second)
llama_print_timings: prompt eval time =   44860.41 ms /   187 tokens (  239.90 ms per token,     4.17 tokens per second)
llama_print_timings:        eval time =   88625.76 ms /   276 runs   (  321.11 ms per token,     3.11 tokens per second)
llama_print_timings:       total time =  134761.95 ms /   463 tokens
 89%|████████▉ | 179/201 [6:31:35<49:02, 133.76s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     173.91 ms /   279 runs   (    0.62 ms per token,  1604.24 tokens per second)
llama_print_timings: prompt eval time =   12946.45 ms /    51 tokens (  253.85 ms per token,     3.94 tokens per second)
llama_print_timings:        eval time =   89781.58 ms /   278 runs   (  322.96 ms per token,     3.10 tokens per second)
llama_print_timings:       total time =  104006.67 ms /   329 tokens
 90%|████████▉ | 180/201 [6:33:19<43:42, 124.87s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     170.24 ms /   273 runs   (    0.62 ms per token,  1603.58 tokens per second)
llama_print_timings: prompt eval time =   13670.74 ms /    56 tokens (  244.12 ms per token,     4.10 tokens per second)
llama_print_timings:        eval time =   87537.65 ms /   272 runs   (  321.83 ms per token,     3.11 tokens per second)
llama_print_timings:       total time =  102435.79 ms /   328 tokens
 90%|█████████ | 181/201 [6:35:02<39:23, 118.18s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     176.29 ms /   280 runs   (    0.63 ms per token,  1588.26 tokens per second)
llama_print_timings: prompt eval time =   12482.06 ms /    50 tokens (  249.64 ms per token,     4.01 tokens per second)
llama_print_timings:        eval time =   89876.96 ms /   279 runs   (  322.14 ms per token,     3.10 tokens per second)
llama_print_timings:       total time =  103662.39 ms /   329 tokens
 91%|█████████ | 182/201 [6:36:46<36:03, 113.86s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     159.04 ms /   254 runs   (    0.63 ms per token,  1597.09 tokens per second)
llama_print_timings: prompt eval time =   52096.85 ms /   210 tokens (  248.08 ms per token,     4.03 tokens per second)
llama_print_timings:        eval time =   81234.85 ms /   253 runs   (  321.09 ms per token,     3.11 tokens per second)
llama_print_timings:       total time =  134523.67 ms /   463 tokens
 91%|█████████ | 183/201 [6:39:00<36:01, 120.09s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     117.04 ms /   184 runs   (    0.64 ms per token,  1572.17 tokens per second)
llama_print_timings: prompt eval time =   68958.42 ms /   280 tokens (  246.28 ms per token,     4.06 tokens per second)
llama_print_timings:        eval time =   59161.60 ms /   183 runs   (  323.29 ms per token,     3.09 tokens per second)
llama_print_timings:       total time =  129039.01 ms /   463 tokens
 92%|█████████▏| 184/201 [6:41:09<34:47, 122.81s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     183.74 ms /   289 runs   (    0.64 ms per token,  1572.92 tokens per second)
llama_print_timings: prompt eval time =   40948.93 ms /   176 tokens (  232.66 ms per token,     4.30 tokens per second)
llama_print_timings:        eval time =   92431.63 ms /   288 runs   (  320.94 ms per token,     3.12 tokens per second)
llama_print_timings:       total time =  134742.66 ms /   464 tokens
 92%|█████████▏| 185/201 [6:43:24<33:42, 126.43s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     142.93 ms /   242 runs   (    0.59 ms per token,  1693.14 tokens per second)
llama_print_timings: prompt eval time =   54025.01 ms /   223 tokens (  242.26 ms per token,     4.13 tokens per second)
llama_print_timings:        eval time =   77741.93 ms /   241 runs   (  322.58 ms per token,     3.10 tokens per second)
llama_print_timings:       total time =  132891.34 ms /   464 tokens
 93%|█████████▎| 186/201 [6:45:37<32:06, 128.41s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     182.34 ms /   266 runs   (    0.69 ms per token,  1458.82 tokens per second)
llama_print_timings: prompt eval time =   48651.97 ms /   199 tokens (  244.48 ms per token,     4.09 tokens per second)
llama_print_timings:        eval time =   85070.87 ms /   265 runs   (  321.02 ms per token,     3.12 tokens per second)
llama_print_timings:       total time =  135016.50 ms /   464 tokens
 93%|█████████▎| 187/201 [6:47:52<30:25, 130.42s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     157.83 ms /   255 runs   (    0.62 ms per token,  1615.64 tokens per second)
llama_print_timings: prompt eval time =   50603.50 ms /   210 tokens (  240.97 ms per token,     4.15 tokens per second)
llama_print_timings:        eval time =   80674.18 ms /   254 runs   (  317.61 ms per token,     3.15 tokens per second)
llama_print_timings:       total time =  132457.36 ms /   464 tokens
 94%|█████████▎| 188/201 [6:50:05<28:24, 131.08s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     163.62 ms /   259 runs   (    0.63 ms per token,  1582.92 tokens per second)
llama_print_timings: prompt eval time =   50008.55 ms /   205 tokens (  243.94 ms per token,     4.10 tokens per second)
llama_print_timings:        eval time =   82659.77 ms /   258 runs   (  320.39 ms per token,     3.12 tokens per second)
llama_print_timings:       total time =  133924.11 ms /   463 tokens
 94%|█████████▍| 189/201 [6:52:19<26:23, 131.97s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     168.44 ms /   263 runs   (    0.64 ms per token,  1561.43 tokens per second)
llama_print_timings: prompt eval time =   47508.08 ms /   200 tokens (  237.54 ms per token,     4.21 tokens per second)
llama_print_timings:        eval time =   85387.59 ms /   262 runs   (  325.91 ms per token,     3.07 tokens per second)
llama_print_timings:       total time =  134167.43 ms /   462 tokens
 95%|█████████▍| 190/201 [6:54:33<24:19, 132.66s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     152.73 ms /   246 runs   (    0.62 ms per token,  1610.64 tokens per second)
llama_print_timings: prompt eval time =   52581.21 ms /   216 tokens (  243.43 ms per token,     4.11 tokens per second)
llama_print_timings:        eval time =   79528.06 ms /   246 runs   (  323.28 ms per token,     3.09 tokens per second)
llama_print_timings:       total time =  133292.58 ms /   462 tokens
 95%|█████████▌| 191/201 [6:56:47<22:08, 132.88s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     189.50 ms /   289 runs   (    0.66 ms per token,  1525.05 tokens per second)
llama_print_timings: prompt eval time =   40782.04 ms /   176 tokens (  231.72 ms per token,     4.32 tokens per second)
llama_print_timings:        eval time =   93249.94 ms /   288 runs   (  323.78 ms per token,     3.09 tokens per second)
llama_print_timings:       total time =  135436.44 ms /   464 tokens
 96%|█████████▌| 192/201 [6:59:02<20:03, 133.68s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     143.72 ms /   229 runs   (    0.63 ms per token,  1593.38 tokens per second)
llama_print_timings: prompt eval time =   57605.73 ms /   236 tokens (  244.09 ms per token,     4.10 tokens per second)
llama_print_timings:        eval time =   73509.36 ms /   228 runs   (  322.41 ms per token,     3.10 tokens per second)
llama_print_timings:       total time =  132190.30 ms /   464 tokens
 96%|█████████▌| 193/201 [7:01:15<17:46, 133.27s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     144.47 ms /   237 runs   (    0.61 ms per token,  1640.46 tokens per second)
llama_print_timings: prompt eval time =   55281.93 ms /   227 tokens (  243.53 ms per token,     4.11 tokens per second)
llama_print_timings:        eval time =   74906.90 ms /   236 runs   (  317.40 ms per token,     3.15 tokens per second)
llama_print_timings:       total time =  131304.69 ms /   463 tokens
 97%|█████████▋| 194/201 [7:03:26<15:29, 132.72s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     165.69 ms /   248 runs   (    0.67 ms per token,  1496.77 tokens per second)
llama_print_timings: prompt eval time =   25105.44 ms /   104 tokens (  241.40 ms per token,     4.14 tokens per second)
llama_print_timings:        eval time =   79828.88 ms /   248 runs   (  321.89 ms per token,     3.11 tokens per second)
llama_print_timings:       total time =  106112.23 ms /   352 tokens
 97%|█████████▋| 195/201 [7:05:12<12:28, 124.77s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     213.16 ms /   338 runs   (    0.63 ms per token,  1585.68 tokens per second)
llama_print_timings: prompt eval time =   30179.67 ms /   127 tokens (  237.64 ms per token,     4.21 tokens per second)
llama_print_timings:        eval time =  108479.48 ms /   337 runs   (  321.90 ms per token,     3.11 tokens per second)
llama_print_timings:       total time =  140246.48 ms /   464 tokens
 98%|█████████▊| 196/201 [7:07:33<10:47, 129.45s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     133.13 ms /   218 runs   (    0.61 ms per token,  1637.45 tokens per second)
llama_print_timings: prompt eval time =   59787.00 ms /   245 tokens (  244.03 ms per token,     4.10 tokens per second)
llama_print_timings:        eval time =   70554.22 ms /   217 runs   (  325.13 ms per token,     3.08 tokens per second)
llama_print_timings:       total time =  131388.79 ms /   462 tokens
 98%|█████████▊| 197/201 [7:09:44<08:40, 130.07s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     133.92 ms /   219 runs   (    0.61 ms per token,  1635.30 tokens per second)
llama_print_timings: prompt eval time =   11831.15 ms /    45 tokens (  262.91 ms per token,     3.80 tokens per second)
llama_print_timings:        eval time =   69537.74 ms /   218 runs   (  318.98 ms per token,     3.13 tokens per second)
llama_print_timings:       total time =   82357.08 ms /   263 tokens
 99%|█████████▊| 198/201 [7:11:07<05:47, 115.79s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     167.84 ms /   268 runs   (    0.63 ms per token,  1596.74 tokens per second)
llama_print_timings: prompt eval time =   46459.80 ms /   194 tokens (  239.48 ms per token,     4.18 tokens per second)
llama_print_timings:        eval time =   86224.92 ms /   267 runs   (  322.94 ms per token,     3.10 tokens per second)
llama_print_timings:       total time =  133945.57 ms /   461 tokens
 99%|█████████▉| 199/201 [7:13:21<04:02, 121.27s/it]Llama.generate: prefix-match hit



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m



llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     105.85 ms /   176 runs   (    0.60 ms per token,  1662.73 tokens per second)
llama_print_timings: prompt eval time =   60095.90 ms /   240 tokens (  250.40 ms per token,     3.99 tokens per second)
llama_print_timings:        eval time =   55869.44 ms /   175 runs   (  319.25 ms per token,     3.13 tokens per second)
llama_print_timings:       total time =  116808.17 ms /   415 tokens
100%|█████████▉| 200/201 [7:15:18<01:59, 119.97s/it]


[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m


Llama.generate: prefix-match hit

llama_print_timings:        load time =    2183.06 ms
llama_print_timings:      sample time =     124.43 ms /   204 runs   (    0.61 ms per token,  1639.42 tokens per second)
llama_print_timings: prompt eval time =   62774.09 ms /   261 tokens (  240.51 ms per token,     4.16 tokens per second)
llama_print_timings:        eval time =   65510.24 ms /   203 runs   (  322.71 ms per token,     3.10 tokens per second)
llama_print_timings:       total time =  129286.71 ms /   464 tokens
100%|██████████| 201/201 [7:17:27<00:00, 130.59s/it]


[1m> Finished chain.[0m





In [15]:
mysub = test_df.copy()
mysub["Answers"] = results
mysub["Keywords"] = keywords
mysub["Source files"] = sources
mysub.to_csv("full_test.csv", index=False)

#### Extracting Keywords and Paragraph

In [16]:
test_set = pd.read_csv("full_test.csv")
test_set

Unnamed: 0,ID,Question Text,Answers,Source files
0,Q4,"What is the definition of ""unusual event""",An unusual event is defined as any event that ...,TG_Booklet_1.xlsx
1,Q5,What is Community Based Surveillance (CBS)?,CBS is the process of collecting and reporting...,TG_Booklet_1.xlsx
2,Q9,What kind of training should members of VHC re...,Members of VHC should receive training on how ...,TG_Booklet_1.xlsx
3,Q10,What is indicator based surveillance (IBS)?,Indicator based surveillance (IBS) is the stan...,TG_Booklet_1.xlsx
4,Q13,What is Case based surveillance?,Case-based surveillance involves the ongoing a...,TG_Booklet_1.xlsx
...,...,...,...,...
494,Q1229,Where should completeness be evaluated in the ...,Completeness should be evaluated at the nation...,TG_Booklet_4.xlsx
495,Q1230,Which dimensions of completeness are crucial i...,The crucial dimensions of completeeness in sur...,TG_Booklet_4.xlsx
496,Q1236,How can the completeness of case reporting be ...,The completeness of case reporting can be moni...,TG_Booklet_4.xlsx
497,Q1239,Where should completeness and timeliness of re...,Completeeness and timeliness of reports should...,TG_Booklet_4.xlsx


In [17]:
!pip install nltk

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [26]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import nltk
import os
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords

# Download NLTK resources (run only once)
nltk.download('punkt')
nltk.download('stopwords')


def extract_keywords(provided_text):
    # Tokenize the text
    tokens = word_tokenize(provided_text)

    # Convert tokens to lowercase
    tokens = [token.lower() for token in tokens]

    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    filtered_tokens = [token.title() for token in tokens if token not in stop_words]

    # Remove punctuation and non-alphabetic characters
    keywords = [token for token in filtered_tokens if token.isalpha()]

    # Remove duplicate keywords
    unique_keywords = list(set(keywords))

    return ', '.join(unique_keywords)





def find_matching_paragraphs(csv_filepath, text_to_check, threshold=0.9):
    # Load the DataFrame
    data_dir = os.path.join(".", "data", "MWTGBookletsExcel")
    file_path = os.path.join(data_dir, csv_filepath)
    df = pd.read_excel(file_path, names=["paragraph", "text"])
    df.fillna('', inplace=True)
    # Concatenate all text from the 'text' column in the DataFrame
    all_text = ' '.join(df['text'].astype(str).values.tolist())

    # Combine the provided text and all text from the DataFrame
    combined_text = [text_to_check, all_text]

    # Initialize TfidfVectorizer
    tfidf_vectorizer = TfidfVectorizer()

    # Fit and transform the text in the DataFrame
    tfidf_matrix = tfidf_vectorizer.fit_transform(df['text'])

    # Transform the provided text
    provided_text_tfidf = tfidf_vectorizer.transform([text_to_check])

    # Calculate cosine similarity between the provided text and each paragraph in the DataFrame
    cosine_similarities = cosine_similarity(provided_text_tfidf, tfidf_matrix).flatten()

    # Find paragraphs that meet or exceed the threshold
    matching_paragraph_indices = [i for i, score in enumerate(cosine_similarities) if score >= threshold]

    if matching_paragraph_indices:
        # Get the corresponding paragraph numbers
        matching_paragraph_numbers = df.iloc[matching_paragraph_indices]['paragraph'].tolist()
        matching_paragraph_numbers = [str(int(i)) for i in matching_paragraph_numbers]
        return ', '.join(matching_paragraph_numbers)

    else:
        # If no paragraphs meet the threshold, fallback to selecting the paragraph with the highest similarity
        closest_paragraph_index = cosine_similarities.argmax()
        closest_paragraph_number = df.iloc[closest_paragraph_index]['paragraph']
        return ', '.join([str(closest_paragraph_number)])

[nltk_data] Downloading package punkt to
[nltk_data]     /home/adeptschneiderthedev/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /home/adeptschneiderthedev/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [24]:
test_set[test_set["Answers"] == "Error"]

Unnamed: 0,ID,Question Text,Answers,Source files
244,Q584,"Does the TG mention CBIS as an abbreviation, a...",Error,Error
247,Q590,"Is there an abbreviation CHT in the TG, and wh...",Error,Error
248,Q593,"What does DDO stand for, and is it explained i...",Error,Error
250,Q597,"Does the TG mention DPC as an abbreviation, an...",Error,Error
252,Q606,"Is HCF an abbreviation mentioned in the TG, an...",Error,Error
254,Q613,"Does the TG include IBS as an abbreviation, an...",Error,Error
255,Q615,"Is IEC an abbreviation mentioned in the TG, an...",Error,Error
261,Q628,"What does MOA stand for, and is it explained i...",Error,Error
262,Q629,"Is MTI an abbreviation mentioned in the TG, an...",Error,Error
264,Q631,"Is NNT an abbreviation mentioned in the TG, an...",Error,Error


In [28]:
# Check for NaN values in the 'Answers' column and replace them with 'Target not found'
test_set['Answers'].fillna('Target not found', inplace=True)

In [30]:
# Check for unique values in Source files column
test_set['Source files'].unique()

array(['TG_Booklet_1.xlsx', 'TG_Booklet_2.xlsx', 'TG_Booklet_6.xlsx',
       'TG_Booklet_4.xlsx', 'TG_Booklet_3.xlsx', 'TG_Booklet_5.xlsx',
       'Error'], dtype=object)

In [31]:
ID = []
Target = []

for index, row in tqdm(test_set.iterrows(), total=len(test_set)):
    if row["Answers"]== "Error":
        ID.append(row["ID"]+"_keywords")
        Target.append(extract_keywords(row["Question Text"]))
        ID.append(row["ID"]+"_paragraph(s)_number")
        Target.append(find_matching_paragraphs("TG_Booklet_1.xlsx", row["Question Text"], threshold=0.9))
        ID.append(row["ID"]+"_question_answer")
        Target.append(" ")
        ID.append(row["ID"]+"_reference_document")
        Target.append("TG Booklet 1")

    else:
        ID.append(row["ID"]+"_keywords")
        Target.append(row["Keywords"])
        ID.append(row["ID"]+"_paragraph(s)_number")
        Target.append(find_matching_paragraphs(row["Source files"], row["Answers"], threshold=0.9))
        ID.append(row["ID"]+"_question_answer")
        Target.append(row["Answers"])
        ID.append(row["ID"]+"_reference_document")
        Target.append(row["Source files"].split(".xlsx")[0])

100%|██████████| 499/499 [00:56<00:00,  8.88it/s]


In [32]:
ss = pd.read_csv("./SampleSubmission.csv")
ss

Unnamed: 0,ID,Target
0,Q1000_keywords,
1,Q1000_paragraph(s)_number,
2,Q1000_question_answer,
3,Q1000_reference_document,
4,Q1002_keywords,
...,...,...
1991,Q999_reference_document,
1992,Q9_keywords,
1993,Q9_paragraph(s)_number,
1994,Q9_question_answer,


In [33]:
ss["ID"] = ID
ss["Target"] = Target
ss["Target"] = ss["Target"].fillna(" ")

In [34]:
# Look for Target values with TG_Booklet_1, TG_Booklet_2, TG_Booklet_3, TG_Booklet_4, TG_Booklet_5, TG_Booklet_6 and replace them with TG Booklet 1, TG Booklet 2, TG Booklet 3, TG Booklet 4, TG Booklet 5, TG Booklet 6
ss["Target"] = ss["Target"].str.replace("TG_Booklet_1", "TG Booklet 1")
ss["Target"] = ss["Target"].str.replace("TG_Booklet_2", "TG Booklet 2")
ss["Target"] = ss["Target"].str.replace("TG_Booklet_3", "TG Booklet 3")
ss["Target"] = ss["Target"].str.replace("TG_Booklet_4", "TG Booklet 4")
ss["Target"] = ss["Target"].str.replace("TG_Booklet_5", "TG Booklet 5")
ss["Target"] = ss["Target"].str.replace("TG_Booklet_6", "TG Booklet 6")
ss.to_csv("cpu_submission.csv", index=False)

In [None]:
ss