# Laboratory work #6 (QA)

In [None]:
from huggingface_hub import hf_hub_download
from llama_cpp import Llama

from db_utils import ChromaDataBase

In [None]:
model_name = "TheBloke/Mistral-7B-OpenOrca-GGUF"
model_file = "mistral-7b-openorca.Q4_K_M.gguf"

In [None]:
model_path = hf_hub_download(model_name, filename=model_file)

In [None]:
model_kwargs = {
  "n_ctx":4096,    # Context length to use
  "n_threads":4,   # Number of CPU threads to use
  "n_gpu_layers":0,# Number of model layers to offload to GPU. Set to 0 if only using CPU
}

## Instantiate model from downloaded file
llm = Llama(model_path=model_path, **model_kwargs)

In [None]:
## Generation kwargs
generation_kwargs = {
    "max_tokens":200, # Max number of new tokens to generate
    "stop":["<|endoftext|>", "</s>"], # Text sequences to stop generation on
    "echo":False, # Echo the prompt in the output
    "top_k":1 # This is essentially greedy decoding, since the model will always return the highest-probability token. Set this value > 1 for sampling decoding
}

prompt = "The meaning of life is "

In [None]:
res = llm(prompt, **generation_kwargs) # Res is a dictionary

## Unpack and the generated text from the LLM response dictionary and print it
print(res["choices"][0]["text"])

In [None]:
db = ChromaDataBase()
db.collection.count()

In [None]:
def answer_on(question, verbose=True):
    result = db.query(
        query_texts=[question],
        n_results=10,
        where={'class': 1}
    )
    
    context = ' '.join(result['documents'][0])
    prompt = f'Here is the context: {context}. Here is the question: {question}. Here is the answer to this question:'
    
    if verbose:
        print(f'Prompt: {prompt}')
    
    res = llm(prompt, **generation_kwargs)
    return res["choices"][0]["text"]

In [None]:
questions = [
    'Where is the Catalan pro-independence party?',
    'Who was the president of US in 2018?',
    'Who was the president of US in 2016?',
    'How much forest is there in Slovenia?',
    'Is Donald Trump Republican Party nominee?',
    'Is Donald Trump Democratic Party nominee?',
    'What does the new U.S. tax code target?',
    'Who is Peru’s President in 2017?',
    'Did Google and Apple joined court papers?',
    'Was Silvio Berlusconi an Italian prime minister?'
]
answers = [
    'Catalonia, Spain',
    'Donald Trump',
    'Barack Obama',
    'About 60 percent of Slovenia is covered by forests',
    'Yes',
    'No',
    'The new U.S. tax code targets high-tax states',
    'Pedro Pablo Kuczynski',
    'Yes',
    'Yes'
]

for i in range(len(questions)):
    output = answer_on(questions[i], verbose=True)
    print(f'\nQuestion #{i}:\n{questions[i]}\nPossible answer: {answers[i]}\nLLM\'s answer: {output}\n')