In [1]:
from CreateDocuments import load_documents
import RAG_utils
%load_ext autoreload
%autoreload 2

In [3]:
db = RAG_utils.create_chroma_db()

Number of documents: 10
There are 10 in the collection


# Example

In [3]:
question = 'When was the house at 3524 Redwing Ct, Naperville, IL 60564 last sold and for what price?'

system_message = """You are a helpful assistant. Answer the user's question in one sentence based on the provided context. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information. Do NOT start your response with "According to the provided context." """
user_message_template = """Context: {context} Question: {question}"""

documents = db.similarity_search_with_relevance_scores(question, k=5)
context = RAG_utils.format_docs([doc[0] for doc in documents])
user_message = user_message_template.format(context=context, question=question)

answer = RAG_utils.gen_text_ollama(sys_msg=system_message, user_msg=user_message,options={'seed':0, 'temperature':0.01})
print(question)
print(answer)

When was the house at 3524 Redwing Ct, Naperville, IL 60564 last sold and for what price?
The house at 3524 Redwing Ct, Naperville, IL 60564 was last sold in October 2013 for $595,000.


### Alternative - format entire prompt as user message

In [4]:
question = 'When was the house at 3524 Redwing Ct, Naperville, IL 60564 last sold and for what price?'

# https://huggingface.co/blog/llama2#how-to-prompt-llama-2
llama_3_prompt_template = """<s>[INST] <<SYS>>:
You are a helpful assistant. Answer the user's question in one sentence based on the provided context. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information. Do not preface with "According to the provided context."
<</SYS>>

Context: {context} Question: {question} [/INST]"""
documents = db.similarity_search_with_relevance_scores(question, k=5)
context = RAG_utils.format_docs([doc[0] for doc in documents])
prompt_text = llama_3_prompt_template.format(context=context, question=question)

answer = RAG_utils.gen_text_ollama_user_only(prompt_text, )

print(question)
print(answer)

When was the house at 3524 Redwing Ct, Naperville, IL 60564 last sold and for what price?
The house at 3524 Redwing Ct, Naperville, IL 60564 was last sold in October 2013 for $595,000.


# Testing options

In [5]:
import ollama

In [6]:
prompt_text = 'tell me aboout your day'
for np in range(0, 100, 5):
    response = ollama.generate(model='llama3', prompt=prompt_text, options={'temperature': 0.1, 'seed': 1, 'num_predict': np})
    print(np, len(response['response']))

0 1
5 14
10 28
15 63
20 81
25 109
30 136
35 157
40 174
45 187
50 231
55 232
60 262
65 265
70 287
75 324
80 355
85 361
90 413
95 400


In [7]:
prompt_text = 'tell me aboout your day'
for temp in range(0, 100, 5):
    response = ollama.generate(model='llama3', prompt=prompt_text, options={'temperature': temp/100, 'seed': 11})
    print(f"Temperature: {temp/100}")
    print(temp/100, len(response['response']))

Temperature: 0.0
0.0 757
Temperature: 0.05
0.05 757
Temperature: 0.1
0.1 771
Temperature: 0.15
0.15 815
Temperature: 0.2
0.2 651
Temperature: 0.25
0.25 1019
Temperature: 0.3
0.3 706
Temperature: 0.35
0.35 1099
Temperature: 0.4
0.4 866
Temperature: 0.45
0.45 949
Temperature: 0.5
0.5 880
Temperature: 0.55
0.55 975
Temperature: 0.6
0.6 822
Temperature: 0.65
0.65 804
Temperature: 0.7
0.7 843
Temperature: 0.75
0.75 992
Temperature: 0.8
0.8 890
Temperature: 0.85
0.85 974
Temperature: 0.9
0.9 761
Temperature: 0.95
0.95 1240


In [None]:
"options": {
    "num_keep": 5,
    "seed": 42,
    "num_predict": 100,
    "top_k": 20,
    "top_p": 0.9,
    "tfs_z": 0.5,
    "typical_p": 0.7,
    "repeat_last_n": 33,
    "temperature": 0.8,
    "repeat_penalty": 1.2,
    "presence_penalty": 1.5,
    "frequency_penalty": 1.0,
    "mirostat": 1,
    "mirostat_tau": 0.8,
    "mirostat_eta": 0.6,
    "penalize_newline": true,
    "stop": ["\n", "user:"],
    "numa": false,
    "num_ctx": 1024,
    "num_batch": 2,
    "num_gqa": 1,
    "num_gpu": 1,
    "main_gpu": 0,
    "low_vram": false,
    "f16_kv": true,
    "vocab_only": false,
    "use_mmap": true,
    "use_mlock": false,
    "rope_frequency_base": 1.1,
    "rope_frequency_scale": 0.8,
    "num_thread": 8
  }