In [41]:
from CreateDocuments import load_documents
import RAG_utils
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Setup

### Create chroma database

In [42]:
db = RAG_utils.create_chroma_db()
db._collection.peek()['documents']

Number of documents: 10
There are 10 in the collection


['- 24337 W Boulevard De John3 beds, 2 baths\n1,734 sqft, 22,651 sqft lot, built in 1974\n- 3304 Mistflower Ln4 beds, 5 baths\n3,114 sqft, 10,123 sqft lot, built in 2003\nRentals\n- 24330 W Boulevard De John3 beds, 2 baths\n-- sqft, -- sqft lot, built in --\n- 3412 Minito Ct5 beds, 5 baths\n4,950 sqft, -- sqft lot, built in 2005\n- 3007 Saganashkee Ln2 beds, 3 baths\n1,850 sqft, -- sqft lot, built in 2003\n- 3007 Saganashkee Ln # 30072 beds, 2.5 baths\n1,850 sqft, -- sqft lot, built in 2003\n- 3443 Breitwieser Ln4 beds, 3.5 baths\n3,118 sqft, 10,100 sqft lot, built in 2001\n- 2809 Wheatland Ct2 beds, 2.5 baths\n1,550 sqft, -- sqft lot, built in 1998\n- 2817 Ledgestone Ct3 beds, 2.5 baths\n1,500 sqft, -- sqft lot, built in 1998\n- 2812 Cantore Rd3 beds, 2.5 baths\n1,502 sqft, -- sqft lot, built in 1997\n- 2876 Cedar Glade Dr3 beds, 2.5 baths\n1,624 sqft, -- sqft lot, built in 1997\n- 11336 Highland Dr S4 beds, 2.5 baths\n2,200 sqft, -- sqft lot, built in 2005\n- 11338 Highland Dr S4 bed

# Tests

### Example vector search of database given a question

In [43]:
question = 'When was the house at 3524 Redwing Ct, Naperville, IL 60564 last sold and for what price?'
docs = db.similarity_search_with_relevance_scores(question, k=5)
for i, doc in enumerate(docs):
    print('doc:', i+1, '='*100)
    print(doc[0].page_content)

- Roof type: Asphalt
- Room count: 11
- Stories: 2
- Structure type: Other
- Unit count: 1
Other
- Floor size: 3,591 sqft
- Heating: Gas
- Laundry: In Unit
- Parcel #: 0701093130290000
- Zillow Home ID: 556842K.
Mortgages
Neighborhood
Market guideZillow predicts 60564 home values will fall 1% next year, compared to a 1.1% decrease for Naperville as a whole. Among 60564 homes, this home is valued 49.3% more than the midpoint (median) home, and is valued 11.5% more per square.
Learn more about forecast calculations or 60564 home values.… More Less
For Sale
- 3540 Redwing Ct5 beds, 5 baths
3,986 sqft, 6,372 sqft lot, built in 2004
- 3459 Redwing Dr4 beds, 3.5 baths
2,865 sqft, 10,001 sqft lot, built in 2001
- 3451 Redwing Dr5 beds, 5 baths
3,553 sqft, 10,890 sqft lot, built in 2003
- 3312 Danlaur Ct4 beds, 3.5 baths
4,410 sqft, 12,196 sqft lot, built in 2005
- 3727 Nicanoa Ln5 beds, 4.5 baths
3,700 sqft, 11,003 sqft lot, built in 2003
- 3508 Tall Grass Dr5 beds, 3.5 baths
3524 Redwing Ct,

Note: the document with the correct context is ranked 2nd in the list.

### HuggingFaceH4/zephyr-7b-beta via langchain HF API (langchain_community.llms.HuggingFaceHub)

In [44]:
question = 'When was the house at 3524 Redwing Ct, Naperville, IL 60564 last sold and for what price?'

prompt_template1 = """Your are a helpful assistant. Please answer in one sentence. Answer the question based only on the following context:
{context}
Question: {question}
Answer: 
"""

documents = db.similarity_search_with_relevance_scores(question, k=5)
context = RAG_utils.format_docs([doc[0] for doc in documents])
prompt_text = prompt_template1.format(context=context, question=question)

answer = RAG_utils.gen_text_hf_api(lm_name='HuggingFaceH4/zephyr-7b-beta', prompt_text=prompt_text)
print(question)
print(answer)

When was the house at 3524 Redwing Ct, Naperville, IL 60564 last sold and for what price?

The house at 3524 Redwing Ct, Naperville, IL 60564 was last sold in October 2013 for $595,000.


The answer is exactly as expected.

In [45]:
question = 'When was the house at 3524 Redwing Ct, Naperville, IL 60564 last sold and for what price?'

prompt_template1 = """You're are a helpful assistant. Please answer in one sentence. Answer the question based only on the following context:
{context}
Question: {question}
Answer: 
"""

documents = db.similarity_search_with_relevance_scores(question, k=5)
context = RAG_utils.format_docs([doc[0] for doc in documents])
prompt_text = prompt_template1.format(context=context, question=question)

answer = RAG_utils.gen_text_hf_api(lm_name='HuggingFaceH4/zephyr-7b-beta', prompt_text=prompt_text, temp=0.5)
print(question)
print(answer)

When was the house at 3524 Redwing Ct, Naperville, IL 60564 last sold and for what price?

The house at 3524 Redwing Ct, Naperville, IL 60564 was last sold on Oct 2013 for $595,000.


### HuggingFaceH4/zephyr-7b-beta via transformers.AutoModelForCausalLM

In [46]:
question = 'When was the house at 3524 Redwing Ct, Naperville, IL 60564 last sold and for what price?'

prompt_template1  = """Your are a helpful assistant. Please answer in one sentence. Answer the question based only on the following context:
{context}
Question: {question}
Answer: 
"""

documents = db.similarity_search_with_relevance_scores(question, k=5)
context = RAG_utils.format_docs([doc[0] for doc in documents])

lm, tokenizer = RAG_utils.load_lm_and_tokenizer('HuggingFaceH4/zephyr-7b-beta', config_updates={'do_sample': True,
                                                                                                'max_new_tokens': 250, 
                                                                                                'top_k': 30,
                                                                                                'temperature': 1,
                                                                                                'repetition_penalty': 1.03,}) # Note: config setting does not appear to make a difference
prompt_text = RAG_utils.format_prompt(context, question, prompt_template1)
answer = RAG_utils.gen_text_hf_local(lm, tokenizer, prompt_text)

print(question)
print(answer)

Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

When was the house at 3524 Redwing Ct, Naperville, IL 60564 last sold and for what price?
<ocker. is 2 bath5 baths,
,770 sq.
  mft lot
 Built in 2979
$ 3 story55 Sedar Aveade Dr, is, 2.5 baths
1,514 sqft, -- sqft lot, built in 1997
- 2111 C Cland Dr33 beds, 3.5 baths
1,140 sqft, -- sqft lot, built in 1000
- 11336 Highland Dr S4 beds, 2.5 baths
2Based 1101 Cbury Dr Drt S beds, 2 bath5 baths
2,106 sqft, --0.,000 sqft lot, built in 2005
- 3311 DanWor Drn4 beds, 3 bath5 baths
4,980 sqft, --12,700 sqft lot, built in 2005
- 3711 Nadass Dr5 beds, 4 bath5 baths
3,750 sqft, 1,750 sqft lot, built in 2004
- 3815 K Nies Drt5 beds, 3.s
2,986 sqft, --6,,200 sqft lot, built in 1998
- 2511 Ning M Drd4 beds, 2 bath5 baths
3,100 sqft, 1,744 sqft lot, built in 2004
- 3211 Rollahrel Dr4 beds, 3.5 baths
4,785 sqft, 80,008 sqft lot, built in 2003
- 3411 Nicanoa Ln5 beds, 4 bath5 baths
4,220 sqft, 1,706 sqft lot, built in 2003
- 3714 Nale Drn6 beds, 3 bath5 baths
4,240 sqft, 10,048 sqft lot, built in 2004
- 

In [47]:
question = 'When was the house at 3524 Redwing Ct, Naperville, IL 60564 last sold and for what price?'

prompt_template1  = """Your are a helpful assistant. Please answer in one sentence. Answer the question based only on the following context:
{context}
Question: {question}
Answer: 
"""

documents = db.similarity_search_with_relevance_scores(question, k=5)
context = RAG_utils.format_docs([doc[0] for doc in documents])

lm, tokenizer = RAG_utils.load_lm_and_tokenizer('HuggingFaceH4/zephyr-7b-beta', config_updates={'do_sample': True,
                                                                                                'max_new_tokens': 250, 
                                                                                                'top_k': 30,
                                                                                                'temperature': 0.1,
                                                                                                'repetition_penalty': 1.03,}) # Note: config setting does not appear to make a difference
prompt_text = RAG_utils.format_prompt(context, question, prompt_template1)
answer = RAG_utils.gen_text_hf_local(lm, tokenizer, prompt_text)

print(question)
print(answer)

Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

When was the house at 3524 Redwing Ct, Naperville, IL 60564 last sold and for what price?
<ocker. is 2 bath5 baths,
,770 sq.
  mft lot
 Built in 2979
$ 3 story55 Sedar Aveade Dr, is, 2.5 baths
1,514 sqft, -- sqft lot, built in 1997
- 2111 C Cland Dr33 beds, 3.5 baths
1,140 sqft, -- sqft lot, built in 1000
- 11336 Highland Dr S4 beds, 2.5 baths
2Based 1101 Cbury Dr Drt S beds, 2 bath5 baths
2,106 sqft, --0.,000 sqft lot, built in 2005
- 3311 DanWor Drn4 beds, 3 bath5 baths
4,980 sqft, --12,700 sqft lot, built in 2005
- 3711 Nadass Dr5 beds, 4 bath5 baths
3,750 sqft, 1,750 sqft lot, built in 2004
- 3815 K Nies Drt5 beds, 3.s
2,986 sqft, --6,,200 sqft lot, built in 1998
- 2511 Ning M Drd4 beds, 2 bath5 baths
3,100 sqft, 1,744 sqft lot, built in 2004
- 3211 Rollahrel Dr4 beds, 3.5 baths
4,785 sqft, 80,008 sqft lot, built in 2003
- 3411 Nicanoa Ln5 beds, 4 bath5 baths
4,220 sqft, 1,706 sqft lot, built in 2003
- 3714 Nale Drn6 beds, 3 bath5 baths
4,240 sqft, 10,048 sqft lot, built in 2004
- 

Loading the model directly from HF is not working properly. It runs, but the generated responses often do not even include the right answer at all, and are usually too long.

### HF API same context, different question

In [48]:
docs[1][0].page_content # the relevant document

"3524 Redwing Ct, Naperville, IL 60564\n4 beds5 baths3,591 sqft Edit\nA Zestimate® home valuation is Zillow's estimated market value. It is not an appraisal. Use it as a starting point to determine a home's value. Learn more\nFacts\n- Single Family\n- Built in 2000\n- Views: 773 all time views\n- Cooling: Central, Other\n- Heating: Forced air, Other\n- Last sold: Oct 2013 for $595,000\n- Last sale price/sqft: $166\nFeatures\n- Ceiling Fan\n- Deck\n- Fireplace\n- Flooring: Carpet, Hardwood\n- Mother-in-Law\n- Parking: Garage - Attached, 3 spaces, 704 sqft\n- Security System\n- Vaulted Ceiling\nAppliances Included\n- Dishwasher\n- Dryer\n- Garbage disposal\n- Microwave\n- Range / Oven\n- Refrigerator\n- Washer\nRoom Types\n- Dining room\n- Family room\n- Office\n- Recreation room\nConstruction\n- Exterior material: Brick\n- Roof type: Asphalt\n- Room count: 11\n- Stories: 2\n- Structure type: Other\n- Unit count: 1\nOther\n- Floor size: 3,591 sqft\n- Heating: Gas\n- Laundry: In Unit\n- P

In [49]:
question = "What was the address of the house sold for $595,000 in October 2013?"

prompt_template1 = """Your are a helpful assistant. Please answer in one sentence. Answer the question based only on the following context:
{context}
Question: {question}
Answer: 
"""

documents = db.similarity_search_with_relevance_scores(question, k=5)
context = RAG_utils.format_docs([doc[0] for doc in documents])
prompt_text = prompt_template1.format(context=context, question=question)

answer = RAG_utils.gen_text_hf_api(lm_name='HuggingFaceH4/zephyr-7b-beta', prompt_text=prompt_text)
print(question)
print(answer)



What was the address of the house sold for $595,000 in October 2013?

The address of the house sold for $595,000 in October 2013 is not explicitly stated in the given context. However, the context mentions that the house at 3524 Redwing Ct, Naperville, IL 60564 was last sold for $595,000 in October 2013. Therefore, it can be assumed that the address of the sold house is 3524 Redwing Ct, Naperville, IL 60564.


It cannot answer the question accurately given the correct context.

### HF API - RAG context from row 7771

In [50]:
question = "What does the multi-colored set of gemstone dice represent in the Death Saves / Norse Foundry Arkhan the Cruel™ dice set?"

prompt_template1 = """Your are a helpful assistant. Please answer in one sentence. Answer the question based only on the following context:
{context}
Question: {question}
Answer: 
"""

documents = db.similarity_search_with_relevance_scores(question, k=5)
context = RAG_utils.format_docs([doc[0] for doc in documents])
prompt_text = prompt_template1.format(context=context, question=question)

answer = RAG_utils.gen_text_hf_api(lm_name='HuggingFaceH4/zephyr-7b-beta', prompt_text=prompt_text)
print(question)
print(answer)
true_answer = 'The multi-colored set of gemstone dice represent the power of the five races of the Chromatic Dragons.'
print('True answer:', true_answer)



What does the multi-colored set of gemstone dice represent in the Death Saves / Norse Foundry Arkhan the Cruel™ dice set?

The multi-colored set of gemstone dice in the Death Saves / Norse Foundry Arkhan the Cruel™ dice set represents the power of the five races of the Chromatic Dragons and harks back to the very first Creative Publications / Holmes polyhedral dice ever made in the early to mid 1970s. The numbers on the D20 are configured into two sets of 0–9 and 0–9, and a gold dot is placed on one half of the numbers to indicate that you should add +10. The five-pronged symbol of Arkhan's Dragon Goddess is placed not only on the high number of the D20 but also on the high numbers of the other four dice.
True answer: The multi-colored set of gemstone dice represent the power of the five races of the Chromatic Dragons.


It answers this question correctly, but then adds unnecessary context.

### HF API - RAG context from row 7937

In [51]:
question = "Where was a yellow-billed cuckoo seen on Friday, 06/23?"

prompt_template1 = """You're are a helpful assistant. Please answer in one sentence. Answer the question based only on the following context:
{context}
Question: {question}
Answer: 
"""

documents = db.similarity_search_with_relevance_scores(question, k=5)
context = RAG_utils.format_docs([doc[0] for doc in documents])
prompt_text = prompt_template1.format(context=context, question=question)

answer = RAG_utils.gen_text_hf_api(lm_name='HuggingFaceH4/zephyr-7b-beta', prompt_text=prompt_text)
print('Question:', question)
print('Answer:', answer)
true_answer = 'A YELLOW-BILLED CUCKOO was seen in the trees at the Fielding-Garr Ranch at Antelope Island SP on Friday, 06/23'
print('True answer:', true_answer)



KeyboardInterrupt: 

The response is correct, but then it keeps going.

### Prompt engineering

In [None]:
prompt_text = 'Say hello.'
answer = RAG_utils.gen_text_hf_api(lm_name='HuggingFaceH4/zephyr-7b-beta', prompt_text=prompt_text)
print('Answer:', answer[0:100])

prompt_text = 'You are a friendly chat bot. Please say hello.'
answer = RAG_utils.gen_text_hf_api(lm_name='HuggingFaceH4/zephyr-7b-beta', prompt_text=prompt_text)
print('Answer:', answer[0:100])

Answer: 

We’re a full-service marketing agency that specializes in helping businesses grow. Our team of exp
Answer: 

Hello! I'm your friendly chatbot, here to assist you with any questions or requests you may have. 


In [None]:
question = 'When was the house at 3524 Redwing Ct, Naperville, IL 60564 last sold and for what price?'

prompt_template1  = """Your are a helpful assistant. Please answer in one sentence. Answer the question based only on the following context:
{context}
Question: {question}
Answer: 
"""

documents = db.similarity_search_with_relevance_scores(question, k=5)
context = RAG_utils.format_docs([doc[0] for doc in documents])

lm, tokenizer = RAG_utils.load_lm_and_tokenizer('HuggingFaceH4/zephyr-7b-beta', config_updates={'do_sample': True,
                                                                                                'max_new_tokens': 250, 
                                                                                                'top_k': 30,
                                                                                                'temperature': 0.1,
                                                                                                'repetition_penalty': 1.03,})

answer = RAG_utils.gen_text_hf_local(lm, tokenizer, prompt_template1, context, question)

print(question)
print(answer)

Loading checkpoint shards: 100%|██████████| 8/8 [00:21<00:00,  2.65s/it]


When was the house at 3524 Redwing Ct, Naperville, IL 60564 last sold and for what price?
< in.am are the205ms,
,700 sq.,  mft lot
 built in 2999
$ 2 car1 days sq Nland Dr,,,, 2.5 baths
1,140 sqft, -- sqft lot, built in 1000
- 11348 Highland Dr S4 beds, 2.5 baths
2Based 1115 Highbury Dr Drt S beds, 2 bath5 baths
2,106 sqft, --0.,000 sqft lot, built in 1005
- 3315 HighWor Drn4 beds, 3 bath5 baths
4,880 sqft, --12,400 sqft lot, built in 2005
- 3711 Nadass Dr5 beds, 4 bath5 baths
3,700 sqft, 1,750 sqft lot, built in 2004
- 3815 N Nies Drt5 beds, 3.s
2,986 sqft, --6,,200 sqft lot, built in 1998
- 2511 Ning M Drd4 beds, 2 bath5 baths
2,100 sqft, 1,704 sqft lot, built in 2004
- 3211 Tallahrel Dr5 beds, 3.5 baths
4,785 sqft, 80,008 sqft lot, built in 2000
- 3511 Nicanoa Ln5 beds, 4 bath5 baths
4,220 sqft, 1,806 sqft lot, built in 2003
- 3518 Nau Drn5 beds, 3 bath5 baths
4,250 sqft, 10,048 sqft lot, built in 2004
- 3611 Kaddleyside Drt5 beds, 3 bath5 baths
4,980 sqft, 10,275 sqft lot, built in

# New tests

### meta-llama/Meta-Llama-3-8B

In [None]:
question = 'When was the house at 3524 Redwing Ct, Naperville, IL 60564 last sold and for what price?'

prompt_template1 = """Your are a helpful assistant. Please answer in one sentence. Answer the question based only on the following context:
{context}
Question: {question}
Answer: 
"""

documents = db.similarity_search_with_relevance_scores(question, k=5)
context = RAG_utils.format_docs([doc[0] for doc in documents])
prompt_text = prompt_template1.format(context=context, question=question)

# answer = RAG_utils.gen_text_hf_api(lm_name='meta-llama/Meta-Llama-3-8B', prompt_text=prompt_text)
# 'MaziyarPanahi/Llama-3-8B-Instruct-32k-v0.1-GGUF'ArithmeticError
answer = RAG_utils.gen_text_hf_api(lm_name="MaziyarPanahi/Llama-3-8B-Instruct-32k-v0.1-GGUF", prompt_text=prompt_text)
print(question)
print(answer)

HfHubHTTPError: 404 Client Error: Not Found for url: https://api-inference.huggingface.co/models/MaziyarPanahi/Llama-3-8B-Instruct-32k-v0.1-GGUF (Request ID: 3AjyHcnHIwURUZnhK_fte)

Model MaziyarPanahi/Llama-3-8B-Instruct-32k-v0.1 does not exist

## Llama

In [None]:
from langchain.chains import LLMChain
from langchain.memory import ConversationBufferMemory
from langchain_experimental.chat_models import Llama2Chat

In [None]:
from langchain_core.messages import SystemMessage
from langchain_core.prompts.chat import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    MessagesPlaceholder,
)

llama_template_messages = [
    SystemMessage(content="You are a helpful assistant."),
    MessagesPlaceholder(variable_name="chat_history"),
    HumanMessagePromptTemplate.from_template("{text}"),
]
llama_prompt_template = ChatPromptTemplate.from_messages(template_messages)

NameError: name 'template_messages' is not defined

In [None]:
from langchain_community.llms import HuggingFaceTextGenInference

llm = HuggingFaceTextGenInference(
    inference_server_url="http://127.0.0.1:8080/",
    max_new_tokens=512,
    top_k=50,
    temperature=0.1,
    repetition_penalty=1.03,
)

model = Llama2Chat(llm=llm)

In [None]:
question

'When was the house at 3524 Redwing Ct, Naperville, IL 60564 last sold and for what price?'

In [None]:
RAG_utils.format_prompt(prompt_template1, context, question)

"Your are a helpful assistant. Please answer in one sentence. Answer the question based only on the following context:\n- Roof type: Asphalt\n- Room count: 11\n- Stories: 2\n- Structure type: Other\n- Unit count: 1\nOther\n- Floor size: 3,591 sqft\n- Heating: Gas\n- Laundry: In Unit\n- Parcel #: 0701093130290000\n- Zillow Home ID: 556842K.\nMortgages\nNeighborhood\nMarket guideZillow predicts 60564 home values will fall 1% next year, compared to a 1.1% decrease for Naperville as a whole. Among 60564 homes, this home is valued 49.3% more than the midpoint (median) home, and is valued 11.5% more per square.\nLearn more about forecast calculations or 60564 home values.… More Less\nFor Sale\n- 3540 Redwing Ct5 beds, 5 baths\n3,986 sqft, 6,372 sqft lot, built in 2004\n- 3459 Redwing Dr4 beds, 3.5 baths\n2,865 sqft, 10,001 sqft lot, built in 2001\n- 3451 Redwing Dr5 beds, 5 baths\n3,553 sqft, 10,890 sqft lot, built in 2003\n- 3312 Danlaur Ct4 beds, 3.5 baths\n4,410 sqft, 12,196 sqft lot, bui

In [None]:
from RAG_utils import *
lm = HuggingFaceHub(
    repo_id="MaziyarPanahi/Llama-3-8B-Instruct-32k-v0.1-GGUF",
    #repo_id="microsoft/DialoGPT-medium",
    task="text-generation",
    huggingfacehub_api_token = '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',
    model_kwargs={
        "max_new_tokens": 250,
        "top_k": 30,
        "temperature": 0.1,
        "repetition_penalty": 1.03,
    },
)
generated_text = lm.invoke(prompt_text)[len(prompt_text):]

HfHubHTTPError: 404 Client Error: Not Found for url: https://api-inference.huggingface.co/models/MaziyarPanahi/Llama-3-8B-Instruct-32k-v0.1-GGUF (Request ID: ufQzzPF-KKlBYbo8mLec3)

Model MaziyarPanahi/Llama-3-8B-Instruct-32k-v0.1 does not exist

In [None]:
# Load model directly
from transformers import AutoModel
model = AutoModel.from_pretrained("MaziyarPanahi/Llama-3-8B-Instruct-32k-v0.1-GGUF")

ValueError: The checkpoint you are trying to load has model type `llama-3` but Transformers does not recognize this architecture. This could be because of an issue with the checkpoint, or because your version of Transformers is out of date.

In [None]:
# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("text-generation", model="MaziyarPanahi/Llama-3-8B-Instruct-32k-v0.1-GGUF")

ValueError: The checkpoint you are trying to load has model type `llama-3` but Transformers does not recognize this architecture. This could be because of an issue with the checkpoint, or because your version of Transformers is out of date.

In [None]:
from os.path import expanduser

from langchain_community.llms import LlamaCpp

model_path = expanduser("ragenv/lib/python3.8/site-packages/transformers/models/llama/")

llm = LlamaCpp(
    model_path=model_path,
    streaming=False,
)
model = Llama2Chat(llm=llm)

ValidationError: 1 validation error for LlamaCpp
__root__
  Could not load Llama model from path: ragenv/lib/python3.8/site-packages/transformers/models/llama/. Received error Model path does not exist: ragenv/lib/python3.8/site-packages/transformers/models/llama/ (type=value_error)

In [None]:
from llama_cpp import Llama

In [None]:


MODEL_Q8_0 = Llama(
    model_path="../models/llama-2-7b-chat.ggmlv3.q8_0.bin",
    n_ctx=2048)

ValueError: Model path does not exist: ../models/llama-2-7b-chat.ggmlv3.q8_0.bin

## Different Llama model

In [None]:
# Load model directly
from transformers import AutoModel, LlamaForCausalLM
model = LlamaForCausalLM.from_pretrained("TheBloke/Llama-2-7B-Chat-GGML")

OSError: TheBloke/Llama-2-7B-Chat-GGML does not appear to have a file named pytorch_model.bin, tf_model.h5, model.ckpt or flax_model.msgpack.

## Phi 3

In [None]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained("microsoft/Phi-3-mini-4k-instruct", trust_remote_code=True)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
A new version of the following files was downloaded from https://huggingface.co/microsoft/Phi-3-mini-4k-instruct:
- configuration_phi3.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
A new version of the following files was downloaded from https://huggingface.co/microsoft/Phi-3-mini-4k-instruct:
- modeling_phi3.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
`flash-attention` package not found, consider installing for better performance: No module named 'flash_attn'.
Current `flash-attenton` does not support `window_size`. Either upgrade or use `attn_implementation='eager'`.


Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
question = 'When was the house at 3524 Redwing Ct, Naperville, IL 60564 last sold and for what price?'

prompt_template1 = """Your are a helpful assistant. Please answer in one sentence. Answer the question based only on the following context:
{context}
Question: {question}
Answer: 
"""

documents = db.similarity_search_with_relevance_scores(question, k=5)
context = RAG_utils.format_docs([doc[0] for doc in documents])
prompt_text = prompt_template1.format(context=context, question=question)

# answer = RAG_utils.gen_text_hf_api(lm_name='meta-llama/Meta-Llama-3-8B', prompt_text=prompt_text)
# 'MaziyarPanahi/Llama-3-8B-Instruct-32k-v0.1-GGUF'ArithmeticError
answer = RAG_utils.gen_text_hf_api(lm_name="microsoft/Phi-3-mini-4k-instruct", prompt_text=prompt_text)
print(question)
print(answer)

When was the house at 3524 Redwing Ct, Naperville, IL 60564 last sold and for what price?
- Answer



- Answer

- Answer.

.
- Answer.
- What.

- Answer.
.



- About.
-2. Question.





-1.



===

===

















































.



.

.




.



.

.

.

.
.
.
.
.
.
.
.
. Based.
.
.
.
.
. The following.
. <|end|> ., from me.
2. The question.
. Based. The question.
. The following.
: What. The question.
.
.
.
.
.
.
.
.
: 
.
.

.
.
.
.
.
.
 and12
.

.
.


In [52]:
from langchain_community.llms import HuggingFaceEndpoint
from getpass import getpass

question = 'When was the house at 3524 Redwing Ct, Naperville, IL 60564 last sold and for what price?'

phi3_prompt_template = """<|system|>
You are a helpful assistant.<|end|>
<|user|>
Please answer the following question in one sentence based only on the following context:
{context}
Question: {question}
<|end|>
<|assistant|>

"""

documents = db.similarity_search_with_relevance_scores(question, k=5)
context = RAG_utils.format_docs([doc[0] for doc in documents])
prompt_text = prompt_template1.format(context=context, question=question)


# source: https://python.langchain.com/docs/integrations/llms/huggingface_endpoint/

HUGGINGFACEHUB_API_TOKEN = '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'
repo_id = "microsoft/Phi-3-mini-4k-instruct"

llm = HuggingFaceEndpoint(
    repo_id=repo_id, max_length=128, temperature=0.5, token=HUGGINGFACEHUB_API_TOKEN
)
llm_chain = LLMChain(prompt=prompt_text, llm=llm)
print(llm_chain.run(question))


                    max_length was transferred to model_kwargs.
                    Please make sure that max_length is what you intended.
                    token was transferred to model_kwargs.
                    Please make sure that token is what you intended.


ValidationError: 1 validation error for HuggingFaceEndpoint
__root__
  Could not authenticate with huggingface_hub. Please check your API token. (type=value_error)

In [None]:
# https://huggingface.co/inference-api/serverless
import requests

# def query(payload, model_id, api_token):
# 	headers = {"Authorization": f"Bearer {api_token}"}
# 	API_URL = f"https://api-inference.huggingface.co/models/{model_id}"
# 	response = requests.post(API_URL, headers=headers, json=payload)
# 	return response.json()


def qa_query(question, context, model_id, api_token):
	headers = {"Authorization": f"Bearer {api_token}"}
	API_URL = f"https://api-inference.huggingface.co/models/{model_id}"
	payload = {"inputs": {"question": question, "context": context}}
	response = requests.post(API_URL, headers=headers, json=payload)
	return response.json()

model_id = "distilbert-base-uncased"
#model_id = "microsoft/Phi-3-mini-4k-instruct"
#api_token = "hf_XXXXXXXX" # get yours at hf.co/settings/tokens
HUGGINGFACEHUB_API_TOKEN = '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'

# data = query("The goal of life is [MASK].", model_id, HUGGINGFACEHUB_API_TOKEN)
# print(data)
data = qa_query(question, context, model_id, HUGGINGFACEHUB_API_TOKEN)
print(data)

