In [1]:
import json

# Load the original JSON file
with open('project.json', 'r') as f:
    original_data = json.load(f)

# Extract the dataset name and description
dataset_name = original_data.get("dataset", "unknown_dataset")
description = original_data.get("description", "No description available")

# Extract and reformat the questions
curious_data = []
for item in original_data.get("questions", []):
    curious_data.append({
        "dataset": dataset_name,
        "description": description,
        "question": item.get("question", "unknown_question"),
        "answer": item.get("answer", "unknown_answer"),
        "source": item.get("source", "unknown_source")
    })


In [2]:
curious_data[0]

{'dataset': 'im-feeling-curious',
 'description': 'A collection of interesting Q&A pairs.',
 'question': 'Who said a journey of a thousand miles begins with a single step?',
 'answer': 'In this quote, Lao Tzu is trying to express that great things start from humble beginnings. In the original, the text refers to "1,000 li journey". A li is an old Chinese measure of distance which converts to 360 miles or 576 km.e.',
 'source': 'http://www.bbc.co.uk/worldservice/learningenglish/movingwords/shortlist/laotzu.shtml'}

In [3]:
len(curious_data)

100

In [4]:
from elasticsearch import Elasticsearch

es = Elasticsearch("http://localhost:9200")
es.info()

ObjectApiResponse({'name': '3e071fe0cf03', 'cluster_name': 'docker-cluster', 'cluster_uuid': 'X4q0FcT7TlmIiW9wJfOyVg', 'version': {'number': '8.4.3', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': '42f05b9372a9a4a470db3b52817899b99a76ee73', 'build_date': '2022-10-04T07:17:24.662462378Z', 'build_snapshot': False, 'lucene_version': '9.3.0', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'})

In [32]:
index_settings = {
    "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 0
    },
    "mappings": {
        "properties": {
            "dataset": {"type": "keyword"},
            "description": {"type": "text"},
            "question": {"type": "text"},
            "answer": {"type": "text"},
            "source": {"type": "keyword"} 
        }
    }
}

index_name = "im-curious-question"
response = es.indices.create(index=index_name, body=index_settings)

response

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'im-curious-question'})

In [33]:
from tqdm.auto import tqdm

for doc in tqdm(curious_data):
    es.index(index=index_name, document=doc)

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:02<00:00, 38.88it/s]


In [34]:
user_question = 'Which finger nail grows the fastest?'

In [35]:
search_query = {
    "size": 5,
    "query": {
        "bool": {
            "must": {
                "multi_match": {
                    "query": user_question,
                    "fields": ["question^3", "answer", "source"],
                    "type": "best_fields"
                }
            },
            "filter": {
                "term": {
                    "dataset": "im-feeling-curious"
                }
            }
        }
    }
}

In [36]:
response = es.search(index=index_name, body=search_query)
response

ObjectApiResponse({'took': 15, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 95, 'relation': 'eq'}, 'max_score': 62.97205, 'hits': [{'_index': 'im-curious-question', '_id': 'rRDhDZABiE16chhd2SKG', '_score': 62.97205, '_source': {'dataset': 'im-feeling-curious', 'description': 'A collection of interesting Q&A pairs.', 'question': 'Which finger nail grows the fastest?', 'answer': "On average, fingernails grow about one tenth of an inch in a month. The fastest growing nail is the one on your middle finger. The slowest growing nail is your thumb nail. Freshly cut nails grow faster than nails that aren't cut regularly.", 'source': 'http://www.cutexnails.com/nail-care-center/nail-facts'}}, {'_index': 'im-curious-question', '_id': '5RDhDZABiE16chhd3yId', '_score': 12.886654, '_source': {'dataset': 'im-feeling-curious', 'description': 'A collection of interesting Q&A pairs.', 'question': 'Why is the wedding ring worn on the

In [40]:
for hit in response['hits']['hits']:
    doc = hit['_source']
    print(f"Source: {doc['source']}\nQuestion: {doc['question']}\nAnswer: {doc['answer']}\n\n")

Source: http://www.cutexnails.com/nail-care-center/nail-facts
Question: Which finger nail grows the fastest?
Answer: On average, fingernails grow about one tenth of an inch in a month. The fastest growing nail is the one on your middle finger. The slowest growing nail is your thumb nail. Freshly cut nails grow faster than nails that aren't cut regularly.


Source: http://www.todayifoundout.com/index.php/2010/09/the-origin-of-wedding-rings-and-why-theyre-worn-on-the-4th-finger-of-the-left-hand/
Question: Why is the wedding ring worn on the ring finger?
Answer: According to a tradition believed to have been derived from the Romans, the wedding ring is worn on the left hand ring finger because there was thought to be a vein in the finger, referred to as the 'Vena Amoris' or the 'Vein of Love' said to be directly connected to the heart.


Source: http://en.m.wikipedia.org/wiki/Ewok
Question: Which Star Wars has the Ewoks?
Answer: They first appeared in the film Star Wars Episode VI: Return

#### Putting it in a function

In [42]:
es = Elasticsearch("http://localhost:9200")

def retrieve_documents(query, index_name="im-curious-question", max_results=5):
    
    search_query = {
        "size": max_results,
        "query": {
            "bool": {
                "must": {
                    "multi_match": {
                        "query": query,
                        "fields": ["question^3", "answer", "source"],
                        "type": "best_fields"
                    }
                },
                "filter": {
                    "term": {
                        "dataset": "im-feeling-curious"
                    }
                }
            }
        }
    }
    
    response = es.search(index=index_name, body=search_query)
    documents = [hit['_source'] for hit in response['hits']['hits']]
    return documents

In [44]:
user_question = "Which finger nail grows the fastest?"

response = retrieve_documents(user_question)

for doc in response:
    print(f"Source: {doc['source']}\nQuestion: {doc['question']}\nAnswer: {doc['answer']}\n\n")

Source: http://www.cutexnails.com/nail-care-center/nail-facts
Question: Which finger nail grows the fastest?
Answer: On average, fingernails grow about one tenth of an inch in a month. The fastest growing nail is the one on your middle finger. The slowest growing nail is your thumb nail. Freshly cut nails grow faster than nails that aren't cut regularly.


Source: http://www.todayifoundout.com/index.php/2010/09/the-origin-of-wedding-rings-and-why-theyre-worn-on-the-4th-finger-of-the-left-hand/
Question: Why is the wedding ring worn on the ring finger?
Answer: According to a tradition believed to have been derived from the Romans, the wedding ring is worn on the left hand ring finger because there was thought to be a vein in the finger, referred to as the 'Vena Amoris' or the 'Vein of Love' said to be directly connected to the heart.


Source: http://en.m.wikipedia.org/wiki/Ewok
Question: Which Star Wars has the Ewoks?
Answer: They first appeared in the film Star Wars Episode VI: Return

In [46]:
from openai import OpenAI

client = OpenAI()

response = client.chat.completions.create(
    model="gpt-4o",
    messages=[{"role": "user", "content": "What's the formula for Energy?"}]
)
print(response.choices[0].message.content)

The term "energy" can refer to various forms of energy in physics, and the formula used to calculate energy typically depends on the type of energy being considered. Here are some of the key formulas for different types of energy:

1. **Kinetic Energy (KE):**
   \[
   KE = \frac{1}{2}mv^2
   \]
   where \( m \) is the mass of the object and \( v \) is its velocity.

2. **Potential Energy (Gravitational Potential Energy) (PE):**
   \[
   PE = mgh
   \]
   where \( m \) is the mass, \( g \) is the acceleration due to gravity, and \( h \) is the height above a reference point.

3. **Elastic Potential Energy:**
   \[
   PE_{elastic} = \frac{1}{2}kx^2
   \]
   where \( k \) is the spring constant and \( x \) is the displacement from the equilibrium position.

4. **Thermal Energy (Internal Energy):**
   \[
   Q = mc\Delta T
   \]
   where \( Q \) is the thermal energy, \( m \) is the mass, \( c \) is the specific heat capacity, and \( \Delta T \) is the change in temperature.

5. **Electroma

In [69]:
context_docs = retrieve_documents(user_question)

context = ""

for doc in context_docs:
    doc_str = f"Source: {doc['source']}\nQuestion: {doc['question']}\nAnswer: {doc['answer']}\n\n"
    context += doc_str

context = context.strip()
print(context)

Source: http://www.cutexnails.com/nail-care-center/nail-facts
Question: Which finger nail grows the fastest?
Answer: On average, fingernails grow about one tenth of an inch in a month. The fastest growing nail is the one on your middle finger. The slowest growing nail is your thumb nail. Freshly cut nails grow faster than nails that aren't cut regularly.

Source: http://www.todayifoundout.com/index.php/2010/09/the-origin-of-wedding-rings-and-why-theyre-worn-on-the-4th-finger-of-the-left-hand/
Question: Why is the wedding ring worn on the ring finger?
Answer: According to a tradition believed to have been derived from the Romans, the wedding ring is worn on the left hand ring finger because there was thought to be a vein in the finger, referred to as the 'Vena Amoris' or the 'Vein of Love' said to be directly connected to the heart.

Source: http://en.m.wikipedia.org/wiki/Ewok
Question: Which Star Wars has the Ewoks?
Answer: They first appeared in the film Star Wars Episode VI: Return o

In [73]:
prompt = f"""
You're a course teaching assistant. Answer the user QUESTION based on CONTEXT - the documents retrieved from our Inquisitive database. 
Only use the facts from the CONTEXT. If the CONTEXT doesn't contan the answer, return "NONE"

QUESTION: {user_question}

CONTEXT:

{context}
"""

In [74]:
prompt

'\nYou\'re a course teaching assistant. Answer the user QUESTION based on CONTEXT - the documents retrieved from our Inquisitive database. \nOnly use the facts from the CONTEXT. If the CONTEXT doesn\'t contan the answer, return "NONE"\n\nQUESTION: Which finger nail grows the fastest?\n\nCONTEXT:\n\nSource: http://www.cutexnails.com/nail-care-center/nail-facts\nQuestion: Which finger nail grows the fastest?\nAnswer: On average, fingernails grow about one tenth of an inch in a month. The fastest growing nail is the one on your middle finger. The slowest growing nail is your thumb nail. Freshly cut nails grow faster than nails that aren\'t cut regularly.\n\nSource: http://www.todayifoundout.com/index.php/2010/09/the-origin-of-wedding-rings-and-why-theyre-worn-on-the-4th-finger-of-the-left-hand/\nQuestion: Why is the wedding ring worn on the ring finger?\nAnswer: According to a tradition believed to have been derived from the Romans, the wedding ring is worn on the left hand ring finger be

In [77]:
response = client.chat.completions.create(
    model="gpt-4o",
    messages=[{"role": "user", "content": prompt}]
)
answer = response.choices[0].message.content
reply = answer.replace("\n", " ")
reply

'The fastest growing fingernail is the one on your middle finger.   (Source: http://www.cutexnails.com/nail-care-center/nail-facts)'

In [78]:
def build_context(documents):
    context = ""

    for doc in documents:
        doc_str = f"Source: {doc['source']}\nQuestion: {doc['question']}\nAnswer: {doc['answer']}\n\n"
        context += doc_str
    
    context = context.strip()
    return context


def build_prompt(user_question, documents):
    context = build_context(curious_data)
    return f"""
You're a course teaching assistant.
Answer the user QUESTION based on CONTEXT - the documents retrieved from our INQUISITIVE MINDS database.
Don't use other information outside of the provided CONTEXT.  

QUESTION: {user_question}

CONTEXT:

{context}
""".strip()

def ask_openai(prompt, model="gpt-4o"):
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[{"role": "user", "content": prompt}]
    )
    answer = response.choices[0].message.content
    reply = answer.replace("\n", " ")
    return reply

def qa_bot(user_question):
    context_docs = retrieve_documents(user_question)
    prompt = build_prompt(user_question, context_docs)
    answer = ask_openai(prompt)
    return answer

In [None]:
qa_bot("I'm getting invalid reference format: repository name must be lowercase")

qa_bot("I can't connect to postgres port 5432, my password doesn't work")

qa_bot("how can I run kafka?")

In [79]:
qa_bot("I'm getting invalid reference format: repository name must be lowercase")

'The error "invalid reference format: repository name must be lowercase" likely arises because repository names in version control systems like GitHub are case-sensitive and must adhere to certain format rules. Specifically, they typically need to be in lowercase.  To resolve this issue, ensure that the repository name you are using is completely in lowercase. For example, if your repository name is "MyRepo", change it to "myrepo".  Example: Invalid: `docker pull USERNAME/MyRepo:latest` Valid: `docker pull username/myrepo:latest`'