In [4]:
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import CTransformers
import os
import json
from langchain.schema import Document

In [7]:
def load_json(data_path):
    documents = []
    for filename in os.listdir(data_path):
        if filename.endswith(".json"):
            file_path = os.path.join(data_path, filename)
            with open(file_path, "r", encoding="utf-8") as f:
                data = json.load(f)

                # If the JSON is a list, loop through each item
                if isinstance(data, list):
                    for entry in data:
                        text = entry.get("content") or entry.get("text") or json.dumps(entry)
                        documents.append(Document(page_content=text, metadata={"source": filename}))
                # If it's a single dictionary
                elif isinstance(data, dict):
                    text = data.get("content") or data.get("text") or json.dumps(data)
                    documents.append(Document(page_content=text, metadata={"source": filename}))
                else:
                    # If it's something else, just store as string
                    documents.append(Document(page_content=str(data), metadata={"source": filename}))

    return documents

In [8]:
extracted_data = load_json("data/")
print(f"Loaded {len(extracted_data)} documents")

Loaded 163 documents


In [9]:
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
    text_chunks = text_splitter.split_documents(extracted_data)
    return text_chunks

text_chunks = text_split(extracted_data)
print(f"Number of chunks: {len(text_chunks)}")

Number of chunks: 7957


In [10]:
def download_hugging_face_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

embeddings = download_hugging_face_embeddings()
query_result = embeddings.embed_query("Hello world")
print(f"Query Embedding Length: {len(query_result)}")

  from .autonotebook import tqdm as notebook_tqdm


Query Embedding Length: 384


In [11]:
docsearch = FAISS.from_texts([t.page_content for t in text_chunks], embeddings)

query = "What are allergies?"
docs = docsearch.similarity_search(query, k=3)
print("Top 3 Documents for the Query:", docs)

Top 3 Documents for the Query: [Document(id='6bee5273-4adf-4050-b688-17ce85407406', metadata={}, page_content='most travellers. Protect yourself from sandfly bites, which typically occur after sunset in rural and forested areas and in some urban centres. There is no vaccine or medication to protect against\\u00a0leishmaniasis. Lymphatic filariasis , also known as elephantiasis, is caused by filariae (tiny worms) spread to humans through the bite of an infected mosquito. It can cause a range of illnesses. Risk is generally low for most travellers. Protect yourself from mosquito bites. There is no vaccine'), Document(id='ef5c9a2c-d73a-4fe6-a5bb-6842ebf6da61', metadata={}, page_content='diseases they can spread. To protect yourself from bites: use an approved bug spray (insect repellent) on exposed skin wear light-coloured, loose clothing made of tightly woven materials like nylon or polyester wear socks and closed-toe shoes sleep under mosquito netting if you\\u2019re outdoors or staying

In [12]:
prompt_template = """
Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

PROMPT = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
chain_type_kwargs = {"prompt": PROMPT}

In [16]:
llm = CTransformers(
    model="model/llama-2-7b-chat.ggmlv3.q4_0.bin",
    model_type="llama",
    config={'max_new_tokens': 512, 'temperature': 0.8}
)

In [17]:
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=docsearch.as_retriever(search_kwargs={'k': 2}),
    return_source_documents=True,
    chain_type_kwargs=chain_type_kwargs
)

In [18]:
while True:
    try:
        user_input = input(f"\nInput your question (type 'exit' to stop): ")
        if user_input.lower() == 'exit':
            print("\nExiting chat... Goodbye!")
            break

        result = qa({"query": user_input})
        response = result["result"]

        print(f"\nUser Question: {user_input}")
        print(f"Bot Response: {response}\n")

    except KeyboardInterrupt:
        print("\nExiting chat...")
        break


Input your question (type 'exit' to stop):  What are the best self-defense techniques for women walking alone at night?


  result = qa({"query": user_input})
Number of tokens (513) exceeded maximum context length (512).
Number of tokens (514) exceeded maximum context length (512).
Number of tokens (515) exceeded maximum context length (512).
Number of tokens (516) exceeded maximum context length (512).
Number of tokens (517) exceeded maximum context length (512).
Number of tokens (518) exceeded maximum context length (512).
Number of tokens (519) exceeded maximum context length (512).
Number of tokens (520) exceeded maximum context length (512).
Number of tokens (521) exceeded maximum context length (512).
Number of tokens (522) exceeded maximum context length (512).
Number of tokens (523) exceeded maximum context length (512).
Number of tokens (524) exceeded maximum context length (512).
Number of tokens (525) exceeded maximum context length (512).
Number of tokens (526) exceeded maximum context length (512).
Number of tokens (527) exceeded maximum context length (512).
Number of tokens (528) exceeded m


User Question: What are the best self-defense techniques for women walking alone at night?
Bot Response: Staying aware of your surroundings is key to avoiding dangerous situations. Here are some tips that may be helpful for women walking alone at night:

1. Keep your head up and be mindful of what's happening around you.
2. Trust your instincts and avoid areas that feel unsafe or uncomfortable.
3. Use your voice to scare off potential threats, yell loudly and assertively if someone approaches you in a threatening manner.
4. Keep your phone nearby and accessible in case of an emergency, program the important numbers into your phone such as 911 or a trusted friend/family member.
5. Consider carrying a small personal alarm that can be activated with a single button press to scare off potential threats.

Remember, self-defense is key elementary defense is essential to avoidance is about stay alert and safety is the most importantlysafence is all defense is not be aware and personal safety


Input your question (type 'exit' to stop):  How can I keep my belongings secure on a crowded bus or metro?



User Question: How can I keep my belongings secure on a crowded bus or metro?
Bot Response: To keep your belongings secure on a crowded bus or metro, follow these tips: Keep your belongings close to you at all times, avoid leaving them unattended even for a brief moment. Utilize secure compartments such as anti-theft backpacks with slash-proof fabric, RFID-blocking wallets, and TSA-approved locks for luggage. Additionally, be mindful of your surroundings and keep an eye on your belongings at all times.




Input your question (type 'exit' to stop):  How do I identify and avoid risky areas when walking through a city?


Number of tokens (513) exceeded maximum context length (512).
Number of tokens (514) exceeded maximum context length (512).
Number of tokens (515) exceeded maximum context length (512).
Number of tokens (516) exceeded maximum context length (512).
Number of tokens (517) exceeded maximum context length (512).
Number of tokens (518) exceeded maximum context length (512).
Number of tokens (519) exceeded maximum context length (512).
Number of tokens (520) exceeded maximum context length (512).
Number of tokens (521) exceeded maximum context length (512).
Number of tokens (522) exceeded maximum context length (512).
Number of tokens (523) exceeded maximum context length (512).
Number of tokens (524) exceeded maximum context length (512).
Number of tokens (525) exceeded maximum context length (512).
Number of tokens (526) exceeded maximum context length (512).
Number of tokens (527) exceeded maximum context length (512).
Number of tokens (528) exceeded maximum context length (512).
Number o


User Question: How do I identify and avoid risky areas when walking through a city?
Bot Response: To identify and avoid risky areas while walking through a city, follow these steps:

1. Research the area beforehand: Use online resources or consult with locals to learn about potential danger zones in the city.
2. Keep your head on a swivel: Always be aware of your surroundings, noticing any suspicious activity or areas where crime tends to happen.
3. Trust your instincts: If you feel uncomfortable or uneasy in an area, trust your instincts and avoid it.
4. Use crowds as protection: Sticking with large groups of people can help deter potential criminals.
5. Avoid walking alone at night: If possible, try to avoid walking through urban areas alone during late hours.
6. Keep valuables secure: Carry your belongings close and be mindful of pickpocketing in crowded areas.
7. Stay informed: Keep up-to-date with local news and events to stay aware of any potential safety concerns or potential d


Input your question (type 'exit' to stop):  exit



Exiting chat... Goodbye!


In [19]:
while True:
    try:
        user_input = input(f"\nInput your question (type 'exit' to stop): ")
        if user_input.lower() == 'exit':
            print("\nExiting chat... Goodbye!")
            break

        result = qa({"query": user_input})
        response = result["result"]

        print(f"\nUser Question: {user_input}")
        print(f"Bot Response: {response}\n")

    except KeyboardInterrupt:
        print("\nExiting chat...")
        break


Input your question (type 'exit' to stop):  What are some key legal rights citizens should know to protect themselves?



User Question: What are some key legal rights citizens should know to protect themselves?
Bot Response: Citizens should be aware of their right to equality before the law and protection against arbitrary use of power by authorities. They should also be aware of their right to a fair trial, freedom of speech and expression, and the right to practice their religion without interference from the state. Additionally, citizens should be informed of their rights in times of war or emergency, such as the right to be protected from discrimination and the right to due process.




Input your question (type 'exit' to stop):  What are the most effective safety measures to secure workplaces?


Number of tokens (513) exceeded maximum context length (512).



User Question: What are the most effective safety measures to secure workplaces?
Bot Response: Some of the most effective safety measures to secure workplaces include:
\u201cProviding workers with a safe work environment\u201d, such as proper ventilation, lighting, and emergency exit routes. \u201cDemonstrating that work is never so urgent that we cannot take time to do it safely\u201d, by taking regular breaks and ensuring employees are not overworked or exhausted. \u201cEnsuring accountability at all levels of supervision for safety\u201d, through active participation in safety meetings and training programs. \u201cAligning the organization's safety mission with words and actions\u201d, by setting clear expectations and holding employees and management accountable for meeting those expectations.

Please let me know if there is any additional information you need to provide a more detailed answer.




Input your question (type 'exit' to stop):  exit



Exiting chat... Goodbye!
