In [None]:
!pip install -q langchain qdrant-client sentence-transformers groq
!pip install -q langchain-qdrant langchain-groq python-dotenv



### RAG

In [None]:
from qdrant_client import QdrantClient
from sentence_transformers import SentenceTransformer
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain_qdrant import Qdrant
from langchain_groq import ChatGroq


import os
from dotenv import load_dotenv

# Load environment variables for security
load_dotenv()

# Initialize Qdrant client
qdrant_client = QdrantClient(os.getenv("QDRANT_URL"),
                             api_key=os.getenv("QDRANT_API_KEY"))

# Initialize the Sentence Transformer model
sentence_model = SentenceTransformer("all-MiniLM-L6-v2")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [None]:
collection_name = "chatbot_embeddings_test"

# Check if the collection exists; if not, create it
if not qdrant_client.collection_exists(collection_name):
    qdrant_client.recreate_collection(
        collection_name=collection_name,
        vectors_config={"size": 384, "distance": "Cosine"}
    )

In [None]:
# Define the knowledge base as a list of dictionaries
knowledge_base = [
    {"question": "What is hypertension?", "answer": """Hypertension is a
    condition in which the force of the blood against the artery walls is too
    high. It is also known as high blood pressure and can lead to serious
    health complications if left untreated."""},
    {"question": """What is diabetes?", "answer": "Diabetes is a chronic
    condition characterized by high levels of sugar in the blood. Common
     symptoms include excessive thirst, frequent urination, extreme fatigue,
     and blurred vision."""},
    {"question": """What is heart disease?", "answer": "Heart disease refers to
     a range of conditions that affect the heart, including coronary artery
     disease, heart rhythm problems, and heart defects. Major causes include
     high cholesterol, high blood pressure, smoking, and obesity."""},
    {"question": """What is asthma?", "answer": "Asthma is a respiratory
    condition that causes the airways to become inflamed and narrowed.
    Treatment typically includes the use of inhalers that contain
    bronchodilators and corticosteroids to reduce inflammation and open the
    airways."""}
]

# Generate embeddings for the questions in the knowledge base
embeddings = [
    sentence_model.encode(kb["question"], convert_to_tensor=True).tolist()
    for kb in knowledge_base
]

# Insert embeddings into Qdrant
points = [
    {
        "id": idx,
        "vector": embedding,
        "payload": {"question": kb["question"], "answer": kb["answer"]}
        # Include both question and answer in the payload
    }
    for idx, (embedding, kb) in enumerate(zip(embeddings, knowledge_base))
]

qdrant_client.upsert(collection_name=collection_name, points=points)

UpdateResult(operation_id=7, status=<UpdateStatus.COMPLETED: 'completed'>)

In [None]:
# Initialize Groq LLM
groq_llm = ChatGroq(
    temperature=0,
    api_key=os.getenv("GROQ_API_KEY"),
    model="llama3-8b-8192"
)

In [None]:
# Set up Qdrant as a retriever
qdrant_retriever = Qdrant(
    client=qdrant_client,
    collection_name=collection_name,
    embeddings=sentence_model.encode
    # Use the SentenceTransformer model for embeddings
)

# Define a prompt template for the Groq LLM
prompt_template = PromptTemplate(
    input_variables=["context", "question"],
    template="Context: {context}\n\nQuestion: {question}\nAnswer:"
)

# Create a LangChain retrieval chain using Groq LLM
retrieval_qa_chain = RetrievalQA.from_chain_type(
    llm=groq_llm,
    chain_type="stuff",
    retriever=qdrant_retriever.as_retriever(),
    chain_type_kwargs={"prompt": prompt_template}
)



In [None]:
def answer_question(query):
    result = retrieval_qa_chain.invoke({"query": query})
    # pass the query here
    return result

# Example usage
user_query = "What are the causes of diabetes?"
answer = answer_question(user_query)
print("Generated Answer:", answer['result'])

Generated Answer: There are several causes of diabetes, including:

1. **Genetics**: Having a family history of diabetes increases the risk of developing the condition.
2. **Lack of insulin**: When the pancreas is unable to produce enough insulin, or when the body's cells become resistant to insulin, blood sugar levels can become too high.
3. **Insulin resistance**: When the body's cells become resistant to insulin, it can lead to high blood sugar levels.
4. **Pancreatic damage**: Damage to the pancreas, such as from pancreatitis or pancreatic surgery, can lead to diabetes.
5. **Infections**: Certain infections, such as pancreatitis or mononucleosis, can cause diabetes.
6. **Obesity**: Being overweight or obese is a major risk factor for developing type 2 diabetes.
7. **Physical inactivity**: A lack of physical activity can contribute to the development of type 2 diabetes.
8. **Age**: The risk of developing diabetes increases with age, particularly after the age of 45.
9. **Ethnicity**