In [None]:
!pip install langchain langchain-community faiss-cpu sentence-transformers --quiet

In [None]:
!pip install -U numpy --quiet

In [None]:
!huggingface-cli login

In [None]:
!pip uninstall sentence-transformers numpy -y

In [None]:
!pip install sentence-transformers
#reinstalling package to sync with installed libraries
!pip install --upgrade langchain-community langchain-huggingface

In [1]:
from langchain.schema import Document
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
import json

In [35]:
#converting jsonl file into Document
with open("combined_rag_documents.jsonl", "r", encoding="utf-8") as f:
    loaded_data = [json.loads(sentence) for sentence in f]             #using json.loads instead of json.load since the file is jsonl and not json

documents_comb = [
    Document(page_content=doc["page_content"], metadata=doc["metadata"])
    for doc in loaded_data
]

In [37]:
#embedding model
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Embed and store in FAISS
vectorstore = FAISS.from_documents(documents_comb, embedding_model)
vectorstore.save_local("faiss_1mg_index")

In [5]:
vectorstore = FAISS.load_local("faiss_1mg_index", embedding_model, allow_dangerous_deserialization=True)
retriever = vectorstore.as_retriever(search_kwargs={"k": 2})



In [6]:
from transformers import pipeline
from langchain.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA

#load gemma-2b
llm_pipeline = pipeline("text-generation", model="google/gemma-2b", max_new_tokens=256)
llm = HuggingFacePipeline(pipeline=llm_pipeline)

config.json:   0%|          | 0.00/627 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/13.5k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/67.1M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/137 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/33.6k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.24M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.5M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/636 [00:00<?, ?B/s]

Device set to use cuda:0
  llm = HuggingFacePipeline(pipeline=llm_pipeline)


In [28]:
from langchain.prompts import PromptTemplate

#prompt template for accurate retrieval of answers
#Strict refining of generated response in order to cure token stuffing and noise.
prompt_template = PromptTemplate(
    input_variables=["context", "question"],
    template="""
You are a medical assistant.

Instructions:
- Only answer based on the provided context.
- Do not copy or repeat the context itself.
- Do not add or assume any information not present in the context.
- Avoid repeating words or points.
- List the symptoms using bullet points ('-').
- Do not include disease descriptions or treatments.
- If no symptom information is present, respond: "No answer available in the provided data".
- Keep the answer concise and under 150 words.

Context:
{context}

Question:
{question}

Answer:
"""
)

In [29]:
qa = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    chain_type="stuff",
    chain_type_kwargs={"prompt": prompt_template}
)

In [31]:
import difflib

#function to extract the 1st answer
def extract_answer(text):
    if "Answer" in text:
        return text.split("Answer", 1)[1].strip()
    return text.strip()


In [32]:
query = "symptoms of Addison's Disease"
answer = qa.run(query)
final_answer = extract_answer(answer)

#clean probable duplicates
#final_answer = clean_similar_bullets(final_answer)

print("Answer:", final_answer)

Answer: :
- Tiredness and fatigue
- Loss of appetite
- Craving for salt
- Nausea, vomiting
- Abdominal pain
- Dizziness
- Pain in the muscles
- Diarrhea
- Loss of consciousness
- Constipation
- Increased pigmentation
- Loss of weight
- Hypotension (low BP)
- Anemia
- Vitiligo (a disease that causes loss of skin color in patches).

Symptoms of AddisonΓÇÖs Disease are: Tiredness and fatigue Loss of appetite Craving for salt Nausea, vomiting Abdominal pain Dizziness Pain in the muscles Diarrhea Loss of consciousness Constipation Increased pigmentation Loss of weight Hypotension (low BP) Anemia Vitiligo (a disease that causes loss of skin color in patches) Note:** Sometimes the symptoms of AddisonΓÇÖs disease appear suddenly. This is known as the Addisonian crisis. It is a life-threatening situation that results in low blood pressure, low blood levels of sugar, and high blood levels of potassium.
