# Install necessary libraries

In [9]:
!pip install langchain
!pip install rarfile
!pip install faiss-cpu
!pip install langchain_community

Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl (30.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/30.7 MB[0m [31m44.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.10.0


# RAG Flow in LangChain
##Load → 2. Split → 3. Embed → 4. Store → 5. Retrieve → 6. Prompt + LLM → 7. Answer


In [3]:
from langchain.schema import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import HuggingFacePipeline
from transformers import pipeline
import torch
import json
from pathlib import Path
from langchain_core.documents import Document


## Data Loading


In [2]:
import rarfile
import os

with rarfile.RarFile("diagnostic_kg.rar") as rf:
    rf.extractall("diagnostic_kg")


In [4]:
def extract_knowledge_text(data_dir):
    all_knowledge_texts = []
    for root, _, files in os.walk(data_dir):
        for file in files:
            if file.endswith(".json"):
                file_path = os.path.join(root, file)
                with open(file_path, "r", encoding="utf-8") as f:
                    data = json.load(f)
                    knowledge_section = data.get("knowledge", {})

                    text_chunks = []
                    for key, value in knowledge_section.items():
                        if isinstance(value, dict):
                            for sub_key, sub_val in value.items():
                                text = f"{key} - {sub_key}: {sub_val}"
                                text_chunks.append(text)
                        else:
                            text = f"{key}: {value}"
                            text_chunks.append(text)

                    combined_text = "\n".join(text_chunks)
                    all_knowledge_texts.append(combined_text)

    return all_knowledge_texts

#Load and flatten your data
docs = extract_knowledge_text("diagnostic_kg/Diagnosis_flowchart")
documents = [Document(page_content=doc) for doc in docs]

# Document splitting:

In [6]:
splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=30)
split_docs = splitter.split_documents(documents)

# Embeddings and Vector Store

In [7]:
embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


  embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [10]:
db = FAISS.from_documents(split_docs, embedding)

# Retrieval and Question Answering

In [11]:
retriever = db.as_retriever()

In [12]:
# Load the model locally using transformers
flan_pipeline = pipeline(
    "text2text-generation",
    model="google/flan-t5-large",
    tokenizer="google/flan-t5-large",
    device=0 if torch.cuda.is_available() else -1,
    max_length=512,
    temperature=0.5
)

# Wrap it with LangChain's HuggingFacePipeline
llm = HuggingFacePipeline(pipeline=flan_pipeline)

'''
we used LangChain’s RetrievalQA,it:

Embeds the query using the embedding model you gave to your FAISS retriever.

Searches the most similar chunks using the retriever (retriever.similarity_search(query_embedding)).

Feeds the retrieved docs as context to the LLM (Flan-T5 in your case).

Generates an answer from the LLM.

'''
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,  # Your FAISS retriever(context)
    return_source_documents=True
)

# Ask a question
query = "What are the risk factors of ACS?"
result = qa_chain.invoke(query)  # .invoke() is more consistent in LangChain v0.1+

# Print answer
print("\nAnswer:")
print(result["result"])

print("\nRelevant Sources:")
for doc in result["source_documents"]:
    print("-", doc.page_content[:200], "...\n")


config.json:   0%|          | 0.00/662 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/3.13G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

Device set to use cpu
  llm = HuggingFacePipeline(pipeline=flan_pipeline)



Answer:
Hyperlipidemia, hypertension, smoking, diabetes, infection, hyperthyroidism, severe arrhythmia, anemia, hypoxemia; etc

Relevant Sources:
- Suspected ACS - Risk Factors: Hyperlipidemia, hypertension, smoking, diabetes, infection, hyperthyroidism, severe arrhythmia, anemia, hypoxemia; etc. ...

- Suspected ACS - Symptoms: Chest pain, sweating, nausea, vomiting, palpitations, dyspnea, arrhythmia with weakness, dizziness or syncope, hypotensive shock, acute left heart failure; etc. ...

- Suspected COPD - Risk Factors: Long-term exposure to harmful particles or gases (tobacco smoke, occupational dust and chemicals, air pollution) / Genetic predisposition (e.g., alpha-1 antitrypsin defi ...

- Suspected Adrenal Insufficiency - Risk Factors: Autoimmune diseases; Genetic predisposition; Infections (e.g., tuberculosis, HIV); Adrenal hemorrhage; Anticoagulant therapy; Chronic use of glucocortic ...






🔍 Summary of Retrieved Information:
Know the risk factors for ACS. Know the symptoms of ACS. Know the risk factors for COPD. Know the risk factors for Adrenal Insufficiency.


# Summarization

In [35]:
summarizer_pipeline = pipeline(
    "summarization",
    model="facebook/bart-large-cnn",
    tokenizer="facebook/bart-large-cnn",
    device=0 if torch.cuda.is_available() else -1,
    max_length=40,
)
summarizer = HuggingFacePipeline(pipeline=summarizer_pipeline)
retrieved_text = "\n\n".join([doc.page_content for doc in result["source_documents"]])

# Define a summarization prompt
summarization_prompt = f"""You are a clinical assistant. Given the following user query and medical Model result, summarize it into a clear, detailed explanation in 3-5 sentences.
User Query: {query}
Model Answer:
{result["result"]}
"""

summary = summarizer.invoke(summarization_prompt)
print("\n🔍 Summary of Retrieved Information:")
print(summary)

Device set to use cpu
Your min_length=56 must be inferior than your max_length=40.



🔍 Summary of Retrieved Information:
Hyperlipidemia, hypertension, smoking, diabetes, infection, hyperthyroidism, severe arrhythmia, anemia, hypoxemia; etc are all risk factors.
