In [24]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')


Installations


In [None]:
!pip install gradio


In [None]:
!pip install faiss-cpu

In [None]:
import json
from sentence_transformers import SentenceTransformer
import numpy as np
import pickle
import faiss
from transformers import pipeline
import gradio as gr
from gradio.themes.base import Base

In [25]:
# Step 1: Load the content from JSON lines
chunks = []
with open('cleaned_health_data1.jsonl', 'r', encoding='utf-8') as f:
    for line in f:
        obj = json.loads(line.strip())
        text = obj.get("title", "") + " - " + obj.get("content", "")
        chunks.append(text.strip())

# Step 2: Load the embedding model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Step 3: Generate embeddings
chunk_embeddings = model.encode(chunks, show_progress_bar=True)

# Step 4: Save the embeddings and metadata
np.save('chunk_embeddings.npy', chunk_embeddings)

with open('chunk_metadata.pkl', 'wb') as f:
    pickle.dump(chunks, f)


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

In [26]:
# Load saved embeddings
chunk_embeddings = np.load('chunk_embeddings.npy')

# Rebuild FAISS index
dimension = chunk_embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(chunk_embeddings)

# Save the FAISS index
faiss.write_index(index, "faiss_index.index")


In [27]:
# Load the chunk embeddings and metadata
embeddings = np.load('chunk_embeddings.npy')
with open('chunk_metadata.pkl', 'rb') as f:
    metadata = pickle.load(f)

# Normalize vectors (recommended for cosine similarity)
faiss.normalize_L2(embeddings)

# Create FAISS index (cosine similarity using inner product on normalized vectors)
index = faiss.IndexFlatIP(embeddings.shape[1])
index.add(embeddings)

# Save the index
faiss.write_index(index, "health_faiss_index.index")

# Save metadata alongside the index
with open("health_index_metadata.pkl", "wb") as f:
    pickle.dump(metadata, f)


In [28]:
# Load FAISS index
index = faiss.read_index("faiss_index.index")

# Load chunk metadata
with open("chunk_metadata.pkl", "rb") as f:
    chunks = pickle.load(f)

model = SentenceTransformer('all-MiniLM-L6-v2')


In [29]:
# Load a generative model (e.g., from Hugging Face)
qa_pipeline = pipeline("text2text-generation", model="google/flan-t5-base")

def answer_question(question, top_k=3):
    # Step 1: Embed the question
    question_embedding = model.encode([question])

    # Step 2: Search in the FAISS index
    distances, indices = index.search(np.array(question_embedding).astype("float32"), top_k)

    # Step 3: Get the top-k relevant chunks
    retrieved_chunks = [chunks[i] for i in indices[0]]
    context = "\n".join(retrieved_chunks)

    # Step 4: Concatenate context + question
    prompt = f"Context: {context}\n\nQuestion: {question}\nAnswer:"

    # Step 5: Generate answer
    answer = qa_pipeline(prompt, max_length=200)[0]['generated_text']
    return answer


Device set to use cpu


In [32]:
question = "what's the capital of Egypt?"
response = answer_question(question)
print("Q:", question)
print("A:", response)


Q: what's the capital of Egypt?
A: Cairo


In [40]:
import gradio as gr
import faiss
import numpy as np
import pickle
from sentence_transformers import SentenceTransformer
from transformers import pipeline
from gradio.themes.base import Base

# Load models and data
model = SentenceTransformer("all-MiniLM-L6-v2")
qa_pipeline = pipeline("text2text-generation", model="google/flan-t5-base")

# Load FAISS index and metadata
index = faiss.read_index("faiss_index.index")
with open("chunk_metadata.pkl", "rb") as f:
    chunks = pickle.load(f)

# Define answer generation function
def answer_question(question, top_k=3):
    question_embedding = model.encode([question])
    distances, indices = index.search(np.array(question_embedding).astype("float32"), top_k)
    retrieved_chunks = [chunks[i] for i in indices[0]]
    context = "\n".join(retrieved_chunks)

    prompt = f"Context: {context}\n\nQuestion: {question}\nAnswer:"
    result = qa_pipeline(prompt, max_length=200)[0]["generated_text"]
    return result

custom_theme = Base(primary_hue="emerald", neutral_hue="slate")

# Gradio Interface
iface = gr.Interface(
    fn=answer_question,
    inputs=gr.Textbox(lines=2, placeholder="Ask about UAE health insurance..."),
    outputs="text",
    title="UAE Health Insurance Assistant",
    description="Ask any question related to UAE health insurance policies. This assistant retrieves relevant context and generates an answer using a language model.",
    theme=custom_theme
)

iface.launch()


Device set to use cpu


It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://9a3bdba982307b3725.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


