In [None]:
!pip install pinecone transformers sentence-transformers -q

In [None]:
!pip install transformers sentence-transformers


In [None]:
!pip install pdfplumber

In [None]:
!pip install pymupdf


In [None]:
import fitz  # PyMuPDF

def extract_text_from_pdf(path):
    doc = fitz.open(path)
    text = ""
    for i, page in enumerate(doc):
        text += f"\n--- Page {i+1} ---\n"
        text += page.get_text()
    return text

text_output = extract_text_from_pdf("MIMS.pdf")
print(text_output[:2000])


In [None]:
def chunk_text(text, chunk_size=300, overlap=50):
    words = text.split()
    chunks = []
    start = 0
    while start < len(words):
        end = start + chunk_size
        chunk = " ".join(words[start:end])
        chunks.append(chunk)
        start += chunk_size - overlap
    return chunks

chunks = chunk_text(text_output)


In [None]:
MAX_BYTES = 4 * 1024 * 1024  # 4MB
def split_payload(payload_list, encoder_fn):
    current_batch = []
    current_size = 0
    for item in payload_list:
        encoded = encoder_fn(item)  # e.g., JSON-encoded string or vector
        size = len(encoded.encode("utf-8"))
        if current_size + size > MAX_BYTES:
            yield current_batch
            current_batch = [item]
            current_size = size
        else:
            current_batch.append(item)
            current_size += size
    if current_batch:
        yield current_batch


In [None]:
from sentence_transformers import SentenceTransformer
import pinecone

# Chunk text (assuming this is already done and stored in 'chunks')
# chunks = chunk_text(text_output)

# Load SentenceTransformer model
embedder = SentenceTransformer("BAAI/bge-large-en", device='cuda')

#  Ensure chunks is a list of strings
texts = [chunk['text'] if isinstance(chunk, dict) else chunk for chunk in chunks]

# Embed the chunks
embeddings = embedder.encode(texts, show_progress_bar=True, batch_size=16)

# Initialize Pinecone client
pc = Pinecone(api_key="pcsk_4Xke5d_QK4YNgeake3By84gyFxiRRVZP2vk7riRL5jXciZH47RtoYJep584XqaFCJaFoBZ", environment="us-east-1")


# Connect to index
index = pc.Index("medicalbooks")

# Prepare vectors for upsert
vectors = [
    (f"chunk-{i}", embeddings[i].tolist(), {"text": texts[i]})
    for i in range(len(texts))
]

# Batch upsert (recommended by Pinecone to avoid size limit errors)
batch_size = 100
for i in range(0, len(vectors), batch_size):
    batch = vectors[i:i + batch_size]
    index.upsert(vectors=batch)




In [None]:
from IPython.display import HTML, display

# Enable text wrapping in all outputs
display(HTML('''
<style>
.output pre {
    white-space: pre-wrap;   /* Wrap long lines */
    word-wrap: break-word;   /* Break very long words if needed */
}
</style>
'''))


### To test run the following and change the question in the end

In [None]:
!pip install pinecone transformers sentence-transformers -q

In [None]:
from sentence_transformers import SentenceTransformer
embedder = SentenceTransformer("BAAI/bge-large-en", device='cuda')

In [None]:
import pinecone
pc = pinecone.Pinecone(api_key="pcsk_5w8bJf_Umqqm1NKSfDcmrSdSApd2qyoNmSGQQXt34XKFYqQBJNnNWDbD2VT8gc19kwK53c", environment="us-east-1")
index = pc.Index("medicalbooks")

In [None]:
def retrieve_context(query, top_k=10):
    query_vec = embedder.encode([query])[0].tolist()
    results = index.query(vector=query_vec, top_k=top_k, include_metadata=True)
    return [match["metadata"]["text"] for match in results["matches"]]


In [None]:
from openai import OpenAI

client = OpenAI(
    api_key="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",
    base_url="https://api.groq.com/openai/v1"
)


def generate_rag_response(user_query):
    context_chunks = retrieve_context(user_query)
    context = "\n".join(context_chunks)

    prompt = f"""
You are a medical assistant RAG system. You must reason step-by-step.
Provide your response in this format:

THOUGHT: <detailed reasoning>
FINAL ANSWER: <concise answer to the user>

Use ONLY the context below:
{context}

User Question: {user_query}
    """
    response = client.chat.completions.create(
        model="deepseek-r1-distill-llama-70b",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.7
    )
    return response.choices[0].message.content


In [None]:
question = "what percentage of patients with T2DM will eventually die from CV complications?"
response = generate_rag_response(question)
print("Bot:", response)
