In [28]:
import os
from langchain_community.document_loaders import PyPDFLoader, PyMuPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from pathlib import Path

In [None]:
%pip install pypdf

Note: you may need to restart the kernel to use updated packages.


In [29]:
def process_all_pdfs(pdf_directory):
    """Process all PDF files in a directory"""
    all_documents = []
    pdf_dir = Path(pdf_directory)
    
    pdf_files = list(pdf_dir.glob("**/*.pdf"))
    
    print(f"Found {len(pdf_files)} PDF files to process")
    
    for pdf_file in pdf_files:
        print(f"\nProcessing: {pdf_file.name}")
        try:
            loader = PyPDFLoader(str(pdf_file))
            documents = loader.load()
            
            for doc in documents:
                doc.metadata['source_file'] = pdf_file.name
                doc.metadata['file_type'] = 'pdf'
            
            all_documents.extend(documents)
            print(f"{len(documents)} pages are loaded")
            
        except Exception as e:
            print(f"Error: {e}")
    
    print(f"\nTotal documents loaded: {len(all_documents)}")
    return all_documents

all_pdf_documents = process_all_pdfs("../data/pdf")

Found 3 PDF files to process

Processing: 17+Original+Article+1204+Irum+Qureshi+etal+page+92-96.pdf
5 pages are loaded

Processing: 2024-20903.pdf
3 pages are loaded

Processing: kemupublications,+Journal+editor,+Annals-2 (1).pdf
5 pages are loaded

Total documents loaded: 13


In [30]:
all_pdf_documents

[Document(metadata={'producer': 'Corel PDF Engine Version 17.6.0.1021', 'creator': 'CorelDRAW X7', 'creationdate': '2025-05-02T14:53:59+05:00', 'moddate': '2025-05-02T14:53:59+05:00', 'author': 'Taaha', 'title': 'Complete Journal vol 21 issue 01` 2025.cdr', 'source': '..\\data\\pdf\\17+Original+Article+1204+Irum+Qureshi+etal+page+92-96.pdf', 'total_pages': 5, 'page': 0, 'page_label': '1', 'source_file': '17+Original+Article+1204+Irum+Qureshi+etal+page+92-96.pdf', 'file_type': 'pdf'}, page_content='Introduction\nThe phenomenon of skilled professionals leaving \ntheir home country to seek financial stability in \nanother country is known as the "Brain Drain". \nSpecifically, the migration of physicians is referred to \n1\nas Medical Brain Drain.  The lack of incentives for \nacademic credentials and experience can lead to \nfeelings of disillusionment, prompting individuals to \nmigrate to developed countries. Common factors \ncontributing to brain drain include limited career \nopportun

In [31]:
def split_documents(documents,chunk_size=1000,chunk_overlap=200):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        length_function=len,
        separators=["\n\n", "\n", " ", ""]
    )
    split_docs = text_splitter.split_documents(documents)
    print(f"Split {len(documents)} documents into {len(split_docs)} chunks")
    
    if split_docs:
        print(f"\nExample chunk:")
        print(f"Content: {split_docs[0].page_content[:200]}...")
        print(f"Metadata: {split_docs[0].metadata}")
    
    return split_docs

chunks=split_documents(all_pdf_documents)
chunks

Split 13 documents into 76 chunks

Example chunk:
Content: Introduction
The phenomenon of skilled professionals leaving 
their home country to seek financial stability in 
another country is known as the "Brain Drain". 
Specifically, the migration of physicia...
Metadata: {'producer': 'Corel PDF Engine Version 17.6.0.1021', 'creator': 'CorelDRAW X7', 'creationdate': '2025-05-02T14:53:59+05:00', 'moddate': '2025-05-02T14:53:59+05:00', 'author': 'Taaha', 'title': 'Complete Journal vol 21 issue 01` 2025.cdr', 'source': '..\\data\\pdf\\17+Original+Article+1204+Irum+Qureshi+etal+page+92-96.pdf', 'total_pages': 5, 'page': 0, 'page_label': '1', 'source_file': '17+Original+Article+1204+Irum+Qureshi+etal+page+92-96.pdf', 'file_type': 'pdf'}


[Document(metadata={'producer': 'Corel PDF Engine Version 17.6.0.1021', 'creator': 'CorelDRAW X7', 'creationdate': '2025-05-02T14:53:59+05:00', 'moddate': '2025-05-02T14:53:59+05:00', 'author': 'Taaha', 'title': 'Complete Journal vol 21 issue 01` 2025.cdr', 'source': '..\\data\\pdf\\17+Original+Article+1204+Irum+Qureshi+etal+page+92-96.pdf', 'total_pages': 5, 'page': 0, 'page_label': '1', 'source_file': '17+Original+Article+1204+Irum+Qureshi+etal+page+92-96.pdf', 'file_type': 'pdf'}, page_content='Introduction\nThe phenomenon of skilled professionals leaving \ntheir home country to seek financial stability in \nanother country is known as the "Brain Drain". \nSpecifically, the migration of physicians is referred to \n1\nas Medical Brain Drain.  The lack of incentives for \nacademic credentials and experience can lead to \nfeelings of disillusionment, prompting individuals to \nmigrate to developed countries. Common factors \ncontributing to brain drain include limited career \nopportun

In [None]:
%pip install sentence-transformers

Note: you may need to restart the kernel to use updated packages.


In [None]:
%pip install faiss-cpu

Note: you may need to restart the kernel to use updated packages.


In [None]:
%pip install chromadb

Note: you may need to restart the kernel to use updated packages.


In [32]:
import numpy as np
from sentence_transformers import SentenceTransformer
import chromadb
from chromadb.config import Settings
import uuid
from typing import List, Dict, Any, Tuple
from sklearn.metrics.pairwise import cosine_similarity

In [33]:
class EmbeddingManager:
    """Handles document embedding generation using SentenceTransformer"""
    
    def __init__(self, model_name: str = "all-MiniLM-L6-v2"):
        """
        Initialize the embedding manager
        
        Args:
            model_name: HuggingFace model name for sentence embeddings
        """
        self.model_name = model_name
        self.model = None
        self._load_model()

    def _load_model(self):
        """Load the SentenceTransformer model"""
        try:
            print(f"Loading embedding model: {self.model_name}")
            self.model = SentenceTransformer(self.model_name)
            print(f"Model loaded successfully. Embedding dimension: {self.model.get_sentence_embedding_dimension()}")
        except Exception as e:
            print(f"Error loading model {self.model_name}: {e}")
            raise

    def generate_embeddings(self, texts: List[str]) -> np.ndarray:
        """
        Generate embeddings for a list of texts
        
        Args:
            texts: List of text strings to embed
            
        Returns:
            numpy array of embeddings with shape (len(texts), embedding_dim)
        """
        if not self.model:
            raise ValueError("Model not loaded")
        
        print(f"Generating embeddings for {len(texts)} texts...")
        embeddings = self.model.encode(texts, show_progress_bar=True)
        print(f"Generated embeddings with shape: {embeddings.shape}")
        return embeddings


## initialize the embedding manager

embedding_manager=EmbeddingManager()
embedding_manager

Loading embedding model: all-MiniLM-L6-v2
Model loaded successfully. Embedding dimension: 384


<__main__.EmbeddingManager at 0x233550c00d0>

In [34]:
class VectorStore:
    """Manages document embeddings in a ChromaDB vector store"""
    
    def __init__(self, collection_name: str = "pdf_documents", persist_directory: str = "../data/vector_store"):
        """
        Initialize the vector store
        
        Args:
            collection_name: Name of the ChromaDB collection
            persist_directory: Directory to persist the vector store
        """
        self.collection_name = collection_name
        self.persist_directory = persist_directory
        self.client = None
        self.collection = None
        self._initialize_store()

    def _initialize_store(self):
        """Initialize ChromaDB client and collection"""
        try:
            # Create persistent ChromaDB client
            os.makedirs(self.persist_directory, exist_ok=True)
            self.client = chromadb.PersistentClient(path=self.persist_directory)
            
            # Get or create collection
            self.collection = self.client.get_or_create_collection(
                name=self.collection_name,
                metadata={"description": "PDF document embeddings for RAG"}
            )
            print(f"Vector store initialized. Collection: {self.collection_name}")
            print(f"Existing documents in collection: {self.collection.count()}")
            
        except Exception as e:
            print(f"Error initializing vector store: {e}")
            raise

    def add_documents(self, documents: List[Any], embeddings: np.ndarray):
        """
        Add documents and their embeddings to the vector store
        
        Args:
            documents: List of LangChain documents
            embeddings: Corresponding embeddings for the documents
        """
        if len(documents) != len(embeddings):
            raise ValueError("Number of documents must match number of embeddings")
        
        print(f"Adding {len(documents)} documents to vector store...")
        
        # Prepare data for ChromaDB
        ids = []
        metadatas = []
        documents_text = []
        embeddings_list = []
        
        for i, (doc, embedding) in enumerate(zip(documents, embeddings)):
            # Generate unique ID
            doc_id = f"doc_{uuid.uuid4().hex[:8]}_{i}"
            ids.append(doc_id)
            
            # Prepare metadata
            metadata = dict(doc.metadata)
            metadata['doc_index'] = i
            metadata['content_length'] = len(doc.page_content)
            metadatas.append(metadata)
            
            # Document content
            documents_text.append(doc.page_content)
            
            # Embedding
            embeddings_list.append(embedding.tolist())
        
        # Add to collection
        try:
            self.collection.add(
                ids=ids,
                embeddings=embeddings_list,
                metadatas=metadatas,
                documents=documents_text
            )
            print(f"Successfully added {len(documents)} documents to vector store")
            print(f"Total documents in collection: {self.collection.count()}")
            
        except Exception as e:
            print(f"Error adding documents to vector store: {e}")
            raise

vectorstore=VectorStore()
vectorstore

Vector store initialized. Collection: pdf_documents
Existing documents in collection: 848


<__main__.VectorStore at 0x23355052610>

In [35]:
chunks

[Document(metadata={'producer': 'Corel PDF Engine Version 17.6.0.1021', 'creator': 'CorelDRAW X7', 'creationdate': '2025-05-02T14:53:59+05:00', 'moddate': '2025-05-02T14:53:59+05:00', 'author': 'Taaha', 'title': 'Complete Journal vol 21 issue 01` 2025.cdr', 'source': '..\\data\\pdf\\17+Original+Article+1204+Irum+Qureshi+etal+page+92-96.pdf', 'total_pages': 5, 'page': 0, 'page_label': '1', 'source_file': '17+Original+Article+1204+Irum+Qureshi+etal+page+92-96.pdf', 'file_type': 'pdf'}, page_content='Introduction\nThe phenomenon of skilled professionals leaving \ntheir home country to seek financial stability in \nanother country is known as the "Brain Drain". \nSpecifically, the migration of physicians is referred to \n1\nas Medical Brain Drain.  The lack of incentives for \nacademic credentials and experience can lead to \nfeelings of disillusionment, prompting individuals to \nmigrate to developed countries. Common factors \ncontributing to brain drain include limited career \nopportun

In [36]:
### Convert the text to embeddings
texts=[doc.page_content for doc in chunks]

## Generate the Embeddings

embeddings=embedding_manager.generate_embeddings(texts)

##store int he vector dtaabase
vectorstore.add_documents(chunks,embeddings)

Generating embeddings for 76 texts...


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

Generated embeddings with shape: (76, 384)
Adding 76 documents to vector store...
Successfully added 76 documents to vector store
Total documents in collection: 924


In [37]:
class RAGRetriever:
    """Handles query-based retrieval from the vector store"""
    
    def __init__(self, vector_store: VectorStore, embedding_manager: EmbeddingManager):
        """
        Initialize the retriever
        
        Args:
            vector_store: Vector store containing document embeddings
            embedding_manager: Manager for generating query embeddings
        """
        self.vector_store = vector_store
        self.embedding_manager = embedding_manager

    def retrieve(self, query: str, top_k: int = 5, score_threshold: float = 0.0) -> List[Dict[str, Any]]:
        """
        Retrieve relevant documents for a query
        
        Args:
            query: The search query
            top_k: Number of top results to return
            score_threshold: Minimum similarity score threshold
            
        Returns:
            List of dictionaries containing retrieved documents and metadata
        """
        print(f"Retrieving documents for query: '{query}'")
        print(f"Top K: {top_k}, Score threshold: {score_threshold}")
        
        # Generate query embedding
        query_embedding = self.embedding_manager.generate_embeddings([query])[0]
        
        # Search in vector store
        try:
            results = self.vector_store.collection.query(
                query_embeddings=[query_embedding.tolist()],
                n_results=top_k
            )
            
            # Process results
            retrieved_docs = []
            
            if results['documents'] and results['documents'][0]:
                documents = results['documents'][0]
                metadatas = results['metadatas'][0]
                distances = results['distances'][0]
                ids = results['ids'][0]
                
                for i, (doc_id, document, metadata, distance) in enumerate(zip(ids, documents, metadatas, distances)):
                    # Convert distance to similarity score (ChromaDB uses cosine distance)
                    similarity_score = 1 - distance
                    
                    if similarity_score >= score_threshold:
                        retrieved_docs.append({
                            'id': doc_id,
                            'content': document,
                            'metadata': metadata,
                            'similarity_score': similarity_score,
                            'distance': distance,
                            'rank': i + 1
                        })
                
                print(f"Retrieved {len(retrieved_docs)} documents (after filtering)")
            else:
                print("No documents found")
            
            return retrieved_docs
            
        except Exception as e:
            print(f"Error during retrieval: {e}")
            return []

rag_retriever=RAGRetriever(vectorstore,embedding_manager)

In [38]:
rag_retriever

<__main__.RAGRetriever at 0x23355043250>

In [39]:
rag_retriever.retrieve("What are the factors influencing medical students' decisions to pursue their Post Graduate Medical Education (PGME) in Pakistan or abroad, and to examine the differences in preferences between students from government and private medical colleges")

Retrieving documents for query: 'What are the factors influencing medical students' decisions to pursue their Post Graduate Medical Education (PGME) in Pakistan or abroad, and to examine the differences in preferences between students from government and private medical colleges'
Top K: 5, Score threshold: 0.0
Generating embeddings for 1 texts...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Generated embeddings with shape: (1, 384)
Retrieved 5 documents (after filtering)


[{'id': 'doc_2a089494_1',
  'content': "Employment, Pakistan lost around one million \nAptitude of Medical Students Towards their Postgraduate Medical Education at \nMultan; Brain Drain in Pakistan\n1 2 3 4\nIrum Qureshi,  Muhammad Ahmed Khokhar,  Zaitoon Sarfaraz,  Hafiz Muhammad Yar\nAbstract \nObjective: To investigate the factors influencing medical students' decisions to pursue their Post Graduate \nMedical Education (PGME) in Pakistan or abroad, and to examine the differences in preferences between \nstudents from government and private medical colleges.\nMaterial and Methods: A descriptive cross-sectional study was conducted from January to July 2024 among \n300 medical students in their 4th and final year of MBBS from four different medical colleges in Multan. A \nGoogle form was used to collect data, with two sections: one for pursuing PGME in Pakistan and the other for \ngoing abroad. Data analysis was performed using SPSS 27.",
  'metadata': {'moddate': '2025-05-02T14:53:59+

In [40]:
rag_retriever.retrieve("Conclusion of Imposter Syndrome among Pakistani Medical Students")

Retrieving documents for query: 'Conclusion of Imposter Syndrome among Pakistani Medical Students'
Top K: 5, Score threshold: 0.0
Generating embeddings for 1 texts...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Generated embeddings with shape: (1, 384)
Retrieved 5 documents (after filtering)


[{'id': 'doc_490698fb_63',
  'content': 'IMPOSTER SYNDROME AMONG PAKISTANI MEDICAL STUDENTS \nANNALS VOL 23,   ISSUE 2,   APR. – JUN. 2017 109 \nter syndrome or imposter phenomenon was initially \nconsidered as to be predominant in high achieving \nwomen. But the subsequent studies showed that this \nphenomenon is also quite frequent  among males, and \nmany studies showed no significant difference in the \nfrequencies among genders. But most studies showed \nfemales as relatively common sufferers, 10 especially \nfrom younger age group. In our medical colleges, now \na days, the females students  are two third or close to \nthis of the total student population. This adds to the \nsignificance of impacts of Imposter syndrome on \nmedical education and trainings. \n It is also found out that progress into senior classes \nor senior training years  does not ha ve any effect on \nimposter syndrome so if a student suffers, he/she will \ncontinue suffering this syndrome despit e increase in

In [1]:
%pip install langchain-groq

Collecting langchain-groq
  Using cached langchain_groq-1.1.1-py3-none-any.whl.metadata (2.4 kB)
Using cached langchain_groq-1.1.1-py3-none-any.whl (19 kB)
Installing collected packages: langchain-groq
Successfully installed langchain-groq-1.1.1
Note: you may need to restart the kernel to use updated packages.


In [1]:
%pip install python-dotenv

Note: you may need to restart the kernel to use updated packages.


In [41]:
from langchain_groq import ChatGroq
import os
from dotenv import load_dotenv
load_dotenv()

### Initialize the Groq LLM (set your GROQ_API_KEY in environment)
groq_api_key = os.getenv("GROQ_API_KEY")

llm=ChatGroq(groq_api_key=groq_api_key,model_name="llama-3.1-8b-instant",temperature=0.1,max_tokens=1024)

## 2. Simple RAG function: retrieve context + generate response
def rag_simple(query,retriever,llm,top_k=3):
    ## retriever the context
    results=retriever.retrieve(query,top_k=top_k)
    context="\n\n".join([doc['content'] for doc in results]) if results else ""
    if not context:
        return "No relevant context found to answer the question."
    
    ## generate the answwer using GROQ LLM
    prompt=f"""Use the following context to answer the question concisely.
        Context:
        {context}

        Question: {query}

        Answer:"""
    
    response=llm.invoke([prompt.format(context=context,query=query)])
    return response.content

In [42]:
answer = rag_simple(
    "What results are found in Imposter Syndrome among Pakistani Medical Students?",
    rag_retriever,
    llm
)
print(answer)


Retrieving documents for query: 'What results are found in Imposter Syndrome among Pakistani Medical Students?'
Top K: 3, Score threshold: 0.0
Generating embeddings for 1 texts...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Generated embeddings with shape: (1, 384)
Retrieved 3 documents (after filtering)
According to the given context, the results found in Imposter Syndrome among Pakistani Medical Students are:

1. Imposter Syndrome is more common among females, especially from the younger age group.
2. Females make up two-thirds of the total student population in medical colleges.
3. Progress into senior classes or training years does not affect Imposter Syndrome, and students will continue to suffer from it unless treated specially.
4. There is no significant difference in the frequency of Imposter Syndrome among genders, although it was initially considered predominant in high-achieving women.


In [43]:
answer = rag_simple(
    "When was Imposter Syndrome first described?",
    rag_retriever,
    llm
)
print(answer)


Retrieving documents for query: 'When was Imposter Syndrome first described?'
Top K: 3, Score threshold: 0.0
Generating embeddings for 1 texts...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Generated embeddings with shape: (1, 384)
Retrieved 3 documents (after filtering)
Imposter Syndrome was first described by psychological researchers Pauline Clance and Suzanne Imes in 1978.


In [44]:
def rag_advanced(query, retriever, llm, top_k=5, min_score=0.2, return_context=False):
    """
    RAG pipeline with extra features:
    - Returns answer, sources, confidence score, and optionally full context.
    """
    results = retriever.retrieve(query, top_k=top_k, score_threshold=min_score)
    if not results:
        return {'answer': 'No relevant context found.', 'sources': [], 'confidence': 0.0, 'context': ''}
    
    # Prepare context and sources
    context = "\n\n".join([doc['content'] for doc in results])
    sources = [{
        'source': doc['metadata'].get('source_file', doc['metadata'].get('source', 'unknown')),
        'page': doc['metadata'].get('page', 'unknown'),
        'score': doc['similarity_score'],
        'preview': doc['content'][:300] + '...'
    } for doc in results]
    confidence = max([doc['similarity_score'] for doc in results])
    
    # Generate answer
    prompt = f"""Use the following context to answer the question concisely.\nContext:\n{context}\n\nQuestion: {query}\n\nAnswer:"""
    response = llm.invoke([prompt.format(context=context, query=query)])
    
    output = {
        'answer': response.content,
        'sources': sources,
        'confidence': confidence
    }
    if return_context:
        output['context'] = context
    return output

result = rag_advanced("What is the main reason for joining the medical profession: Passion or parental pressure?", rag_retriever, llm, top_k=3, min_score=0.1, return_context=True)
print("Answer:", result['answer'])
print("Sources:", result['sources'])
print("Confidence:", result['confidence'])
print("Context Preview:", result['context'][:300])

Retrieving documents for query: 'What is the main reason for joining the medical profession: Passion or parental pressure?'
Top K: 3, Score threshold: 0.1
Generating embeddings for 1 texts...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Generated embeddings with shape: (1, 384)
Retrieved 3 documents (after filtering)
Answer: The main reason for joining the medical profession is parental pressure, accounting for 69.5% of the participants.
Sources: [{'source': '2024-20903.pdf', 'page': 1, 'score': 0.5252103805541992, 'preview': 'The reason for such a large number of students joining\nmedicine just because of parental pressure is an indication\nof cultural forces at play. Children are generally not given\nthe freedom by their parents in Pakistan to choose the\nprofession of their interest. An earlier study conducted in\nRawalpind...'}, {'source': '2024-20903.pdf', 'page': 1, 'score': 0.5252103805541992, 'preview': 'The reason for such a large number of students joining\nmedicine just because of parental pressure is an indication\nof cultural forces at play. Children are generally not given\nthe freedom by their parents in Pakistan to choose the\nprofession of their interest. An earlier study conducted in\nRawalpind...'},

In [45]:
from typing import List, Dict, Any
import time

class AdvancedRAGPipeline:
    def __init__(self, retriever, llm):
        self.retriever = retriever
        self.llm = llm
        self.history = []  # Store query history

    def query(self, question: str, top_k: int = 5, min_score: float = 0.2, stream: bool = False, summarize: bool = False) -> Dict[str, Any]:
        # Retrieve relevant documents
        results = self.retriever.retrieve(question, top_k=top_k, score_threshold=min_score)
        if not results:
            answer = "No relevant context found."
            sources = []
            context = ""
        else:
            context = "\n\n".join([doc['content'] for doc in results])
            sources = [{
                'source': doc['metadata'].get('source_file', doc['metadata'].get('source', 'unknown')),
                'page': doc['metadata'].get('page', 'unknown'),
                'score': doc['similarity_score'],
                'preview': doc['content'][:120] + '...'
            } for doc in results]
            # Streaming answer simulation
            prompt = f"""Use the following context to answer the question concisely.\nContext:\n{context}\n\nQuestion: {question}\n\nAnswer:"""
            if stream:
                print("Streaming answer:")
                for i in range(0, len(prompt), 80):
                    print(prompt[i:i+80], end='', flush=True)
                    time.sleep(0.05)
                print()
            response = self.llm.invoke([prompt.format(context=context, question=question)])
            answer = response.content

        # Add citations to answer
        citations = [f"[{i+1}] {src['source']} (page {src['page']})" for i, src in enumerate(sources)]
        answer_with_citations = answer + "\n\nCitations:\n" + "\n".join(citations) if citations else answer

        # Optionally summarize answer
        summary = None
        if summarize and answer:
            summary_prompt = f"Summarize the following answer in 2 sentences:\n{answer}"
            summary_resp = self.llm.invoke([summary_prompt])
            summary = summary_resp.content

        # Store query history
        self.history.append({
            'question': question,
            'answer': answer,
            'sources': sources,
            'summary': summary
        })

        return {
            'question': question,
            'answer': answer_with_citations,
            'sources': sources,
            'summary': summary,
            'history': self.history
        }
# Example usage:
adv_rag = AdvancedRAGPipeline(rag_retriever, llm)
result = adv_rag.query("what is the conclusion of Aptitude of Medical Students Towards their Postgraduate Medical Education at Multan", top_k=3, min_score=0.1, stream=True, summarize=True)
print("\nFinal Answer:", result['answer'])
print("Summary:", result['summary'])
print("History:", result['history'][-1])

Retrieving documents for query: 'what is the conclusion of Aptitude of Medical Students Towards their Postgraduate Medical Education at Multan'
Top K: 3, Score threshold: 0.1
Generating embeddings for 1 texts...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Generated embeddings with shape: (1, 384)
Retrieved 3 documents (after filtering)
Streaming answer:
Use the following context to answer the question concisely.
Context:
abroad, it is essential to understand the factors 
driving this trend and its impact on healthcare 
systems in both home and host countries.
Pakistan faces a severe shortage of doctors due to its 
large population, making it essential to understand 
the reasons behind medical graduates' choices. This 
study was conducted to provide quantitative data on 
the number of students inclined towards pursuing 
PGME abroad or staying in Pakistan, as well as the 
factors influencing their decisions. The primary 
objectives of this study were to determine the 
aptitude of medical students towards PGME and the 
factors contributing to their choice of different career 
pathways.
Material and Methods
An online Google survey was conducted from 
January to July 2024 at four medical colleges in 
Multan, comprising two government and two