**Import Libraries**

In [None]:
!pip install llama-index openai langchain langchain_community langchain-openai faiss-cpu arxiv serpapi google-search-results requests beautifulsoup4 streamlit

In [None]:
from google.colab import userdata
openai_api_key = userdata.get('OPENAI_API_KEY')
serpapi_key = userdata.get('SERPAPI_API_KEY')

**Fetching Research Papers**

1. Fetching Papers from arXiv

In [None]:
import arxiv

def fetch_arxiv_papers(query, max_results=5):
  "Fetch latest research papers from arXiv based on query."
  client = arxiv.Client()
  search = arxiv.Search(
        query=query,
        max_results=max_results,
        sort_by=arxiv.SortCriterion.SubmittedDate
    )
  results = client.results(search)
  papers = []
  for result in results:
    papers.append({
          "title": result.title,
          "summary": result.summary,
          "source": "arXiv"
      })

  return papers


2. Fetching Papers from PubMed

In [None]:
import requests
from bs4 import BeautifulSoup

def get_pubmed_abstract(pmid):
    """Fetch abstract from PubMed using PubMed ID (PMID)."""
    url = f"https://pubmed.ncbi.nlm.nih.gov/{pmid}"
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    abstraction_section = soup.find("div", class_="abstract-content")
    if abstraction_section:
      return abstraction_section.text.strip()
    else:
      return "Abstract not found"

def fetch_pubmed_papers(query, max_results=5):
    """Fetch medical research papers from PubMed."""
    base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
    search_url = base_url + "esearch.fcgi"
    params = {
        "db": "pubmed",
        "term": query,
        "retmode": "json",
        "retmax": max_results
    }

    response = requests.get(search_url, params=params).json()
    pmid_list = response['esearchresult']['idlist']

    # Fetch abstract for each pmid
    papers = []
    for pmid in pmid_list:
      abstract = get_pubmed_abstract(pmid)
      papers.append({
          "title": f"PumEd article {pmid}",
          "summary": abstract,
          "source": "PubMed"
      })

    return papers

3. Fetching Papers from Google Scholar

In [None]:
from serpapi import GoogleSearch
import requests
from bs4 import BeautifulSoup
def get_scholar_details(scholar_url):
  """Extracts title and abstract from a Google Scholar article page."""
  response = requests.get(scholar_url, headers={'User-Agent': 'Mozilla/5.0'})
  soup = BeautifulSoup(response.text, 'html.parser')

  title_element = soup.find("h3", class_="gs_rt")
  abstract_name = soup.find("div", class_="gs_rs")

  return {
      "title": title_element.text.strip() if title_element else "Title not found",
      "summary": abstract_name.text.strip() if abstract_name else "Abstract not found"
  }


def search_google_scholar(query, max_results=5):
    "Fetch research papers from Google Scholar based on query."
    params = {
        "q": query,
        "api_key": serpapi_key,
        "engine": "google_scholar",
        "num": max_results
    }
    search = GoogleSearch(params)
    results = search.get_dict().get("organic_results", [])

    papers = []
    for result in results:
      url = result.get("link")
      details = get_scholar_details(url) if url else {"title": result["title"], "abstract": "No abstract available"}

      papers.append({
          "title": details["title"],
          "summary": details["summary"],
          "source": "Google Scholar"
      })


    return papers

**Process Papers for Summarization**

In [None]:
#from llama_index.core import Document
from langchain.schema import Document


def process_papers(papers, max_words=100):
  """Processes research papers for embedding into the chatbot's memory."""
  documents = []
  for paper in papers:
    doc_text = f"Title: {paper['title']}\nAbstract: {paper['summary']}\nSource: {paper['source']}"
    doc_text = " ".join(doc_text.split()[:max_words])  # Truncate to max_words words
    # The Document class in LangChain expects 'page_content' instead of 'text'
    documents.append(Document(page_content=doc_text, metadata={"title": paper["title"], "source": paper["source"]}))

  return documents


**Summarization Using LLM**

In [None]:
from openai import OpenAI
import time
def summarize_paper(paper_summary, max_retries=3):
    """Generate a summary using OpenAI GPT-4 with error handling."""
    prompt = f"You are a specialized assistant for summarizing medical and healthcare papers.Summarize the following text in maximum 3 sentence:\n\n{paper_summary}"

    for attempt in range(max_retries):
        try:
            response = client.chat.completions.create(
                model="gpt-4o",
                messages=[{"role": "user", "content": prompt}],
                max_tokens=50  # Adjust token limit
            )
            return response.choices[0].message.content  # Extract the generated summary

        except openai.RateLimitError as e:
            print(f"Rate limit exceeded. Attempt {attempt+1}/{max_retries}. Retrying in 10 seconds...")
            time.sleep(10)  # Wait before retrying

        except Exception as e:
            print(f"Error: {e}")
            return "Error in summarization"

    return "Failed after multiple retries"

**Build RAG**

In [None]:
from llama_index.core import VectorStoreIndex, ServiceContext
#from llama_index.embedding.huggingface import HuggingFaceEmbedding
from llama_index.embeddings.openai import OpenAIEmbedding

def build_rag_pipeline(documents):
  """Builds a RAG-based chatbot with retrieved documents."""
  service_context = ServiceContext.from_defaults(
      embed_model = OpenAIEmbedding(embed_batch_size=10),
  )
  index = VectorStoreIndex.from_documents(documents, service_context=service_context)
  return index

**Create Chatbot with LangChain**

In [None]:
from langchain_openai import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain_community.vectorstores.faiss import FAISS
from langchain.embeddings import OpenAIEmbeddings
from langchain.memory import ConversationBufferMemory, ConversationSummaryBufferMemory

def create_chatbot(documents):
  """Creates a chatbot agent using LangChain."""
  vectorstore = FAISS.from_documents(documents, OpenAIEmbeddings(openai_api_key=openai_api_key))
  retriever = vectorstore.as_retriever()
  llm = ChatOpenAI(temperature=0.0, model_name="gpt-4o", openai_api_key=openai_api_key)
  chatbot = ConversationalRetrievalChain.from_llm(
      llm = llm,
      retriever = retriever,
      memory = ConversationSummaryBufferMemory(llm=llm, max_token_limit=10, input_key='question', memory_key="chat_history", return_docs=False, return_messages=True),
  )

  return chatbot

**Inference in Colab**

In [None]:
query = "Sleep role in memory consolidation"
papers = fetch_arxiv_papers(query, max_results=1) + fetch_pubmed_papers(query, max_results=1) + search_google_scholar(query, max_results=1)
documents = process_papers(papers)
chatbot = create_chatbot(documents)

In [None]:
q = "How brain memory consolidates during sleep?"
chat_history = []
# Run chatbot interaction in a loop
while True:
      user_input = input("\n🗨️ Ask a question about these papers (or type 'exit' to quit): ")
      if user_input.lower() == "exit":
          print("\n👋 Exiting chatbot. Have a great day!")
          break

      #response = chatbot.invoke({"question": user_input, "chat_history": chat_history})
      response = chatbot.invoke({"question": user_input, "chat_history": chat_history})
      chat_history.append((user_input, response["answer"]))
      print("\n🤖 Chatbot Response:\n", response)


🗨️ Ask a question about these papers (or type 'exit' to quit): How brain memory consolidates during sleep?

🤖 Chatbot Response:
 {'question': 'How brain memory consolidates during sleep?', 'chat_history': [], 'answer': 'Memory consolidation during sleep involves the strengthening and stabilization of memories that were acquired during wakefulness. This process is believed to occur through the reactivation and reorganization of neural circuits. During sleep, particularly during rapid-eye-movement (REM) sleep, specific brain regions, such as the hippocampus and cortex, are active in replaying and processing information. This activity helps to integrate new memories with existing knowledge, making them more stable and less susceptible to interference. Additionally, certain neurotransmitters and hormones, like oxytocin, may play a role in facilitating memory consolidation, as suggested by research on the prelimbic cortex and its involvement in social memory consolidation.'}

🗨️ Ask a ques

KeyboardInterrupt: Interrupted by user

**Deployment in Streamlit**