In [3]:
%pip install haystack-ai
%pip install pymed

Collecting haystack-ai
  Downloading haystack_ai-2.0.0b7-py3-none-any.whl.metadata (19 kB)
Collecting boilerpy3 (from haystack-ai)
  Downloading boilerpy3-1.0.7-py3-none-any.whl.metadata (5.8 kB)
Collecting haystack-bm25 (from haystack-ai)
  Downloading haystack_bm25-1.0.2-py2.py3-none-any.whl.metadata (3.9 kB)
Collecting lazy-imports (from haystack-ai)
  Downloading lazy_imports-0.3.1-py3-none-any.whl (12 kB)
Collecting more-itertools (from haystack-ai)
  Downloading more_itertools-10.2.0-py3-none-any.whl.metadata (34 kB)
Collecting openai>=1.1.0 (from haystack-ai)
  Downloading openai-1.12.0-py3-none-any.whl.metadata (18 kB)
Collecting posthog (from haystack-ai)
  Downloading posthog-3.4.0-py2.py3-none-any.whl.metadata (2.0 kB)
Collecting distro<2,>=1.7.0 (from openai>=1.1.0->haystack-ai)
  Downloading distro-1.9.0-py3-none-any.whl.metadata (6.8 kB)
Collecting httpx<1,>=0.23.0 (from openai>=1.1.0->haystack-ai)
  Downloading httpx-0.26.0-py3-none-any.whl.metadata (7.6 kB)
Collecting m

In [4]:
import os
from dotenv import load_dotenv

load_dotenv()

huggingfacehub_api_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")

In [5]:
from pymed import PubMed
from typing import List
from haystack import component
from haystack import Document

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
pubmed = PubMed(tool="Haystack2.0Prototype", email="srijanshovit@gmail.com")


In [7]:
def documentize(article):
  return Document(content=article.abstract, meta={'title': article.title, 'keywords': article.keywords})

@component
class PubMedFetcher():

  @component.output_types(articles=List[Document])
  def run(self, queries: list[str]):
    cleaned_queries = queries[0].strip().split('\n')

    articles = []
    try:
      for query in cleaned_queries:
        response = pubmed.query(query, max_results = 1)
        documents = [documentize(article) for article in response]
        articles.extend(documents)
    except Exception as e:
        print(e)
        print(f"Couldn't fetch articles for queries: {queries}" )
    results = {'articles': articles}
    return results

In [14]:
from haystack.components.generators import HuggingFaceTGIGenerator
from haystack.utils import Secret


keyword_llm = HuggingFaceTGIGenerator("mistralai/Mistral-7B-Instruct-v0.2")
keyword_llm.warm_up()

llm = HuggingFaceTGIGenerator("mistralai/Mistral-7B-Instruct-v0.2")
llm.warm_up()

tokenizer_config.json: 100%|██████████| 1.46k/1.46k [00:00<?, ?B/s]
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
tokenizer.model: 100%|██████████| 493k/493k [00:00<00:00, 3.25MB/s]
tokenizer.json: 100%|██████████| 1.80M/1.80M [00:01<00:00, 1.76MB/s]
special_tokens_map.json: 100%|██████████| 72.0/72.0 [00:00<?, ?B/s]


In [15]:
from haystack import Pipeline
from haystack.components.builders.prompt_builder import PromptBuilder

keyword_prompt_template = """
Your task is to convert the following question into 3 keywords that can be used to find relevant medical research papers on PubMed.
Here is an examples:
question: "What are the latest treatments for major depressive disorder?"
keywords:
Antidepressive Agents
Depressive Disorder, Major
Treatment-Resistant depression
---
question: {{ question }}
keywords:
"""

prompt_template = """
Answer the question truthfully based on the given documents.
If the documents don't contain an answer, use your existing knowledge base.

q: {{ question }}
Articles:
{% for article in articles %}
  {{article.content}}
  keywords: {{article.meta['keywords']}}
  title: {{article.meta['title']}}
{% endfor %}

"""
keyword_prompt_builder = PromptBuilder(template=keyword_prompt_template)

prompt_builder = PromptBuilder(template=prompt_template)
fetcher = PubMedFetcher()

pipe = Pipeline()

pipe.add_component("keyword_prompt_builder", keyword_prompt_builder)
pipe.add_component("keyword_llm", keyword_llm)
pipe.add_component("pubmed_fetcher", fetcher)
pipe.add_component("prompt_builder", prompt_builder)
pipe.add_component("llm", llm)

pipe.connect("keyword_prompt_builder.prompt", "keyword_llm.prompt")
pipe.connect("keyword_llm.replies", "pubmed_fetcher.queries")

pipe.connect("pubmed_fetcher.articles", "prompt_builder.articles")
pipe.connect("prompt_builder.prompt", "llm.prompt")


<haystack.pipeline.Pipeline at 0x17c46f57e10>

In [16]:
from IPython.display import display, HTML

def ask(question):
  output = pipe.run(data={"keyword_prompt_builder":{"question":question},
                          "prompt_builder":{"question": question},
                          "llm":{"generation_kwargs": {"max_new_tokens": 500}}})
  print(question)
  print(output['llm']['replies'][0])
  # display(HTML(f'<div style="color: blue">{output["llm"]['replies'][0]}</div>'))


In [17]:
ask("How are mRNA vaccines being used for cancer treatment?")

How are mRNA vaccines being used for cancer treatment?
a: mRNA vaccines are being used for cancer treatment by delivering mRNA encoding tumor antigens to stimulate an immune response against cancer cells. The documents do not provide specific information on how mRNA vaccines are being used for cancer treatment, but they do mention the use of mRNA for therapeutic interventions and the development of polymeric architectures for mRNA delivery. The documents do not mention cancer treatment specifically in relation to these polymeric architectures, but they do suggest that they can be tailored for mRNA protection, loading efficacy, and targeted release, which could be useful for cancer treatment applications. Overall, while the documents do not directly answer the question, they do provide some context and background information on the use of mRNA for therapeutic applications, including cancer treatment.


In [30]:
%pip install mermaid
%pip install pygraphviz


Collecting mermaid
  Downloading mermaid-0.3.2-py3-none-any.whl (1.2 MB)
     ---------------------------------------- 0.0/1.2 MB ? eta -:--:--
     ---------------------------------------- 0.0/1.2 MB ? eta -:--:--
     ---------------------------------------- 0.0/1.2 MB ? eta -:--:--
     -- ------------------------------------- 0.1/1.2 MB 544.7 kB/s eta 0:00:03
     ---------------------- ----------------- 0.7/1.2 MB 4.6 MB/s eta 0:00:01
     ---------------------------------------  1.2/1.2 MB 7.5 MB/s eta 0:00:01
     ---------------------------------------- 1.2/1.2 MB 5.8 MB/s eta 0:00:00
Collecting jupyter (from mermaid)
  Downloading jupyter-1.0.0-py2.py3-none-any.whl (2.7 kB)
Collecting pynrrd (from mermaid)
  Downloading pynrrd-1.0.0-py2.py3-none-any.whl (19 kB)
Collecting future (from mermaid)
  Downloading future-0.18.3.tar.gz (840 kB)
     ---------------------------------------- 0.0/840.9 kB ? eta -:--:--
     ------------------------------------  839.7/840.9 kB 51.8 MB/s e

DEPRECATION: mermaid 0.3.2 has a non-standard dependency specifier torch>=1.7torchvision. pip 24.1 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of mermaid or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063


Collecting pygraphvizNote: you may need to restart the kernel to use updated packages.

  Using cached pygraphviz-1.12.tar.gz (104 kB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Installing backend dependencies: started
  Installing backend dependencies: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Building wheels for collected packages: pygraphviz
  Building wheel for pygraphviz (pyproject.toml): started
  Building wheel for pygraphviz (pyproject.toml): finished with status 'error'
Failed to build pygraphviz


  error: subprocess-exited-with-error
  
  × Building wheel for pygraphviz (pyproject.toml) did not run successfully.
  │ exit code: 1
  ╰─> [49 lines of output]
      running bdist_wheel
      running build
      running build_py
      creating build
      creating build\lib.win-amd64-cpython-311
      creating build\lib.win-amd64-cpython-311\pygraphviz
      copying pygraphviz\agraph.py -> build\lib.win-amd64-cpython-311\pygraphviz
      copying pygraphviz\graphviz.py -> build\lib.win-amd64-cpython-311\pygraphviz
      copying pygraphviz\scraper.py -> build\lib.win-amd64-cpython-311\pygraphviz
      copying pygraphviz\testing.py -> build\lib.win-amd64-cpython-311\pygraphviz
      copying pygraphviz\__init__.py -> build\lib.win-amd64-cpython-311\pygraphviz
      creating build\lib.win-amd64-cpython-311\pygraphviz\tests
      copying pygraphviz\tests\test_attribute_defaults.py -> build\lib.win-amd64-cpython-311\pygraphviz\tests
      copying pygraphviz\tests\test_clear.py -> build\lib.

In [34]:
pipe.draw("img.jpg")