# Medical AI agent

### Install required dependencies

In [3]:
!pip install llama-index --quiet
!pip install llama-index-llms-groq --quiet
!pip install llama-index-core --quiet
!pip install llama-index-readers-file --quiet
!pip install llama-index-tools-wolfram-alpha --quiet
!pip install llama-index-embeddings-huggingface --quiet
!pip install 'crewai[tools]' --quiet

#####  Sign up at Groq

In [12]:
from llama_index.llms.groq import Groq

In [13]:
llm = Groq(model="llama3-70b-8192", api_key="")

In [None]:
response = llm.complete("What are AI agents")
print(response)

In [None]:
response = llm.complete("Describe medical chatbots")
print(response)

In [14]:
from langchain_openai import ChatOpenAI #Crew AI requires a chat based model for binding

groq_api_key = ""
chat_llm = ChatOpenAI(
    openai_api_base="https://api.groq.com/openai/v1",
    openai_api_key=groq_api_key,
    model="llama3-70b-8192",
    temperature=0,
    max_tokens=1000,
)

### Download data

In [25]:
!wget "https://arxiv.org/pdf/2408.08545" -O ndc.pdf

--2024-09-14 18:59:04--  https://arxiv.org/pdf/2408.08545
Resolving arxiv.org (arxiv.org)... 151.101.195.42, 151.101.3.42, 151.101.67.42, ...
Connecting to arxiv.org (arxiv.org)|151.101.195.42|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1245286 (1.2M) [application/pdf]
Saving to: ‘ndc.pdf’


2024-09-14 18:59:04 (69.4 MB/s) - ‘ndc.pdf’ saved [1245286/1245286]



#### Parse the data

In [16]:
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
from llama_index.llms.openai import OpenAI
import os
from langchain_openai import ChatOpenAI

In [26]:
reader = SimpleDirectoryReader(input_files=["ndc.pdf"])#["infy_nav.pdf"])
docs = reader.load_data()
docs[1]

Document(id_='c3949ad0-4d5a-4cf4-bfa3-7e3d07764d33', embedding=None, metadata={'page_label': '2', 'file_name': 'ndc.pdf', 'file_path': 'ndc.pdf', 'file_type': 'application/pdf', 'file_size': 1245286, 'creation_date': '2024-09-14', 'last_modified_date': '2024-08-19'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, text='even all) models in the pool does not necessarily re-\nsult in the best scores. Thus, selectively abstaining\nfrom querying unsuitable LLMs for a given task\nmay help improve the overall response quality\nof such ensembles. Additionally, such an approach\nwould implicitly save computation by accessing\nfewer models per query.\nIn this paper, we propose the SELECT LLM algo-\nrithm to explore this idea. Our approach first learns\nthe model-specif

#### embedding the data

In [27]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

In [28]:
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/94.8k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/133M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

### Build Index

In [29]:
index = VectorStoreIndex.from_documents(docs,
                                        embed_model=embed_model,
                                        )

### Develop  Query Engine

In [30]:
query_engine = index.as_query_engine(similarity_top_k=5, llm=llm)

In [None]:
!pip show SQLAlchemy

In [None]:
!pip install --upgrade SQLAlchemy==1.4.18

In [None]:
!pip install crewai_tools embedchain SQLAlchemy 

In [41]:
from crewai_tools import LlamaIndexTool
query_tool = LlamaIndexTool.from_query_engine(
    query_engine,
    name="Ai medical Assist Tool",
    description="Use this tool to lookup the presented document",
)

In [42]:
query_tool.args_schema.schema()

{'title': 'QueryToolSchema',
 'description': 'Schema for query tool.',
 'type': 'object',
 'properties': {'query': {'title': 'Query',
   'description': 'Search query for the query tool.',
   'type': 'string'}},
 'required': ['query']}

#### Define agents

In [43]:
import os
from crewai import Agent, Task, Crew, Process

In [44]:
researcher = Agent(
    role="Medical Assistant",
    goal="Provide medical support and information.",
    backstory="""You work at a healthcare facility.
    Your goal is to assist healthcare professionals and patients with accurate medical information.""",
    verbose=True,
    allow_delegation=False,
    tools=[query_tool],
    llm=chat_llm,
)

writer = Agent(
    role="Tech Content Strategist",
    goal="Craft compelling content on tech advancements",
    backstory="""You are a renowned Content Strategist, known for your insightful and engaging articles.
    You transform complex concepts into compelling narratives.""",
    llm=chat_llm,
    verbose=True,
    allow_delegation=False,
)


####tasks

In [45]:
task1 = Task(
    description="""Conduct a comprehensive analysis of the latest advancements in AI for early disease detection, focusing on its applications and benefits in healthcare settings.""",
    expected_output="Full analysis report in bullet points",
    agent=researcher,
)

task2 = Task(
    description="""Using the insights provided, develop an engaging blog post that highlights the impact of AI in early disease detection. Your post should be informative yet accessible, catering to a general audience. Aim to make the topic understandable and compelling, avoiding overly complex language.""",
    expected_output="Full blog post of at least 4 paragraphs",
    agent=writer,
)


In [46]:
import logging

logging.basicConfig(level=logging.DEBUG)  # logging.INFO, logging.WARNING.

crew = Crew(
    agents=[researcher, writer],
    tasks=[task1, task2],
    verbose=True,  
)


#### AI_agent workflow

In [47]:
result = crew.kickoff()
print("*********************************"")
print(result)

[1m[95m [2024-09-14 19:05:53][DEBUG]: == Working Agent: Medical Assistant[00m
[1m[95m [2024-09-14 19:05:53][INFO]: == Starting Task: Conduct a comprehensive analysis of the latest advancements in AI for early disease detection, focusing on its applications and benefits in healthcare settings.[00m


[1m> Entering new CrewAgentExecutor chain...[0m
[32;1m[1;3mThought: I need to gather information on the latest advancements in AI for early disease detection and its applications and benefits in healthcare settings.

Action: Ai medical Assist Tool
Action Input: {"query": "Latest advancements in AI for early disease detection"}[0m[95m 

The latest advancements in AI for early disease detection are not explicitly mentioned in the provided context. However, the context highlights the development of a novel algorithm called SELECT LLM, which efficiently selects the most suitable subset of Large Language Models (LLMs) from a large pool to answer input queries. This algorithm has the p