In [10]:
import nest_asyncio
nest_asyncio.apply()
import asyncio  # Add this import





import os
import agentops
from dotenv import load_dotenv
import json
import autogen
import tempfile
from autogen.coding import LocalCommandLineCodeExecutor
from autogen import GroupChat
import weave
from APIs.combinedapi import PubMedProcessor


# Load environment variables from .env file

load_dotenv()

 # Import the PubMedAPI class for literature search


openai_api_key = os.getenv("OPENAI_API_KEY")

# Load the config list from JSON
llm_config ={
        "model": "gpt-4o-mini",
        "temperature": 0.3, # temperature controls the randomness of the output in sampling
        "api_key": openai_api_key, 
        "top_p": 1.0, # exceeds predefined probability threshold also known as nucleus sampling
        #"top_k": 40, # controls the size of the model's vocabulary
    }

SAR_config = {
        "model": "gpt-4o-mini",
        "temperature": 0.3, # temperature controls the randomness of the output in sampling
        "api_key": openai_api_key,
        "top_p": 1.0, # exceeds predefined probability threshold also known as nucleus sampling
        }
#        



# see https://www.youtube.com/watch?v=-BBulGM6xF0
 
user_proxy = autogen.UserProxyAgent(
    name="User_Proxy",
    human_input_mode="NEVER", # **
)

Moderator = autogen.AssistantAgent(
    name="Moderator",
    llm_config=llm_config,
    human_input_mode="TERMINATE",
    system_message="""You are 'Moderator', overseeing the drug discovery process. 
                    You speak to SAR first then once the report is complete, speak to user_proxy to present the final output.
                    ensure all aspects of the prompt are addressed, and synthesize a comprehensive final report - essay length. You will:
                    Your role is to coordinate the discussion between specialized agents,  ensuring all aspects of the prompt are addressed.

                    1. Analyze the given prompt and break it down into subtasks for each agent.
                    2. Assign tasks to appropriate agents and manage their interactions.
                    3. Ensure all required sections (scientific rationale, target assessment strategy, and safety assessment) are thoroughly covered.
                    4. Compile and summarize the inputs from all agents into a cohesive final report.
                    5. Ensure all claims are properly referenced and the report follows the specified format.
                    6. Present the final output, including a complete list of references."""                    ) # dominic lukes? AI competency centre same voice throughout the conversation


# system prompt if too long agent becomes confused and wont complete its tasks. 

scientific_rationale = autogen.AssistantAgent(
    name="SAR",
    llm_config=SAR_config,
    
    system_message=""" You are 'SAR', an expert in target discovery and assessment at a pharmaceutical company, you get commission when you assess and discovery a new target. 
                        You provide scientific rationales to support the selection of drug targets for a specific disease.
                        You are meticulous in your analysis and provide evidence-based assessments to guide drug discovery efforts.
                        Your role is to:
                        1. Your primary role is to develop comprehensive scientific rationales for given targets in specified diseases using available literature. """
                        )


# safety_officer = autogen.AssistantAgent(
#     name="SAFEA",
#     system_message="""You are 'SAFEA' an expert in safety when it comes to target discovery. Your role is to conduct a thorough safety assessment of the proposed drug target. You will:

#                     Evaluate target expression patterns and tissue specificity.
#                     Analyze potential species differences between humans and animal models.
#                     Assess on-target and off-target safety concerns, including peripheral risks.
#                     Evaluate the risk of exaggerated pharmacology and immunogenicity.
#                     Consider the impact of genetic polymorphisms on target function.
#                     Provide a comprehensive safety profile addressing all points in the prompt's safety assessment section""",
#     llm_config=llm_config,
# )
# target_assessment = autogen.AssistantAgent(
#     name="TAG",
#     llm_config=llm_config,
#     system_message="""You are 'TAG' a specialist in assessing targets for drug discovery. Your role is to develop and outline a comprehensive 1-year Target Assessment (TA) to Lead Identification (LI) plan for the proposed drug target. Your responsibilities include:

#                     Analyze and highlight key inflection points that will inform the project's feasibility.
#                     Assess the status of in-vitro platforms and translational in vivo models.
#                     Identify and describe what needs to be established in terms of platforms and models.
#                     Elaborate on the target's tractability and major challenges for advancement in a drug discovery portfolio.
#                     Discuss and propose potential biomarkers and readouts for efficacy and target engagement.
#                     Outline a high-level TA-LI plan, focusing on critical milestones and decision points.
#                     Evaluate the commercial viability and desirability of the proposed mode of action on the target in a clinical setting.
#                     Assess the advantages and disadvantages of different therapeutic modalities for tackling the target.
#                     Use the pubmed_api function to find relevant target assessment data and literature.
#                     Provide a concise yet comprehensive target assessment strategy addressing all points in the prompt's target assessment section, within a 500-word limit.

#                     Your assessment should be evidence-based, drawing from the latest research and industry best practices in target assessment. 
                    
#                     Be prepared to interact with other specialists to ensure a well-rounded evaluation of the target""",
#                     )

literature_agent = autogen.AssistantAgent(
    name="Literature_Agent",
    llm_config=llm_config,
    max_consecutive_auto_reply=1,
    system_message="""You are the Literature_Agent. Your role is to provide relevant scientific literature and references.
    When asked to search for literature, ALWAYS use the pubmed_api function as follows:
    pubmed_api("Your search query here")
    After receiving the results in json format, pass them to the next speaker for analysis.
    Do not skip this step or pretend to search without using the function. Analyze the literature and provide a summary of the key points to the next speaker.""",
)

reflection_agent = autogen.AssistantAgent(
    name="RAR",
    llm_config=SAR_config,
    max_consecutive_auto_reply=1,
    system_message="""
    # RAR: Reflection and Analysis Agent for Scientific Rationale

You are 'RAR', an expert in critically analyzing scientific rationales and providing constructive feedback. Your role is to review the output of the Scientific Assessment Rationale (SAR) Agent and offer insights to improve its comprehensiveness, accuracy, and relevance.

## Role
Your primary function is to reflect on and analyze the scientific rationale provided by the SAR Agent, identifying strengths, weaknesses, and areas for improvement.

## Analysis Structure
When presented with the SAR Agent's output, begin your message with 'IMPROVEMENTS NEEDED:' conduct a thorough review and provide feedback in the following areas:

### 1. Completeness of Analysis
- Assess whether all required sections have been adequately addressed.
- Identify any missing or underdeveloped points in the rationale.

### 2. Quality of Scientific Evidence
- Evaluate the strength and relevance of the scientific evidence presented.
- Assess the appropriateness of the Harvard-style citations.
- Identify any claims that require additional supporting evidence.

### 3. Clarity and Coherence
- Evaluate the overall structure and flow of the rationale.
- Assess the clarity of explanations and arguments.
- Suggest improvements for better organization or presentation of ideas.

### 4. Critical Thinking and Depth
- Analyze the depth of reasoning in each section.
- Identify areas where more critical analysis or in-depth exploration is needed.

### 5. Relevance to Target and Disease
- Assess how well the rationale addresses the specific target and disease in question.
- Identify any irrelevant information or missed opportunities for disease-specific insights.

### 6. Potential Gaps or Oversights
- Highlight any potential blind spots or overlooked aspects in the rationale.
- Suggest additional areas of investigation that could strengthen the overall assessment.

### 7. Balance and Objectivity
- Evaluate the balance between positive and negative aspects of the target.
- Assess the objectivity of the analysis, identifying any potential biases.

### 8. Practical Implications
- Reflect on the practical implications of the rationale for drug discovery and development.
- Suggest ways to enhance the actionable insights provided in the assessment.

## Feedback Format
For each of the above areas, provide:
1. A brief summary of your observations.
2. Specific examples from the SAR Agent's output to illustrate your points.
3. Constructive suggestions for improvement or further exploration.

## Conclusion
Summarize your overall assessment of the scientific rationale, 
highlighting key strengths and priority areas for improvement. 
Provide actionable recommendations for enhancing the quality and utility of the SAR Agent's output.
      """
)

# relevance_agent = autogen.AssistantAgent(
#     name="relevance_agent",
#     llm_config=llm_config,
#     max_consecutive_auto_reply=2,
#     system_message=""" As the 'relevance_agent' your role is to extract relevant text to the help answer the question and pass them to SAR for use. 
#                     Do not skip any papers. provide direct quotes and summaries you think will be helpful. Provide a usefulness score out of 100. 
#                     Follow this layout:

#                     (paper title and DOI):
#                     (quote):
#                     (summary):  
#                     (usefulness score): 
                
                    
                    
#                     """,)

def state_transition_function(last_speaker, groupchat):
    messages = groupchat.messages
    
    if last_speaker == user_proxy:
        return Moderator
    elif last_speaker == Moderator:
        return scientific_rationale
    elif last_speaker == literature_agent:
        return scientific_rationale
    elif last_speaker == scientific_rationale:
        if "FINAL REPORT 1" in messages[-1]["content"]:
            return reflection_agent
        elif "FINAL_REPORT_2" in messages[-1]["content"]:
            return Moderator
        else:
            return literature_agent
    elif last_speaker == reflection_agent:
        if "IMPROVEMENTS NEEDED" in messages[-1]["content"]:
            return scientific_rationale
        else:
            return Moderator  # If no improvements needed, go back to Moderator for final presentation
    else:
        return Moderator

# Update the GroupChat instantiation
constrained_groupchat = GroupChat(
    agents=[user_proxy, Moderator, scientific_rationale],#, safety_officer, target_assessment,
    messages=[],
    max_round=3,  # Increased to allow for more interactions
    speaker_selection_method=state_transition_function,
    send_introductions=True,
)

Moderator.description = "overseeing the process and provides guidance to other agents."
scientific_rationale.description = "provides a comprehensive scientific rationale for the given target in the specified disease which will be reviewed by agent RAR."
# literature_agent.description = "provides relevant literature and references related to the prompt."
# abstract_agent.description = "summarises the key points from the provided list of abstracts and references."
# reflection_agent.description = "RAR reviews the scientific rationale provided by the SAR Agent and offers insights to improve its comprehensiveness, accuracy, and relevance."

# allowed_transitions = {
#     user_proxy: [Moderator],
#     Moderator: [scientific_rationale],
#     scientific_rationale: [literature_agent, Moderator],  # Allow SAR to report back to Moderator
#     literature_agent: [abstract_agent],
#     abstract_agent: [scientific_rationale],
#     reflection_agent: [scientific_rationale],
# }


manager = autogen.GroupChatManager(
    groupchat=constrained_groupchat,
    llm_config=llm_config,)


# async for paperqa so annoyingg
async def pubmed_paperqa(query: str) -> str:
    max_results: int = 10
    pubmed_query = query
    doc_query = query
    email = "sanazkazemi@hotmail.com"
    print(f"pubmed_paperqa called with query: {query}, max_results: {max_results}")
    pubmed_instance = PubMedProcessor(email)
    results_dict = await asyncio.to_thread(pubmed_instance.full_process, pubmed_query, doc_query, max_results)
    return json.dumps(results_dict, indent=4)


# def pubmed_api(query: str) -> str:
#     max_results: int = 10
#     print(f"pubmed_api called with query: {query}, max_results: {max_results}")
#     pubmed_instance = Pubmed_API(email="your_email@example.com")  # Replace with your actual email
#     papers = pubmed_instance.query(query, max_results)
#     return pubmed_instance.format_results(papers)

# def paperqa_


# havent done it for moderator as it interferes with other specialist agents by suggesting tool call.
# A few options - 

# Register the pubmed_api function for all agents

def pubmed_api_wrapper(query: str) -> str:
    return asyncio.get_event_loop().run_until_complete(pubmed_paperqa(query))

autogen.agentchat.register_function(
    f=pubmed_api_wrapper,
    caller=scientific_rationale,
    executor=literature_agent,
    name="pubmed_api",
    description="""
    Search PubMed for literature related to the query.
    This function should be used when you need to retrieve peer-reviewed scientific information from literature:
    - Finding evidence to support scientific claims
    - Gathering information on recent advancements in a specific area 
    - Identify key papers in a particular field
    - Check the current state of knowledge on a specific topic
    - You want to cite specific papers to support your arguments
    - You need to explore the current research landscape on a topic
    
    Returns: a json dictionary in the following format:
    {
        "summary: [Summary text here]": {
                    "original_text": "[Full extract text here]",
                    "source": {
                    "chunk_id": "[Identifier for the document chunk]",
                    "full_citation": "[Full citation of the source]"
                    },
                    "relevance_score": [Numeric score]
                }
            }
    """
)

# Scoring prompt.





In [11]:
@weave.op()
def initiate_chat(message: str) -> str  : 
    # Start the chat

  chat_result = user_proxy.initiate_chat(
        manager, # do not use abbreviations in the target disease 
        message=f""".{message}""",
        clear_history=True
        
    )
  
  return chat_result
    
    # Extract the full conversation from the groupchat messages
weave.init('Scientific_Rationale_Dev')

conversation = initiate_chat("""# AI Assistant for Drug Discovery

## Target Information
- **Given target:** Neurokinin 3 receptor
- **Given disease:** vasomotor symptoms due to menopause
- **Given mode of action:** Antagonism of the neurokinin 3 receptor (NK3R) GPCR

## Context
Preclinical research has strongly implicated heightened signalling of neurokinin B together with its receptor (neurokinin 3 receptor [NK3R]) in causing menopausal vasomotor symptoms, via the hypothalamic median pre-optic nucleus and autonomic thermoregulatory pathway. 
Subsequent phase 2b trials of NK3R antagonists showed that pharmacological blockade of NK3R attenuated vasomotor symptoms almost as effectively as hormone treatment, without the need for oestrogen exposure.

## Task 1: Develop a Scientific Rationale

Develop a scientific rationale for Neurokinin 3 receptor (NK3R) GPCR in Vasomotor symptoms due to menopause.

Highlight the working hypothesis for the clinical target rationale and human biology evidence by minimum 2000 words.

Describe as much as possible the evidence in humans or in human tissue that link the target, target space or approach to the pathogenesis of interest. If known, also describe here the wanted mode of action with regards to desired clinical outcome. Please avoid including only pre-clinical data in this section.

### Structure

#### Working Hypothesis
- Create a detailed description of the following idea: Develop an inhibitor of the neurokinin 3 receptor (NK3R) GPCR for the treatment of vasomotor symptoms due to menopause.
- Is there a significant unmet medical need?
- Is it suitable for combination therapy?
- Which predictive biomarkers exist for the target related to the disease?
  - Provide a detailed description of existing clinically relevant biomarkers.

#### Clinical Target Rationale
- How relevant is the target location to the disease biology?
- How is the target expression altered in human disease?
- How is the target involved in the physiological process relevant to the disease?
- Which phenotypes and genotypes were identified for the target?
- How is the genetic link between the target and the disease?
- Describe the evidence provided in clinics or by tools acting on the pathway where the target is involved.
- Which kind of target modulation is required to treat the disease?

#### Challenges for the Drug Discovery Program
- Check the following idea for details on small molecule compounds: Develop an inhibitor of the neurokinin 3 receptor (NK3R) GPCR for the treatment of vasomotor symptoms due to menopause.
- Is an 'information driven approach' (IDA) strategy based on available small molecules possible?
  - Which small molecular modulators of the target are known?
  - Which inhibitors, antagonists, agonists, negative allosteric modulators (NAM), positive allosteric modulators (PAM) are required for target modulation in the given disease? 
- Which patients would respond to the therapy?
- Is the proposed mode of action on the target desirable and commercially viable in a clinical setting?
- What are advantages and disadvantages of different therapeutic modalities (antibodies, small molecules, antisense oligonucleotides, PROTACs, molecular glue, peptide macrocycles, and so on) for tackling the target?

#### Alternative Indications
- Describe alternative indications for modulators of the target and explain why."""
)

# full texts? - look into this. PDF agent 


[33mUser_Proxy[0m (to chat_manager):

.# AI Assistant for Drug Discovery

## Target Information
- **Given target:** Neurokinin 3 receptor
- **Given disease:** vasomotor symptoms due to menopause
- **Given mode of action:** Antagonism of the neurokinin 3 receptor (NK3R) GPCR

## Context
Preclinical research has strongly implicated heightened signalling of neurokinin B together with its receptor (neurokinin 3 receptor [NK3R]) in causing menopausal vasomotor symptoms, via the hypothalamic median pre-optic nucleus and autonomic thermoregulatory pathway. 
Subsequent phase 2b trials of NK3R antagonists showed that pharmacological blockade of NK3R attenuated vasomotor symptoms almost as effectively as hormone treatment, without the need for oestrogen exposure.

## Task 1: Develop a Scientific Rationale

Develop a scientific rationale for Neurokinin 3 receptor (NK3R) GPCR in Vasomotor symptoms due to menopause.

Highlight the working hypothesis for the clinical target rationale and human bio

[runtime logging] log_function_use: autogen logger is None


[33mLiterature_Agent[0m (to chat_manager):

[33mLiterature_Agent[0m (to chat_manager):

[32m***** Response from calling tool (call_0dyxxqnrWZDDRefbRdbXMe7N) *****[0m
{
    "summary: The excerpt indicates that between \"45 and 97% of menopausal women\" experience vasomotor symptoms such as hot flushes/flashes (HFs) and night sweats, significantly impacting quality of life. Neurokinin 3 receptor antagonists (NK3Ras) have shown promising efficacy compared to serotonin norepinephrine reuptake inhibitors (SNRIs) in clinical trials. NK3Ra trials reported \"a statistically significant reduction in HF frequency by 62\u201393% from baseline,\" whereas SNRIs showed a reduction of \"48\u201367%.\" Additionally, NK3Ras demonstrated better tolerability with \"good overall tolerability,\" while SNRIs were linked to poor tolerability, especially concerning nausea.\n\n9": {
        "original_text": "Title: Neurokinin 3 Receptor Antagonists Compared With Serotonin Norepinephrine Reuptake Inhibito