In [1]:
import nest_asyncio
nest_asyncio.apply()
import asyncio  # Add this import

import os
from dotenv import load_dotenv
import json
import autogen
import tempfile
from autogen.coding import LocalCommandLineCodeExecutor
from autogen import GroupChat
import weave
from APIs.combinedapi import PubMedProcessor


# Load environment variables from .env file

load_dotenv()

 # Import the PubMedAPI class for literature search


openai_api_key = os.getenv("OPENAI_API_KEY")

# Load the config list from JSON
llm_config ={
        "model": "gpt-4o-mini",
        "temperature": 0.3, # temperature controls the randomness of the output in sampling
        "api_key": openai_api_key, 
        "top_p": 1.0, # exceeds predefined probability threshold also known as nucleus sampling
        #"top_k": 40, # controls the size of the model's vocabulary
    }

SAR_config = {
        "model": "gpt-4o-mini",
        "temperature": 0.3, # temperature controls the randomness of the output in sampling
        "api_key": openai_api_key,
        "top_p": 1.0, # exceeds predefined probability threshold also known as nucleus sampling
        }


In [2]:
from autogen import AssistantAgent

scientific_rationale = AssistantAgent(
    "scientific_rationale",
    llm_config = llm_config,
    human_input_mode='NEVER',
    system_message=""" You are 'SAR', an expert in target discovery and assessment at a pharmaceutical company, you get commission when you assess and discovery a new target. 
                        You provide scientific rationales to support the selection of drug targets for a specific disease.
                        You are meticulous in your analysis and provide evidence-based assessments to guide drug discovery efforts.
                        Your role is to:
                        1. Your primary role is to develop comprehensive scientific rationales for given targets in specified diseases using available literature. """
                        )

    

In [3]:
prompt = """# IMPORTANT:
Try to answer this question/instruction with step-by-step thoughts and make the answer more structural.
Use `\n\n` to split the answer into several paragraphs.
Just respond to the instruction directly. DO NOT add additional explanations or introducement in the answer unless you are asked to.
'''Develop a scientific rationale for the following:
                             
                             You are a AI assistant with a background in drug discovery.

    Given target: Gamma secretase
    Given disease: Alzheimer's disease
    Given mode of action: Gamma secretase is a multi-subunit protease complex that cleaves type I transmembrane proteins, including the amyloid precursor protein (APP) leading to the generation of amyloid-beta (Aβ) peptides.

    Context:
     Aβ is a family of secreted peptides generated from the sequential cleavages of the type 1 membrane protein APP by beta-secretase (BACE) and gamma-secretase (GSEC), respectively. 
      BACE cleaves APP in the luminal domain, releasing the N-terminal soluble APPβ domain and leaving the C-terminal fragment, APP-CTF, which remains in the membrane. 
      Subsequently, the APP-CTF is recruited to GSEC, a complex comprising four subunits, including PS, which harbors the active site. GSEC first cuts APP-CTF at the epsilon-cleavage site located close to the inner leaflet of the membrane. 
      This cleavage event produces either Aβ48 or Aβ49 and the APP intracellular domain (AICD). The membrane-retained Aβ48 or Aβ49 is then further processed by GSEC in a continuous cascade of proteolytical events at every third of fourth amino acid, where the N-terminal product of each reaction becomes the substrate for the next GSEC cleavage event.
       Accordingly, GSEC processes APP-CTF along two main product lines, Aβ49 → 46 → 43 → 40 → 37… and Aβ48 → 45 → 42 → 38…, respectively (Takami et al., 2009; Matsumura et al., 2014; Olsson et al., 2014). During this processing cascade, Aβ43 and shorter Aβ peptides stochastically escape further processing by GSEC and are released into the extracellular space. 
      As a result, Aβ peptides varying from 30 to 43 amino acids in length are secreted into the extracellular space. Among all secreted Aβ, Aβ40 is the most abundant in human CSF, followed by Aβ38, Aβ42, and Aβ37 (Liu et al., 2022). In cognitively normal individuals, Aβ42 and Aβ43 represent a smaller portion of the total secreted Aβ (Liu et al., 2022).
         These longer forms of Aβ seed the formation of Aβ-amyloid aggregates, a key step in the formation of amyloid plaques (Veugelen et al., 2016), as illustrated in Figure 1. Aβ42, which is produced in higher amounts than Aβ43, is the most abundant Aβ in amyloid plaques (Welander et al., 2009).


    Task 1: Develop a scientific rationale for Gamma secretase in Alzheimer's disease.

    Highlight the working hypothesis for the clinical target rationale and human biology evidence by minimum 2000 words.

    Describe as much as possible the evidence in humans or in human tissue that link the target, target space or approach to the pathogenesis of interest.
    If known, also describe here the wanted mode of action with regards to desired clinical outcome.
    Please avoid including only pre-clinical data in this section.

    Use the following structure and provide a detailed description for each point:
    - Working hypothesis:
    - Create a detailed description of the following idea: Develop an inhibitor of the neurokinin 3 receptor (NK3R) GPCR for the treatment of vasomotor symptoms due to menopause.
    - Is there are significant unmet medical need?
    - Is it suitable for combination therapy?
    - Which predictive biomarkers exist for the target related to the disease?
        - Provide a detailed description of existing clinical relevant biomarkers.

    - Clinical target rationale:
    - How relevant is the target location to the disease biology?
    - How it the target expression altered in human disease?
    - How is the target involved in the physiological process relevant to the disease?
    - Which phenotypes and genotypes were identified for the target?
    - How is the genetic link between the target and the disease?
    - Describe the evidence provided in clinics or by tools acting on the pathway where the target is involved.
    - Which kind of target modulation is required to treat the disease?

    - Challenges for the drug discovery program related to the target.
    - Check the following idea for details on small molecule compounds: Develop an inhibitor of the neurokinin 3 receptor (NK3R) GPCR for the treatment of vasomotor symptoms due to menopause..
    - Is a 'information driven approach' (IDA) strategy based on available small molecules possible?
        - Which small molecular modulators of the target known?
        - Which inhibitors, antagonists, agonists, negative allosteric modulators (NAM), positive allosteric modulators (PAM) are required for target modulation in the given disease? 
    - Which patients would respond the therapy?
    - Is the proposed mode of action on the target desirable and commercially viable in a clinical setting?
    - What are advantages and disadvantages of different therapeutic modalities (antibodies, small molecules, antisense oligonucleotides, PROTACs, molecular glue, peptide macrocycles, and so on) for tackling the target?

    - Alternative indications:
    - Describe alternative indication for modulators of the target and explain why."""

In [4]:
from autogen import UserProxyAgent

user_proxy = UserProxyAgent("user_proxy", 
            code_execution_config=False,
            )


In [5]:
@weave.op()
def initiate_chat(message: str) -> str  : 
    # Start the chat

  chat_result = user_proxy.initiate_chat(
      recipient=scientific_rationale,
        message=f""".{message}""",
        clear_history=True
        
    )
  
  return chat_result.summary


    

In [6]:
results = initiate_chat(prompt)

[33muser_proxy[0m (to scientific_rationale):

.# IMPORTANT:
Try to answer this question/instruction with step-by-step thoughts and make the answer more structural.
Use `

` to split the answer into several paragraphs.
Just respond to the instruction directly. DO NOT add additional explanations or introducement in the answer unless you are asked to.
'''Develop a scientific rationale for the following:
                             
                             You are a AI assistant with a background in drug discovery.

    Given target: Gamma secretase
    Given disease: Alzheimer's disease
    Given mode of action: Gamma secretase is a multi-subunit protease complex that cleaves type I transmembrane proteins, including the amyloid precursor protein (APP) leading to the generation of amyloid-beta (Aβ) peptides.

    Context:
     Aβ is a family of secreted peptides generated from the sequential cleavages of the type 1 membrane protein APP by beta-secretase (BACE) and gamma-secretase (

In [7]:
print(results)

Understood. If you have any further questions or need assistance, feel free to ask.


In [8]:

async def run_conversation(prompt):
    chat_result = await user_proxy.a_initiate_chat(
        scientific_rationale,
        message=prompt,
    )
    return chat_result.summary

async def run_multiple_conversations(prompt, num_conversations=3):
    results = []
    for i in range(num_conversations):
        result = await run_conversation(prompt)
        results.append(f"Conversation {i+1} result:\n{result}")
    return "\n\n".join(results)

# Run the conversations
import asyncio

prompt = prompt
results = asyncio.run(run_multiple_conversations(prompt))

# Print or save the results
print(results)

# Optionally, save to a file
with open("conversation_results.txt", "w") as f:
    f.write(results)

[33muser_proxy[0m (to scientific_rationale):

# IMPORTANT:
Try to answer this question/instruction with step-by-step thoughts and make the answer more structural.
Use `

` to split the answer into several paragraphs.
Just respond to the instruction directly. DO NOT add additional explanations or introducement in the answer unless you are asked to.
'''Develop a scientific rationale for the following:
                             
                             You are a AI assistant with a background in drug discovery.

    Given target: Gamma secretase
    Given disease: Alzheimer's disease
    Given mode of action: Gamma secretase is a multi-subunit protease complex that cleaves type I transmembrane proteins, including the amyloid precursor protein (APP) leading to the generation of amyloid-beta (Aβ) peptides.

    Context:
     Aβ is a family of secreted peptides generated from the sequential cleavages of the type 1 membrane protein APP by beta-secretase (BACE) and gamma-secretase (G