In [8]:
from typing import Literal
from tqdm import tqdm
from langchain.agents import initialize_agent, load_tools, AgentType
from langchain.chains import LLMChain
from langchain_openai import ChatOpenAI
from langchain_experimental.plan_and_execute import (
    load_chat_planner, load_agent_executor, PlanAndExecute
)

import pandas as pd

In [9]:
# load CSV
csv_dir = "../target_csv/"
csv_name = "train_data_smi_unique_target_name"
df = pd.read_csv(csv_dir + csv_name +".csv")

In [10]:
ReasoningStrategies = Literal["zero-shot-react", "plan-and-solve"]

def load_agent(
    tool_names: list[str],
    strategy: ReasoningStrategies = "zero-shot-react"
) -> LLMChain:
    llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0)
    tools = load_tools(
        tool_names=tool_names,
        llm=llm
    )
    
    if strategy == "plan-and-solve":
        planner = load_chat_planner(llm)
        executor = load_agent_executor(llm, tools, verbose=True)
        return PlanAndExecute(planner=planner, executor=executor, verbose=True)

    return initialize_agent(
        tools=tools, llm=llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
        verbose=True
    )

In [11]:
# 에이전트 로드
tool_names = ["wikipedia",'pubmed']
agent_chain = load_agent(tool_names, strategy="plan-and-solve")

In [12]:
# Description 컬럼을 생성하여 각 protein_name에 대한 설명을 추가 및 저장
if "description" not in df.columns:
    df["description"] = ""

In [13]:
save_threshold = 10  # 열 개마다 저장
save_count = 0

In [15]:
for idx, protein_name in tqdm(enumerate(df["Target Name"]), desc="Fetching descriptions", total=len(df)):
    if not df.at[idx, "description"]:  # 이미 설명이 채워진 경우 건너뜀
        query = f"Please provide a brief description of {protein_name}."
        try:
            description = agent_chain.invoke(query, handle_parsing_errors=True)
        except ValueError as e:
            description = f"Error: {str(e)}"
        
        df.at[idx, "description"] = description
        save_count += 1
        hex_count = 'A'

        # 열 개의 설명이 추가되면 저장
        if save_count >= save_threshold:
            df.to_csv(csv_name +" cnt,"+ hex_count + save_count + ".csv", index=False)
            save_count = 0  # 저장 후 카운트 초기화
            hex_count += 1

Fetching descriptions:   0%|          | 0/61 [00:00<?, ?it/s]



[1m> Entering new PlanAndExecute chain...[0m
steps=[Step(value='Define what Tyrosine-protein kinase receptor RET is, including its classification and general function.'), Step(value='Describe the role of RET in cellular signaling and its importance in development and physiology.'), Step(value='Mention any known associations with diseases or conditions related to RET.'), Step(value="Summarize the key points in a concise manner.\nGiven the above steps taken, please respond to the user's original question. \n")]

[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to gather information about Tyrosine-protein kinase receptor RET, including its classification and general function. This will likely involve looking up relevant scientific and biological information. 

Action:
```
{
  "action": "wikipedia",
  "action_input": "Tyrosine-protein kinase receptor RET"
}
```
[0m
Observation: [36;1m[1;3mPage: Receptor tyrosine kinase
Summary: Receptor tyrosine kinases (RT



  lis = BeautifulSoup(html).find_all('li')



Observation: [36;1m[1;3mPage: RET proto-oncogene
Summary: The RET proto-oncogene encodes a receptor tyrosine kinase for members of the glial cell line-derived neurotrophic factor (GDNF) family of extracellular signalling molecules.
RET loss of function mutations are associated with the development of Hirschsprung's disease, while gain of function mutations are associated with the development of various types of human cancer, including medullary thyroid carcinoma, multiple endocrine neoplasias type 2A and 2B, pheochromocytoma and parathyroid hyperplasia.

Page: Hirschsprung's disease
Summary: Hirschsprung's disease (HD or HSCR) is a birth defect in which nerves are missing from parts of the intestine. The most prominent symptom is constipation. Other symptoms may include vomiting, abdominal pain, diarrhea and slow growth. Most children develop signs and symptoms shortly after birth. However, others may be diagnosed later in infancy or early childhood. About half of all children with 

Fetching descriptions:   2%|▏         | 1/61 [02:51<2:51:01, 171.03s/it]

[32;1m[1;3mThought: I need to summarize the key points regarding the Tyrosine-protein kinase receptor RET based on the previous steps. 

Action:
```
{
  "action": "Final Answer",
  "action_input": "The Tyrosine-protein kinase receptor RET is a receptor tyrosine kinase encoded by the RET proto-oncogene, crucial for signaling in the glial cell line-derived neurotrophic factor (GDNF) family. It is essential for the development of the enteric nervous system and regulates gastrointestinal motility. Mutations in RET can lead to developmental disorders like Hirschsprung disease and are associated with cancers such as medullary thyroid carcinoma and multiple endocrine neoplasias."
}
```
[0m

[1m> Finished chain.[0m
*****

Step: Summarize the key points in a concise manner.
Given the above steps taken, please respond to the user's original question. 


Response: The Tyrosine-protein kinase receptor RET is a receptor tyrosine kinase encoded by the RET proto-oncogene, crucial for signaling i

Fetching descriptions:   3%|▎         | 2/61 [04:57<2:22:11, 144.60s/it]

[32;1m[1;3mThought: I need to summarize the key points regarding the Tyrosine-protein kinase receptor RET and the Macrophage-stimulating protein (MSP) receptor based on the previous steps. 

Action:
```
{
  "action": "Final Answer",
  "action_input": "The Tyrosine-protein kinase receptor RET is a receptor tyrosine kinase essential for signaling in the GDNF family, crucial for the development of the enteric nervous system and gastrointestinal motility. Mutations in RET are linked to Hirschsprung disease and various cancers. The Macrophage-stimulating protein (MSP) receptor, also known as RON, regulates inflammation and immune responses by inhibiting nitric oxide synthesis in macrophages, playing a role in chronic inflammation, autoimmune diseases, and potentially facilitating immune escape in tumors."
}
```
[0m

[1m> Finished chain.[0m
*****

Step: Summarize the key points in a brief description.

Given the above steps taken, please respond to the user's original question. 


Respo

Fetching descriptions:   5%|▍         | 3/61 [05:24<1:28:01, 91.06s/it] 

[32;1m[1;3mThought: I need to summarize the key points regarding the Tyrosine-protein kinase receptor RET, the Macrophage-stimulating protein (MSP) receptor, and Cyclin-dependent kinase 7 (CDK7) based on the previous steps taken. 

Action:
```
{
  "action": "Final Answer",
  "action_input": "The Tyrosine-protein kinase receptor RET is essential for signaling in the GDNF family, crucial for the development of the enteric nervous system and gastrointestinal motility, with mutations linked to Hirschsprung disease and various cancers. The Macrophage-stimulating protein (MSP) receptor, also known as RON, regulates inflammation and immune responses, playing a role in chronic inflammation, autoimmune diseases, and potentially facilitating immune escape in tumors. Cyclin-dependent kinase 7 (CDK7) is a key regulator of the cell cycle and transcription, implicated in various cancers and emerging as a promising therapeutic target."
}
```
[0m

[1m> Finished chain.[0m
*****

Step: Summarize th

Fetching descriptions:   7%|▋         | 4/61 [05:57<1:04:34, 67.96s/it]

[32;1m[1;3mThought: I need to summarize the key points from the previous steps regarding the RET receptor, MSP receptor, CDK7, and ATM, and provide a concise response to the user's original question.

Action:
```
{
  "action": "Final Answer",
  "action_input": "The Tyrosine-protein kinase receptor RET is essential for signaling in the GDNF family, crucial for the development of the enteric nervous system and gastrointestinal motility, with mutations linked to Hirschsprung disease and various cancers. The Macrophage-stimulating protein (MSP) receptor, also known as RON, regulates inflammation and immune responses, playing a role in chronic inflammation, autoimmune diseases, and potentially facilitating immune escape in tumors. Cyclin-dependent kinase 7 (CDK7) is a key regulator of the cell cycle and transcription, implicated in various cancers and emerging as a promising therapeutic target. Serine-protein kinase ATM, or Ataxia-telangiectasia mutated, is vital for responding to DNA dam

Fetching descriptions:   8%|▊         | 5/61 [07:42<1:16:09, 81.61s/it]

[32;1m[1;3mThought: I need to summarize the information regarding the various proteins and their roles in the body, including RET, MSP receptor, CDK7, ATM, and Plasma kallikrein, based on the previous steps taken.

Action:
```
{
  "action": "Final Answer",
  "action_input": "The Tyrosine-protein kinase receptor RET is essential for signaling in the GDNF family, crucial for the development of the enteric nervous system and gastrointestinal motility, with mutations linked to Hirschsprung disease and various cancers. The Macrophage-stimulating protein (MSP) receptor, also known as RON, regulates inflammation and immune responses, playing a role in chronic inflammation, autoimmune diseases, and potentially facilitating immune escape in tumors. Cyclin-dependent kinase 7 (CDK7) is a key regulator of the cell cycle and transcription, implicated in various cancers and emerging as a promising therapeutic target. Serine-protein kinase ATM, or Ataxia-telangiectasia mutated, is vital for respond

Fetching descriptions:  10%|▉         | 6/61 [08:27<1:03:20, 69.10s/it]

[32;1m[1;3mThought: I need to summarize the key points regarding the various proteins and their roles in cellular processes and diseases based on the previous steps taken.

Action:
```
{
  "action": "Final Answer",
  "action_input": "The Tyrosine-protein kinase receptor RET is essential for signaling in the GDNF family, crucial for the development of the enteric nervous system and gastrointestinal motility, with mutations linked to Hirschsprung disease and various cancers. The Macrophage-stimulating protein (MSP) receptor, also known as RON, regulates inflammation and immune responses, playing a role in chronic inflammation, autoimmune diseases, and potentially facilitating immune escape in tumors. Cyclin-dependent kinase 7 (CDK7) is a key regulator of the cell cycle and transcription, implicated in various cancers and emerging as a promising therapeutic target. Serine-protein kinase ATM, or Ataxia-telangiectasia mutated, is vital for responding to DNA damage, maintaining genomic sta

Fetching descriptions:  11%|█▏        | 7/61 [09:00<51:31, 57.25s/it]  

[32;1m[1;3mThought: I need to summarize the key points about STAT3 based on the previous steps taken. 

Action:
```
{
  "action": "Final Answer",
  "action_input": "Signal transducer and activator of transcription 3 (STAT3) is a transcription factor that plays a crucial role in the JAK-STAT signaling pathway, which regulates various cellular processes such as immunity, cell division, and survival. Activated by cytokines and growth factors, STAT3 undergoes phosphorylation, leading to its dimerization and translocation to the nucleus, where it regulates the transcription of target genes that promote cell proliferation and survival. Dysregulation of STAT3 is significantly associated with various diseases, particularly cancers, where it can contribute to tumorigenesis by promoting cell growth and survival. Additionally, STAT3 influences immune responses, affecting inflammation and the function of immune cells, and its persistent activation is linked to chronic inflammation and immune dis

Fetching descriptions:  13%|█▎        | 8/61 [09:25<41:34, 47.06s/it]

[32;1m[1;3mThought: I need to summarize the information about Rho-associated protein kinase 2 (ROCK2) based on the previous steps taken. 

Action:
```
{
  "action": "Final Answer",
  "action_input": "Rho-associated protein kinase 2 (ROCK2) is a serine-threonine kinase that regulates the shape and movement of cells by acting on the cytoskeleton. It is involved in processes such as smooth muscle contraction, cell migration, and maintaining cell viability and morphology. ROCK2 is activated by the small GTPase RhoA and plays a crucial role in cytoskeletal dynamics, influencing cell shape and motility. Dysregulation of ROCK2 is implicated in various diseases, including cardiovascular diseases, cancer, and neurodegenerative disorders, making it a potential therapeutic target."
}
```
[0m

[1m> Finished chain.[0m
*****

Step: Summarize the information to provide a concise description of ROCK2.

Given the above steps taken, please respond to the user's original question. 


Response: Rho-a

Fetching descriptions:  15%|█▍        | 9/61 [09:54<35:47, 41.29s/it]

[32;1m[1;3mThought: I need to summarize the importance of JAK2 in medical research and its potential as a therapeutic target based on the previous steps taken.

Action:
```json
{
  "action": "Final Answer",
  "action_input": "JAK2 (Janus kinase 2) is a crucial non-receptor tyrosine kinase involved in signaling pathways for various cytokine receptors, playing a significant role in hematopoiesis and immune response. Its dysregulation, particularly through mutations like JAK2 V617F, is associated with myeloproliferative neoplasms (MPNs) such as essential thrombocythemia and polycythemia vera. This makes JAK2 a significant target for therapeutic intervention, as inhibiting its activity can help manage these conditions and potentially prevent the progression to acute myeloid leukemia (AML). JAK2's role in cell proliferation and differentiation underscores its importance in medical research, particularly in developing targeted therapies for hematological malignancies."
}
```
[0m

[1m> Fi

Fetching descriptions:  15%|█▍        | 9/61 [10:43<1:01:58, 71.51s/it]

[32;1m[1;3mThought: I need to summarize the information provided in the previous steps to respond to the user's original question effectively. The summary should include key points about each protein or receptor discussed, highlighting their functions, roles in diseases, and significance in health.

Action:
```json
{
  "action": "Final Answer",
  "action_input": "The Tyrosine-protein kinase receptor RET is essential for signaling in the GDNF family, crucial for the development of the enteric nervous system and gastrointestinal motility, with mutations linked to Hirschsprung disease and various cancers. The Macrophage-stimulating protein (MSP) receptor, also known as RON, regulates inflammation and immune responses, playing a role in chronic inflammation, autoimmune diseases, and potentially facilitating immune escape in tumors. Cyclin-dependent kinase 7 (CDK7) is a key regulator of the cell cycle and transcription, implicated in various cancers and emerging as a promising therapeutic




TypeError: can only concatenate str (not "int") to str

In [None]:
# 모든 작업이 끝난 후 마지막으로 저장 (남아있는 데이터 저장)
if save_count > 0:
    df.to_csv(csv_name + "_descriptions.csv", index=False)

In [None]:
# # 예시 도구 이름 리스트
# tool_names = ["wikipedia"]

# # 에이전트 로드 (제로샷 모델 사용)
# agent_chain = load_agent(tool_names, strategy="zero-shot-react")

# # 에이전트 실행
# result = agent_chain.run("Explain about T1-Faker")

# print("Result:\n", result)