In [1]:
from typing import Literal
from tqdm import tqdm
from langchain.agents import initialize_agent, load_tools, AgentType
from langchain.chains import LLMChain
from langchain_openai import ChatOpenAI
from langchain_experimental.plan_and_execute import (
    load_chat_planner, load_agent_executor, PlanAndExecute
)

import pandas as pd

In [2]:
# load CSV
csv_name = "train_data_smi_check_target_protein"
df = pd.read_csv(csv_name +".csv")

In [3]:
ReasoningStrategies = Literal["zero-shot-react", "plan-and-solve"]

def load_agent(
    tool_names: list[str],
    strategy: ReasoningStrategies = "zero-shot-react"
) -> LLMChain:
    llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0)
    tools = load_tools(
        tool_names=tool_names,
        llm=llm
    )
    
    if strategy == "plan-and-solve":
        planner = load_chat_planner(llm)
        executor = load_agent_executor(llm, tools, verbose=True)
        return PlanAndExecute(planner=planner, executor=executor, verbose=True)

    return initialize_agent(
        tools=tools, llm=llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
        verbose=True
    )

In [4]:
# 에이전트 로드
tool_names = ["wikipedia"]
agent_chain = load_agent(tool_names, strategy="plan-and-solve")

In [5]:
# Description 컬럼을 생성하여 각 protein_name에 대한 설명을 추가 및 저장
if "description" not in df.columns:
    df["description"] = ""

In [6]:
save_threshold = 10  # 열 개마다 저장
save_count = 0

In [7]:
for idx, protein_name in tqdm(enumerate(df["protein_name"]), desc="Fetching descriptions", total=len(df)):
    if not df.at[idx, "description"]:  # 이미 설명이 채워진 경우 건너뜀
        query = f"Please provide a brief description of {protein_name}."
        try:
            description = agent_chain.invoke(query, handle_parsing_errors=True)
        except ValueError as e:
            description = f"Error: {str(e)}"
        
        df.at[idx, "description"] = description
        save_count += 1
        hex_count = 'A'

        # 열 개의 설명이 추가되면 저장
        if save_count >= save_threshold:
            df.to_csv(csv_name +" cnt,"+ hex_count + save_count + ".csv", index=False)
            save_count = 0  # 저장 후 카운트 초기화
            hex_count += 1

Fetching descriptions:   0%|                                                        | 0/61 [00:00<?, ?it/s]



[1m> Entering new PlanAndExecute chain...[0m
steps=[Step(value='Define what Tyrosine-protein kinase receptor RET is, including its classification and general function.'), Step(value='Describe the role of RET in cellular signaling and its importance in development and physiology.'), Step(value='Mention any known associations with diseases or conditions related to RET.'), Step(value="Summarize the key points in a concise manner.\n\nGiven the above steps taken, please respond to the user's original question. \n")]

[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to look up information about Tyrosine-protein kinase receptor RET, including its classification and general function. This will help me provide a comprehensive definition. 

Action:
```
{
  "action": "wikipedia",
  "action_input": "Tyrosine-protein kinase receptor RET"
}
```
[0m
Observation: [36;1m[1;3mPage: Receptor tyrosine kinase
Summary: Receptor tyrosine kinases (RTKs) are the high-affinity c



  lis = BeautifulSoup(html).find_all('li')



Observation: [36;1m[1;3mPage: RET proto-oncogene
Summary: The RET proto-oncogene encodes a receptor tyrosine kinase for members of the glial cell line-derived neurotrophic factor (GDNF) family of extracellular signalling molecules.
RET loss of function mutations are associated with the development of Hirschsprung's disease, while gain of function mutations are associated with the development of various types of human cancer, including medullary thyroid carcinoma, multiple endocrine neoplasias type 2A and 2B, pheochromocytoma and parathyroid hyperplasia.

Page: Hirschsprung's disease
Summary: Hirschsprung's disease (HD or HSCR) is a birth defect in which nerves are missing from parts of the intestine. The most prominent symptom is constipation. Other symptoms may include vomiting, abdominal pain, diarrhea and slow growth. Most children develop signs and symptoms shortly after birth. However, others may be diagnosed later in infancy or early childhood. About half of all children with 

Fetching descriptions:   2%|▊                                               | 1/61 [00:36<36:09, 36.17s/it]

[32;1m[1;3mThought: I need to summarize the key points regarding the Tyrosine-protein kinase receptor RET based on the previous steps. 

Action:
```
{
  "action": "Final Answer",
  "action_input": "The Tyrosine-protein kinase receptor RET is a receptor tyrosine kinase encoded by the RET proto-oncogene, crucial for signaling in the glial cell line-derived neurotrophic factor (GDNF) family. It plays a vital role in cellular processes such as proliferation, differentiation, and migration, particularly in the development of the nervous system and kidneys. Mutations in RET are associated with Hirschsprung's disease and various cancers, including medullary thyroid carcinoma and multiple endocrine neoplasias. RET is essential for normal development and physiological homeostasis."
}
```
[0m

[1m> Finished chain.[0m
*****

Step: Summarize the key points in a concise manner.

Given the above steps taken, please respond to the user's original question. 


Response: The Tyrosine-protein kinas

Fetching descriptions:   2%|▋                                            | 1/61 [03:47<3:47:29, 227.49s/it]


ConnectionError: HTTPSConnectionPool(host='en.wikipedia.org', port=443): Max retries exceeded with url: /w/api.php?prop=extracts&explaintext=&exintro=&titles=MST1&format=json&action=query (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f7eb9938050>: Failed to establish a new connection: [Errno 101] Network is unreachable'))

In [None]:
# 모든 작업이 끝난 후 마지막으로 저장 (남아있는 데이터 저장)
if save_count > 0:
    df.to_csv(csv_name + "_descriptions.csv", index=False)

In [None]:
# # 예시 도구 이름 리스트
# tool_names = ["wikipedia"]

# # 에이전트 로드 (제로샷 모델 사용)
# agent_chain = load_agent(tool_names, strategy="zero-shot-react")

# # 에이전트 실행
# result = agent_chain.run("Explain about T1-Faker")

# print("Result:\n", result)