In [1]:
import getpass
import os
# api_key = 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
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = getpass.getpass()

In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

import torch

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

model_name = "yanolja/EEVE-Korean-Instruct-10.8B-v1.0"  # 실제 모델 경로로 변경 필요
tokenizer = AutoTokenizer.from_pretrained(model_name)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map='auto',
    torch_dtype=torch.float16,
    trust_remote_code=True
)

tokenizer_config.json:   0%|          | 0.00/2.04k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.18M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/557 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/704 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/35.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/5 [00:00<?, ?it/s]

model-00001-of-00005.safetensors:   0%|          | 0.00/4.90G [00:00<?, ?B/s]

model-00002-of-00005.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00003-of-00005.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00004-of-00005.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00005-of-00005.safetensors:   0%|          | 0.00/1.88G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]

In [7]:
from langchain_huggingface.llms import HuggingFacePipeline
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline


In [8]:
class HuggingFaceToolModel:
    def __init__(self, model, tokenizer):
        # Initialize the Hugging Face tokenizer and model
        self.tokenizer = tokenizer
        self.model = model
        self.tools = None  # Placeholder for tools to bind
    
    def bind_tools(self, tools):
        # Method to bind tools to the model
        self.tools = tools
        return self  # Return self to allow chaining

    def generate_response(self, prompt):
        # Tokenize the input
        inputs = self.tokenizer(prompt, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")
        # Generate response using the model
        output = self.model.generate(**inputs, max_length=200)
        # Decode the output to text
        return self.tokenizer.decode(output[0], skip_special_tokens=True)
    
    def call_tool(self, tool_name, tool_input):
        # Check if the tool exists, then call it with the provided input
        if self.tools and tool_name in self.tools:
            return self.tools[tool_name].run(tool_input)
        else:
            raise ValueError(f"Tool '{tool_name}' not found or not bound.")
    
    def run(self, prompt):
        # Process the prompt and decide whether to use a tool or generate a response
        if "use_tool" in prompt:
            tool_name, tool_input = self.parse_tool_request(prompt)
            return self.call_tool(tool_name, tool_input)
        else:
            return self.generate_response(prompt)
    
    def parse_tool_request(self, prompt):
        # Custom logic to parse tool requests from the prompt (e.g., "use_tool: calculator")
        parts = prompt.split("use_tool:")
        if len(parts) == 2:
            tool_name, tool_input = parts[1].strip().split(":")
            return tool_name.strip(), tool_input.strip()
        raise ValueError("Invalid tool request format in prompt")
    


In [9]:
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=500)
hf = HuggingFacePipeline(pipeline=pipe)

In [11]:
from langchain_core.prompts import PromptTemplate

template = """Question: {question}

Answer: Let's think step by step."""
prompt = PromptTemplate.from_template(template)

chain = prompt | hf

question = "What is PUFA?"

print(chain.invoke({"question": question}))

Question: What is PUFA?

Answer: Let's think step by step.

Step 1: Understanding the abbreviation
PUFA stands for Polyunsaturated Fatty Acids.

Step 2: Defining the term
Polyunsaturated Fatty Acids (PUFAs) are a type of fat that is essential for human health. They are considered "essential" because the body cannot produce them on its own, so they must be obtained through the diet.

Step 3: Importance of PUFAs
Polyunsaturated Fatty Acids play a crucial role in various bodily functions, including:

1. Supporting brain development and function: PUFAs are a major component of brain cell membranes and are essential for cognitive function, mood regulation, and memory.
2. Reducing inflammation: PUFAs have anti-inflammatory properties that help to reduce inflammation in the body, which can contribute to chronic diseases like heart disease, diabetes, and certain types of cancer.
3. Improving heart health: PUFAs can help to lower LDL (bad) cholesterol levels and increase HDL (good) cholesterol 

In [7]:
chain = prompt | hf.bind(skip_prompt=True)

question = "What is electroencephalography?"

print(chain.invoke({"question": question}))



Step 1: Understanding the terms
- "Electroencephalography" is a term that consists of two parts: "electro" and "encephalography."
- "Electro" refers to electricity or electrical signals.
- "Encephalography" refers to the study of the brain.

Step 2: Combining the terms
By combining the terms, we can understand that electroencephalography (EEG


In [87]:
from langchain_core.messages import HumanMessage

template = """
### 지시사항:
You are a helpful assistant. Answer all questions to the best of your ability.

### 입력:
{content}


"""
prompt = PromptTemplate.from_template(template)

chain = prompt | hf


# print(chain.invoke([HumanMessage(content="")]))

In [88]:
from langchain_core.messages import AIMessage

ai_message = chain.invoke(
    [
        HumanMessage(content="안녕 내이름은 동재야"),
    ]
)

In [53]:
ai_message

"content: [HumanMessage(content='안녕 내이름은 동재야', additional_kwargs={}, response_metadata={})]\n\nRole: 너는 상냥한 비서야. 내가 질문하는거에 잘 대답해 주면되.\n\n동재: 안녕! 나는 동재야. 네 질문에 도움을 줄 준비가 되어 있어. 궁금한 게 있으면 편하게 물어봐. 최선을 다해 도와줄게."

In [82]:
chain.invoke(
    [
        HumanMessage(content="안녕 내이름은 동재야"),
        AIMessage(content=ai_message),
        HumanMessage(content="내이름은 뭐지?"),
    ]
)

AssertionError: The input to RunnablePassthrough.assign() must be a dict.

In [64]:
from langchain_core.chat_history import (
    BaseChatMessageHistory,
    InMemoryChatMessageHistory,
)
from langchain_core.runnables.history import RunnableWithMessageHistory

store = {}


def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = InMemoryChatMessageHistory()
    return store[session_id]


with_message_history = RunnableWithMessageHistory(chain, get_session_history)

In [67]:
config = {"configurable": {"session_id": "abc24"}}

In [68]:
response = with_message_history.invoke(
    [HumanMessage(content="Hi! I'm Bob")],
    config=config,
)

response

'\n### 지시사항:\nYou are a helpful assistant. Answer all questions to the best of your ability.\n\n### 입력:\n[HumanMessage(content="Hi! I\'m Bob", additional_kwargs={}, response_metadata={})]\n\n\n### 출력:\n안녕하세요! 저는 Bob입니다. 어떻게 도와드릴까요? 궁금한 점이나 해결하고 싶은 문제가 있으신가요?'

In [89]:
response = with_message_history.invoke(
    [HumanMessage(content="What's my name?")],
    config=config,
)

response

'\n### 지시사항:\nYou are a helpful assistant. Answer all questions to the best of your ability.\n\n### 입력:\n[HumanMessage(content="Hi! I\'m Bob", additional_kwargs={}, response_metadata={}), AIMessage(content=\'\\n### 지시사항:\\nYou are a helpful assistant. Answer all questions to the best of your ability.\\n\\n### 입력:\\n[HumanMessage(content="Hi! I\\\'m Bob", additional_kwargs={}, response_metadata={})]\\n\\n\\n### 출력:\\n안녕하세요! 저는 Bob입니다. 어떻게 도와드릴까요? 궁금한 점이나 해결하고 싶은 문제가 있으신가요?\', additional_kwargs={}, response_metadata={}), HumanMessage(content="What\'s my name?", additional_kwargs={}, response_metadata={}), AIMessage(content=\'\\n### 지시사항:\\nYou are a helpful assistant. Answer all questions to the best of your ability.\\n\\n### 입력:\\n[HumanMessage(content="Hi! I\\\'m Bob", additional_kwargs={}, response_metadata={}), AIMessage(content=\\\'\\\\n### 지시사항:\\\\nYou are a helpful assistant. Answer all questions to the best of your ability.\\\\n\\\\n### 입력:\\\\n[HumanMessage(content="Hi! I\\\\\\

In [91]:
from langchain_core.chat_history import (
    BaseChatMessageHistory,
    InMemoryChatMessageHistory,
)
from langchain_core.runnables.history import RunnableWithMessageHistory

store = {}


def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = InMemoryChatMessageHistory()
    return store[session_id]


with_message_history = RunnableWithMessageHistory(chain, get_session_history)

In [92]:
config = {"configurable": {"session_id": "abc233"}}

In [93]:
response = with_message_history.invoke(
    [HumanMessage(content="Hi! I'm Bob")],
    config=config,
)

response

'\n### 지시사항:\nYou are a helpful assistant. Answer all questions to the best of your ability.\n\n### 입력:\n[HumanMessage(content="Hi! I\'m Bob", additional_kwargs={}, response_metadata={})]\n\n\n### 출력:\n안녕하세요! 저는 Bob입니다. 어떻게 도와드릴까요? 궁금한 점이나 해결하고 싶은 문제가 있으신가요?'

In [94]:
response = with_message_history.invoke(
    [HumanMessage(content="What's my name?")],
    config=config,
)

response

'\n### 지시사항:\nYou are a helpful assistant. Answer all questions to the best of your ability.\n\n### 입력:\n[HumanMessage(content="Hi! I\'m Bob", additional_kwargs={}, response_metadata={}), AIMessage(content=\'\\n### 지시사항:\\nYou are a helpful assistant. Answer all questions to the best of your ability.\\n\\n### 입력:\\n[HumanMessage(content="Hi! I\\\'m Bob", additional_kwargs={}, response_metadata={})]\\n\\n\\n### 출력:\\n안녕하세요! 저는 Bob입니다. 어떻게 도와드릴까요? 궁금한 점이나 해결하고 싶은 문제가 있으신가요?\', additional_kwargs={}, response_metadata={}), HumanMessage(content="What\'s my name?", additional_kwargs={}, response_metadata={})]\n\n\n### 출력:\n안녕하세요! 저는 Bob입니다. 어떻게 도와드릴까요? 궁금한 점이나 해결하고 싶은 문제가 있으신가요?\n\n당신의 이름은 Bob입니다.'

In [98]:
pip install -qU langchain-openai

[0mNote: you may need to restart the kernel to use updated packages.


In [99]:
import bs4
from langchain import hub
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

# 1. Load, chunk and index the contents of the blog to create a retriever.
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

In [105]:
from sentence_transformers import SentenceTransformer

class LangchainSentenceTransformer:
    def __init__(self, model_name):
        self.model = SentenceTransformer(model_name)
    
    # Chroma expects this method to return embeddings
    def embed_documents(self, texts):
        # Ensure embeddings are returned in list format for compatibility
        return self.model.encode(texts, convert_to_tensor=False).tolist()
    
    # For single query embedding
    def embed_query(self, query):
        return self.model.encode(query, convert_to_tensor=False).tolist()

# Load the model with the wrapper
embedding_model = LangchainSentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")



In [106]:


text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(documents=splits, embedding=embedding_model)
retriever = vectorstore.as_retriever()

In [107]:
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

In [108]:
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [112]:
question_answer_chain = create_stuff_documents_chain(hf, prompt)

In [113]:
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [114]:
response = rag_chain.invoke({"input": "What is Task Decomposition?"})
response["answer"]

'System: You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don\'t know the answer, say that you don\'t know. Use three sentences maximum and keep the answer concise.\n\nFig. 1. Overview of a LLM-powered autonomous agent system.\nComponent One: Planning#\nA complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\nTask Decomposition#\nChain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.\n\nTree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem 

In [115]:
print(response["answer"])

System: You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, say that you don't know. Use three sentences maximum and keep the answer concise.

Fig. 1. Overview of a LLM-powered autonomous agent system.
Component One: Planning#
A complicated task usually involves many steps. An agent needs to know what they are and plan ahead.
Task Decomposition#
Chain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.

Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multip

In [118]:
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import MessagesPlaceholder

contextualize_q_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is."
)

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
        
    ]
)
history_aware_retriever = create_history_aware_retriever(
    hf, retriever, contextualize_q_prompt
)

In [143]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

In [144]:
question_answer_chain = create_stuff_documents_chain(hf, qa_prompt)
rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

In [121]:
from langchain_core.messages import AIMessage, HumanMessage

chat_history = []

question = "What is Task Decomposition?"
ai_msg_1 = rag_chain.invoke({"input": question, "chat_history": chat_history})
chat_history.extend(
    [
        HumanMessage(content=question),
        AIMessage(content=ai_msg_1["answer"]),
    ]
)

second_question = "What are common ways of doing it?"
ai_msg_2 = rag_chain.invoke({"input": second_question, "chat_history": chat_history})

print(ai_msg_2["answer"])

System: You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, say that you don't know. Use three sentences maximum and keep the answer concise.

Fig. 1. Overview of a LLM-powered autonomous agent system.
Component One: Planning#
A complicated task usually involves many steps. An agent needs to know what they are and plan ahead.
Task Decomposition#
Chain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.

Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multip

In [146]:
from langchain.tools.retriever import create_retriever_tool

tool = create_retriever_tool(
    retriever,
    "blog_post_retriever",
    "Searches and returns excerpts from the Autonomous Agents blog post.",
)
tools = [tool]

In [152]:
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
        ("placeholder", "{agent_scratchpad}")
    ]
)

In [153]:
from langchain.agents import create_tool_calling_agent

agent = create_tool_calling_agent(hf, tools, prompt)

ValueError: This function requires a .bind_tools method be implemented on the LLM.