# Wrapping the Fine-Tuned Model into an Agentic pipeline (LangChain)

## Dependencies

### Modules 

In [None]:
%pip install langchain

In [None]:
%pip install langchain_community

In [None]:
%pip install langchain-huggingface

In [None]:
%pip install -U langchain-huggingface transformers

In [None]:
%pip install llama_index

In [None]:
%pip install llama-index-embeddings-huggingface

In [None]:
%pip install langchain-openai

In [None]:
%pip install python-dotenv

### Imports

In [None]:
import langchain, langchain_core, langchain_community
print(langchain.__version__, langchain_core.__version__, langchain_community.__version__)


In [22]:
from langchain.tools import tool
from langchain_openai import ChatOpenAI
from langchain.agents import AgentExecutor, create_tool_calling_agent
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_huggingface import HuggingFacePipeline 
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification, AutoModelForCausalLM
import torch
from langchain.memory import ConversationBufferWindowMemory

from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.node_parser import SentenceSplitter
from pydantic import BaseModel, Field

import os
from dotenv import load_dotenv

In [17]:
use_gpu = torch.cuda.is_available()
device = 0 if use_gpu else -1

## Agent

### Loading models

In [18]:
load_dotenv()

True

In [19]:
open_ai_key = os.getenv("OPEN_AI_KEY") 

In [49]:
gen_model_path = "gpt-4o-mini"
llm = ChatOpenAI(
    model="gpt-4o-mini",
    temperature=0.2,
    api_key=open_ai_key
)

kaggle

In [None]:
classifier_path = "/kaggle/input/classifier-model/bert-spam-ham-classifier-full_dataset/checkpoint-13086"

local

In [26]:
classifier_path = "bert-spam-ham-classifier-full_dataset/checkpoint-13086"

In [27]:
cls_model = AutoModelForSequenceClassification.from_pretrained(classifier_path)
cls_tokenizer = AutoTokenizer.from_pretrained(classifier_path)

In [28]:
clf = pipeline("text-classification", 
               model=cls_model, 
               tokenizer=cls_tokenizer,
               device=device
              )

Device set to use cpu


In [29]:
clf("hello click here")

[{'label': 'LABEL_1', 'score': 0.6220691204071045}]

### Tools

#### Classifier Tool

In [30]:
class ClassifyArgs(BaseModel):
    text: str = Field(description="text needed for classification")

@tool("classify_spam_ham", args_schema=ClassifyArgs)
def classify_spam_ham(text: str) -> str:
    """
    This tool classifies the given message as spam or ham.
    """
    print(f"Searching for query: {text}")
    pred = clf(text)[0]["label"]
    return "spam" if pred == "LABEL_1" else "ham"

#### Info retrieval

In [31]:
Settings.embed_model = HuggingFaceEmbedding("sentence-transformers/all-MiniLM-L6-v2")

In [32]:
Settings.node_parser = SentenceSplitter(chunk_size=50, chunk_overlap=10)

kaggle

In [None]:
file_path = "/kaggle/input/student-bio/student_bio.txt"

local

In [33]:
file_path = "student_bio.txt"

In [34]:
docs = SimpleDirectoryReader(input_files=[file_path]).load_data()
docs

[Document(id_='3b9df5ce-7948-481e-8efb-f1e82028537f', embedding=None, metadata={'file_path': 'student_bio.txt', 'file_name': 'student_bio.txt', 'file_type': 'text/plain', 'file_size': 451, 'creation_date': '2025-08-12', 'last_modified_date': '2025-08-12'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text_resource=MediaResource(embeddings=None, data=None, text='My name is Mykhailo Ivasiuk. \nI am from Zarichia, the Ivano-Frankivsk region of Ukraine. \nI was born in Ibiza, Spain. My dad is Mykhailo, and my mother is Nadiya.\nI am currently studying at the Ukrainian Catholic University in the Faculty of Applied Sciences, specializing in Computer Science.\nI am passionate about developing problem-sol

In [35]:
index = VectorStoreIndex.from_documents(docs)

Metadata length (6) is close to chunk size (50). Resulting chunks are less than 50 tokens. Consider increasing the chunk size or decreasing the size of your metadata to avoid this.


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [36]:
retriever = index.as_retriever(similarity_top_k=5)

In [37]:
nodes = retriever.retrieve("What does Mykhailo Ivasiuk study")
context = ".".join(n.node.get_content() for n in nodes)

In [38]:
for n in nodes:
    print(n)

Node ID: 83b2af3d-96fe-488b-9c01-9db0e47f243c
Text: My name is Mykhailo Ivasiuk.  I am from Zarichia, the Ivano-
Frankivsk region of Ukraine.  I was born in Ibiza, Spain.
Score:  0.497

Node ID: a7e4ecb0-84d0-4577-9262-0ef3990c74dc
Text: I was born in Ibiza, Spain. My dad is Mykhailo, and my mother is
Nadiya.
Score:  0.277

Node ID: 226205e2-fec8-451c-9309-a7b501e0bfc9
Text: I am currently studying at the Ukrainian Catholic University in
the Faculty of Applied Sciences, specializing in Computer Science. I
am passionate about developing problem-solving skills.
Score:  0.170

Node ID: 5f0be92a-de46-4561-a9d6-b9ee872155bb
Text: I am passionate about developing problem-solving skills. I am
currently studying a new field for Machine Learning, which I really
enjoy. I like going to gym.
Score:  0.132



In [39]:
context

'My name is Mykhailo Ivasiuk. \nI am from Zarichia, the Ivano-Frankivsk region of Ukraine. \nI was born in Ibiza, Spain..I was born in Ibiza, Spain. My dad is Mykhailo, and my mother is Nadiya..I am currently studying at the Ukrainian Catholic University in the Faculty of Applied Sciences, specializing in Computer Science.\nI am passionate about developing problem-solving skills..I am passionate about developing problem-solving skills.\nI am currently studying a new field for Machine Learning, which I really enjoy.\nI like going to gym.'

In [40]:
class SearchInfoArgs(BaseModel):
    query: str = Field(description="a query of user for looking down the information about Mykhailo")

@tool("search_info_about_Mykhailo_Ivasiuk", args_schema=SearchInfoArgs)
def search_info(query: str) -> str:
    """
    This tool searches for the biography of Mykhailo Ivasiuk and returns relevant information.
    Returns concatenated text of all matching documents, or an empty string if no results found.
    """
    print(f"Searching for query: {query}")
    nodes = retriever.retrieve(query)
    if not nodes:
        return "No information found."
    context = ".".join(n.node.get_content() for n in nodes)
    return context

In [41]:
tools = [classify_spam_ham, search_info]

### Implementing agent

In [None]:
system = """
You are a helpful terminal assistant.

RULES:
- Call the "classify_spam_ham" tool if the user EXPLICITLY asks to classify spam or ham,
    OR they use trigger phrases like: "classify", "is this spam", "spam or ham",
    "label this", "check for spam", or the input starts with "Classify this:" or "Classify:".
    When you do call this tool, after it returns, output EXACTLY the tool's result
    in lowercase ("spam" or "ham"), with some extra words saying its a spam or ham
- If the user asks anything related to Mykhailo Ivasiuk's biography, for example queries like:
    "Who is Mykhailo Ivasiuk?", "Tell me about Mykhailo Ivasiuk", "What does Mykhailo study?",
    call the "search_info_about_Mykhailo_Ivasiuk" tool to with the given query to fetch his biography details,
    process them to look more natural and human, and return the result.
- If the user sends a greeting or small talk (e.g., "hello", "hi"), DO NOT call any tool.
- If a user asks a general question that doesn't match either tool, provide a neutral response.
"""

In [43]:
prompt = ChatPromptTemplate.from_messages([
    ("system", system),
    MessagesPlaceholder("chat_history"),      
    ("human", "{input}"),                     
    MessagesPlaceholder("agent_scratchpad"),
])

In [50]:
agent = create_tool_calling_agent(llm, tools, prompt)

In [51]:
memory = ConversationBufferWindowMemory(
    k=20,
    return_messages=True,
    memory_key="chat_history",
    output_key="output"
)

debug

In [46]:
from langchain.globals import set_verbose, set_debug

on = False
set_debug(on)
set_verbose(on)

In [52]:
agent_exec = AgentExecutor(agent=agent, 
                           tools=tools,
                           memory=memory,
                           verbose=True,
                           memory_key="chat_history",
                           return_intermediate_steps=True)

In [53]:
res = agent_exec.invoke({"input": "Hello"})
print(res["output"])



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mHi there! How can I assist you today?[0m

[1m> Finished chain.[0m
Hi there! How can I assist you today?


In [54]:
res = agent_exec.invoke({"input": "Classify: hello click here"})
res



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `classify_spam_ham` with `{'text': 'hello click here'}`


[0mSearching for query: hello click here
[36;1m[1;3mspam[0m[32;1m[1;3mThe message is classified as spam.[0m

[1m> Finished chain.[0m


{'input': 'Classify: hello click here',
 'chat_history': [HumanMessage(content='Hello', additional_kwargs={}, response_metadata={}),
  AIMessage(content='Hi there! How can I assist you today?', additional_kwargs={}, response_metadata={})],
 'output': 'The message is classified as spam.',
 'intermediate_steps': [(ToolAgentAction(tool='classify_spam_ham', tool_input={'text': 'hello click here'}, log="\nInvoking: `classify_spam_ham` with `{'text': 'hello click here'}`\n\n\n", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_Ai8gVwOe8PZO1SypRLHlTy8P', 'function': {'arguments': '{"text":"hello click here"}', 'name': 'classify_spam_ham'}, 'type': 'function'}]}, response_metadata={'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c', 'service_tier': 'default'}, id='run--364b92df-84ef-4286-9955-2cae9b26d93d', tool_calls=[{'name': 'classify_spam_ham', 'args': {'text': 'hello click here'}, '

In [55]:
res = agent_exec.invoke({"input": "Who is Michael Jordan?"})
print(res["output"])



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mMichael Jordan is a former professional basketball player widely regarded as one of the greatest basketball players of all time. He played the majority of his career for the Chicago Bulls in the NBA, where he won six championships and earned numerous accolades, including five Most Valuable Player (MVP) awards. Known for his scoring ability, athleticism, and competitive spirit, Jordan became a global icon and helped popularize the NBA worldwide. After retiring from basketball, he became a successful businessman and is the principal owner of the Charlotte Hornets, an NBA team.[0m

[1m> Finished chain.[0m
Michael Jordan is a former professional basketball player widely regarded as one of the greatest basketball players of all time. He played the majority of his career for the Chicago Bulls in the NBA, where he won six championships and earned numerous accolades, including five Most Valuable Player (MVP) awards. Known for his 

In [56]:
res = agent_exec.invoke({"input": "What's my name?"})
print(res["output"])



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI'm sorry, but I don't have access to your name or personal information. If you'd like to share your name, feel free to do so![0m

[1m> Finished chain.[0m
I'm sorry, but I don't have access to your name or personal information. If you'd like to share your name, feel free to do so!


In [57]:
res = agent_exec.invoke({"input": "my name is Bob"})
print(res["output"])



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mNice to meet you, Bob! How can I assist you today?[0m

[1m> Finished chain.[0m
Nice to meet you, Bob! How can I assist you today?


In [58]:
res = agent_exec.invoke({"input": "What's my name?"})
print(res["output"])



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mYour name is Bob. How can I help you today, Bob?[0m

[1m> Finished chain.[0m
Your name is Bob. How can I help you today, Bob?


In [59]:
res = agent_exec.invoke({"input": "What's my favorite color?"})
print(res["output"])



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI don't know your favorite color, Bob. If you'd like to share it, feel free![0m

[1m> Finished chain.[0m
I don't know your favorite color, Bob. If you'd like to share it, feel free!


In [60]:
res = agent_exec.invoke({"input": "My favorite color is yellow"})
print(res["output"])



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThat's great, Bob! Yellow is a bright and cheerful color. How can I assist you further today?[0m

[1m> Finished chain.[0m
That's great, Bob! Yellow is a bright and cheerful color. How can I assist you further today?


In [61]:
res = agent_exec.invoke({"input": "What's my favorite color?"})
print(res["output"])



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mYour favorite color is yellow. Would you like to talk about anything else, Bob?[0m

[1m> Finished chain.[0m
Your favorite color is yellow. Would you like to talk about anything else, Bob?


In [62]:
res = agent_exec.invoke({"input": "Provide information about where Mykhailo was born"})
print(res["output"])



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `search_info_about_Mykhailo_Ivasiuk` with `{'query': 'Where was Mykhailo Ivasiuk born?'}`


[0mSearching for query: Where was Mykhailo Ivasiuk born?
[33;1m[1;3mMy name is Mykhailo Ivasiuk. 
I am from Zarichia, the Ivano-Frankivsk region of Ukraine. 
I was born in Ibiza, Spain..I was born in Ibiza, Spain. My dad is Mykhailo, and my mother is Nadiya..I am currently studying at the Ukrainian Catholic University in the Faculty of Applied Sciences, specializing in Computer Science.
I am passionate about developing problem-solving skills..I am passionate about developing problem-solving skills.
I am currently studying a new field for Machine Learning, which I really enjoy.
I like going to gym.[0m[32;1m[1;3mMykhailo Ivasiuk was born in Ibiza, Spain, but he is originally from Zarichia, located in the Ivano-Frankivsk region of Ukraine. If you have any more questions about him or anything else, feel free to ask![0m



In [63]:
res

{'input': 'Provide information about where Mykhailo was born',
 'chat_history': [HumanMessage(content='Hello', additional_kwargs={}, response_metadata={}),
  AIMessage(content='Hi there! How can I assist you today?', additional_kwargs={}, response_metadata={}),
  HumanMessage(content='Classify: hello click here', additional_kwargs={}, response_metadata={}),
  AIMessage(content='The message is classified as spam.', additional_kwargs={}, response_metadata={}),
  HumanMessage(content='Who is Michael Jordan?', additional_kwargs={}, response_metadata={}),
  AIMessage(content='Michael Jordan is a former professional basketball player widely regarded as one of the greatest basketball players of all time. He played the majority of his career for the Chicago Bulls in the NBA, where he won six championships and earned numerous accolades, including five Most Valuable Player (MVP) awards. Known for his scoring ability, athleticism, and competitive spirit, Jordan became a global icon and helped pop