# ***Install Required Libraries***
* The following packages are essential for working with LangChain and Hugging Face models
* langchain: Main framework for building AI applications
* langchain_community: Community-maintained components
* huggingface_hub: Interface for Hugging Face's model repository

In [55]:
# Import necessary modules from the installed packages
import langchain
import langchain_community
import huggingface_hub

In [None]:
import os

In [3]:
import os

 ### ***Set Up API Authentication***

In [4]:
# Set Hugging Face API Token
os.environ["HUGGINGFACEHUB_API_TOKEN"] = ""

In [5]:
from langchain import HuggingFaceHub

### ***Initialize Language Model***

In [6]:
# Create a instance of Hugging Face's FLAN-T5 model
# - repo_id: Specifies which model to use
# - temperature: Controls randomness (0 = deterministic outputs)
# - max_length: Maximum length of generated responses
llm = HuggingFaceHub(repo_id="google/flan-t5-large", model_kwargs={"temperature": 0, "max_length": 64})

  llm = HuggingFaceHub(repo_id="google/flan-t5-large", model_kwargs={"temperature": 0, "max_length": 64})


### ***Basic Model Interaction Examples***

In [7]:
# Example 1: Translation task
translation = llm("translate English to German: How old are you?")
print(translation)

Wie alte sind Sie?


  translation = llm("translate English to German: How old are you?")


In [8]:
# Example 2: Restaurant name generation
restaurant_name = llm.predict("I want to open a restaurant for Indian food. Suggest a fancy name for this.")
print(restaurant_name)

  restaurant_name = llm.predict("I want to open a restaurant for Indian food. Suggest a fancy name for this.")


Indian restaurant


In [9]:
# Example 3: Company name suggestion
company_name = llm.predict("What would be a good company name for a company that makes colorful socks?")
print(company_name)

sock mania


### ***Prompt Templates***

In [10]:
# Create reusable templates for consistent prompting
from langchain.prompts import PromptTemplate

# Template for generating restaurant names
prompt_template_name = PromptTemplate(
    input_variables =['cuisine'],
    template = "I want to open a restaurant for {cuisine} food. Suggest a fency name for this."
)

# Usage example
p = prompt_template_name.format(cuisine="indian")
print(p)

I want to open a restaurant for indian food. Suggest a fency name for this.


In [11]:
# Create chain using our template and model
prompt = PromptTemplate.from_template("What is a good name for a company that makes {product}")

prompt.format(product="colorful socks")

'What is a good name for a company that makes colorful socks'

### ***Chains - Connecting Components***

In [12]:
from langchain.chains import LLMChain

In [13]:
from langchain.prompts import PromptTemplate

In [14]:
prompt = PromptTemplate.from_template("What is a good name for a company that makes {product}?")
print(prompt.format(product="colorful socks"))

What is a good name for a company that makes colorful socks?


In [15]:
# Use our FLAN‑T5-based LLM defined earlier.
chain = LLMChain(llm=llm, prompt=prompt)
response = chain.run("colorful socks")
print(response)

sock mania


  chain = LLMChain(llm=llm, prompt=prompt)
  response = chain.run("colorful socks")


In [16]:
prompt_template_restaurant = PromptTemplate(
    input_variables=['cuisine'],
    template="I want to open a restaurant for {cuisine} food. Suggest a fancy name for this."
)

In [17]:
chain_restaurant = LLMChain(llm=llm, prompt=prompt_template_restaurant)
response = chain_restaurant.run("Mexican")
print(response)

Mexican restaurant


In [18]:
chain_restaurant_verbose = LLMChain(llm=llm, prompt=prompt_template_restaurant, verbose=True)
response = chain_restaurant_verbose.run("Mexican")
print(response)



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mI want to open a restaurant for Mexican food. Suggest a fancy name for this.[0m

[1m> Finished chain.[0m
Mexican restaurant


### ***Simple Sequential Chain***

In [None]:
# To combine the Chain and  to set a sequence for that we use SimpleSequentialChain
# The output from the first PromptTemplate is passed to the next PromptTemplate as input**

In [19]:
from langchain.chains import SimpleSequentialChain

In [20]:
prompt_template_name = PromptTemplate(
    input_variables=['cuisine'],
    template="I want to open a restaurant for {cuisine} food. Suggest a fancy name for this."
)

In [21]:
# Chain to generate a restaurant name.
name_chain = LLMChain(llm=llm, prompt=prompt_template_name)

In [22]:
# Chain to generate menu items based on the restaurant name.
prompt_template_items = PromptTemplate(
    input_variables=['restaurant_name'],
    template="Suggest some menu items for {restaurant_name}."
)

In [23]:
food_items_chain = LLMChain(llm=llm, prompt=prompt_template_items)

In [24]:
chain_seq = SimpleSequentialChain(chains=[name_chain, food_items_chain])
content = chain_seq.run("american")
print(content)

Chicken fried steak


In [25]:
chain_seq = SimpleSequentialChain(chains=[name_chain, food_items_chain])
content = chain_seq.run("china")
print(content)

steamed pork with egg


In [26]:
chain_seq = SimpleSequentialChain(chains=[name_chain, food_items_chain])
content = chain_seq.run("bangali")
print(content)

samosas


### ***Sequential chain***

In [27]:
prompt_template_name = PromptTemplate(
    input_variables =['cuisine'],
    template = "I want to open a restaurant for {cuisine} food. Suggest a fency name for this."
)

In [28]:
name_chain =LLMChain(llm=llm, prompt=prompt_template_name, output_key="restaurant_name")

In [29]:
prompt_template_items = PromptTemplate(
    input_variables = ['restaurant_name'],
    template="Suggest some menu items for {restaurant_name}."
)

In [30]:
food_items_chain =LLMChain(llm=llm, prompt=prompt_template_items, output_key="menu_items")

In [31]:
from langchain.chains import SequentialChain

chain = SequentialChain(
    chains = [name_chain, food_items_chain],
    input_variables = ['cuisine'],
    output_variables = ['restaurant_name', "menu_items"]
)

In [32]:
print(chain({"cuisine": "indian"}))

{'cuisine': 'indian', 'restaurant_name': 'Indian restaurant', 'menu_items': 'tandoori chicken'}


  print(chain({"cuisine": "indian"}))


### ***Agents and Tools***

In [36]:
from langchain.llms import HuggingFaceHub
from langchain.agents import initialize_agent, Tool
from langchain.agents import AgentExecutor
from langchain.tools import WikipediaQueryRun
from langchain.utilities import WikipediaAPIWrapper
from langchain.prompts import PromptTemplate
from langchain.agents import AgentType

Agents involve an LLM making decisions about which Actions to take, taking that Action, seeing an Observation, and repeating that until done.


When used correctly agents can be extremely powerful. In order to load agents, you should understand the following concepts:

- Tool: A function that performs a specific duty. This can be things like: Google Search, Database lookup, Python REPL, other chains.
- LLM: The language model powering the agent.
- Agent: The agent to use.

In [37]:
# Hugging Face API Token
huggingfacehub_api_token = ""

In [38]:
# Initialize LLM
llm = HuggingFaceHub(
    repo_id="google/flan-t5-base",
    huggingfacehub_api_token=huggingfacehub_api_token,
    model_kwargs={"temperature": 0.5, "max_length": 100}
)

In [39]:
# Wikipedia Tool
wiki = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())

# Tool Definition
tools = [
    Tool(
        name="Wikipedia",
        func=wiki.run,
        description="Useful for answering general knowledge questions by searching Wikipedia."
    )
]

In [40]:
# Custom Prompt Template for ReAct Format
template = """
You are a helpful assistant. Answer the following questions using the tools provided.
Use the following format:

Question: {input}
Thought: First, think about how to answer the question. Which tool should I use?
Action: Choose the appropriate tool (e.g., Wikipedia).
Action Input: Provide the query for the tool.
Observation: The result returned by the tool.
Final Answer: Provide a final answer based on the observation.

Begin!

Question: {input}
"""

In [41]:
prompt = PromptTemplate(input_variables=["input"], template=template)

# Agent Initialization with Parsing Error Handling
agent = initialize_agent(
    tools=tools,
    llm=llm,
    agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True,
    handle_parsing_errors=True  # Handles unexpected output formats
)

# Ask a Question
response = agent.run("Who is Messi?")
print(response)

  agent = initialize_agent(




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mLionel Messi is a footballer.[0m
Observation: Invalid Format: Missing 'Action:' after 'Thought:
Thought:[32;1m[1;3mLionel Messi is a footballer.[0m
Observation: Invalid Format: Missing 'Action:' after 'Thought:
Thought:[32;1m[1;3mLionel Messi is a footballer.[0m
Observation: Invalid Format: Missing 'Action:' after 'Thought:
Thought:[32;1m[1;3mLionel Messi is a footballer. Observation: Invalid Format: Missing 'Action:' after 'Thought: Thought: Lionel Messi is a footballer. Observation: Invalid Format: Missing 'Action:' after 'Thought: Thought: Lionel Messi is a footballer. Observation: Invalid Format: Missing 'Action[0m
Observation: Invalid Format: Missing 'Action Input:' after 'Action:'
Thought:[32;1m[1;3mInvalid Format: Missing 'Action:' after 'Thought: Thought: Lionel Messi is a footballer. Observation: Invalid Format: Missing 'Action:' after 'Thought: Thought: Lionel Messi is a footballer. Observation: Invalid 

### ***Memory***

In [42]:
from langchain.prompts import PromptTemplate

prompt_template_name = PromptTemplate(
    input_variables =['cuisine'],
    template = "I want to open a restaurant for {cuisine} food. Suggest a fency name for this."
)

In [44]:
from langchain.chains import LLMChain

chain = LLMChain(llm=llm,prompt=prompt_template_name)
name = chain.run("Mexican")
print(name)

el teo


In [45]:
name = chain.run("Indian")
print(name)

saada


In [46]:
chain.memory

In [47]:
type(chain.memory)

NoneType

### ***ConversationBufferMemory***

In [48]:
from langchain.memory import ConversationBufferMemory

memory = ConversationBufferMemory()

chain = LLMChain(llm=llm, prompt=prompt_template_name, memory=memory)
name = chain.run("Mexican")
print(name)

el teo


  memory = ConversationBufferMemory()


In [49]:
name = chain.run("Arabic")
print(name)

ahmed ahmed


In [50]:
print(chain.memory.buffer)

Human: Mexican
AI: el teo
Human: Arabic
AI: ahmed ahmed


### ***ConversationChain***

In [60]:
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationChain
from langchain.llms import HuggingFaceHub

In [61]:
# Hugging Face API Token
huggingfacehub_api_token = ""

In [64]:
# Initialize the LLM (Using FLAN-T5 model from Hugging Face)
llm = HuggingFaceHub(
    repo_id="google/flan-t5-base",
    huggingfacehub_api_token=huggingfacehub_api_token,
    model_kwargs={"temperature": 0.7, "max_length": 100}
)

In [65]:
# Initialize Memory (Conversation Buffer)
memory = ConversationBufferMemory()

In [67]:
# Create the Conversational Chain
conversation = ConversationChain(
    llm=llm,
    memory=memory,
    verbose=True  # Shows intermediate steps (useful for debugging)
)

In [68]:
# Example Conversations
print(conversation.predict(input="Hi, my favourite color is Red."))
print(conversation.predict(input="What’s my my favourite color?"))
print(conversation.predict(input="what is 5 + 5?"))
print(conversation.predict(input="Do you remember what is my favourite color was?"))



[1m> Entering new ConversationChain chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:
Human: Hi, my favourite color is Red.
AI: Human: What color is your favourite?
Human: What’s my my favourite color?
AI: Human: Red is my favourite color.
Human: what is 5 + 5?
AI: Human: 5 + 5 is a slang term for a number of different colors.
Human: Do you remember what is my favourite color was?
AI: Human: I remember the color red.
Human: Hi, my favourite color is Red.
AI:[0m

[1m> Finished chain.[0m
Human: Red is my favourite color.


[1m> Entering new ConversationChain chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific detail

### **Document Loaders**

In [71]:
from langchain.document_loaders import PyPDFLoader

In [73]:
loader = PyPDFLoader("/content/BHM-503T.pdf")
pages = loader.load()

In [74]:
pages

[Document(metadata={'source': '/content/BHM-503T.pdf', 'page': 0, 'page_label': '1'}, page_content=' \nResearching for Hospitality and Tourism Management                               BHM-503T \nUttarakhand Open University  1 \n \nUNIT: 01  \nRESEARCH: MEANING, TYPES, SCOPE AND \nSIGNIFICANCE  \nStructure  \n 1.1 Introduction  \n 1.2 Objectives  \n 1.3 Meaning of Research  \n 1.4 Definition of Research  \n 1.5 Characteristics of Research  \n 1.6 Types of Research  \n 1.7 Methodology of Research \n 1.8 Formulation of Research Problem \n 1.9 Research Design  \n   1.9.1 Meaning of Research Design \n   1.9.2 Characteristics of Research Design \n   1.9.3 Steps in Research Design \n 1.10 Concept of Hypotheses \n 1.11 Summary \n 1.12 Glossary \n 1.13 References/Bibliography \n 1.14 Suggested Readings  \n 1.15 Terminal Questions  \n1.1  INTRODUCTION  \nResearch is an activity that leads us to finding new facts, information, assisting us in \nverifying the available knowledge and in making us q

### ***Multi DataFrame in langchain***

In [77]:
# In LangChain, the Agent can access many dataframes at once
import os
import warnings
warnings.filterwarnings("ignore")
# Importing pandas for data handling
import pandas as pd

In [80]:
# Importing required LangChain modules
from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent
from langchain.llms import HuggingFaceHub

In [81]:
# Setting up Hugging Face API key
os.environ['HUGGINGFACEHUB_API_TOKEN'] = ""

In [82]:
# Loading Titanic dataset from a URL
url = "https://raw.githubusercontent.com/adamerose/datasets/master/titanic.csv"
df = pd.read_csv(url)
print(df.shape)
df.head()

(891, 15)


Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [83]:
# Initializing Hugging Face LLM (e.g., 'google/flan-t5-large')
llm = HuggingFaceHub(repo_id="google/flan-t5-large", model_kwargs={"temperature": 0.5, "max_length": 512})

In [90]:
# Creating an agent to interact with the DataFrame
agent = create_pandas_dataframe_agent(llm, df, verbose=True, allow_dangerous_code=True)

# Ask a Question
response = agent.run("How many rows are there?")
print(response)



[1m> Entering new AgentExecutor chain...[0m


ValueError: An output parsing error occurred. In order to pass this error back to the agent and have it try again, pass `handle_parsing_errors=True` to the AgentExecutor. This is the error: Could not parse LLM output: `df.repl_ast('') n = int(input()) l = [] for i in range(n): l.append([]) for j in range(i+1,n): for k in range(i+1,n): for l in range(i+1,n): for l in range(i+1,n): for l in range(i+1,n): for l in range(i+1,n): for l in range(i+1,n): for l in range(i+1,n): for l in range(i+1,n): for l in range(i+1,n): for l in range(i+1,n): for l in range(i+1,n): for l in range(i+1,n): for l in range(i+1,n): for l in range(i+1,n): for l in range(i+1,n): for l in range(i+1,n): for l in range(i+1,n): for l in range(i+1,n): for l in range(i+1,n): for l in range(i+1,n): for l in range(i+1,n): for l in range(i+1,n): for l in range(i+1,n): for l in range(i+1,n): for l in range(i+1,n): for l in range(i+1,n): for l in range(i+1,n): for l in range(i+1,n): for l in range(i+1,n): for l in range(i+1,n): for l in range(i+1,n): for l in range(i+1,n): for l in range(i+1,n): for l in range(i+1,n): for l in range(i+1,n): for l in range(i+1,n): for l in range(i+1,n): for l in range(i`
For troubleshooting, visit: https://python.langchain.com/docs/troubleshooting/errors/OUTPUT_PARSING_FAILURE 

In [5]:
import os
import warnings
import pandas as pd
from langchain.llms import HuggingFaceHub
from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent

# Set Hugging Face API token
os.environ['HUGGINGFACEHUB_API_TOKEN'] = ''  # Replace with your token

# Suppress warnings
warnings.filterwarnings("ignore")

# Example DataFrame
df = pd.DataFrame({
    "Name": ["Alice", "Bob", "Charlie"],
    "Age": [25, 30, 35]
})

# Initialize HuggingFaceHub LLM with a smaller model
llm = HuggingFaceHub(repo_id="distilgpt2")  # Smaller model

# Creating an agent to interact with the DataFrame
agent = create_pandas_dataframe_agent(llm, df, verbose=True, allow_dangerous_code=True)

# Ask a question
response = agent.run("How many rows are there?")
print(response)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
You are working with a pandas dataframe in Python. The name of the dataframe is `df`.
You should use the tools below to answer the question posed of you:

python_repl_ast - A Python shell. Use this to execute python commands. Input should be a valid python command. When using this tool, sometimes output is abbreviated - make sure it does not look abbreviated before using it in your answer.

Use the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [python_repl_ast]
Action Input: the input to the action[0mthe action to take, should be one of [python_repl_ast] is not a valid tool, try one of [python_repl_ast].[32;1m[1;3m
You are working with a pandas dataframe in Python. The name of the dataframe is `df`.
You should use the tools below to answer the question posed of you:

python_repl_ast - A Python shell. Use this t

HfHubHTTPError: 422 Client Error: Unprocessable Entity for url: https://api-inference.huggingface.co/models/distilgpt2 (Request ID: kxHjeJ)

Input validation error: `inputs` tokens + `max_new_tokens` must be <= 1024. Given: 1051 `inputs` tokens and 0 `max_new_tokens`
Make sure 'text-generation' task is supported by the model.

In [6]:
# Ask a question with token length limit
response = agent.run("How many rows are there?", max_new_tokens=50)  # Limit output tokens

ValueError: `run` supported with either positional arguments or keyword arguments but not both. Got args: ('How many rows are there?',) and kwargs: {'max_new_tokens': 50}.

In [None]:
agent.run("How many people have more than 23 age")

### ***06: Multi DataFrame Example***

In [None]:
#Agent can also interact with Multiple DataFrames passed in a list
# Checking DataFrame information
df.info()

In [None]:
# Creating a copy of the DataFrame
df1 = df.copy()

In [None]:
# Filling missing values in the 'age' column
df1["age"] = df1["age"].fillna(df1["age"].mean())

In [None]:
# Creating an agent to handle multiple DataFrames
agent = create_pandas_dataframe_agent(llm, [df, df1], verbose=True, allow_dangerous_code=True)

In [None]:
# Comparing DataFrames
agent.run("How many rows in the age column are different")

In [None]:
# Creating another DataFrame with additional calculated column
df2 = df1.copy()
df2["Age_Multiplied"] = df1["age"] * 2

In [None]:
# Displaying the updated DataFrame
df2.head()

In [None]:
# Creating an agent to handle three DataFrames
agent = create_pandas_dataframe_agent(llm, [df, df1, df2], verbose=True, allow_dangerous_code=True)

In [None]:
# Asking questions related to multiple DataFrames
agent.run("Are the number of columns same in all the dataframe")

### ***Using HuggingFace with oepn source LLM***

In [8]:
from langchain import PromptTemplate
from langchain import HuggingFaceHub
from langchain import LLMChain
import os

In [9]:
os.environ['HUGGINGFACEHUB_API_TOKEN'] = ''

### ***Approach 1:  Access Models Hosted on Hugging Face Through API***

#**Text2Text Generation Models | Seq2Seq Models | Encoder-Decoder Models**

In [10]:
prompt = PromptTemplate(
    input_variables=["product"],
    template="What is a good name for a company that makes {product}"
)

In [11]:
chain = LLMChain(llm=HuggingFaceHub(repo_id='google/flan-t5-large', model_kwargs={'temperature':0, 'max_length':64}),prompt = prompt)

In [12]:
chain.run("colorful socks")

'sock mania'

In [13]:
prompt = PromptTemplate(
    input_variables=["name"],
    template="Can you tell me about famous footballer {name}"
)

In [14]:
chain = LLMChain(llm=HuggingFaceHub(repo_id='google/flan-t5-large', model_kwargs={'temperature':0, 'max_length':64}),prompt = prompt)

In [15]:
chain.run("Messi")

'Messi is a footballer who plays for Argentina.'

In [16]:
prompt = PromptTemplate(
    input_variables=["cusine"],
    template="Can you tell me food items for a  {cusine} restuarant"
)

In [17]:
chain = LLMChain(llm=HuggingFaceHub(repo_id='google/flan-t5-large', model_kwargs={'temperature':0, 'max_length':64}),prompt = prompt)

In [18]:
chain.run("indian")

'Vegetables'

#**Apprach 01: Text Generation Models | Decoder Only Models**

In [19]:
prompt = PromptTemplate(
    input_variables=["name"],
    template="Can you tell me about famous footballer {name}"
)

In [26]:
chain = LLMChain(llm=HuggingFaceHub(repo_id='google/flan-t5-large', model_kwargs={'temperature':0, 'max_length':64}),prompt = prompt)

In [24]:
chain = LLMChain(llm=HuggingFaceHub(repo_id='EleutherAI/gpt-neo-2.7B', model_kwargs={'temperature':0.1, 'max_length':64}), prompt=prompt)

In [None]:
chain = LLMChain(llm=HuggingFaceHub(repo_id='tiiuae/falcon-7b', model_kwargs={'temperature':0.1, 'max_length':64}),prompt = prompt)

In [27]:
chain.run("Messi")

'Messi is a footballer who plays for Argentina.'

### ***Approach 02: Download Model Locally (Create Pipelines)***

In [43]:
from langchain.llms import HuggingFacePipeline
import torch
from transformers import AutoTokenizer
from transformers import AutoModelForCausalLM
from transformers import pipeline
from transformers import AutoModelForSeq2SeqLM

In [44]:
model_id = 'google/flan-t5-large'

In [45]:
tokenizer = AutoTokenizer.from_pretrained(model_id)

In [46]:
model = AutoModelForSeq2SeqLM.from_pretrained(model_id, load_in_8bit=True, device_map='auto')

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


In [47]:
pipeline = pipeline("text2text-generation", model=model, tokenizer=tokenizer, max_length=128)

Device set to use cuda:0


In [48]:
local_llm = HuggingFacePipeline(pipeline=pipeline)

In [49]:
prompt = PromptTemplate(
    input_variables=["product"],
    template="What is a good name for a company that makes {product}"
)

In [50]:
chain = LLMChain(llm=local_llm,prompt = prompt)

In [51]:
chain.run("colorful socks")

'sock mania'

In [52]:
prompt = PromptTemplate(
    input_variables=["name"],
    template="Can you tell me about famous footballer {name}"
)

In [53]:
chain = LLMChain(llm=local_llm,prompt = prompt)

In [54]:
chain.run("Messi")

'Messi is a footballer who plays for Argentina.'