### Calling Hugging Face Login

In [1]:
from huggingface_hub import login

login()   # you'll paste your HF key once


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.svâ€¦

### Initialize a Model from Hugging Face

In [None]:
from langchain_huggingface import HuggingFaceEndpoint,ChatHuggingFace

model = HuggingFaceEndpoint(
    repo_id="microsoft/Phi-3-mini-4k-instruct",
    task="text-generation",
    max_new_tokens=512,
    do_sample=False,
    repetition_penalty=1.03,
)

chat = ChatHuggingFace(llm=model, verbose=True)



### Invoke API

In [2]:
from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace

base = HuggingFaceEndpoint(
    repo_id="Qwen/Qwen2.5-7B-Instruct",
    task="conversational",     # Qwen supports chat completions
    max_new_tokens=512,
    temperature=0.7,
)

chat = ChatHuggingFace(llm=base, verbose=True)

print(chat.invoke("Explain Qunatum Physics simple terms."))


content="Sure! Quantum physics is a branch of physics that deals with the behavior of tiny particles, like atoms and subatomic particles such as electrons and photons. It's a bit different from the classical physics we learn in school because it describes the world at a very small scale, where things can behave in very strange and unexpected ways.\n\nHere are some key concepts explained in simple terms:\n\n1. **Particles and Waves**: In quantum physics, particles can sometimes act like waves and waves can act like particles. For example, light can behave as both a particle (photon) and a wave. This is known as wave-particle duality.\n\n2. **Quantum Superposition**: Imagine a coin spinning in the air. It's not heads or tails until it lands. In quantum physics, particles can exist in multiple states at the same time until they are observed. This is called superposition. For example, an electron can be in multiple places at once until it is measured.\n\n3. **Quantum Entanglement**: When p

In [None]:
questions = [
    "What is RAG?",
    "Explain LangChain.",
    "What's the difference between LLM and embedding models?"
]

answers = chat.batch(questions)

for a in answers:
    print("\n---\n", a)



In [None]:
for chunk in chat.stream("Tell me a short story about a robot learning emotions."):
    print(chunk, end="")


### Output Parsers in Langchain

In [None]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import JsonOutputParser

parser = JsonOutputParser()

prompt = ChatPromptTemplate.from_messages([
    ("system", "Return JSON only."),
    ("user", "{q}")
])

chain = prompt | chat | parser

print(chain.invoke({"q": "Give 3 animals and 1 fact each."}))


In [None]:
import dateparser

text = "Schedule a meeting next Tuesday at 3pm"

# 1. Ask model to rewrite as a clean date expression
llm_output = chat.invoke(f"Convert this to a precise datetime: {text}")

# Extract the message content (string)
model_text = llm_output.content

# 2. Parse using dateparser
parsed_date = dateparser.parse(model_text)

print("LLM said:", model_text)
print("Parsed datetime:", parsed_date)


In [None]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import (
    JsonOutputParser,
    CommaSeparatedListOutputParser
)
from langchain_core.runnables import RunnableParallel

# Parsers
json_parser = JsonOutputParser()
list_parser = CommaSeparatedListOutputParser()

# Prompt
prompt = ChatPromptTemplate.from_messages([
    ("system", "Answer in the requested format."),
    ("user", "{q}")
])

# Wrap the branches in RunnableParallel
parallel_chain = RunnableParallel(
    as_list = prompt | chat | list_parser,
    as_json = prompt | chat | json_parser
)

# Invoke
result = parallel_chain.invoke({
    "q": "Give me 3 animals. Format 1: comma-separated. Format 2: JSON."
})

print(result)


### New Way of Short Term Memory Langchain

In [None]:
from langchain.agents import create_agent
from langgraph.checkpoint.memory import InMemorySaver

# simple tool
def get_name():
    return "This tool doesn't do much."

agent = create_agent(
    model=chat,                 # your Qwen ChatHuggingFace model
    tools=[],
    checkpointer=InMemorySaver()
)

# first turn
agent.invoke(
    {"messages": [{"role": "user", "content": "Hi, my name is Sahana."}]},
    {"configurable": {"thread_id": "123"}},
)

# second turn (memory persists automatically)
response = agent.invoke(
    {"messages": [{"role": "user", "content": "What is my name?"}]},
    {"configurable": {"thread_id": "123"}},
)

print(response)


### RAG Flow

In [3]:
from langchain_huggingface import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)


In [5]:
from langchain_core.documents import Document


dolphin_docs = [
    Document(page_content="""
    Dolphins are highly intelligent marine mammals known for their social behavior.
    They belong to the family Delphinidae and communicate using whistles and clicks.
    """),

    Document(page_content="""
    Dolphins use echolocation to navigate and hunt. They emit sound waves and
    interpret the echoes to understand their surroundings.
    """),

    Document(page_content="""
    The bottlenose dolphin is one of the most well-known species. They live in warm
    and temperate seas worldwide and can reach speeds of over 30 km/h.
    """),

    Document(page_content="""
    Dolphins sleep by shutting down one half of their brain at a time. This allows them
    to rest while still surfacing for air.
    """),

    Document(page_content="""
    Dolphins have complex social structures and often travel in pods. They show signs
    of empathy and cooperative behavior, including helping injured members.
    """),
]


In [6]:
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_messages([
    ("system",
     "You are a dolphin expert. Use ONLY the context to answer.\n\n"
     "Context:\n{context}\n"
    ),
    ("user", "{question}")
])


In [7]:
def combine_docs(docs):
    return "\n\n".join([d.page_content for d in docs])


In [8]:
from langchain_community.vectorstores import FAISS

vectorstore = FAISS.from_documents(dolphin_docs, embeddings)
retriever = vectorstore.as_retriever()


In [9]:
from langchain_core.runnables import RunnableParallel, RunnablePassthrough

rag_chain = (
    RunnableParallel(
        docs = retriever,
        question = RunnablePassthrough()
    )
    | (lambda x: {
        "context": combine_docs(x["docs"]),
        "question": x["question"],
    })
    | prompt
    | chat
)


In [10]:
answer = rag_chain.invoke("How do dolphins sleep?")
print(answer.content)


Dolphins sleep by shutting down one half of their brain at a time, allowing them to rest while still surfacing for air.


In [11]:
answer = rag_chain.invoke("How does cat meows")
print(answer.content)

The context provided is about dolphins, so it does not contain information about how cats meow. Based on general knowledge, cats meow as a form of communication. They use meows to interact with humans and other cats, often to get attention, express needs, or convey emotions. However, this information is not derived from the given context.


### Langsmith Tracing

In [19]:
from dotenv import load_dotenv
load_dotenv()


True

In [21]:
from langchain_core.runnables import RunnableLambda

def rag_function(query: str):
    return rag_chain.invoke(query).content

rag_runnable = RunnableLambda(lambda x: rag_function(x["question"]))



In [24]:
import langsmith as ls

with ls.tracing_context(project_name="dolphin-rag", enabled=True):
    rag_chain.invoke("How do dolphins sleep?")



In [25]:
with ls.tracing_context(project_name="dolphin-rag", enabled=True):
    rag_chain.invoke("What are key features of dolphins?")

### Basic Tool Calling

In [None]:
import webcolors
def closest_color(requested_color):
    min_colors = {}
    for key, name in webcolors.CSS3_HEX_TO_NAMES.items():
        r, g, b = webcolors.hex_to_rgb(key)
        rd = (r - requested_color[0]) ** 2
        gd = (g - requested_color[1]) ** 2
        bd = (b - requested_color[2]) ** 2
        min_colors[(rd + gd + bd)] = name
    return min_colors[min(min_colors.keys())]

In [None]:
from langchain.tools import tool

@tool
def color_name(r: int, g: int, b: int) -> str:
    """Given RGB values, return the closest CSS color name."""
    try:
        return webcolors.rgb_to_name((r, g, b))
    except ValueError:
        return closest_color((r, g, b))

@tool
def color_mix(r1: int, g1: int, b1: int, r2: int, g2: int, b2: int) -> dict:
    """Mix two RGB colors and return a dictionary with:
    - mixed RGB
    - closest CSS color name
    """
    mixed = (
        (r1 + r2) // 2,
        (g1 + g2) // 2,
        (b1 + b2) // 2,
    )

    try:
        name = webcolors.rgb_to_name(mixed)
    except ValueError:
        name = closest_color(mixed)

    return {"mixed_rgb": mixed, "mixed_color_name": name}



In [None]:
from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace

base = HuggingFaceEndpoint(
    repo_id="Qwen/Qwen2.5-7B-Instruct",
    task="conversational",
    max_new_tokens=300,
)

tools = [color_name, color_mix]

chat = ChatHuggingFace(
    llm=base,
    tools=tools,
    verbose=True
)

In [None]:
print(chat.invoke("What color is rgb(135, 206, 250)?"))

In [None]:
print(chat.invoke(
    "Mix rgb(255, 0, 0) with rgb(0, 0, 255). What color do we get?"
))

In [None]:
print(chat.invoke("Find the name of rgb(240, 248, 255)"))