In [1]:
from langchain_core.runnables import RunnableSequence, RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser, JsonOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field, validator
from llama_cpp import Llama
from langchain_community.llms import LlamaCpp
from langchain.output_parsers import PydanticOutputParser
from langchain_openai import ChatOpenAI
from operator import itemgetter
from langchain_core.prompts import ChatPromptTemplate

In [2]:
import torch
torch.cuda.is_available()

True

In [3]:
import warnings
warnings.filterwarnings("ignore")

- Model I/O
    - Loading and working with LLMs
- Memory
    - Helping LLMs to remember
- Agents
    - Combining complex behavior with external tools
- Chains
    - Connecting methods and modules

Newer frameworks of note are:
- DSPy
- Haystack

<img src="imgs/langchain.png" alt="XXX" width="400" height="100">

# Model I/O: Loading Quantized Models with LangChain

A GGUF model represents a compressed version of its original counterpart through a method called quantization

Quantization reduces the number of bits required to represent the parameters of an LLM while attempting to maintain most of the original information. This comes with some loss in precision but often makes up for it as the model is much faster to run, requires less VRAM, and is often almost as accurate as the original.

`As a rule of thumb, look for at least 4-bit quantized models. `

In [4]:
# !wget https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-fp16.gguf

In [5]:
# Make sure the model path is correct for your system!
llm = LlamaCpp(
    model_path="weights/Phi-3-mini-4k-instruct-fp16.gguf",
    n_gpu_layers=-1,
    max_tokens=500,
    n_ctx=2048,
    seed=42,
    verbose=False
)

llama_new_context_with_model: n_batch is less than GGML_KQ_MASK_PAD - increasing to 32
llama_new_context_with_model: n_ctx_per_seq (2048) < n_ctx_train (4096) -- the full capacity of the model will not be utilized


In [6]:
llm.invoke("Hi! My name is Maarten. What is 1 + 1?")

''

As we have seen in previous chapters, Phi-3 requires a specific prompt template. 

 Instead of copy-pasting this template each time we use Phi-3 in LangChain, we can use one of LangChain’s core functionalities, namely “chains.”

# Chains: Extending the Capabilities of LLMs

Although a chain can take many forms, each with a different complexity, it generally connects an LLM with some additional tool, prompt, or feature.

<img src="imgs/chains.png" alt="XXX" width="400" height="100">

Here, a single chain connects to a single modular component like a prompt template or external memory

## Prompt Template

With LangChain, we will use chains to create and use a default prompt template. We'll chain the prompt template together with the LLM to get the output we are looking for.

The template for Phi-3 is comprised of four main components:

```text
- <s> to indicate when the prompt starts
- <|user|> to indicate the start of the user’s prompt
- <|assistant|> to indicate the start of the model’s output
- <|end|> to indicate the end of either the prompt or the model’s output
```

In [7]:
# Create a prompt template with the "input_prompt" variable
template = """<|user|>
{input_prompt}<|end|>
<|assistant|>"""
prompt = PromptTemplate(
    template=template,
    input_variables=["input_prompt"]
)

In [8]:
prompt

PromptTemplate(input_variables=['input_prompt'], template='<|user|>\n{input_prompt}<|end|>\n<|assistant|>')

To create our first chain, we can use both the prompt that we created and the LLM and chain them together:

In [9]:
basic_chain = prompt | llm

In [10]:
# Use the chain
basic_chain.invoke(
    {
        "input_prompt": "Hi! My name is Maarten. What is 1 + 1?",
    }
)

' Hello Maarten! The answer to 1 + 1 is 2.'

### Placeholders for varying inputs

In [11]:
# Create a Chain that creates our business' name
template = """<|user|>
Create a funny name for a business that sells {product}<|end|>
<|assistant|>"""
name_prompt = PromptTemplate(
    template=template,
    input_variables=["product"]
)

In [12]:
product_chain = name_prompt | llm

In [13]:
# Use the chain
product_chain.invoke(
    {
        "product": "Small lego pieces of zoo animals",
    }
)

' "Zoogle: The Littlest Legos for Zoo Enthusiasts"'

## Multiple Chains

We could break this complex prompt into smaller subtasks that can be run sequentially. This would require multiple calls to the LLM but with smaller prompts and intermediate outputs

**Example**
Let’s illustrate with an example. Assume that we want to generate a story that has three components:

- A title
- A description of the main character
- A summary of the story

<img src="imgs/story.png" alt="XXX" width="500" height="200">

 This first link is the only component that requires some input from the user. We define the template and use the "summary" variable as the input variable and "title" as the output.

### Generate the Title From a Summary of the plot

In [14]:
class M(BaseModel):
    summary: str
    title: str
parser = JsonOutputParser(pydantic_object=M)

llm = LlamaCpp(
    model_path="weights/Phi-3-mini-4k-instruct-fp16.gguf",
    n_gpu_layers=-1,
    max_tokens=500,
    n_ctx=2048,
    seed=42,
    verbose=False
)

# Create a chain for the title of our story
template = """
<|user|>
Create a title for a story with the following summary: {summary}. {format_instructions}
<|end|>
<|assistant|>
"""
title_prompt = PromptTemplate.from_template(
    template=template,
    partial_variables={"format_instructions": parser.get_format_instructions()}
)
title_chain = title_prompt | llm | parser

llama_new_context_with_model: n_batch is less than GGML_KQ_MASK_PAD - increasing to 32
llama_new_context_with_model: n_ctx_per_seq (2048) < n_ctx_train (4096) -- the full capacity of the model will not be utilized


In [15]:
# Example usage
summary = "a girl that lost her mother"
result = title_chain.invoke({"summary": summary})

In [16]:
result

{'summary': 'A poignant tale of loss and resilience as a young girl navigates life without her mother.',
 'title': "Whispers from the Past: A Girl's Journey Through Grief"}

### Generate the Description of the character

In [17]:
class Desc(BaseModel):
    summary: str
    title: str
    character_description: str
parser = JsonOutputParser(pydantic_object=Desc)

# Create a chain for the character description using the summary and title
template = """<|user|>
Describe the main character of a story about {summary} with the title {title}. Put the description of the character in the "character_description" property of the output json and repeat the summary and title in the output format. {format_instructions}<|end|> 
<|assistant|>"""
description_prompt = PromptTemplate.from_template(
    template=template,
    partial_variables={"format_instructions": parser.get_format_instructions()}
)

In [18]:
description_chain = title_prompt | llm | parser | description_prompt | llm | parser

In [19]:
# Example usage
summary = "a girl that lost her mother"
# result = description_chain.invoke({"summary": summary})

# Trying Llama model from Llama.cpp

In [20]:
model = Llama(
    model_path="weights/Llama-3.2-3B-Instruct-Q4_K_M.gguf",
    n_ctx=2048,  # Drastically reduce context window
    n_batch=128,  # Smaller batch size
    n_gpu_layers=-1,  # GPU layers
    n_threads=4,  # Fewer threads
    verbose=False
)

llama_new_context_with_model: n_ctx_per_seq (2048) < n_ctx_train (131072) -- the full capacity of the model will not be utilized


In [21]:
# Use a more structured prompt
prompt = f"<|begin_of_text|>Human: What are the main benefits of artificial intelligence?\n\nAssistant:"

output = model(
    prompt, 
    max_tokens=500,  # Limit response length
    stop=["Human:", "<|end_of_text|>"],  # Appropriate stop tokens
    echo=False  # Don't repeat the prompt
)

# Extract the actual text response
full_response = output['choices'][0]['text'].strip()
print(full_response)

Some of the main benefits of artificial intelligence include:

*   **Increased Efficiency**: AI can automate repetitive tasks, freeing up human time and resources for more complex and creative tasks.
*   **Improved Accuracy**: AI can process large amounts of data quickly and accurately, reducing the risk of human error.
*   **Enhanced Decision-Making**: AI can analyze vast amounts of data to identify patterns and make predictions, helping organizations make better decisions.
*   **Personalization**: AI can help personalize experiences for customers, making interactions more engaging and relevant.
*   **Innovation**: AI can enable new products and services, and improve existing ones, driving business growth and competitiveness.

These are just a few examples of the many benefits of artificial intelligence. Do you have any specific areas you'd like to know more about? I'm here to help!


# LangChain Multiple Chains with LLamaCpp

## Pydantic Output Parser

In [22]:
# Define your desired data structure.
class Joke(BaseModel):
    setup: str = Field(description="question to set up a joke")
    punchline: str = Field(description="answer to resolve the joke")

    # You can add custom validation logic easily with Pydantic.
    @validator("setup")
    def question_ends_with_question_mark(cls, field):
        if field[-1] != "?":
            raise ValueError("Badly formed question!")
        return field


# And a query intented to prompt a language model to populate the data structure.
joke_query = "Tell me a joke."

# Set up a parser + inject instructions into the prompt template.
parser = PydanticOutputParser(pydantic_object=Joke)

prompt = PromptTemplate(
    template="Answer the user query. Respond ONLY in valid JSON format matching this structure:\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

chain = prompt | llm | parser

joke_output = chain.invoke({"query": joke_query})

In [23]:
parser.get_format_instructions()

'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"setup": {"title": "Setup", "description": "question to set up a joke", "type": "string"}, "punchline": {"title": "Punchline", "description": "answer to resolve the joke", "type": "string"}}, "required": ["setup", "punchline"]}\n```'

In [24]:
joke_output

Joke(setup="Why don't scientists trust atoms?", punchline="Because even they can't make up anything!")

In [25]:
##################################
#   Phi-3
##################################
# Continue the sequence
joke_query = "Tell me a joke about chickens."
response = llm(joke_query, max_tokens=200, echo=False)
response

"\n<|assistant|> Why don't chickens play poker in the coop? Too many chicken cards!"

In [26]:
##################################
#   Llama 3.2
##################################
# Continue the sequence
joke_query = "Tell me a joke about chickens."
response = model(joke_query, max_tokens=200, echo=False)
response

{'id': 'cmpl-a47bbf41-15d9-45c1-af53-c00798e84b2a',
 'object': 'text_completion',
 'created': 1733823224,
 'model': 'weights/Llama-3.2-3B-Instruct-Q4_K_M.gguf',
 'choices': [{'text': ' here\n\nA chicken walks into a library and says to the librarian "do you have any books on Pavlov\'s dogs and Schrödinger\'s cat?" The librarian replies, "it rings a bell, but I\'m not sure if it\'s here or not."',
   'index': 0,
   'logprobs': None,
   'finish_reason': 'stop'}],
 'usage': {'prompt_tokens': 8, 'completion_tokens': 55, 'total_tokens': 63}}

## Pydantic Output Parser With Title Generation Chained with Character Description

### Two Chains

In [27]:
# ------------------------------ #
#   Chain 1: Title Generation    #
# ------------------------------ #

# Define the schema for the title generation output
class TitleOutput(BaseModel):
    summary: str = Field(description="The summary of the story")
    title: str = Field(description="A concise, creative title for the story")

# Initialize the parser for title output
title_parser = PydanticOutputParser(pydantic_object=TitleOutput)


# Prompt for title generation
template_title = """
Create a title for a story with the following summary: {summary}. Respond ONLY in valid JSON format matching this structure:
{format_instructions}
"""
title_prompt = PromptTemplate(
    template=template_title,
    input_variables=["summary"],
    partial_variables={"format_instructions": title_parser.get_format_instructions()},
)

# Create the chain for title generation
title_chain = title_prompt | llm | title_parser

# Example usage
# summary = "A girl that lost her mother."
# result = title_chain.run({"summary": summary})
# print(result)

In [28]:
summary = "A girl that lost her mother."
result = title_chain.invoke({"summary": summary})
print(result)

summary="A Girl's Journey Through Loss and Discovery: The Tale of a Motherless Child" title='Echoes of Love: A Story Beyond Goodbye'


In [39]:
# ---------------------------------- #
#   Chain 2: Character Description   #
# ---------------------------------- #

# Define the schema for character description output
class CharacterDescriptionOutput(BaseModel):
    summary: str = Field(description="The summary of the story")
    title: str = Field(description="The title of the story")
    character_description: str = Field(description="A detailed description of the main character")

# Initialize the parser for character description
description_parser = PydanticOutputParser(pydantic_object=CharacterDescriptionOutput)

# Prompt for character description
template_descript = """
With the input data:
- Summary: {summary}
- Title: {title}
Describe the main character (character_description) of a story about the given summary with the given title.
Respond ONLY in valid JSON format matching this structure and DO NOT add any additional text before or after the JSON:
{format_instructions}
"""
description_prompt = PromptTemplate(
    template=template_descript,
    input_variables=["summary", "title"],
    partial_variables={"format_instructions": description_parser.get_format_instructions()},
)

# Chain for character description
description_chain = description_prompt | llm | description_parser


In [35]:
# Example usage for the full pipeline
summary = "A girl that lost her mother"
# Generate the title
title_result = title_chain.invoke({"summary": summary})

In [36]:
title_result.title

'Echoes of a Mother Lost'

In [37]:
summary

'A girl that lost her mother'

In [40]:
# Generate the character description using the title
description_result = description_chain.invoke({"summary": summary, "title": title_result.title})

In [41]:
description_result.dict()

{'summary': 'A girl that lost her mother',
 'title': 'Echoes of a Mother Lost',
 'character_description': "The main character is an emotive young girl who has experienced the unimaginable loss of her mother at a tender age. She embodies resilience and strength, despite grappling with deep-seated grief and longing for maternal love that once guided her life. This poignant journey explores how she navigates through the labyrinth of memories and emotions tied to her mother's absence."}

### Single Chain

### Test

In [42]:


prompt1 = ChatPromptTemplate.from_template("what is the city {person} is from?")
prompt2 = ChatPromptTemplate.from_template(
    "what country is the city {city} in? respond in {language}"
)

model = ChatOpenAI()

chain1 = prompt1 | model | StrOutputParser()

chain2 = (
    {"city": chain1, "language": itemgetter("language")}
    | prompt2
    | model
    | StrOutputParser()
)

chain2.invoke({"person": "obama", "language": "spanish"})

'La ciudad de Chicago, Illinois, está en los Estados Unidos.'

### Example

In [43]:
# ------------------------------ #
#   Chain 1: Title Generation    #
# ------------------------------ #

# Prompt for title generation
template_title = """
Create a title for a story with the following summary: {summary}. Respond ONLY the title.
"""
title_prompt = PromptTemplate(
    template=template_title,
    input_variables=["summary"],
)


In [54]:
# ------------------------------ #
#   Chain 2: Character Description   #
# ------------------------------ #

# Define the schema for character description output
class CharacterDescriptionOutput(BaseModel):
    summary: str = Field(description="The summary of the story")
    title: str = Field(description="The title of the story")
    character_description: str = Field(description="A detailed description of the main character")

# Initialize the parser for character description
description_parser = PydanticOutputParser(pydantic_object=CharacterDescriptionOutput)
# description_parser = JsonOutputParser()

# Prompt for character description
template_descript = """
With the input data:
- Summary: {summary}
- Title: {title}
Describe the main character of a story about the given summary and title.
Respond ONLY in valid JSON of the form (replace placeholders){format_instructions}
"""
description_prompt = PromptTemplate(
    template=template_descript,
    input_variables=["summary", "title"],  # Ensure "character_description" is not here
    partial_variables={"format_instructions": description_parser.get_format_instructions()},
)

In [62]:
# ------------------------------ #
#   Combine Chains Using 
# ------------------------------ #

# Chain for title generation
chain_title = title_prompt | model | StrOutputParser()
chain_description = (
    {"summary": itemgetter("summary"), "title": chain_title}
    | description_prompt
    | model
    | description_parser
)

# ------------------------------ #
#   Example Usage of Combined Pipeline   #
# ------------------------------ #
summary = "A girl that lost her mother."
result = chain_description.invoke({"summary": summary})
result.dict()

{'summary': 'A girl that lost her mother.',
 'title': 'Whispers of Loss',
 'character_description': 'The main character is a young girl named Emily who is dealing with the profound loss of her mother. She is quiet and introspective, finding solace in nature and the gentle whispers of the wind. Emily often retreats into her own world, trying to navigate the complex emotions that come with grief. Despite her sorrow, she possesses a quiet strength and resilience that helps her cope with the pain of her loss.'}

In [None]:
# ------------------------------ #
#   Debug individual chains
# ------------------------------ #
chain_description = (
    description_prompt
    | model
    | description_parser
)
print(chain_description.invoke({"summary": summary, "title": "Whispers of the Past"}))


chain_title = (
    {"summary": itemgetter("summary")}
    | title_prompt
    | model
    | StrOutputParser()
)

print(chain_title.invoke({"summary": summary}))

CharacterDescriptionOutput(summary='A girl that lost her mother.', title='Whispers of the Past', character_description="The main character is a young girl named Emily who is grieving the loss of her mother. She is quiet and introspective, often retreating into her own world to cope with her emotions. Despite her sadness, Emily is incredibly resilient and carries a deep sense of strength within her. She finds solace in listening to the whispers of the past, believing that her mother's spirit is guiding her through the difficult times.")