<a href="https://colab.research.google.com/github/Prakum14/Testfiles/blob/master/M4_AST_03_LangChain_with_Open_Source_LLMs.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Advanced Certification Programme in AI and MLOps
## A Program by IISc and TalentSprint
### Assignment 3: Open Source LLMs with LangChain 🦜🔗

## Learning Objectives

At the end of the experiment, you will be able to:

* use open source LLMs: **`zephyr-7b-beta`**, **`Mistral-7B-Instruct-v0.2`**,  and **`Llama2`** through HuggingFaceHub with LangChain
* understand & use the concept of Prompt template, Memory and output parsers in LangChain


### Setup Steps:

In [3]:
#@title Please enter your registration id to start: { run: "auto", display-mode: "form" }
Id = "2416218" #@param {type:"string"}

In [4]:
#@title Please enter your password (your registered phone number) to continue: { run: "auto", display-mode: "form" }
password = "8975485400" #@param {type:"string"}

In [5]:
#@title Run this cell to complete the setup for this Notebook
from IPython import get_ipython

ipython = get_ipython()

notebook= "M4_AST_03_LangChain_with_Open_Source_LLMs" #name of the notebook

def setup():
#  ipython.magic("sx pip3 install torch")

    # ipython.magic("wget https://cdn.iisc.talentsprint.com/AIandMLOps/Datasets/Acoustic_Extinguisher_Fire_Dataset.xlsx")
    from IPython.display import HTML, display
    display(HTML('<script src="https://dashboard.talentsprint.com/aiml/record_ip.html?traineeId={0}&recordId={1}"></script>'.format(getId(),submission_id)))
    print("Setup completed successfully")
    return

def submit_notebook():
    ipython.magic("notebook -e "+ notebook + ".ipynb")

    import requests, json, base64, datetime

    url = "https://dashboard.talentsprint.com/xp/app/save_notebook_attempts"
    if not submission_id:
      data = {"id" : getId(), "notebook" : notebook, "mobile" : getPassword()}
      r = requests.post(url, data = data)
      r = json.loads(r.text)

      if r["status"] == "Success":
          return r["record_id"]
      elif "err" in r:
        print(r["err"])
        return None
      else:
        print ("Something is wrong, the notebook will not be submitted for grading")
        return None

    elif getAnswer() and getComplexity() and getAdditional() and getConcepts() and getComments() and getMentorSupport():
      f = open(notebook + ".ipynb", "rb")
      file_hash = base64.b64encode(f.read())

      data = {"complexity" : Complexity, "additional" :Additional,
              "concepts" : Concepts, "record_id" : submission_id,
              "answer" : Answer, "id" : Id, "file_hash" : file_hash,
              "notebook" : notebook,
              "feedback_experiments_input" : Comments,
              "feedback_mentor_support": Mentor_support}
      r = requests.post(url, data = data)
      r = json.loads(r.text)
      if "err" in r:
        print(r["err"])
        return None
      else:
        print("Your submission is successful.")
        print("Ref Id:", submission_id)
        print("Date of submission: ", r["date"])
        print("Time of submission: ", r["time"])
        print("View your submissions: https://aimlops-iisc.talentsprint.com/notebook_submissions")
        #print("For any queries/discrepancies, please connect with mentors through the chat icon in LMS dashboard.")
        return submission_id
    else: submission_id


def getAdditional():
  try:
    if not Additional:
      raise NameError
    else:
      return Additional
  except NameError:
    print ("Please answer Additional Question")
    return None

def getComplexity():
  try:
    if not Complexity:
      raise NameError
    else:
      return Complexity
  except NameError:
    print ("Please answer Complexity Question")
    return None

def getConcepts():
  try:
    if not Concepts:
      raise NameError
    else:
      return Concepts
  except NameError:
    print ("Please answer Concepts Question")
    return None


# def getWalkthrough():
#   try:
#     if not Walkthrough:
#       raise NameError
#     else:
#       return Walkthrough
#   except NameError:
#     print ("Please answer Walkthrough Question")
#     return None

def getComments():
  try:
    if not Comments:
      raise NameError
    else:
      return Comments
  except NameError:
    print ("Please answer Comments Question")
    return None


def getMentorSupport():
  try:
    if not Mentor_support:
      raise NameError
    else:
      return Mentor_support
  except NameError:
    print ("Please answer Mentor support Question")
    return None

def getAnswer():
  try:
    if not Answer:
      raise NameError
    else:
      return Answer
  except NameError:
    print ("Please answer Question")
    return None


def getId():
  try:
    return Id if Id else None
  except NameError:
    return None

def getPassword():
  try:
    return password if password else None
  except NameError:
    return None

submission_id = None
### Setup
if getPassword() and getId():
  submission_id = submit_notebook()
  if submission_id:
    setup()
else:
  print ("Please complete Id and Password cells before running setup")



Setup completed successfully


### Install required dependencies

In [6]:
# Langchain
!pip -q install langchain

# Library to communicate with HF hub
!pip -q install --upgrade huggingface_hub

In [7]:
!pip -q install langchain_community

In [8]:
!pip -q install langchain_huggingface

### Import required packages

In [9]:
# Import the 'os' module to interact with the operating system, such as setting environment variables.
import os

# Import 'getpass' to securely prompt the user for a password or API key without displaying it on the screen.
from getpass import getpass

# Import 'HuggingFaceEndpoint' from LangChain's community module to connect and interact with Hugging Face's hosted models.
from langchain_community.llms import HuggingFaceEndpoint

# Import 'PromptTemplate' from LangChain to create structured templates for prompting language models.
from langchain.prompts import PromptTemplate

### **Provide your HuggingFace api key/access token**

In [10]:
# Securely prompt the user to enter their Hugging Face access token without displaying it on the screen.
pass_token = getpass("Enter your HuggingFace access token: ")

# Store the entered token as an environment variable named 'HF_TOKEN'.
os.environ["HF_TOKEN"] = pass_token

# Also store the token in 'HUGGINGFACEHUB_API_TOKEN', which may be required for authentication with Hugging Face's API.
os.environ["HUGGINGFACEHUB_API_TOKEN"] = pass_token

# Delete the 'pass_token' variable from memory for security reasons after storing it in environment variables.
del pass_token

Enter your HuggingFace access token: ··········


### **Exploring Open Source LLMs hosted on HuggingFace**

>**I.** `HuggingFaceH4/zephyr-7b-beta`
>
>**II.** `mistralai/Mistral-7B-Instruct-v0.2`
>
>**III.** `LlaMa2`

[LangChain link](https://python.langchain.com/docs/integrations/chat/huggingface) for using Hugging Face LLM's as chat models.

### **I.** [**HuggingFaceH4/zephyr-7b-beta**](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta)

In [11]:
# Import HuggingFace model abstraction class from langchain
from langchain_huggingface import HuggingFaceEndpoint

In [12]:
# Initialize the Hugging Face language model endpoint with specific parameters.
llm = HuggingFaceEndpoint(
    # Specify the repository ID of the model to use. In this case, it's "zephyr-7b-beta" from HuggingFaceH4.
    repo_id="HuggingFaceH4/zephyr-7b-beta",

    # Define the task type for the model, which is "text-generation" (used for generating text responses).
    task="text-generation",

    # Set the maximum number of new tokens the model can generate in response.
    max_new_tokens=512,

    # Use 'top_k' sampling to limit the number of highest-probability tokens considered at each generation step.
    top_k=30,

    # Set the temperature, which controls the randomness of the output. A lower value (e.g., 0.1) makes responses more deterministic.
    temperature=0.1,

    # Apply a repetition penalty to discourage the model from repeating the same words or phrases too frequently.
    repetition_penalty=1.03,
)

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


In [13]:
# Invoke the Hugging Face language model with a specific prompt asking for five points on learning programming.
response = llm.invoke("How to learn programming? give 5 points")

# Print the model's generated response to the console.
print(response)





1. **Start with the Basics**:
   - Begin by understanding what programming is and why it's important.
   - Learn about variables, data types, loops, and functions. These are fundamental concepts in any programming language.
   - Codecademy, freeCodeCamp, and W3Schools offer interactive, beginner-friendly tutorials.

2. **Choose a Language**:
   - Select a programming language that interests you. Popular choices for beginners include Python, JavaScript, and Java.
   - Each language has its own syntax and use cases, so choose one that aligns with your goals (e.g., web development, data analysis, mobile app development).

3. **Practice Coding Daily**:
   - Consistency is key in learning to program. Aim to code for at least 30 minutes to an hour each day.
   - Websites like LeetCode, HackerRank, and Exercism provide coding challenges to help you improve your skills.
   - Work on personal projects or contribute to open-source projects to gain practical experience.

4. **Learn by Doing**:


#### **Prompt Template**

Prompt templates are predefined recipes for generating prompts for language models.

A template may include instructions, few-shot examples, and specific context and questions appropriate for a given task.

LangChain provides tooling to create and work with prompt templates.

To know more about Prompt template, refer [here](https://python.langchain.com/docs/modules/model_io/prompts/quick_start).

#### **Example-1**

In [14]:
# Import the PromptTemplate class from LangChain to create structured prompts.
from langchain.prompts import PromptTemplate

# Create a prompt template using placeholders for dynamic input values.
prompt_template = PromptTemplate.from_template(
    "Tell me a {adjective} joke about {content}."
)

# Format the template by replacing the placeholders with actual values.
messages = prompt_template.format(adjective="funny", content="Trump")

# Output the formatted prompt string.
messages

'Tell me a funny joke about Trump.'

In [15]:
# Import the ChatHuggingFace class from LangChain's Hugging Face integration.
from langchain_huggingface import ChatHuggingFace

# Initialize a chat model using the previously defined Hugging Face LLM.
chat_model = ChatHuggingFace(llm=llm)

In [16]:
# Invoke the chat model with the formatted prompt message.
response = chat_model.invoke(messages)

# Print the generated response content from the chat model.
print(response.content)

Why did Donald Trump throw a clock out the window?

Because he wanted to see time fly!

(Sentiment: irreverent humor, political satire)

(Context: This joke is a playful poke at Donald Trump's obsession with time and his famously impatient personality. The humor lies in the unexpected and absurd situation of throwing a clock out of a window to make time go faster.)


**Practice-1 :**
Create a prompt template envisioning a situation where you have to pass three parameters, and the language model responds to it.

In [17]:
# YOUR CODES HERE for above practice exercise
# Import the PromptTemplate class from LangChain to create structured prompts.
from langchain.prompts import PromptTemplate

# Create a prompt template using placeholders for dynamic input values.
prompt_template = PromptTemplate.from_template(
    "Tell me a {length}, {adjective} joke about {content}."
)

# Format the template by replacing the placeholders with actual values.
messages = prompt_template.format(length= "short",adjective="funny", content="Trump")

# Output the formatted prompt string.
messages

response = chat_model.invoke(messages)

# Print the generated response content from the chat model.
print(response.content)

Why did President Trump decide to move the White House to Mar-a-Lago?

Because he thinks that's where foreign leaders go to pay their respects. 

(Note: Mar-a-Lago is Trump's private estate in Florida where he has previously hosted several foreign leaders.)


#### **Example-2**

In [18]:
# Import message classes from LangChain's schema module.
from langchain.schema import (
    HumanMessage,  # Represents a message sent by the human (user) in a conversation.
    SystemMessage,  # Represents a message providing system-level instructions or context.
)

In [19]:
# Import the PromptTemplate class from LangChain to create structured prompts.
from langchain.prompts import PromptTemplate

# Create a prompt template with placeholders for dynamic values.
prompt_template = PromptTemplate.from_template(
    "Tell me {count} facts about {event_or_place}."
)

# Format the template by replacing placeholders with actual values.
user_msg = prompt_template.format(count=5, event_or_place="Tajmahal")

# Output the formatted prompt string.
user_msg

'Tell me 5 facts about Tajmahal.'

In [20]:
# Create a list of messages to structure a conversation with both system and human roles.
messages = [
    # SystemMessage provides a directive to the model about its role or behavior.
    SystemMessage(content="You're a knowledgeable historian"),

    # HumanMessage contains the prompt or query from the user, which is the formatted message.
    HumanMessage(content=user_msg),
]

In [21]:
# Import the ChatHuggingFace class from LangChain's Hugging Face integration to handle chat interactions.
from langchain_huggingface import ChatHuggingFace

In [22]:
# Initialize the ChatHuggingFace model with the previously defined Hugging Face language model (llm).
chat_model = ChatHuggingFace(llm=llm)

In [23]:
# Access the model identifier of the Hugging Face model used by the ChatHuggingFace instance.
chat_model.model_id

'HuggingFaceH4/zephyr-7b-beta'

In [24]:
# Convert the sequence of messages into a chat prompt that can be processed by the Hugging Face model.
chat_model._to_chat_prompt(messages)

"<|system|>\nYou're a knowledgeable historian</s>\n<|user|>\nTell me 5 facts about Tajmahal.</s>\n<|assistant|>\n"

In [25]:
# Convert the sequence of messages into a chat prompt and print the resulting formatted prompt.
print(chat_model._to_chat_prompt(messages))

<|system|>
You're a knowledgeable historian</s>
<|user|>
Tell me 5 facts about Tajmahal.</s>
<|assistant|>



In [26]:
# Invoke the chat model with the sequence of messages, sending it to the Hugging Face model for processing.
response = chat_model.invoke(messages)

# Print the generated response content from the chat model.
print(response.content)

1. The Taj Mahal is a mausoleum located in Agra, India, commissioned by the Mughal emperor Shah Jahan in memory of his wife Mumtaz Mahal.

2. Construction of the Taj Mahal began in 1631 and took around 22 years to complete, with over 20,000 workers involved in the project.

3. The white marble used to build the Taj Mahal was sourced from Makrana, a town situated in the state of Rajasthan. The marble was then transported to Agra via a network of rivers.

4. The Taj Mahal is a symbol of universal love and is an example of Mughal architecture that combines elements of Persian, Ottoman, and Indian design.

5. The Taj Mahal is not just a symbol of love and devotion but also an engineering masterpiece. The tomb is situated on a raised platform and is surrounded by four smaller tombs - one each for Shah Jahan's other wives and a son who died in infancy, as well as a highwayman who helped Shah Jahan escape from imprisonment. Additionally, the Taj Mahal is built on a complex system of canals an

**Practice-2 :**
Create a prompt template envisioning a situation where the language model behaves like a particular persona, and the user's query requires information involving three parameters.

In [27]:
# YOUR CODES HERE for above practice exercise
from langchain.prompts import PromptTemplate

# Create a prompt template with placeholders for dynamic values.
prompt_template = PromptTemplate.from_template(
    "Tell me {count} facts about {person_or_place} of {era}."
)

# Format the template by replacing placeholders with actual values.
user_msg = prompt_template.format(count=5, person_or_place="Mahatma Gandhi", era="pre-independence")

# Output the formatted prompt string.
user_msg

messages = [
    # SystemMessage provides a directive to the model about its role or behavior.
    SystemMessage(content="You're a knowledgeable historian"),

    # HumanMessage contains the prompt or query from the user, which is the formatted message.
    HumanMessage(content=user_msg),
]

response = chat_model.invoke(messages)

# Print the generated response content from the chat model.
print(response.content)

1. Mahatma Gandhi returned to India from South Africa in 1915, inspiring a non-violent struggle for self-rule. Before this, Gandhi had led a successful non-cooperation movement against the oppressive white minority government in South Africa.

2. Gandhi played a significant role in India's freedom struggle as early as 1917 in Protest against the Rowlatt Act, aimed at suppressing political dissent. Gandhi's satyagraha cool down campaign led by the Indian National Congress attracted national and global attention, marking a turning point in the non-violent movement for independence.

3. Gandhi led the famous Salt Satyagraha movement in 1930, starting with a salt march from his ashram in Sabarmati to the coast of Dandi, Gujarat. His disobedience of the Salt Laws, characterised by collective civil disobedience, became a watershed in Indian history, leading to international condemnation of British imperialism and growing support for India's independence struggle.

4. During the Quit India mo

#### **Example-3**

The prompt to *chat models* is a list of chat messages.

Each chat message is associated with content, and an additional parameter called `role`. For example, in the OpenAI Chat Completions API, a chat message can be associated with an AI assistant, a human or a system role.

In [28]:
# Import the ChatPromptTemplate class from LangChain's core prompts module to create structured chat prompts.
from langchain_core.prompts import ChatPromptTemplate

In [29]:
# Create a ChatPromptTemplate using a list of predefined message templates for system, human, and AI interactions.
chat_template = ChatPromptTemplate.from_messages(
    [
        # The system message, defining the behavior or persona of the AI.
        ("system", "You are a helpful A {persona}."),

        # The first human message, a simple greeting.
        ("human", "Hello, how are you doing?"),

        # The AI's response to the human's greeting.
        ("ai", "I'm doing well, thanks!"),

        # The second human message, which will be dynamically filled with user input.
        ("human", "{user_input}"),
    ]
)

In [30]:
# Define the persona for the AI, which will be used in the system message template.
persona = """trustworthy friend"""

# Define the user's query, asking for help with understanding a concept.
query = """
I am not able to understand the concept taught in class. \
Could you please suggest something? \
I need your help. Give 5 points to work on.
"""

# Format the messages in the chat template by filling in the placeholders with the defined persona and query.
messages = chat_template.format_messages(persona=persona, user_input=query)

In [31]:
messages

[SystemMessage(content='You are a helpful A trustworthy friend.', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='Hello, how are you doing?', additional_kwargs={}, response_metadata={}),
 AIMessage(content="I'm doing well, thanks!", additional_kwargs={}, response_metadata={}),
 HumanMessage(content='\nI am not able to understand the concept taught in class. Could you please suggest something? I need your help. Give 5 points to work on.\n', additional_kwargs={}, response_metadata={})]

In [32]:
# Convert the formatted messages into a chat prompt that can be processed by the Hugging Face model.
chat_model._to_chat_prompt(messages)

"<|system|>\nYou are a helpful A trustworthy friend.</s>\n<|user|>\nHello, how are you doing?</s>\n<|assistant|>\nI'm doing well, thanks!</s>\n<|user|>\n\nI am not able to understand the concept taught in class. Could you please suggest something? I need your help. Give 5 points to work on.\n</s>\n<|assistant|>\n"

In [33]:
print(chat_model._to_chat_prompt(messages))

<|system|>
You are a helpful A trustworthy friend.</s>
<|user|>
Hello, how are you doing?</s>
<|assistant|>
I'm doing well, thanks!</s>
<|user|>

I am not able to understand the concept taught in class. Could you please suggest something? I need your help. Give 5 points to work on.
</s>
<|assistant|>



In [34]:
# Invoke the chat model with the formatted messages and get the model's response.
response = chat_model.invoke(messages)

# Print the content of the response from the chat model.
print(response.content)

Sure, I'd be happy to help you! Here are five points you can work on to better understand the concept:

1. Define the concept: Make sure you have a clear understanding of what the concept is about. Write down a definition in your own words.

2. Find examples: Look for real-life examples that illustrate the concept. This will help you understand how it works in practice.

3. Practice using it: Try applying the concept to different scenarios. The more you practice, the more comfortable you'll become with it.

4. Collaborate with classmates: Discuss the concept with your classmates and see how they interpret it. This can help you gain a different perspective and clarify any misunderstandings.

5. Seek help from the teacher: Don't be afraid to ask questions in class or schedule a one-on-one session with your teacher. They can provide further clarification and guide you through the concept.

Good luck!


**Practice-3 :**
Create a prompt template that takes context and a question from the user and answers the question based on the given context.

Hint: Keep the context in the system message and the question in the human message.

**Context:** Meet Aryan Kapoor, a rising star in the entertainment industry whose talent knows no bounds. In 2023, Aryan captivated
audiences with his mesmerizing performance in the critically acclaimed film "Echoes of Eternity," earning him the prestigious Best Actor award at the National Film Awards. His versatility shone brightly in 2024 when he showcased his vocal prowess as a playback singer in the chart-topping soundtrack of the blockbuster movie "Infinite Horizon." The same year,  Aryan's captivating screen presence garnered him the coveted Filmfare Critics Award for Best Actor. As his star continued to
ascend, Aryan was honored with the International Icon of the Year award at the Global Entertainment Awards in 2025, recognizing his global impact and widespread admiration. With each role he undertakes, Aryan Kapoor cements his status as an unrivaled  talent in the world of cinema, leaving audiences eagerly anticipating his next masterpiece.

**Question:** What awards did Aryan Kapoor win for his contributions to the entertainment industry, and in which years were they received?

In [35]:
# YOUR CODES HERE for above practice exercise

In [36]:
# Create a ChatPromptTemplate using a list of predefined message templates for system and human interactions.
chat_template = ChatPromptTemplate.from_messages(
    [
        # The system message, providing the context for the assistant and setting expectations for the response.
        ("system", "You are a helpful assistant and know this context ```{context}``"),

        # The human message, containing the query, asking for a point-based answer and to stick to the provided context.
        ("human", " pls reply ```{question}``` in points based on the context provided. Strictly don't add extra facts and information ?"),
    ]
)

In [37]:
# Define the user's question about Aryan Kapoor's awards and the years they were received.
question = """What awards did Aryan Kapoor win for his contributions to the entertainment industry,
and in which years were they received?"""

# Define the context, which provides detailed information about Aryan Kapoor's achievements.
context = """
Meet Aryan Kapoor, a rising star in the entertainment industry whose talent knows no bounds. In 2023, Aryan captivated
audiences with his mesmerizing performance in the critically acclaimed film "Echoes of Eternity," earning him the
 prestigious Best Actor award at the National Film Awards. His versatility shone brightly in 2024 when he showcased his
  vocal prowess as a playback singer in the chart-topping soundtrack of the blockbuster movie "Infinite Horizon." The same year,
  Aryan's captivating screen presence garnered him the coveted Filmfare Critics Award for Best Actor. As his star continued to
  ascend, Aryan was honored with the International Icon of the Year award at the Global Entertainment Awards in 2025, recognizing
   his global impact and widespread admiration. With each role he undertakes, Aryan Kapoor cements his status as an unrivaled
   talent in the world of cinema, leaving audiences eagerly anticipating his next masterpiece.
"""

# Format the chat prompt messages by inserting the context and question into the message template.
messages = chat_template.format_messages(context=context, question=question)

In [38]:
# Convert the formatted messages into a chat prompt that can be processed by the Hugging Face model.
chat_model._to_chat_prompt(messages)

'<|system|>\nYou are a helpful assistant and know this context ```\nMeet Aryan Kapoor, a rising star in the entertainment industry whose talent knows no bounds. In 2023, Aryan captivated\naudiences with his mesmerizing performance in the critically acclaimed film "Echoes of Eternity," earning him the\n prestigious Best Actor award at the National Film Awards. His versatility shone brightly in 2024 when he showcased his\n  vocal prowess as a playback singer in the chart-topping soundtrack of the blockbuster movie "Infinite Horizon." The same year,\n  Aryan\'s captivating screen presence garnered him the coveted Filmfare Critics Award for Best Actor. As his star continued to\n  ascend, Aryan was honored with the International Icon of the Year award at the Global Entertainment Awards in 2025, recognizing\n   his global impact and widespread admiration. With each role he undertakes, Aryan Kapoor cements his status as an unrivaled\n   talent in the world of cinema, leaving audiences eagerly

In [39]:
# Send the formatted messages to the chat model for processing and obtain the model's response.
response = chat_model.invoke(messages)

# Print the generated response content from the chat model.
print(response.content)

1. In the year 2023, Aryan Kapoor won the prestigious Best Actor award at the National Film Awards for his outstanding performance in the movie "Echoes of Eternity".

2. In the year 2024, Aryan Kapoor showcased his vocal talent by singing playback for the chart-topping soundtrack of the blockbuster movie "Infinite Horizon".

3. In 2024, Aryan Kapoor received the coveted Filmfare Critics Award for Best Actor, acknowledging his captivating screen presence in his acting roles.

4. In 2025, Aryan Kapoor was conferred with the prestigious International Icon of the Year award at the Global Entertainment Awards, in recognition of his global impact and widespread admiration in the entertainment industry.


#### **Output Parsers**

Let's start with defining how we would like the LLM output to look like:

In [40]:
# An example output format
{
  "gift": False,
  "delivery_days": 5,
  "price_value": "pretty affordable!"
}

{'gift': False, 'delivery_days': 5, 'price_value': 'pretty affordable!'}

In [41]:
customer_review = """\
This leaf blower is pretty amazing.  It has four settings:\
candle blower, gentle breeze, windy city, and tornado. \
It arrived in two days, just in time for my wife's \
anniversary present. \
I think my wife liked it so much she was speechless. \
So far I've been the only one using it, and I've been \
using it every other morning to clear the leaves on our lawn. \
It's slightly more expensive than the other leaf blowers \
out there, but I think it's worth it for the extra features.
"""

In [42]:
review_template = """\
For the following text, extract the following information:

gift: Was the item purchased as a gift or present for someone else? \
Answer True if yes, False if not or unknown.

delivery_days: How many days did it take for the product \
to arrive? If this information is not found, output -1.

price_value: Extract any sentences about the value or price,\
and output them as a comma separated Python list.

Format the output as JSON with the following keys:
gift
delivery_days
price_value

text: {text}
"""

In [43]:
# Import ChatPromptTemplate from LangChain Core to create structured prompts for chat models.
from langchain_core.prompts import ChatPromptTemplate

# Creating a chat prompt template using a predefined template string (review_template).
prompt_template = ChatPromptTemplate.from_template(review_template)

# Print the created prompt template to check its structure.
print(prompt_template)

input_variables=['text'] input_types={} partial_variables={} messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['text'], input_types={}, partial_variables={}, template='For the following text, extract the following information:\n\ngift: Was the item purchased as a gift or present for someone else? Answer True if yes, False if not or unknown.\n\ndelivery_days: How many days did it take for the product to arrive? If this information is not found, output -1.\n\nprice_value: Extract any sentences about the value or price,and output them as a comma separated Python list.\n\nFormat the output as JSON with the following keys:\ngift\ndelivery_days\nprice_value\n\ntext: {text}\n'), additional_kwargs={})]


In [44]:
# Format the chat prompt messages by filling in the placeholder(s) in the prompt template with the actual customer review text.
messages = prompt_template.format_messages(text=customer_review)

# Send the formatted messages to the chat model for processing and obtain the model's response.
response = chat_model.invoke(messages)

# Print the generated response content from the chat model.
print(response.content)

{
  "gift": True,
  "delivery_days": 2,
  "price_value": ["It's slightly more expensive than the other leaf blowers out there, but I think it's worth it for the extra features."]
}


In [45]:
print(type(response.content))

<class 'str'>


#### **Parse the LLM output string into a structured data**:

Language models output text. But there are times where you want to get more structured information than just text back. While some model providers support [built-in ways to return structured output](https://python.langchain.com/docs/how_to/structured_output/), not all do.

Output parsers are classes that help structure language model responses.

Below we go over the main type of output parser, the `PydanticOutputParser`.



[Structured output parser](https://python.langchain.com/docs/how_to/output_parser_structured/)

In [46]:
# Import necessary modules
from langchain_core.output_parsers import PydanticOutputParser  # Parses model output into structured data
from pydantic import BaseModel, Field  # Used for defining structured data models

# Define the structured data model for extracting product-related information
class Product_Info(BaseModel):
    """Product service info."""

    # Field to check if the product was purchased as a gift
    gift: str = Field(description="Was the item purchased as a gift for someone else? "
                                  "Answer 'True' if yes, 'False' if not or unknown.")

    # Field to capture the number of days taken for delivery
    delivery_days: int = Field(description="How many days did it take for the product to arrive? "
                                           "If this information is not found, output -1.")

    # Field to extract and store sentences related to price or value as a list
    price_value: list = Field(description="Extract sentences about the value or price, "
                                          "and output them as a comma-separated Python list.")

In [47]:
# Set up a parser + inject instructions into the prompt template
parser = PydanticOutputParser(pydantic_object = Product_Info)

In [48]:
# Import PromptTemplate from LangChain to create structured prompts for the model
from langchain.prompts import PromptTemplate

# Create a prompt template to guide the model's response generation
prompt = PromptTemplate(
    template="Answer the user query.\n{format_instructions}\n{text}\n",  # Template structure
    input_variables=["text"],  # Defines 'text' as a required input variable
    partial_variables={"format_instructions": parser.get_format_instructions()},  # Inserts format instructions dynamically
)

In [49]:
# Create a pipeline where the formatted prompt is passed to the language model (llm)
prompt_and_model = prompt | llm  # The "|" operator chains the prompt and the LLM for execution

# Invoke the pipeline with a user query (customer review) to generate a structured response
output = prompt_and_model.invoke({"text": customer_review})

# Parse the model's output into the predefined structured format using the parser
result = parser.invoke(output)

# Display the final structured result
result



Product_Info(gift='True', delivery_days=2, price_value=['slightly more expensive'])

In [50]:
print(result.gift)
print(result.delivery_days)
print(result.price_value)

True
2
['slightly more expensive']


**Practice-4 :** Continuing the practice-3, can you get the ouput in the below format :

{'Year': 'Award}

In [51]:
# YOUR CODES HERE for above practice exercise

#### [**Customizing Conversational Memory**](https://python.langchain.com/docs/how_to/chatbots_memory/)

LangChain can helps in building better chatbots, or have
an LLM with more effective chats by better managing
what it remembers from the conversation you've had so far.

In [52]:
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant. Answer all questions to the best of your ability.",
        ),
        ("placeholder", "{chat_history}"),
        ("human", "{input}"),
    ]
)

chain = prompt | chat_model


In [53]:
# Import ChatMessageHistory to manage and store conversation history
from langchain_community.chat_message_histories import ChatMessageHistory

# Import RunnableWithMessageHistory to enable conversation-aware execution of chat models
from langchain_core.runnables.history import RunnableWithMessageHistory

In [54]:
# Dictionary to store chat message histories for different sessions
store = {}

# Function to retrieve or create a new chat history for a given session ID
def get_session_history(session_id: str) -> ChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()  # Initialize new history if session does not exist
    return store[session_id]  # Return the existing or new session history

# Create a chat pipeline with message history tracking
chain_with_message_history = RunnableWithMessageHistory(
    runnable=chain,  # The main conversation chain (LLM model + prompt)
    get_session_history=get_session_history,  # Function to retrieve chat history per session
    input_messages_key="input",  # Key in the input where the new user message is stored
    history_messages_key="chat_history",  # Key in the input where the previous chat history is stored
)

In [55]:
# Invoke the chat model with message history tracking
chain_with_message_history.invoke(
    {"input": "Hi, my name is James"},  # User's input message
    {"configurable": {"session_id": "user1"}},  # Configuration containing session ID
)

AIMessage(content="Hello, James! I'm glad you decided to reach out. My name is [your assistant name], and I'm here to provide helpful information and assistance whenever and wherever you need it. Feel free to ask me any questions you have, and I'll do my best to provide accurate and insightful answers. How can I help you today?", additional_kwargs={}, response_metadata={'token_usage': ChatCompletionOutputUsage(completion_tokens=73, prompt_tokens=57, total_tokens=130), 'model': '', 'finish_reason': 'stop'}, id='run-5a96edf3-f480-42cb-aaef-2954faa6561a-0')

In [56]:
store

{'user1': InMemoryChatMessageHistory(messages=[HumanMessage(content='Hi, my name is James', additional_kwargs={}, response_metadata={}), AIMessage(content="Hello, James! I'm glad you decided to reach out. My name is [your assistant name], and I'm here to provide helpful information and assistance whenever and wherever you need it. Feel free to ask me any questions you have, and I'll do my best to provide accurate and insightful answers. How can I help you today?", additional_kwargs={}, response_metadata={'token_usage': ChatCompletionOutputUsage(completion_tokens=73, prompt_tokens=57, total_tokens=130), 'model': '', 'finish_reason': 'stop'}, id='run-5a96edf3-f480-42cb-aaef-2954faa6561a-0')])}

In [57]:
# Invoke the chat model with message history tracking for a specific user session
chain_with_message_history.invoke(
    input={"input": "Do you remember my name?"},  # User's current input message
    config={"configurable": {"session_id": "user1"}}  # Session configuration to maintain chat history
)


AIMessage(content="I'm sorry, but I don't have access to personal information or memories about prior interactions. My training data and programming only allows me to remember information that is input into my system or that I've learned through repeated interactions. However, based on our previous conversation, you did introduce yourself as James. Is there anything else I can help you with today?", additional_kwargs={}, response_metadata={'token_usage': ChatCompletionOutputUsage(completion_tokens=75, prompt_tokens=162, total_tokens=237), 'model': '', 'finish_reason': 'stop'}, id='run-cdcc6957-b27a-4942-937b-8fff14196dbd-0')

In [58]:
store

{'user1': InMemoryChatMessageHistory(messages=[HumanMessage(content='Hi, my name is James', additional_kwargs={}, response_metadata={}), AIMessage(content="Hello, James! I'm glad you decided to reach out. My name is [your assistant name], and I'm here to provide helpful information and assistance whenever and wherever you need it. Feel free to ask me any questions you have, and I'll do my best to provide accurate and insightful answers. How can I help you today?", additional_kwargs={}, response_metadata={'token_usage': ChatCompletionOutputUsage(completion_tokens=73, prompt_tokens=57, total_tokens=130), 'model': '', 'finish_reason': 'stop'}, id='run-5a96edf3-f480-42cb-aaef-2954faa6561a-0'), HumanMessage(content='Do you remember my name?', additional_kwargs={}, response_metadata={}), AIMessage(content="I'm sorry, but I don't have access to personal information or memories about prior interactions. My training data and programming only allows me to remember information that is input into 

In [59]:
# Invoke the chat model with message history tracking to retrieve the user's name
chain_with_message_history.invoke(
    input={"input": "Can you tell me what is my name?"},  # User's current query asking for their name
    config={"configurable": {"session_id": "user1"}}  # Session configuration to ensure chat history for "user1"
)

AIMessage(content="I'm sorry but I don't have access to information about your name as it's not provided during our interactions. Whenever you interact with me, you introduce yourself with your name, and I remember it for that session. However, I cannot store that information outside of our ongoing communication. Would you like me to remember your name for this session, or is this simply an experiment to see if I can do so? If you'd like me to remember your name, please let me know your name once again, and I'll make a note of it for this session.", additional_kwargs={}, response_metadata={'token_usage': ChatCompletionOutputUsage(completion_tokens=120, prompt_tokens=272, total_tokens=392), 'model': '', 'finish_reason': 'stop'}, id='run-e4987768-cdd6-4330-aea6-22ce997e4e2a-0')

In [60]:
store

{'user1': InMemoryChatMessageHistory(messages=[HumanMessage(content='Hi, my name is James', additional_kwargs={}, response_metadata={}), AIMessage(content="Hello, James! I'm glad you decided to reach out. My name is [your assistant name], and I'm here to provide helpful information and assistance whenever and wherever you need it. Feel free to ask me any questions you have, and I'll do my best to provide accurate and insightful answers. How can I help you today?", additional_kwargs={}, response_metadata={'token_usage': ChatCompletionOutputUsage(completion_tokens=73, prompt_tokens=57, total_tokens=130), 'model': '', 'finish_reason': 'stop'}, id='run-5a96edf3-f480-42cb-aaef-2954faa6561a-0'), HumanMessage(content='Do you remember my name?', additional_kwargs={}, response_metadata={}), AIMessage(content="I'm sorry, but I don't have access to personal information or memories about prior interactions. My training data and programming only allows me to remember information that is input into 

### **II. mistralai/Mistral-7B-Instruct-v0.2**

Note that you need to ask for access before using this model. Go to https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 and click on `Agree and access repository`.


In [61]:
# Importing HuggingFaceEndpoint from langchain_huggingface to interact with Hugging Face models via an API endpoint
from langchain_huggingface import HuggingFaceEndpoint

In [62]:
question = "How to learn programing? Give 5 examples. "

In [63]:
# Set the Hugging Face model repository ID
repo_id = "mistralai/Mistral-7B-Instruct-v0.2"  # Model identifier for Mistral-7B

# Define additional model parameters, including max token length and authentication token
model_kwargs = {
    "max_length": 128,  # Maximum token length for generated output
    "token": os.environ["HF_TOKEN"]  # Hugging Face API token for authentication, stored in environment variables
}

# Initialize the HuggingFaceEndpoint to interact with the model hosted on Hugging Face
llm = HuggingFaceEndpoint(
    repo_id=repo_id,  # Model repo ID to specify which model to use
    task="text-generation",  # Task to perform, here it's text generation (common for LLMs)
    temperature=0.5,  # Temperature parameter controls the randomness of the output (higher value = more random)
    model_kwargs=model_kwargs  # Pass the additional model parameters (max length, token)
)

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


In [64]:
# Use the Hugging Face model endpoint to generate a response based on the input question
response = llm.invoke(question)  # Pass the user's question to the model for text generation
print(response)  # Output the generated response from the model

1. Learn a programming language: Python is a great language for beginners because it's easy to read and write, and it has a wide range of applications. You can start by learning the basics of Python, such as variables, data types, and control structures, and then move on to more advanced topics like functions, modules, and object-oriented programming. There are many online resources available to help you learn Python, such as Codecademy, W3Schools, and the official Python documentation. 2. Build a project: Once you have a basic understanding of a programming language, it's a good idea to start working on a project. This will give you practical experience and help you apply what you've learned. Some project ideas for beginners include creating a simple calculator, building a to-do list, or making a basic website. You can find many tutorials and resources online to help you get started with your project. 3. Join a coding community: There are many online communities where you can connect 



#### **Prompt Template**

**Example-1**

In [65]:
# Importing message types from langchain.schema to represent different types of messages in a conversation
from langchain.schema import (
    HumanMessage,  # Represents a message from the user (human)
    SystemMessage,  # Represents a message from the system, providing context or instructions
)

In [66]:
# Importing the ChatPromptTemplate class from langchain_core.prompts to create and format chat-based prompts
from langchain_core.prompts import ChatPromptTemplate

In [67]:
template_s = """You are a {style1}.\
Tell me  {count} facts about {event_or_place}.```
"""

In [68]:
# Creating a ChatPromptTemplate using a template string, where 'template_s' is a predefined prompt template
prompt_template = ChatPromptTemplate.from_template(template_s)

In [69]:
# Accessing the prompt of the first message in the ChatPromptTemplate
prompt_template.messages[0].prompt

PromptTemplate(input_variables=['count', 'event_or_place', 'style1'], input_types={}, partial_variables={}, template='You are a {style1}.Tell me  {count} facts about {event_or_place}.```\n')

In [70]:
# Accessing the input variables of the first message's prompt in the ChatPromptTemplate
prompt_template.messages[0].prompt.input_variables

['count', 'event_or_place', 'style1']

In [71]:
# Formatting the messages using the prompt_template with specific values for placeholders
user_messages = prompt_template.format_messages(
    style1="knowledgeable historian",  # Replacing the 'style1' placeholder with the value "knowledgeable historian"
    count=5,                           # Replacing the 'count' placeholder with the value 5
    event_or_place="Tajmahal"          # Replacing the 'event_or_place' placeholder with the value "Tajmahal"
)

In [72]:
user_messages

[HumanMessage(content='You are a knowledgeable historian.Tell me  5 facts about Tajmahal.```\n', additional_kwargs={}, response_metadata={})]

In [73]:
# Importing ChatHuggingFace from langchain_huggingface to interface with Hugging Face models
from langchain_huggingface import ChatHuggingFace

# Creating an instance of ChatHuggingFace using a previously defined language model (llm)
chat_model = ChatHuggingFace(llm=llm)

# Accessing the model ID associated with the chat_model instance
chat_model.model_id

'mistralai/Mistral-7B-Instruct-v0.2'

In [74]:
# Converting the user messages to a chat-friendly format for the chat_model
chat_model._to_chat_prompt(user_messages)

'<s> [INST] You are a knowledgeable historian.Tell me  5 facts about Tajmahal.```\n [/INST]'

In [75]:
# Invoking the chat model with user messages to generate a response
response = chat_model.invoke(user_messages)

# Printing the content of the response generated by the chat model
print(response.content)

 1. The Taj Mahal is located in Agra, India, and was completed in 1653. It was built by Mughal Emperor Shah Jahan as a mausoleum for his wife Mumtaz Mahal.
2. The Taj Mahal is considered one of the Seven Wonders of the World and is a UNESCO World Heritage Site. It is renowned for its white marble exterior and intricate craftsmanship, which includes calligraphy and inlaid semi-precious stones.
3. The Taj Mahal complex includes not only the mausoleum but also a mosque, a guest house, and a large reflecting pool. The complex is symmetrical, with the entrance to the mosque mirroring the entrance to the mausoleum across the pool.
4. Contrary to popular belief, the Taj Mahal is not a perfect symmetric structure. A careful examination reveals slight differences between the right and left sides of the building, including the size and position of some of the elements.
5. The Taj Mahal was built using an estimated 20,000 workers, who came from all over India and Central Asia. The construction to

**Example-2**

In [76]:
# Creating a list of messages with a single human message as input
messages = [HumanMessage(content="How to learn programming? give 5 points")]

In [77]:
# Converting the list of messages to the appropriate chat format for the chat model
chat_model._to_chat_prompt(messages)

'<s> [INST] How to learn programming? give 5 points [/INST]'

In [78]:
# Sending the formatted messages to the chat model and generating a response
response = chat_model.invoke(messages)

# Printing the content of the response generated by the chat model
print(response.content)

 1. Start with the Basics: Begin by learning the fundamental concepts of programming such as variables, loops, conditional statements, functions, and data structures. Pick a programming language that interests you and stick with it until you have a strong foundation. Some good options for beginners include Python, JavaScript, or C++.

2. Practice Regularly: Consistent practice is crucial when learning programming. Try to write code every day, even if it's just for a few minutes. You can find plenty of free resources online, such as coding exercises, projects, and tutorials. Each time you write code, you'll learn something new and build upon your existing knowledge.

3. Learn the Syntax and Semantics: Understand the rules and structure of the programming language you're using. Familiarize yourself with the syntax (the rules for writing code) and semantics (the meaning behind the code). This will help you read, write, and understand code more effectively.

4. Collaborate and Learn from O

### **III.** **[Llama2](https://ai.meta.com/llama/)** ***(Optional)***

**NOTE:**

>For using this model you have to click `Download models` link available in [this](https://ai.meta.com/llama/) reference which re-direct to a **form for request**. It may take 1 hour to 2 days to get the **approval** for usage of this model through HuggingFace. You will get an email for the same.

>Once the request is approved, connect to **GPU runtime** for below steps. Also, you need to provide your HF api key/access token.

Trying Llama2-2-7b model:


In [79]:
%%capture
# This is a Jupyter notebook magic command that captures the output of the cell,
# preventing it from being printed in the notebook.

!pip install -q transformers accelerate langchain xformers bitsandbytes
# Using pip to install the following Python packages:
# - transformers: A library for working with transformer models like GPT, BERT, etc.
# - accelerate: A library to speed up the training and inference of large models efficiently.
# - langchain: A framework for building applications using large language models (LLMs).
# - xformers: Tools and optimizations for efficient memory usage and performance in transformers.
# - bitsandbytes: A library for running models with low-bit precision to improve memory usage and speed.
# The '-q' flag ensures the installation process runs quietly without verbose output.

In [80]:
# Enter your HuggingFace access token when prompted

import os  # Importing the 'os' module for interacting with the operating system
from getpass import getpass  # Importing 'getpass' to securely input the HuggingFace token without displaying it

# Prompt the user to input their HuggingFace access token without showing the input
pass_token = getpass("Enter your HuggingFace access token: ")

# Setting the HuggingFace token as environment variables to be used in API requests
os.environ["HF_TOKEN"] = pass_token  # Storing the token as 'HF_TOKEN' in the environment
os.environ["HUGGINGFACEHUB_API_TOKEN"] = pass_token  # Storing the token as 'HUGGINGFACEHUB_API_TOKEN'

# Delete the pass_token variable for security reasons to ensure it doesn't remain in memory
del pass_token

Enter your HuggingFace access token: ··········


## Initializing the Hugging Face Pipeline

The first thing we need to do is initialize a `text-generation` pipeline with Hugging Face transformers. The Pipeline requires three things that we must initialize first, those are:

* A LLM, in this case it will be `meta-llama/Llama-2-7b-chat-hf`.

* The respective tokenizer for the model.

We'll explain these as we get to them, let's begin with our model.

We initialize the model and move it to our CUDA-enabled GPU. Using Colab this can take 5-10 minutes to download and initialize the model.

In [81]:
from torch import cuda, bfloat16
# Importing 'cuda' from PyTorch to interact with GPU for acceleration (e.g., checking for available CUDA devices, using the GPU for tensor operations).
# Importing 'bfloat16' from PyTorch, a 16-bit floating point format often used to save memory and speed up computation, especially in deep learning.

import transformers
# Importing the 'transformers' library from Hugging Face, which provides tools for working with transformer models like GPT, BERT, and others.

In [82]:
# Define the model ID for the pre-trained model you want to use from Hugging Face
model_id = 'meta-llama/Llama-2-7b-chat-hf'
# 'meta-llama/Llama-2-7b-chat-hf' is the ID of a pre-trained Llama-2 model hosted on Hugging Face.
# This model is fine-tuned for chat applications.

# Determine which device (GPU or CPU) to use for model inference
device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'
# If a CUDA-enabled GPU is available, use the first available GPU (cuda:0).
# If CUDA is not available, use the CPU ('cpu').
device  # This stores the device configuration for later use when loading the model.

'cuda:0'

In [83]:
!pip install torch==2.5.1+cu124 torchvision==0.20.1+cu124 torchaudio==2.5.1+cu124 -f https://download.pytorch.org/whl/torch_stable.html
!pip install fastai

Looking in links: https://download.pytorch.org/whl/torch_stable.html
[31mERROR: Could not find a version that satisfies the requirement torch==2.5.1+cu124 (from versions: 1.13.0, 1.13.0+cpu, 1.13.0+cu116, 1.13.0+cu117, 1.13.0+cu117.with.pypi.cudnn, 1.13.1, 1.13.1+cpu, 1.13.1+cu116, 1.13.1+cu117, 1.13.1+cu117.with.pypi.cudnn, 2.0.0, 2.0.0+cpu, 2.0.0+cpu.cxx11.abi, 2.0.0+cu117, 2.0.0+cu117.with.pypi.cudnn, 2.0.0+cu118, 2.0.1, 2.0.1+cpu, 2.0.1+cpu.cxx11.abi, 2.0.1+cu117, 2.0.1+cu117.with.pypi.cudnn, 2.0.1+cu118, 2.0.1+rocm5.3, 2.0.1+rocm5.4.2, 2.1.0, 2.1.0+cpu, 2.1.0+cpu.cxx11.abi, 2.1.0+cu118, 2.1.0+cu121, 2.1.0+cu121.with.pypi.cudnn, 2.1.0+rocm5.5, 2.1.0+rocm5.6, 2.1.1, 2.1.1+cpu, 2.1.1+cpu.cxx11.abi, 2.1.1+cu118, 2.1.1+cu121, 2.1.1+cu121.with.pypi.cudnn, 2.1.1+rocm5.5, 2.1.1+rocm5.6, 2.1.2, 2.1.2+cpu, 2.1.2+cpu.cxx11.abi, 2.1.2+cu118, 2.1.2+cu121, 2.1.2+cu121.with.pypi.cudnn, 2.1.2+rocm5.5, 2.1.2+rocm5.6, 2.2.0, 2.2.0+cpu, 2.2.0+cpu.cxx11.abi, 2.2.0+cu118, 2.2.0+cu121, 2.2.0+rocm5.6, 

In [84]:
!pip install -q torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!pip install -q transformers accelerate langchain xformers bitsandbytes --upgrade

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
fastai 2.7.18 requires torch<2.6,>=1.10, but you have torch 2.6.0 which is incompatible.
torchaudio 2.5.1+cu124 requires torch==2.5.1, but you have torch 2.6.0 which is incompatible.
torchvision 0.20.1+cu124 requires torch==2.5.1, but you have torch 2.6.0 which is incompatible.[0m[31m
[0m

In [85]:
# Load the tokenizer for the model specified by 'model_id' from Hugging Face
tokenizer = transformers.AutoTokenizer.from_pretrained(model_id)
# 'AutoTokenizer' automatically selects the appropriate tokenizer based on the model provided.
# The tokenizer is responsible for converting text input into token IDs that the model can understand.

# Set up a text-generation pipeline using the specified model and tokenizer
pipeline = transformers.pipeline(
    "text-generation",  # Specifies the type of task (text generation in this case)
    model=model_id,  # The model to use for text generation (in this case, Llama-2 model)
    tokenizer=tokenizer,  # Tokenizer to convert input text into tokens
    torch_dtype=bfloat16,  # Use 'bfloat16' precision for tensor computations to reduce memory usage and speed up computations
    trust_remote_code=True,  # Trust remote code when loading model weights and configurations
    device_map="auto",  # Automatically map the model layers to available devices (CPU/GPU)
    max_length=1000,  # Maximum length of the generated text (number of tokens)
    do_sample=True,  # Enable sampling for text generation (instead of deterministic outputs)
    top_k=10,  # Limits the sampling to the top 10 most probable next tokens to ensure diversity
    num_return_sequences=1,  # Number of generated sequences to return
    eos_token_id=tokenizer.eos_token_id  # End-of-sequence token ID, so generation stops when this token is produced
)

RuntimeError: Failed to import transformers.pipelines because of the following error (look up to see its traceback):
operator torchvision::nms does not exist

In [86]:
# Pass the input prompt to the pipeline and generate text based on the given prompt
res = pipeline("How to learn programming?")
# The pipeline processes the input prompt ("How to learn programming?") and generates text.
# The result is stored in the variable 'res'. The pipeline returns a list of generated sequences.

# Print the generated text from the first sequence
print(res[0]["generated_text"])
# The generated text is accessed from the first result in the list (res[0]).
# 'generated_text' is the key that contains the model's output for the generated text.

NameError: name 'pipeline' is not defined

#### **Now implementing with LangChain**

In [87]:
# Install the LangChain library, a framework for building language model-based applications
!pip -q install langchain
# The '-q' flag suppresses unnecessary output, making the installation process less verbose.
# LangChain is useful for building chains of various language model components like prompts, tools, and memory.

# Install the langchain_community library, which contains community-supported modules for LangChain
!pip -q install langchain_community
# This library includes additional modules contributed by the LangChain community to extend its functionality.

In [90]:
from torch import cuda, bfloat16
import transformers
from langchain.llms import HuggingFacePipeline

# Define the model ID and device
model_id = 'meta-llama/Llama-2-7b-chat-hf'
device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

!pip uninstall -y torch torchvision torchaudio
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

# Load the tokenizer and create the pipeline
tokenizer = transformers.AutoTokenizer.from_pretrained(model_id)
pipeline = transformers.pipeline(
    "text-generation",
    model=model_id,
    tokenizer=tokenizer,
    torch_dtype=bfloat16,
    trust_remote_code=True,
    device_map="auto",
    max_length=1000,
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id
)

Found existing installation: torch 2.6.0
Uninstalling torch-2.6.0:
  Successfully uninstalled torch-2.6.0
Found existing installation: torchvision 0.20.1+cu124
Uninstalling torchvision-0.20.1+cu124:
  Successfully uninstalled torchvision-0.20.1+cu124
Found existing installation: torchaudio 2.5.1+cu124
Uninstalling torchaudio-2.5.1+cu124:
  Successfully uninstalled torchaudio-2.5.1+cu124
Looking in indexes: https://download.pytorch.org/whl/cu118
Collecting torch
  Downloading https://download.pytorch.org/whl/cu118/torch-2.6.0%2Bcu118-cp311-cp311-linux_x86_64.whl.metadata (27 kB)
Collecting torchvision
  Downloading https://download.pytorch.org/whl/cu118/torchvision-0.21.0%2Bcu118-cp311-cp311-linux_x86_64.whl.metadata (6.1 kB)
Collecting torchaudio
  Downloading https://download.pytorch.org/whl/cu118/torchaudio-2.6.0%2Bcu118-cp311-cp311-linux_x86_64.whl.metadata (6.6 kB)
Downloading https://download.pytorch.org/whl/cu118/torch-2.6.0%2Bcu118-cp311-cp311-linux_x86_64.whl (848.7 MB)
[2K   

RuntimeError: Failed to import transformers.pipelines because of the following error (look up to see its traceback):
partially initialized module 'torchvision' has no attribute 'extension' (most likely due to a circular import)

In [88]:
# Import HuggingFacePipeline from langchain.llms
from langchain.llms import HuggingFacePipeline

# Create an LLM instance by wrapping the HuggingFace pipeline inside LangChain
# The pipeline processes the input data and generates responses.
llm = HuggingFacePipeline(pipeline=pipeline, model_kwargs={'temperature': 0.7})
# 'pipeline': The pre-configured HuggingFace pipeline used to generate responses based on input prompts.
# 'model_kwargs': Additional keyword arguments passed to the model, in this case, setting the 'temperature' to 0.7.
# 'temperature': A parameter that controls the randomness of the output (higher values lead to more randomness).

NameError: name 'pipeline' is not defined

In [None]:
# Invoke the HuggingFace model wrapped inside the LangChain LLM with the query "How to learn programming?"
# The 'invoke' method runs the input query through the model pipeline and generates a response.
print(llm.invoke("How to learn programming?"))
# This will output the response generated by the model to the prompt "How to learn programming?".
# The model will process the query and provide an answer based on its training.

#### **Prompt Template**

In [91]:
# Importing ChatPromptTemplate from langchain.prompts
from langchain.prompts import ChatPromptTemplate
# This class allows for creating templates for chat-based prompts. You can define the structure of messages in the chat,
# and then dynamically replace parts of it based on inputs or parameters.

In [92]:
# Define a multi-line string template for a chat prompt using placeholders for style and text
template_s = """Reply the answer
like {style1}.
text: ```{text1}```
"""
# This string will be used as a template, where:
# {style1}: Placeholder for the style in which the answer should be given (e.g., "knowledgeable historian").
# {text1}: Placeholder for the text or query to which the model will respond.

In [93]:
# Create a ChatPromptTemplate instance using the previously defined template
prompt_template = ChatPromptTemplate.from_template(template_s)
# This initializes the ChatPromptTemplate with the string 'template_s' as its format.
# The template will later allow dynamic substitution of the placeholders {style1} and {text1}.

In [94]:
# Access the first message in the prompt template and retrieve its prompt string
prompt_template.messages[0].prompt
# This accesses the first message in the 'messages' list of the ChatPromptTemplate,
# and retrieves the actual prompt string (i.e., the template structure defined earlier).

PromptTemplate(input_variables=['style1', 'text1'], input_types={}, partial_variables={}, template='Reply the answer\nlike {style1}.\ntext: ```{text1}```\n')

In [95]:
# Access the input variables that are used in the prompt template
prompt_template.messages[0].prompt.input_variables
# This retrieves the list of input variables (placeholders) used in the first message's prompt template.
# In this case, the input variables are {style1} and {text1}, which are placeholders for dynamic content.

['style1', 'text1']

In [96]:
# Define the style to be used in the prompt template
style = """trustworthy friend"""
# This assigns the string "trustworthy friend" to the variable 'style'.
# This value will be used as the persona or tone in which the response should be framed in the prompt template.

In [97]:
# Define the query to be used in the prompt template
query = """
I am not able to understand the concept taught in class.
Could you please suggest something?
I need your help. Give 5 points to work on.
"""
# This variable 'query' holds the question or request that will be input to the language model.
# It's a message asking for help with understanding a concept and requesting 5 points to work on.

In [98]:
# Format the messages using the prompt template, filling in the placeholders
user_messages = prompt_template.format_messages(
    style1=style,   # Provide the style (e.g., "trustworthy friend")
    text1=query     # Provide the user query (e.g., the request for help with 5 points)
)
# This line fills the placeholders {style1} and {text1} in the prompt template with the defined values
# of 'style' (persona) and 'query' (the actual question). The resulting 'user_messages' will be used
# to generate the final input for the model.

In [99]:
# Print the first formatted message from the user_messages list
print(user_messages[0])
# This will output the first message in the 'user_messages' list after formatting.
# The output will be a string where the placeholders {style1} and {text1} have been replaced by the values of 'style' and 'query'.

content='Reply the answer\nlike trustworthy friend.\ntext: ```\nI am not able to understand the concept taught in class.\nCould you please suggest something?\nI need your help. Give 5 points to work on.\n```\n' additional_kwargs={} response_metadata={}


In [100]:
# Call the LLM to translate to the style of the customer message
llm_response = llm.invoke(user_messages)



In [101]:
print(llm_response)

Assistant: I'm here to help! Let's break down your study challenge into manageable tasks. Here are five points to focus on:

1. **Review your notes**: Start by re-reading your notes from the class. Try to identify the key points and any areas that you found confusing.

2. **Read the textbook**: Go through the relevant chapters in your textbook. Sometimes, the textbook might explain the concept in a different way that could help clarify your understanding.

3. **Online resources**: Utilize online resources like video lectures, tutorials, or websites that explain the concept. Websites like Khan Academy, MIT OpenCourseWare, or YouTube channels dedicated to your subject can be very helpful.

4. **Practice problems**: Try to solve problems related to the concept. This will help you understand the application of the concept and reinforce your learning. If you're stuck, don't hesitate to look up the solutions.

5. **Discuss with peers or the instructor**: Reach out to your classmates or your 

### Please answer the questions below to complete the experiment:




In [102]:
#@title Which of the following prompt techniques in LangChain allows flexible templated prompts that are suitable for better describing the role and content? { run: "auto", form-width: "500px", display-mode: "form" }
Answer = "Both" #@param ["", "PromptTemplate", "ChatPromptTemplate", "Both"]

In [103]:
#@title How was the experiment? { run: "auto", form-width: "500px", display-mode: "form" }
Complexity = "Good and Challenging for me" #@param ["","Too Simple, I am wasting time", "Good, But Not Challenging for me", "Good and Challenging for me", "Was Tough, but I did it", "Too Difficult for me"]

In [104]:
#@title If it was too easy, what more would you have liked to be added? If it was very difficult, what would you have liked to have been removed? { run: "auto", display-mode: "form" }
Additional = "Nothing much" #@param {type:"string"}

In [105]:
#@title Can you identify the concepts from the lecture which this experiment covered? { run: "auto", vertical-output: true, display-mode: "form" }
Concepts = "Yes" #@param ["","Yes", "No"]

In [106]:
#@title  Text and image description/explanation and code comments within the experiment: { run: "auto", vertical-output: true, display-mode: "form" }
Comments = "Very Useful" #@param ["","Very Useful", "Somewhat Useful", "Not Useful", "Didn't use"]

In [107]:
#@title Mentor Support: { run: "auto", vertical-output: true, display-mode: "form" }
Mentor_support = "Very Useful" #@param ["","Very Useful", "Somewhat Useful", "Not Useful", "Didn't use"]

In [108]:
#@title Run this cell to submit your notebook for grading { vertical-output: true }
try:
  if submission_id:
      return_id = submit_notebook()
      if return_id : submission_id = return_id
  else:
      print("Please complete the setup first.")
except NameError:
  print ("Please complete the setup first.")

Your submission is successful.
Ref Id: 5473
Date of submission:  18 Feb 2025
Time of submission:  11:53:25
View your submissions: https://aimlops-iisc.talentsprint.com/notebook_submissions
