### Comprehensive Guide to LangChain: Building Advanced LLM Applications

#### [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)]()

# Initial Setup

In [5]:
!pip -q install langchain openai chromadb tiktoken unstructured wikipedia google-search-results langchain_community

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/2.5 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.7/2.5 MB[0m [31m20.7 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m37.8 MB/s[0m eta [36m0:00:00[0m
[?25h

## Import Necessary Libraries

In [2]:
import os
import json
import warnings
warnings.filterwarnings('ignore')

from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.schema import HumanMessage, SystemMessage, AIMessage
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain, SequentialChain
from langchain.memory import ConversationBufferMemory
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.agents import load_tools, initialize_agent, AgentType


## Set Up API Key

In [4]:
OPENAI_API_KEY = "sk-proj-00000-000-00-00"  # Replace with your OpenAI API key
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
llm = OpenAI(temperature=0.7)
chat_model = ChatOpenAI(temperature=0.7)

# 1. Basic LLM Interactions

In [5]:
def test_llm_responses():
    print("Basic LLM Response:")
    response = llm.predict("What is the capital of Canada?")
    print(response)
    print("\n" + "="*50 + "\n")
    print("Chat Model Response:")
    messages = [
        SystemMessage(content="You are a helpful assistant."),
        HumanMessage(content="What is the capital of Canada?")
    ]
    response = chat_model.predict_messages(messages)
    print(response.content)

test_llm_responses()

Basic LLM Response:

The capital of Canada is Ottawa.


Chat Model Response:
The capital of Canada is Ottawa.


# 2. Working with Prompt Templates

In [9]:
def demonstrate_prompt_templates():
    basic_prompt = PromptTemplate(
        input_variables=["topic"],
        template="Write a brief summary about {topic}."
    )
    detailed_prompt = PromptTemplate(
        input_variables=["topic", "tone", "length"],
        template="Write a {length} summary about {topic} in a {tone} tone."
    )
    print("Basic Prompt Result:")
    print(llm.predict(basic_prompt.format(topic="artificial intelligence")))
    print("\n" + "="*50 + "\n")
    print("Detailed Prompt Result:")
    print(llm.predict(detailed_prompt.format(
        topic="artificial intelligence", tone="professional", length="two-paragraph"
    )))

demonstrate_prompt_templates()

Basic Prompt Result:


Artificial intelligence (AI) is a branch of computer science that focuses on creating machines that can perform tasks that typically require human intelligence, such as learning, problem-solving, and decision-making. This is achieved through the development of algorithms and computer systems that can analyze and interpret data, recognize patterns, and adapt to new situations. AI has applications in various fields, including robotics, healthcare, finance, and transportation, and has the potential to greatly improve efficiency and productivity in these areas. However, there are also concerns about the potential impact of AI on the job market and society as a whole. Ongoing research and advancements in AI technology continue to push the boundaries of what is possible and raise ethical questions about the role of machines in our daily lives. 


Detailed Prompt Result:


Artificial intelligence, or AI, is a rapidly developing field of computer science that focuses on 

# 3. Creating Advanced Chains

In [12]:

def multi_step_chain():
    # Step 1: Generate keywords for a topic
    keyword_prompt = PromptTemplate(
        input_variables=["topic"],
        template="Generate 5 keywords related to {topic}."
    )
    keyword_chain = LLMChain(llm=llm, prompt=keyword_prompt, output_key="keywords")

    # Step 2: Use keywords to create a blog title
    title_prompt = PromptTemplate(
        input_variables=["keywords"],
        template="Create a blog title using these keywords: {keywords}"
    )
    title_chain = LLMChain(llm=llm, prompt=title_prompt, output_key="blog_title")

    # Combine chains using SequentialChain
    chain = SequentialChain(
        chains=[keyword_chain, title_chain],
        input_variables=["topic"],  # Initial input to the chain
        output_variables=["blog_title"]  # Final output of the chain
    )

    # Test the chain
    result = chain.run({"topic": "climate change"})
    print("Generated Blog Title:", result)

multi_step_chain()


Generated Blog Title: 

"Combatting Global Warming: Reducing Greenhouse Gases and Our Carbon Footprint with Renewable Energy in the Face of Extreme Weather Events"


# 4. Using LangChain for Summarization Tasks


In [14]:
def summarize_document():
    # Load a sample document
    document_text = """
    Climate change refers to long-term alterations in temperature, precipitation, wind patterns, and other elements of the Earth's climate system.
    It is primarily driven by human activities, particularly the burning of fossil fuels, which increases greenhouse gas concentrations in the atmosphere.
    These gases trap heat, leading to global warming and a host of other environmental changes, including rising sea levels, melting glaciers, and disruptions to ecosystems.
    Addressing climate change requires a combination of mitigation efforts, such as reducing emissions, and adaptation strategies, such as preparing for the impacts already underway.
    """

    # Split the document into chunks
    text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=20)
    chunks = text_splitter.split_text(document_text)

    # Summarize each chunk and combine
    summarization_prompt = PromptTemplate(
        input_variables=["chunk"],
        template="Summarize the following text: {chunk}"
    )
    summarization_chain = LLMChain(llm=llm, prompt=summarization_prompt)

    summary = "\n".join([summarization_chain.run({"chunk": chunk}) for chunk in chunks])
    print("Document Summary:")
    print(summary)

summarize_document()

Document Summary:


Climate change is caused by human activities, specifically the burning of fossil fuels, which leads to increased greenhouse gas concentrations in the atmosphere. This results in global warming and various environmental changes, such as rising sea levels, melting glaciers, and impacts on ecosystems. To address climate change, both mitigation efforts (reducing emissions) and adaptation strategies (preparing for current impacts) are needed.



# 5. Using LangChain for Q&A with Custom Data

## Build a Question-Answering System

In [15]:
def qa_with_custom_data():
    # Sample document content
    document_content = """
    Artificial Intelligence (AI) is a field of computer science focused on creating systems capable of performing tasks that usually require human intelligence.
    These tasks include natural language understanding, image recognition, decision-making, and more.
    Advances in AI have been driven by machine learning techniques, particularly deep learning, which involves training neural networks on large datasets.
    """

    # Split and process the document
    text_splitter = CharacterTextSplitter(chunk_size=200, chunk_overlap=50)
    chunks = text_splitter.split_text(document_content)

    # Embed the document
    embeddings = OpenAIEmbeddings()
    vectorstore = Chroma.from_texts(chunks, embeddings)

    # Create a retriever
    retriever = vectorstore.as_retriever()

    # Define a QA chain
    qa_prompt = PromptTemplate(
        input_variables=["context", "question"],
        template="Given the following context, answer the question.\nContext: {context}\nQuestion: {question}\nAnswer:"
    )
    qa_chain = LLMChain(llm=llm, prompt=qa_prompt)

    # Ask a question
    question = "What is Artificial Intelligence?"
    relevant_context = retriever.get_relevant_documents(question)[0].page_content
    answer = qa_chain.run({"context": relevant_context, "question": question})

    print("Question:", question)
    print("Answer:", answer)

qa_with_custom_data()



Question: What is Artificial Intelligence?
Answer:  Artificial Intelligence is a field of computer science focused on creating systems capable of performing tasks that usually require human intelligence.


# 6. Question Answering with Images

## Building an Image-Based Q&A System

In [7]:
#!pip -q install pdfminer
#!pip uninstall -y pdfminer.six unstructured
#!pip install "pdfminer.six<20221105" "unstructured==0.10.12"
!pip install pdf2image

Collecting pdf2image
  Downloading pdf2image-1.17.0-py3-none-any.whl.metadata (6.2 kB)
Downloading pdf2image-1.17.0-py3-none-any.whl (11 kB)
Installing collected packages: pdf2image
Successfully installed pdf2image-1.17.0


In [10]:
# !apt-get update
# !apt-get install -y tesseract-ocr
!pip install pytesseract


Collecting pytesseract
  Downloading pytesseract-0.3.13-py3-none-any.whl.metadata (11 kB)
Downloading pytesseract-0.3.13-py3-none-any.whl (14 kB)
Installing collected packages: pytesseract
Successfully installed pytesseract-0.3.13


In [15]:
import pytesseract
from PIL import Image
from langchain import PromptTemplate, LLMChain
from langchain.chat_models import ChatOpenAI

def qa_with_image():
    image_path = "/content/sign.webp"
    image = Image.open(image_path)

    # Extract text with pytesseract directly
    extracted_text = pytesseract.image_to_string(image)

    llm = ChatOpenAI(model_name="gpt-4")

    qa_prompt = PromptTemplate(
        input_variables=["context", "question"],
        template=(
            "Given the following context extracted from an image, "
            "answer the question.\nContext: {context}\nQuestion: {question}\nAnswer:"
        )
    )
    qa_chain = LLMChain(llm=llm, prompt=qa_prompt)

    question = "What information is provided in the image?"
    answer = qa_chain.run({"context": extracted_text, "question": question})

    print("Question:", question)
    print("Answer:", answer)

qa_with_image()


Question: What information is provided in the image?
Answer: The image provides text that reads "waa" and "ry ae".


# 7. Question Answering with Videos

## Building a Video-Based Q&A System

In [None]:
def qa_with_video():
    import cv2
    from langchain.document_loaders import UnstructuredVideoLoader

    # Load a video
    video_path = "sample_video.mp4"  # Replace with your video file path

    # Extract text from the video
    video_loader = UnstructuredVideoLoader(file_path=video_path)
    extracted_text = video_loader.load()[0].page_content

    # Define a QA prompt
    qa_prompt = PromptTemplate(
        input_variables=["context", "question"],
        template="Given the following context extracted from a video, answer the question.\nContext: {context}\nQuestion: {question}\nAnswer:"
    )
    qa_chain = LLMChain(llm=llm, prompt=qa_prompt)

    # Ask a question
    question = "What is described in the video?"
    answer = qa_chain.run({"context": extracted_text, "question": question})

    print("Question:", question)
    print("Answer:", answer)

qa_with_video()