## Environment Setup

In [None]:
! pip install langchain_community langchain_mistralai langchainhub langchain tiktoken langchain-pinecone

In [None]:
import os
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHAIN_API_KEY'] = '<your LangChain API KEY goes here>'
os.environ['MISTRAL_API_KEY'] = '<your Mistral API KEY goes here>'
os.environ['HF_TOKEN'] = '<your Hugging Face TOKEN goes here>'
os.environ['PINECONE_API_KEY'] = '<your Pinecone API KEY goes here>'

In [None]:
questions = [ "What does Moby Dick say about humanity's struggle against nature?",
              "How does Moby Dick explore the theme of obsession through Ahab's quest?",
              "How is Captain Ahab portrayed as both a hero and a villain in Moby Dick?",
              # "What motivates Ishmael to join the Pequod, and how does he change throughout the novel?",
              # "What does the white whale symbolize in Moby Dick, and how does it relate to Ahab's obsession?",
              # "How does the novel Moby Dick use the ocean as a symbol of the unknown?",
              # "How does Melville’s narrative style in Moby Dick contribute to the sense of adventure and mystery?",
              # "How does Ishmael’s perspective shape the reader’s understanding of the story in Moby Dick?",
              # "How does Moby Dick reflect 19th-century views on fate and destiny?",
              # "What philosophical questions does Melville raise about human existence and purpose in Moby Dick?",
              # "How does Melville use imagery to depict the sea as both beautiful and terrifying in Moby Dick?",
              # "How does Moby Dick describe the vastness and danger of the open sea?",
              # "What moral dilemmas do the crew members face in Moby Dick?",
              # "How does Moby Dick present Ahab's pursuit of revenge as both justified and self-destructive?"
              ]

question = questions[0]

## Indexing

In [None]:
from langchain_community.document_loaders import WebBaseLoader
# there can be mulitple urls
loader = WebBaseLoader("https://www.gutenberg.org/cache/epub/2701/pg2701.txt")
books = loader.load()

### Splitting the text

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=300,
    chunk_overlap=50)

# Make splits
splits = text_splitter.split_documents(books)
len(splits)

### Indexing to PineconeDB

In [None]:
from pinecone import Pinecone
from langchain_mistralai import MistralAIEmbeddings
# Notlangchain_community.vectorstores
from langchain_pinecone import PineconeVectorStore

# Initialize Pinecone client
pc = Pinecone(api_key=os.environ["PINECONE_API_KEY"])
index = pc.Index("book-passages")

# Create LangChain vectorstore
vectorstore = PineconeVectorStore(
    index=index,
    embedding=MistralAIEmbeddings(),
    text_key="text"
)

## Retrieval

### Init Retriever

In [None]:
retriever = vectorstore.as_retriever(search_kwargs={"k": 1})

### Step-Back Prompt

In [None]:
from langchain_core.prompts import ChatPromptTemplate, FewShotChatMessagePromptTemplate
generification_play = [
    {
        "input": "What are the economic impacts of tourism in Venice?",
        "output": "How does tourism affect local economies in popular cities?",
    },
    {
        "input": "What are the main causes of air pollution in New Delhi?",
        "output": "What are the common causes of air pollution in large cities?",
    },
]
# We now transform these to example messages
play_prompt_template = ChatPromptTemplate.from_messages(
    [
        ("human", "{input}"),
        ("ai", "{output}"),
    ]
)
play_prompt = FewShotChatMessagePromptTemplate(
    example_prompt=play_prompt_template,
    examples=generification_play,
)
stepback_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
"""
You are a knowledgeable assistant.
Your task is to transform specific questions into broader,
more general questions that are easier to answer and provide a wider perspective.
This approach is known as creating 'step-back' questions.
Here are a few examples:
""",
        ),
        # Play Prompt
        play_prompt,
        # Genuine user question
        ("user", "{question}"),
    ]
)




### Output original questions and their step-back questions

In [None]:
from langchain_mistralai import ChatMistralAI
from langchain.schema.output_parser import StrOutputParser
stepback_chain = stepback_prompt | ChatMistralAI(temperature=0) | StrOutputParser()

for question in questions:
    print("-------------")
    print("Original Question:\n\t"+question)
    stepback_question = stepback_chain.invoke({"question":question})
    print("Generic question:\n\t"+stepback_question)


# Generation

In [None]:
from langchain_core.runnables import RunnableLambda


response_prompt_template = """
You are a world knowledge expert.
Answer the following question thoroughly.
Use relevant information from the provided context but disregard any irrelevant details.

Context:
{normal_context}
{step_back_context}

Question: {question}
Answer:"""
response_prompt = ChatPromptTemplate.from_template(response_prompt_template)

chain = (
    {
        # Retrieve context using the normal question
        "normal_context": RunnableLambda(lambda x: x["question"]) | retriever,
        # Retrieve context using the step-back question
        "step_back_context": stepback_chain | retriever,
        # Pass on the question
        "question": lambda x: x["question"],
    }
    | response_prompt
    | ChatMistralAI(temperature=0)
    | StrOutputParser()
)

chain.invoke({"question": question})