In [None]:
%pip install langchain langchain_openai faiss-cpu langchainhub --upgrade

In [None]:
import os
os.environ["OPENAI_API_KEY"] = "API_KEY_HERE"

In [None]:
from langchain import hub

In [None]:
prompt = hub.pull("homanp/question-answer-pair")
prompt_two = hub.pull("gitmaxd/synthetic-training-data")
prompt_three = hub.pull("rlm/text-to-sql")
rag_prompt = hub.pull("rlm/rag-prompt")

In [None]:
prompt

ChatPromptTemplate(input_variables=['context', 'data_format', 'number_of_pairs'], messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'data_format', 'number_of_pairs'], template='You are an AI assistant tasked with generating question and answer pairs for the given context using the given format. Only answer in the format with no other text. You should create the following number of question/answer pairs: {number_of_pairs}. Return the question/answer pairs as a Python List. Each dict in the list should have the full context provided, a relevant question to the context and an answer to the question.\n\nFormat:\n{data_format}\n\nContext:\n{context}\n'))])

In [None]:
prompt_two

PromptTemplate(input_variables=['EXAMPLE', 'NUMBER', 'PERSPECTIVE', 'SEED_CONTENT'], template='Utilize Natural Language Processing techniques and Generative AI to create new Question/Answer pair textual training data for OpenAI LLMs by drawing inspiration from the given seed content: {SEED_CONTENT} \n\nHere are the steps to follow:\n\n1. Examine the provided seed content to identify significant and important topics, entities, relationships, and themes. You should use each important topic, entity, relationship, and theme you recognize. You can employ methods such as named entity recognition, content summarization, keyword/keyphrase extraction, and semantic analysis to comprehend the content deeply.\n\n2. Based on the analysis conducted in the first step, employ a generative language model to generate fresh, new synthetic text samples. These samples should cover the same topic, entities, relationships, and themes present in the seed data. Aim to generate {NUMBER} high-quality variations 

## RAG Prompt

In [None]:
rag_prompt

ChatPromptTemplate(input_variables=['context', 'question'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"))])

In [None]:
print(rag_prompt.messages)

[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"))]


In [None]:
# Load docs
from langchain.document_loaders import WebBaseLoader
loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/")
data = loader.load()

# Split
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 0)
all_splits = text_splitter.split_documents(data)

# Store splits
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
vectorstore = FAISS.from_documents(documents=all_splits, embedding=OpenAIEmbeddings())

# RAG prompt
from langchain import hub
prompt = hub.pull("rlm/rag-prompt")

# LLM
from langchain.chains import RetrievalQA
from langchain_openai.chat_models import ChatOpenAI
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

# RetrievalQA
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=vectorstore.as_retriever(),
    chain_type_kwargs={"prompt": prompt}
)
question = "What are the approaches to Task Decomposition?"
result = qa_chain.invoke({"query": question})
result["result"]

'The approaches to task decomposition include using LLM with simple prompting, task-specific instructions, and human inputs.'