In [171]:
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.vectorstores import DocArrayInMemorySearch

from langchain.document_loaders import PyPDFDirectoryLoader
# from langchain.indexes import VectorstoreIndexCreator

from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.embeddings import HuggingFaceEmbeddings
from langchain import LLMChain
from langchain.chains import SequentialChain
from langchain.chains import TransformChain
from langchain.prompts import PromptTemplate

from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.schema import SystemMessage, AIMessage, HumanMessage
from langchain.prompts import ChatPromptTemplate
from langchain.prompts import SystemMessagePromptTemplate
from langchain.prompts import HumanMessagePromptTemplate
from langchain.prompts import AIMessagePromptTemplate
from langchain.prompts.example_selector import LengthBasedExampleSelector

from langchain.output_parsers import CommaSeparatedListOutputParser


from IPython.display import display, Markdown

In [172]:
import langchain
langchain.debug = False

In [173]:
import os
with open('openai_api_key.txt', 'r') as f:
    os.environ["OPENAI_API_KEY"] = f.read()
os.environ["LANGCHAIN_TRACING_V2"] = "true"


In [174]:
# file = './resources'
# loader = PyPDFDirectoryLoader(file)
# pages = loader.load_and_split()

# embeddings = HuggingFaceEmbeddings()
# db = DocArrayInMemorySearch.from_documents(pages, embeddings)

In [175]:
llm = ChatOpenAI(temperature=0.0)
llm_streaming = ChatOpenAI(temperature=0.5, streaming=True, callbacks=[StreamingStdOutCallbackHandler()])


In [176]:
n_queries = 4
n_answers = 2
system_template = f'''
You are a teacher assistant in a class for teaching writers how to write. You are really
skilled with using technology to do your job for you. When you are asked a question you generate a number of queries to use against 
a database containing books on good writing. Make sure that your queries answer the essence of the question rather
than quering some less relevant information that is contained in the question. That's especially important for long questions
with that have some setup before the actual question is asked.
You are not supposed to answer the question yourself, but rather to find the best queries that answer the question for you.
Queries answers will be chosen based on semantic similarity to the question so make sure the queries stay relevant and sensible. 
Answer with {n_queries} queries separated by commas as this is the format the search engine accepts, do not use numbered list,
do not put the queries in quotes.
'''
system_template = SystemMessagePromptTemplate.from_template(system_template)
question = '{question}'
question = HumanMessagePromptTemplate.from_template(question)

output_parser = CommaSeparatedListOutputParser()
format_instructions = output_parser.get_format_instructions()

answer = 'I must list between ' + str(n_queries) + ''' short queries. They need to be relevant to the question
 but also different from each other to cover wide array of search results.\n
{format_instructions}
'''
answer = PromptTemplate(template=answer, input_variables=[], partial_variables={"format_instructions": format_instructions})
answer = AIMessagePromptTemplate(prompt=answer)

chat_prompt = ChatPromptTemplate.from_messages([system_template, question, answer])

In [177]:
first_chain = LLMChain(llm=llm, prompt=chat_prompt, output_key='queries')

In [178]:
def query_db(d: dict) -> str:
    queries = d['queries']
    queries = output_parser.parse(queries)
    assert len(queries) == n_queries, f'Expected {n_queries} queries, got {len(queries)}'
    results = []
    for q in queries:
        query_result = db.similarity_search(q, k=n_answers)
        results.extend(query_result)
    assert len(results) == n_answers * n_queries

    results = '\n\n'.join([f'{i+1}.\n{r.page_content}' for i, r in enumerate(results)])
    d['context'] = results
    d['queries'] = [f'{i+1}. {q.capitalize()}' for i, q in enumerate(queries)]
    return d

In [179]:
transform_chain = TransformChain(
    input_variables=["queries"],
    output_variables=["context"],
    transform=query_db
)

In [198]:
system_template = '''
Given a question and a bunch of contexts returned by the search engine, you need to summarize each context.
If some of the ''' + str(n_queries * n_answers) + \
''' contexts is not understadable or irrlevant, you can skip it. Otherwise, you need to summarize it in 1-2 sentences.
You don't need to summarize the whole context, just extract the most important information in relation to the question.
Numerate the resulting summaries. If you skip a context, just leave the number empty. 

Question given:
{question}

Contexts given:
{context}
'''
system_template = SystemMessagePromptTemplate.from_template(system_template)
chat_prompt = ChatPromptTemplate.from_messages([system_template])

summ_chain = LLMChain(llm=llm_streaming, prompt=chat_prompt, output_key='context_summaries')

In [199]:
system_template = '''
You are a renown writer and are tutoring some students. You need to help them by answering their questions, helping them develop their skills
and giving them a valuable feedback on their work. You also have a bunch of helpful materials and books that help you answering their questions.
You are not supposed to give suggestions that are irrelevant to the question. Try to answer the specific question as best as you can.
If some materials given to you are irrelevant then disregard them rather than trying to include them in your answer.
Do not repeat yourself. Your answer should be shorter than the contexts given to you so do not try to paraphrase them. 
'''
system_template = SystemMessagePromptTemplate.from_template(system_template)
question = '{question}'
question = HumanMessagePromptTemplate.from_template(question)

system_help = '''
Your assistant provided you some summaries from various sources that might help you answer the question.
Take a look at them and use them to answer the question based on the information they provide and your own deep knowledge.
Sources: 
{context_summaries}
\n
'''
system_help = SystemMessagePromptTemplate.from_template(system_help)

chat_prompt = ChatPromptTemplate.from_messages([system_template, question, system_help])
final_chain = LLMChain(llm=llm_streaming, prompt=chat_prompt, output_key='result')
# messages = chat_prompt.format_prompt(question='What is the difference between a good and a bad writer?').to_messages()

In [200]:
chain_whole = SequentialChain(chains=[first_chain, transform_chain, summ_chain, final_chain],
                              input_variables=["question"], output_variables=['queries', 'context_summaries', 'result'])

In [201]:
question = '''
I read a book in which the characters have to make a journey through desolate and dangerous desert which at night turns into 
a dark ocean. The book kept me on edge consantly. There was danger and challenge in traversing the desert during day. 
There was unease and fear with a feeling of dark foreboding when characters hid from the rushing ocean at night.
The ocean was ominous with many hidden monstrous leviathans lurking beneath the surface.

I want to create a similar feeling in my own writing. How can I do that? What setup would work best? I want my work to be original
but also to be inspired by the book I read, mimicking the feeling of unease and danger.
'''
question = '''What's the best method to start self-publishing?'''
response = chain_whole(dict(question=question))

1. A checklist for self-publishing includes identifying publishing goals, being prepared to devote time and energy to publicity and marketing, hiring professionals for quality, and having a marketing plan.
2. Self-publishing can be quick and easy, but if you are contemplating a career as an author, self-publishing your book may have unintended consequences on your future publishing endeavors.
3. Self-publishing has a lack of distribution, which is almost impossible for your work to reach its market without strong distribution channels.
6. If you do decide to self-publish and want to maximize sales, develop a plan and strategy for producing a readable, attractive book and getting it out there. 
7. Some online companies offer an excellent package for self-publishing, including print-on-demand and electronic formats, with royalties of 20% or 50%.
8. Independent publishers can get books out sooner than larger houses and must make every book count, which can mean a lot to you over time.Star

In [202]:
display(Markdown(response['result']))

Starting self-publishing can be a quick and easy process, but it is important to keep in mind your publishing goals and to be prepared to devote time and energy to publicity and marketing. If you are contemplating a career as an author, self-publishing your book may have unintended consequences on your future publishing endeavors. One of the biggest challenges of self-publishing is the lack of distribution. Without strong distribution channels, it can be almost impossible for your work to reach its market. However, some online companies offer excellent packages for self-publishing, including print-on-demand and electronic formats, with royalties of 20% or 50%. If you do decide to self-publish and want to maximize sales, develop a plan and strategy for producing a readable, attractive book and getting it out there. Additionally, hiring professionals for quality and having a marketing plan can be beneficial. Independent publishers can get books out sooner than larger houses and must make every book count, which can mean a lot to you over time.

In [194]:
response

{'question': "What's the best method to start self-publishing?",
 'result': "The best method to start self-publishing is to develop a plan and strategy for producing a readable, attractive book and getting it out there. It's important to hire a quality editor, proofreader, book designer, and publicist. Print-on-demand (POD) companies can print and ship books as they receive orders and list the book with online bookstores and certain chain stores. Independent publishers can get books out sooner and buy books that will backlist, but staying in print for a long time may not happen with larger New York houses. However, it's important to keep in mind that self-publishing requires extensive time and energy for publicity and marketing, and it may have unintended consequences on future publishing endeavors. Therefore, it's crucial to have a marketing and publicity plan in place and to be highly effective at publicity and marketing.",
 'context_summaries': "Self-publishing can be a good option 

In [185]:
langchain.debug = False