In [57]:
#import streamlit as st
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import google.generativeai as palm
from langchain.embeddings import GooglePalmEmbeddings
from langchain.llms import GooglePalm
from langchain.vectorstores import chroma
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
import os
from langchain.prompts import PromptTemplate

In [58]:
os.environ['GOOGLE_API_KEY'] =  ''

In [59]:
from langchain.document_loaders import PyPDFLoader

loader = PyPDFLoader("buildings-12-01787-v3.pdf")
pages = loader.load_and_split()

In [60]:
llm = GooglePalm(temperature=0.1)

In [61]:
from langchain.chains.summarize import load_summarize_chain
import textwrap

chain = load_summarize_chain(llm, 
                             chain_type="map_reduce")


output_summary = chain.run(pages)
wrapped_text = textwrap.fill(output_summary, width=100)
print(wrapped_text)

Deep forest-DQN is a deep reinforcement learning method that uses a deep forest classifier to reduce
the action space and speed up convergence. DF-DQN has a good energy-saving effect in engineering
applications but is not suitable for systems lacking historical data or expert experience.


In [25]:
prompt_template = """Write a concise bullet point summary of the following:


{text}


CONSCISE SUMMARY IN BULLET POINTS:"""

BULLET_POINT_PROMPT = PromptTemplate(template=prompt_template, 
                        input_variables=["text"])

chain = load_summarize_chain(llm, 
                             chain_type="map_reduce",
                             map_prompt=BULLET_POINT_PROMPT, 
                             combine_prompt=BULLET_POINT_PROMPT)

# chain.llm_chain.prompt= BULLET_POINT_PROMPT
# chain.combine_document_chain.llm_chain.prompt= BULLET_POINT_PROMPT

output_summary = chain.run(pages)
wrapped_text = textwrap.fill(output_summary, 
                             width=100,
                             break_long_words=False,
                             replace_whitespace=False)

print(wrapped_text)

- DF-DQN is a deep reinforcement learning method for energy-saving control of cooling water system
in HVAC.
- DF-DQN uses a deep forest classifier to map the action space to a smaller one.
- DF-DQN
converges much faster than DQN.
- DF-DQN performs slightly worse than the model-based control method
in saving energy, but it does not require any complete system model.
- DF-DQN has a good energy-
saving effect in engineering applications, but it is not suitable for systems lacking historical
data or expert experience.


# chat with pdf

In [26]:
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS 

In [27]:
doc_reader = PdfReader('impromptu-rh.pdf')

raw_text = ''
for i, page in enumerate(doc_reader.pages):
    text = page.extract_text()
    if text:
        raw_text += text
        
# Splitting up the text into smaller chunks for indexing
text_splitter = CharacterTextSplitter(        
    separator = "\n",
    chunk_size = 1000,
    chunk_overlap  = 200, #striding over the text
    length_function = len,
)
texts = text_splitter.split_text(raw_text)

In [37]:
embeddings = GooglePalmEmbeddings()
docsearch = FAISS.from_texts(texts, embeddings)

In [38]:
query = "how does GPT-4 change social media?"
docs = docsearch.similarity_search(query)

In [43]:
print(docs[0].page_content)

when I started thinking about GPT-4’s potential impact on 
social media was to spend an hour or so feeding GPT-4 various 
prompts to generate titles for potential MrBeast videos. While 
GPT-4 produced fairly generic responses much of the time, its 106Impromptu: Amplifying Our Humanity Through AI
extreme generativity means its success rate doesn’t have to be 
high. Here’s a few that I felt were its best responses:
Reid: Create twenty-five titles for funny MrBeast videos 
that feature a Lamborghini versus something else.
GPT-4:  Lamborghini vs 10,000 Balloons: Can It Float?
Reid: Create twenty-five titles for hilarious and creative 
MrBeast videos involving hot sauce.
GPT-4:  I Filled a Pool with Hot Sauce and Dared My 
Friends to Swim in It
Reid: Create the headline for a MrBeast video if MrBeast 
were a radical Marxist.
GPT-4:  I Bought an Entire Factory and Gave It to 
the Workers
Reid: Create twenty titles for wacky and inventive 
MrBeast videos that involve the world’s largest 3D


In [44]:
from langchain.chains.question_answering import load_qa_chain

chain = load_qa_chain(GooglePalm(), 
                      chain_type="stuff") # we are going to stuff all the docs in at once

In [45]:
chain.llm_chain.prompt.template

"Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\n{context}\n\nQuestion: {question}\nHelpful Answer:"

In [46]:
query = "who are the authors of the book?"
docs = docsearch.similarity_search(query)
chain.run(input_documents=docs, question=query)

'Reid Hoffman, Sam Altman'

In [48]:
chain = load_qa_chain(GooglePalm(), 
                      chain_type="map_rerank",
                      return_intermediate_steps=True
                      ) 

query = "who are openai?"
docs = docsearch.similarity_search(query,k=10)
results = chain({"input_documents": docs, "question": query}, return_only_outputs=True)
results



{'intermediate_steps': [{'answer': 'This document does not answer the question',
   'score': '0'},
  {'answer': 'a company that developed a chatbot', 'score': '50'},
  {'answer': 'research organization founded in 2015 with a mission to give millions of people direct, hands-on access to powerful new AI tools',
   'score': '80'},
  {'answer': 'an organization founded in 2015 to develop technologies that put the power of AI directly into the hands of millions of people',
   'score': '80'},
  {'answer': 'This document does not answer the question', 'score': '0'},
  {'answer': 'This document does not answer the question', 'score': '0'},
  {'answer': 'This document does not answer the question', 'score': '0'},
  {'answer': 'gence tools for the benefit of humanity', 'score': '85'},
  {'answer': 'non-profit AI research company', 'score': '80'},
  {'answer': 'a non-profit research organization founded in 2015 with a mission to give millions of people direct, hands-on access to powerful new AI t

In [49]:
results['output_text']

'a non-profit research organization founded in 2015 with a mission to give millions of people direct, hands-on access to powerful new AI tools.'

In [50]:
# check the prompt
chain.llm_chain.prompt.template

"Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\nIn addition to giving an answer, also return a score of how fully it answered the user's question. This should be in the following format:\n\nQuestion: [question here]\nHelpful Answer: [answer here]\nScore: [score between 0 and 100]\n\nHow to determine the score:\n- Higher is a better answer\n- Better responds fully to the asked question, with sufficient level of detail\n- If you do not know the answer based on the context, that should be a score of 0\n- Don't be overconfident!\n\nExample #1\n\nContext:\n---------\nApples are red\n---------\nQuestion: what color are apples?\nHelpful Answer: red\nScore: 100\n\nExample #2\n\nContext:\n---------\nit was night and the witness forgot his glasses. he was not sure if it was a sports car or an suv\n---------\nQuestion: what type was the car?\nHelpful Answer: a sports car or an su

# RetrievalQA
RetrievalQA chain uses load_qa_chain and combines it with the a retriever (in our case the FAISS index)

In [51]:
from langchain.chains import RetrievalQA

# set up FAISS as a generic retriever 
retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k":4})

# create the chain to answer questions 
rqa = RetrievalQA.from_chain_type(llm=GooglePalm(), 
                                  chain_type="stuff", 
                                  retriever=retriever, 
                                  return_source_documents=True)

In [52]:
rqa("What is OpenAI?")

{'query': 'What is OpenAI?',
 'result': 'OpenAI is a research organization founded in 2015 with a mission to give millions of people direct, hands-on access to powerful new AI tools.',
 'source_documents': [Document(page_content='Stable Diffusion, a new kind of opt-in, user-driven, and very \nvisible AI usage suddenly exists. Users share their outputs, \ntechniques, experiences, and opinions on Twitter, YouTube, \nGithub, Discord, and more. Diverse viewpoints from around \nthe world, informed by hands-on usage, shape this discourse, \nwhich is always spirited, often fractious, and, to my mind, \nhighly productive.\nMillions of people, including many whose main goal is to find \nflaws in these systems, are getting a shot to shape the further \nevolution of AI through their usage, feedback, and critiques. As 233Conclusion: At the Crossroads of the 21st Century\nOpenAI co-founder and CEO Sam Altman exclaimed in a recent \npost on OpenAI’s website, “We currently believe the best way to \ns

In [54]:
query = "what have the last 20 years been like for American journalism?"
rqa(query)['result']

'The last 20 years have been mostly bad news for the American journalism industry.'