In [None]:
!pip install langchain
!pip install unstructured
!pip install unstructured[local-inference]
!apt-get install poppler-utils 

In [None]:
!pip install openai

In [None]:
# !apt install tesseract-ocr
# !apt install libtesseract-dev

In [None]:
# !pip install chromadb

In [None]:
!pip install pinecone-client

In [None]:
!pip install "detectron2@git+https://github.com/facebookresearch/detectron2.git@v0.6#egg=detectron2"

In [54]:
import os
import json

import langchain
from langchain.document_loaders import UnstructuredPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

import pinecone

In [40]:
from langchain.vectorstores import Chroma, Pinecone
from langchain.embeddings.openai import OpenAIEmbeddings

In [44]:
from langchain.llms import OpenAI
from langchain.chains.question_answering import load_qa_chain

In [52]:
def load_api_keys(credentials_file_name: str = 'credentials.json') -> tuple:
    '''Load API keys from file

    Arguments:
        credentials_file_name: name of file containing credentials

    Returns:
        A tuple containing OpenAI API Key, Pinecone API key and Pinecone API
        environment name

    '''
    
    if os.path.exists(credentials_file_name):
        with open(credentials_file_name) as f:
            content = json.load(f)
            OPENAI_API_KEY = content['OPENAI_API_KEY']
            PINECONE_API_KEY = content['PINECONE_API_KEY']
            PINECONE_API_ENV = content['PINECONE_API_ENV']
    else:
        return f'No file {credentials_file_name} or file corrupted'

    return OPENAI_API_KEY, PINECONE_API_KEY, PINECONE_API_ENV


In [69]:
OPENAI_API_KEY, PINECONE_API_KEY, PINECONE_API_ENV = load_api_keys('credentials.json')

In [71]:
os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY

### Book load

In [24]:
loader = UnstructuredPDFLoader('/content/input/book.pdf')

In [27]:
book = loader.load()

In [31]:
len(book)

1

In [32]:
book[0].page_content



In [33]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 0)
texts = text_splitter.split_documents(book)

In [35]:
len(texts)

391

In [37]:
texts[12]

Document(page_content='("Logotherapy in a Nutshell") boils down, as it were, to the lesson one may distill from the first part, the autobiographical account ("Experiences in a Concen- tration Camp"), whereas Part One serves as the exis- tential validation of my theories. Thus, both parts mutually support their credibility. I had none of this in mind when I wrote the book in 1945. And I did so within nine successive days and with the firm determination that the book would be published anonymously. In fact, the first printing of the original German version does not show my name on the cover, though at the last moment, just before the book\'s initial publication, I did finally give in to my friends who had urged me to let it be published with my name at least on the title page. At first, however, it had been written with the absolute conviction that, as an anonymous opus, it could never earn its author literary fame. I had wanted simply to convey to the reader by way of a concrete example

### Create embeddings

In [48]:
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)

In [65]:
pinecone.init(
    api_key = PINECONE_API_KEY,
    environment=PINECONE_API_ENV
)

index_name = 'langchain'

In [None]:
docsearch = Pinecone.from_texts([t.page_content for t in texts], embeddings, index_name = index_name)

In [None]:
query = 'What was the life of prisoners of concentration camps like?'
docs = docsearch.similarity_search(query, include_metadata = True)

In [None]:
llm = OpenAI(temperature = 0, openai_api_key=OPENAI_API_KEY)
chain = load_qa_chain(llm, chain_type = 'stuff')

In [None]:
chain.run(input_documents = docs, question = query)

---

In [72]:
llm = OpenAI(temperature=0.9)

In [73]:
llm('What is the meaning of life according to Viktor Frankl?')

"\n\nViktor Frankl believed that the meaning of life is found in every individual's unique search for meaning, which is largely determined by the person's attitude and individual interpretations of a set of circumstances. He argued that humans have a natural instinct to search for meaning and purpose, and that this was the primary motivation in life. He believed that in order to find meaning, we must invest our time and energy in something greater than ourselves, such as a meaningful job, a relationship with God, or a meaningful cause. He also believed that meaning can be found even in difficult circumstances and that each person has the potential to use the worst circumstances to fashion the best possible outcome."