### Imports


In [4]:
import os
import openai
import tiktoken
import wandb
from pprint import pprint
from getpass import getpass
from wandb.integration.openai import autolog

from pathlib import Path
from pprint import pprint

from rich.markdown import Markdown
import pandas as pd
from tenacity import (
    retry,
    stop_after_attempt,
    wait_random_exponential, # for exponential backoff
)

In [5]:
openai.api_key = os.getenv("OPENAI_API_KEY", "")

In [6]:
# we need a single line of code to start tracing langchain with W&B
os.environ["LANGCHAIN_WANDB_TRACING"] = "true"

# wandb documentation to configure wandb using env variables
# https://docs.wandb.ai/guides/track/advanced/environment-variables
# here we are configuring the wandb project name
os.environ["WANDB_PROJECT"] = "p3m_app"

In [7]:
assert os.getenv("WANDB_PROJECT", "") == "p3m_app", "This doesn't look like a valid W&B project"

In [8]:
MODEL_NAME = "text-davinci-003"

In [9]:
# # load in pdf document using langchain
# import langchain
# lc = langchain()
# lc.add_pdf("Data/guide-to-se-and-p3m-processes.pdf")

In [10]:
from langchain.document_loaders import PyPDFLoader

In [11]:
loader = PyPDFLoader("../Data/guide-to-se-and-p3m-processes.pdf")

In [13]:
# loader

<langchain.document_loaders.pdf.PyPDFLoader at 0x7fa39f77f310>

In [12]:
pages = loader.load_and_split()

In [23]:
# type(pages)

list

In [13]:
# We will need to count tokens in the documents, and for that we need the tokenizer
tokenizer = tiktoken.encoding_for_model(MODEL_NAME)

In [14]:
# tokenizer

In [20]:
# function to count the number of tokens in each document
def count_tokens(documents):
    token_counts = [len(tokenizer.encode(document.page_content)) for document in documents]
    return token_counts

In [22]:
count_tokens(pages)

[727,
 484,
 541,
 497,
 718,
 654,
 525,
 375,
 181,
 500,
 425,
 466,
 421,
 390,
 536,
 521,
 56,
 639,
 235,
 1039,
 559,
 371,
 219,
 389,
 783,
 822,
 63,
 818,
 114,
 555,
 843,
 592]

### Notes
* what is the maximum number of tokens that can be inputted to the model?
* what is going on here (above)? how did we split the pdf into pages? how will this then be passed to the model?
* how do you inspect your output when doc splitting? (add to design pattern as an unknown)
* interested to know how `pages = loader.load_and_split()` works

In [31]:
# pages[0].page_content


In [32]:
Markdown(pages[1].page_content)

### Embeddings
* this code use embeddings with a vector database retriever to find relevant documents for a query.
* why vector dbs? why not just numpy?

In [15]:
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma

# We will use the OpenAIEmbeddings to embed the text, and Chroma to store the vectors
embeddings = OpenAIEmbeddings()
db = Chroma.from_documents(pages, embeddings)

### Notes
* look up what is going on with retrievers and db stores (add to design pattern as an unknown)
* here is a question: what is stored in vector dbs: just the embeddings? or the tokens too?

We can create a retriever from the db now, we can pass the `k` param to get the most relevant sections from the similarity search

In [16]:
retriever = db.as_retriever(search_kwargs=dict(k=3))

In [38]:
# retriever

VectorStoreRetriever(tags=['Chroma', 'OpenAIEmbeddings'], metadata=None, vectorstore=<langchain.vectorstores.chroma.Chroma object at 0x7fa3815ec9d0>, search_type='similarity', search_kwargs={'k': 3})

In [50]:
query = "What are the principles of P3M?"
docs = retriever.get_relevant_documents(query)

In [40]:
# Let's see the results
for doc in docs:
    print(doc.metadata["source"])

../Data/guide-to-se-and-p3m-processes.pdf
../Data/guide-to-se-and-p3m-processes.pdf
../Data/guide-to-se-and-p3m-processes.pdf


### Notes
* look into this: i guess the docs go into the db as embeddings, but retain their link to the source doc? so its interesting to understand how th db works in this regard and what available metadata and normal data is available

In [51]:
for doc in docs:
    print(doc)
    print('/n-------------------/n')

page_content='Guide to SE and P3M Processes :  Issue  1. Page  3 \n \nTerminology  \nThe term P3 is used to denote Project, Programmes and Portfolios (as defined within the APM Body of \nKnowledge), and P3M denotes Project, Programme and Portfolio Management.  Where the discussion only \nrefers to Projects and Programmes, the term PPM will be used.  SE is used to denote Systems Engineering \napproaches, models, processes and ways of thinking.  \nAcknowledgements and r eferences  \nUnless otherwise referenced, information is taken fr om the INCOSE System Engineering Handbook  (SEHBK) \nv4 (201 5)3 or the APM Body of Knowledge  (PMBOK) 6th Edition (2012)4.  Key references will also include \ninformation from the ISO S tandard ISO15288:20 15 Systems and software engineering – System life cycle \nprocesses  and the ISO standard ( ISO21500:2012)  Guidance on Project Management .  Use has also been \nmade of the information contained within the Guide to the SE Body of Knowledge  (currently a

In [52]:
for doc in docs:
    print(doc.metadata)

{'page': 5, 'source': '../Data/guide-to-se-and-p3m-processes.pdf'}
{'page': 8, 'source': '../Data/guide-to-se-and-p3m-processes.pdf'}
{'page': 27, 'source': '../Data/guide-to-se-and-p3m-processes.pdf'}


## Stuff Prompt

We'll now take the content of the retrieved documents, stuff them into prompt template along with the query, and pass into an LLM to obtain the answer.

In [47]:
from langchain.prompts import PromptTemplate

prompt_template = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}

Question: {question}
Helpful Answer:"""
PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

context = "\n\n".join([doc.page_content for doc in docs])
prompt = PROMPT.format(context=context, question=query)

Use langchain to call openai chat API with the question

In [53]:
from langchain.llms import OpenAI

llm = OpenAI()
response = llm.predict(prompt)
Markdown(response)

### Notes
* How does the prompt generated here differ from the prompt generated using the chain

In [17]:
query = "What are the principles of P3M?"

In [20]:
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI

llm = OpenAI()
qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=retriever)
result = qa.run(query)

Markdown(result)

[34m[1mwandb[0m: Streaming LangChain activity to W&B at https://wandb.ai/dan-h/p3m_app/runs/dd54lkro
[34m[1mwandb[0m: `WandbTracer` is currently in beta.
[34m[1mwandb[0m: Please report any issues to https://github.com/wandb/wandb/issues with the tag `langchain`.


### Notes
* need to look at prompt template more thoroughly
* understand the `retrieval qa chain` more thoroughly

## Gradio app

In [21]:
def retrieval_response(message, history):
    qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=retriever)
    return qa.run(message)

In [2]:
import random

def random_response(message, history):
    return random.choice(["Yes", "No"])

In [26]:
import gradio as gr

gr.ChatInterface(retrieval_response,
                 title="Ask about P3M",
                chatbot=gr.Chatbot(height=300),
                textbox=gr.Textbox(placeholder="Ask a question about P3M", container=False, scale=7),
                #description="Ask Yes Man any question",
                theme="soft",
                examples=["What are the key principles of P3M?", "What are the limitations of P3M?"],
                cache_examples=True,
                retry_btn=None,
                undo_btn="Delete Previous",
                clear_btn="Clear",
            ).launch()




Caching examples at: '/Users/drh/Documents/Repos/LLM_Project/Notebooks/gradio_cached_examples/86'
Caching example 1/2
Caching example 2/2
Caching complete

Running on local URL:  http://127.0.0.1:7864

To create a public link, set `share=True` in `launch()`.


