## Imports

In [25]:
from langchain import PromptTemplate, HuggingFaceHub, LLMChain
from langchain.llms import OpenAI
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.indexes import VectorstoreIndexCreator
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from huggingface_hub import hf_hub_download
import textwrap
import glob
import os

In [26]:
os.environ['OPENAI_API_KEY'] = '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'

## Constants

In [27]:
HUGGING_FACE_API_KEY="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"

In [28]:
# model = HuggingFaceHub(
#     repo_id="decapoda-research/llama-7b-hf",
#     model_kwargs={
#         "temperature": 0.9,
#         "max_length": 512
#     },
#     huggingfacehub_api_token=HUGGING_FACE_API_KEY
# )


In [29]:
model = OpenAI(model_name="text-davinci-003")   

In [30]:
template = """Question : {question}

Answer the question in less than 5 words"""

prompt = PromptTemplate(template=template, input_variables=['question'])

In [31]:
test_chain = LLMChain(prompt=prompt, llm=model)

question = "Where is Cameroon found?"

print(test_chain.run(question))

: Central Africa


## Loading, splitting the PDF

In [32]:
loader = PyPDFLoader("../data/12_rules.pdf")
pages = loader.load_and_split()
splitter = RecursiveCharacterTextSplitter(chunk_size = 700, chunk_overlap = 0)
texts = splitter.split_documents(pages)

In [33]:
texts[:5]

[Document(page_content='Jordan B. Peterson\n12 RULES FOR LIFE\nAn Antidote for Chaos\nForeword by Norman Doidge\nIllustrations by Ethan Van Scriver', metadata={'source': '../data/12_rules.pdf', 'page': 2}),
 Document(page_content='Table of Contents\nForeword by Norman Doidge\nOverture\nRULE 1\n / Stand up straight with your shoulders back\nRULE 2\n / Treat yourself like someone you are responsible for helping\nRULE 3\n / Make friends with people who want the best for you\nRULE 4\n / Compare yourself to who you were yesterday, not to who someone else is today\nRULE 5\n / Do not let your children do anything that makes you dislike them\nRULE 6\n / Set your house in perfect order before you criticize the world\nRULE 7\n / Pursue what is meaningful (not what is expedient)\nRULE 8\n / Tell the truth—or, at least, don’t lie\nRULE 9\n / Assume that the person you are listening to might know something you don’t\nRULE 10', metadata={'source': '../data/12_rules.pdf', 'page': 3}),
 Document(page_

In [36]:
ewp_loader = PyPDFLoader("../data/ewp_22.2.pdf")
ewp_pages = ewp_loader.load_and_split()

ewp_texts = splitter.split_documents(ewp_pages)

In [37]:
ewp_texts[:5]

[Document(page_content='Admin User Guide\nUser Guide\n4/1553-CSH 109 698/4 Uen J', metadata={'source': '../data/ewp_22.2.pdf', 'page': 0}),
 Document(page_content='Copyright\n© Ericsson AB 2012–2022. All rights reserved. No part of this document may be\nreproduced in any form without the written permission of the copyright owner.\nDisclaimer\nThe contents of this document are subject to revision without notice due to\ncontinued progress in methodology, design and manufacturing. Ericsson shall\nhave no liability for any error or damage of any kind resulting from the use of this\ndocument.\n4/1553-CSH 109 698/4 Uen J | 2022-02-23', metadata={'source': '../data/ewp_22.2.pdf', 'page': 1}),
 Document(page_content='Contents\n1 Admin User Guide Introduction 1\n2 Persistent Storage Configuration 2\n3 Notification  Service 3\n3.1 Notification  Service Overview 3\n3.2 Notification  Service Client Configuration 4\n3.3 Notification  Service Channel Configuration 5\n3.4 Disable Notification  Servic

## Embedding and similarity search

In [9]:
hf_embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
chroma_index = Chroma.from_documents(texts, hf_embeddings)
print('Done')

Using embedded DuckDB without persistence: data will be transient


Done


In [38]:
ewp_hf_embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
ewp_chroma_index = Chroma.from_documents(ewp_texts, ewp_hf_embeddings)
print('Done')

Using embedded DuckDB without persistence: data will be transient


Done


In [49]:
results = ewp_chroma_index.similarity_search("What is the internal rating function?", k=2)

for result in results:
    print(str(result.metadata["page"]) + ":", result.page_content+"\n")

146: External Rating User Profile . The example profile  is configured  to use
external rating for fee, commissioning, loyalty, discounts, and promotions. The
profile  configuration  is done in the Administration Tools  GUI, see General User
Profile  Information in the Administration Tools GUI  on page 218
To enable external rating for fee, commissioning and loyalty:External Rating Configuration
4/1553-CSH 109 698/4 Uen J | 2022-02-23 141

132: The Internal (Ericsson Rule Engine) commission rating engine is used with
this rating, see General User Profile  Information in the Administration Tools
GUI  on page 218.
— External rating for commission, see External Rating Configuration  on page
141.
8.1 Commission Distribution
The commission generated for an operation, such as registration, or a
transaction, such as cash-out, can be distributed, or split, between agents in a
hierarchy. The split is defined  as the percentage of the commission an agent
share with the agent on the lower level i

## Question answering

In [None]:
chain = load_qa_with_sources_chain(model, chain_type='refine')
query = "What is chaos?"
documents = chroma_index.similarity_search(query)
result = chain({"input_documents": documents, "question": query})

In [17]:
result

{'input_documents': [Document(page_content='Chaos is the domain of ignorance itself. It’s \nunexplored territory\n. Chaos is\nwhat extends, eternally and without limit, beyond the boundaries of all states,\nall ideas, and all disciplines. It’s the foreigner, the stranger, the member of\nanother gang, the rustle in the bushes in the night-time, the monster under the\nbed, the hidden anger of your mother, and the sickness of your child. Chaos is\nthe despair and horror you feel when you have been profoundly betrayed. It’s\nthe place you end up when things fall apart; when your dreams die, your\ncareer collapses, or your marriage ends. It’s the underworld of fairytale and\nmyth, where the dragon and the gold it guards eternally co-exist. Chaos is', metadata={'source': '../data/12_rules.pdf', 'page': 63}),
  Document(page_content='discover your partner’s infidelity. Chaos is the experience of reeling unbound\nand unsupported through space when your guiding routines and traditions\ncollapse

In [42]:
chain = load_qa_with_sources_chain(model, chain_type='refine')
query = "what's the title of the book?"
documents = chroma_index.similarity_search(query)
result = chain({"input_documents": documents, "question": query})

In [43]:
result

{'input_documents': [Document(page_content='Jung, some of Freud, much of the great works of Nietzsche, Dostoevsky,\nSolzhenitsyn, Eliade, Neumann, Piaget, Frye and Frankl, \nMaps of Meaning\n,\npublished nearly two decades ago, shows Jordan’s wide-ranging approach to\nunderstanding how human beings and the human brain deal with the\narchetypal situation that arises whenever we, in our daily lives, must face\nsomething we do not understand. The brilliance of the book is in his', metadata={'source': '../data/12_rules.pdf', 'page': 9}),
  Document(page_content='demonstration of how rooted this situation is in evolution, our DNA, our\nbrains and our most ancient \nstories. And he shows that these stories have\nsurvived because they still provide guidance in dealing with uncertainty, and\nthe unavoidable unknown.\nOne of the many virtues of the book you are reading now is that it provides\nan entry point into \nMaps of Meaning,\n which is a highly complex work\nbecause Jordan was working ou

In [46]:
chain = load_qa_with_sources_chain(model, chain_type='stuff')
query = "What permisssions are necessary to view accountholder information?"
documents = ewp_chroma_index.similarity_search(query)
result = chain({"input_documents": documents, "question": query})

In [47]:
result

{'input_documents': [Document(page_content='View account holder details. It is possible to view\nusing either individual permissions or access\nprofiles.If  both are configured,  individual\npermissionseffects permissions inherited using\naccess profiles.View Account Holder Information by Access Profiles\nAccess to account holder profile  is needed,\nconfigured  using Access Profiles , see Access Profiles\nin the Administration Tools GUI  on page 253.\nAdd account for account holder ROLE_VIEW_ANY_ACCOUNT\nAccess to account holder profile  is needed,\nconfigured  using Access Profiles , see Access Profiles\nin the Administration Tools GUI  on page 253.\nROLE_GET_PROFILES\nROLE_ADD_ANY_MOBILE_MONEY_ACCOUNT\nROLE_VIEW_ANY_ACCOUNTHOLDER_INFO', metadata={'source': '../data/ewp_22.2.pdf', 'page': 356}),
  Document(page_content='•On Behalf of Permission , see On Behalf of Permissions in the\nAdministration Tools GUI  on page 265.\nTo allow a user profile  to execute specific  operations on be

In [None]:
prompt_template = """Answer the following in 1 or 2 sentences:

Question: {question}

Answer:"""

llm_chain = LLMChain(llm=model, prompt=PromptTemplate.from_template(prompt_template))

In [None]:
llm_chain("What is chaos?")

In [15]:
total_revenue = 15000000

vat = 19.25 / 100 * total_revenue

enchird = 10 / 100 * total_revenue

production_costs = 60 / 100 * total_revenue

registration = 350000

profit = total_revenue - vat - enchird - production_costs - registration

profit

1262500.0

In [9]:
enchird

1150000.0