### Setup

In [14]:
import openai
import os
from langchain.chat_models import ChatOpenAI
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.document_loaders import PyPDFLoader


# use reddis as a backend
from langchain.vectorstores.redis import Redis
redis_url = "redis://localhost:6379"

# read local .env file
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) 


# setup azure openai api
openai.api_type = "azure"
openai.api_base = os.getenv("AZURE_OAI_ENDPOINT")
openai.api_version = "2023-03-15-preview"
openai.api_key  = os.getenv('AZURE_OAI_KEY')

llm_name = "gpt-3.5-turbo"
deployment_name_gpt = os.getenv("AZURE_OAI_MODEL_GPT_3")
deployment_name_ada = os.getenv("AZURE_OAI_MODEL_ADA")

os.environ["OPENAI_API_TYPE"] = openai.api_type
os.environ["OPENAI_API_BASE"] = openai.api_base
os.environ["OPENAI_API_KEY"] = openai.api_key
os.environ["OPENAI_API_VERSION"] = openai.api_version

### Load PDF Files

In [15]:
loader = PyPDFLoader("../data/dummy-file.pdf")
pages = loader.load()

In [16]:
len(pages)

19

In [17]:
page= pages[2]
print(page.page_content[:500])

 
 3     
 COSMETIC OR MEDICINAL?
 
 
Another factor that will affect how claims are regulated is whether 
they are considered cosmetic or medicinal.   
A cosmetic product is defined as: A medicinal product is defined as: 
“Any substance or preparation intended 
to be placed in contact with the various 
external parts of the human body 
(epidermis, hair system, nails, lips and 
external genital organs) or with the 
teeth and the mucous membranes of 
the oral cavity with a view exclusively 
or ma


In [18]:
page.metadata

{'source': '../data/dummy-file.pdf', 'page': 2}

### Document splitting

In [19]:
chunk_size = 1000
chunk_overlap = 150
separator = "\n"

In [20]:
r_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
c_splitter = CharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap, separator=separator)

In [21]:
docs = r_splitter.split_documents(pages)

In [22]:
print(docs[1].page_content)
print(docs[1].metadata)
len(docs)

Claims made on-air or in marketing literature that accompanies products will 
be regulated by the Advertising Standards Authority (ASA). Not only 
do the ASA investigate consumer complaints, they also have a monitoring team reviewing the media, including teleshopping channels, to ensure claims adhere to their rules. Consequently, 
claims that are commonplace in the high street are not found in 
advertising, because non-removable point of sale advertising does not fall within the ASA’s remit.   
Any direct claim, or implication, that features on-air will fall within 
the ASA remit, regardless of how it is made – on-air graphic, claims on the product itself, press, testimonials, opinions, personal experience, beliefs etc are all assessed against the BCAP Code.   
Most other claims will be regulated by Trading Standards. While 
they do not enforce the regulations as vigilantly as the ASA, the requirement to substantiate claims is similar.   Type of 
claim Regulatory Body Most relevant
{'s

40

In [23]:
docs = c_splitter.split_documents(pages)

In [24]:
print(docs[1].page_content)
print(docs[1].metadata)
len(docs)

Claims made on-air or in marketing literature that accompanies products will 
be regulated by the Advertising Standards Authority (ASA). Not only 
do the ASA investigate consumer complaints, they also have a monitoring team reviewing the media, including teleshopping channels, to ensure claims adhere to their rules. Consequently, 
claims that are commonplace in the high street are not found in 
advertising, because non-removable point of sale advertising does not fall within the ASA’s remit.   
Any direct claim, or implication, that features on-air will fall within 
the ASA remit, regardless of how it is made – on-air graphic, claims on the product itself, press, testimonials, opinions, personal experience, beliefs etc are all assessed against the BCAP Code.   
Most other claims will be regulated by Trading Standards. While 
they do not enforce the regulations as vigilantly as the ASA, the requirement to substantiate claims is similar.   Type of 
claim Regulatory Body Most relevant
{'s

40

### Embedding

### spin up local redis to store vector embeddings
````docker run --rm -it -p 6379:6379 redis/redis-stack-server:latest````

In [25]:
#for embedding we need to use text-embedding-ada-002 model in needs to be deployed in Azure OAI service
embeddings = OpenAIEmbeddings(
    model=deployment_name_ada, 
    deployment=deployment_name_ada,
    chunk_size=1
    )

In [26]:
redis = Redis.from_documents(
    docs,
    embeddings,
    redis_url=redis_url,
    index_name="dummy-file"
    )

### Retreival

In [27]:
def search(query):
    results = redis.similarity_search_limit_score(query, k=1)
    return results[0] if results else None

In [28]:
query = "What are the typical cosmetic claims?"
result = search(query)

if (result):
    print(result.page_content)
    print(result.metadata)
else:
    print("Sorry, I couldn't find a relevant answer for your question.")

So diseases include conditions such as  
spots, sun burn and even razor burn 
 
More info1 More info  
TYPICAL COSMETIC CLAIMS: 
Clean, Protect, Cover, Mask, Perfume TYPICAL MEDICINAL CLAIMS: 
Treat, Restore, Cure, Rejuvenate, 
Repair, Lift, Prevent, Fix  
Please note: context is important in the above given examples 
COSMETIC CLAIMS CAN FEATURE 
ON QVC IF SUBSTANTIATED  TO MAKE MEDICINAL CLAIMS 
WITHOUT A PRODUCT LICENCE IS 
A CRIMINAL OFFENCE 
 
                                                
1 If you viewing a hard copy of this document the links are as follows: 
Cosmetics: http://www.ctpa.org.uk/page.asp?section=3&page=1 
Medicines: http://www.mhra.gov.uk/home/idcplg?IdcService=SS_GET_PAGE&nodeId=91
{'source': '../data/dummy-file.pdf', 'page': 2, 'id': 'doc:dummy-file:741b504762514defa17ba70c465bb02e'}


In [29]:
query = "What are the protection claims?"
result = search(query)

if (result):
    print(result.page_content)
    print(result.metadata)
else:
    print("Sorry, I couldn't find a relevant answer for your question.")

pollution by reinforcing the skin’s natural barrier”  Summary of protection claims:
 
 
 
Rejected Claims: Accepted Claims: 
Sun block 
 Sunscreen 
Improves the skin’s 
natural barrier Reinforces the skin’s natural 
barrier  
Protects skin (if no SPF 
contained) 
 
 Protects skin (if SPF 
contained) 
 
Help protect against further 
age spots 
 
 
Temporary delays 
premature ageing (if 
<SPF15 contained) Temporary delays premature 
ageing (if >SPF15 contained) 
Protect skin against 
sunburn (medicinal 
claim) Protect skin against 
pollution/the cold etc 
Reduces free radicals Helps to prevent the 
formation of free radicals 
caused by UV exposure
{'source': '../data/dummy-file.pdf', 'page': 4, 'id': 'doc:dummy-file:46d3c343de5c4d9ebf0f59a1abb4ed82'}


### QAChain

In [30]:
llm = ChatOpenAI(model_name=llm_name, temperature=0, engine=deployment_name_gpt)

                    engine was transferred to model_kwargs.
                    Please confirm that engine is what you intended.


In [31]:
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=redis.as_retriever()
)

In [32]:
question = "What are the typical cosmetic claims?"
result = qa_chain({"query": question})
result["result"]

'Typical cosmetic claims include clean, protect, cover, mask, and perfume.'

In [33]:
question = "What are the typical medicinal claims?"
result = qa_chain({"query": question})
result["result"]

'Typical medicinal claims include treat, restore, cure, rejuvenate, repair, lift, prevent, and fix.'

### PromptTemplates

In [34]:
# Build prompt
template = """Use the following pieces of context to answer the question at the end. \
    If you don't know the answer, just say that you don't know, don't try to make up an answer. \
    Use three sentences maximum. Keep the answer as concise as possible. \
    Always say "thanks for asking!" at the end of the answer. \
    {context}
    Question: {question}
    Helpful Answer:"""
    
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)

In [35]:
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=redis.as_retriever(),
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)

In [36]:
question = "What are the typical cosmetic claims?"
result = qa_chain({"query": question})

In [37]:
result["result"]

'Typical cosmetic claims include clean, protect, cover, mask, and perfume. Thanks for asking!'

In [38]:
result["source_documents"]

[Document(page_content='So diseases include conditions such as  \nspots, sun burn and even razor burn \n \nMore info1 More info  \nTYPICAL COSMETIC CLAIMS: \nClean, Protect, Cover, Mask, Perfume TYPICAL MEDICINAL CLAIMS: \nTreat, Restore, Cure, Rejuvenate, \nRepair, Lift, Prevent, Fix  \nPlease note: context is important in the above given examples \nCOSMETIC CLAIMS CAN FEATURE \nON QVC IF SUBSTANTIATED  TO MAKE MEDICINAL CLAIMS \nWITHOUT A PRODUCT LICENCE IS \nA CRIMINAL OFFENCE \n \n                                                \n1 If you viewing a hard copy of this document the links are as follows: \nCosmetics: http://www.ctpa.org.uk/page.asp?section=3&page=1 \nMedicines: http://www.mhra.gov.uk/home/idcplg?IdcService=SS_GET_PAGE&nodeId=91', metadata={'source': '../data/dummy-file.pdf', 'page': 2, 'id': 'doc:dummy-file:741b504762514defa17ba70c465bb02e'}),
 Document(page_content='to cosmetic beauty, \nrelaxation or ‘feel good’ \nclaims  \n  \nFURTHER CLAIMS\n \n As previously state

In [39]:
question = "What is blah blah blah?"
result = qa_chain({"query": question})
result["result"]

"I'm sorry, there is no specific question provided for me to answer. Please provide a question for me to assist you better. Thanks for asking!"