### Setup

In [None]:
import openai
import os
from langchain.chat_models import ChatOpenAI
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate


# use reddis as a backend
from langchain.vectorstores.redis import Redis
redis_url = "redis://localhost:6379"

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

from langchain.document_loaders import PyPDFLoader


# setup azure openai api
openai.api_type = "azure"
openai.api_base = "https://openaiserviceworkshop.openai.azure.com/"
openai.api_version = "2023-03-15-preview"
openai.api_key  = os.getenv('AZURE_OAI_KEY')

llm_name = "gpt-3.5-turbo"
deployment_name = "openai-workshop-deployment-environment"

os.environ["OPENAI_API_TYPE"] = openai.api_type
os.environ["OPENAI_API_BASE"] = openai.api_base
os.environ["OPENAI_API_KEY"] = openai.api_key
os.environ["OPENAI_API_VERSION"] = openai.api_version

### Load PDF Files

In [None]:
loader = PyPDFLoader("dummy-file.pdf")
pages = loader.load()

In [None]:
len(pages)

In [None]:
page= pages[2]
print(page.page_content[:500])

In [None]:
page.metadata

### Document splitting

In [None]:
chunk_size = 1000
chunk_overlap = 150
separator = "\n"

In [None]:
r_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
c_splitter = CharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap, separator=separator)

In [None]:
docs = r_splitter.split_documents(pages)

In [None]:
print(docs[1].page_content)
print(docs[1].metadata)
len(docs)

In [None]:
docs = c_splitter.split_documents(pages)

In [None]:
print(docs[1].page_content)
print(docs[1].metadata)
len(docs)

### Embedding

### spin up local redis to store vector embeddings
````docker run --rm -it -p 6379:6379 redis/redis-stack-server:latest````

In [None]:
embeddings = OpenAIEmbeddings(
    model="text-embedding-ada-002", 
    deployment="mak-ada-deployment",
    chunk_size=1
    )

In [None]:
redis = Redis.from_documents(
    docs,
    embeddings,
    redis_url=redis_url,
    index_name="dummy-file"
    )

### Retreival

In [None]:
def search(query):
    results = redis.similarity_search_limit_score(query, k=1)
    return results[0] if results else None

In [None]:
query = "What are the typical cosmetic claims?"
result = search(query)

if (result):
    print(result.page_content)
    print(result.metadata)
else:
    print("Sorry, I couldn't find a relevant answer for your question.")

In [None]:
query = "What are the protection claims?"
result = search(query)

if (result):
    print(result.page_content)
    print(result.metadata)
else:
    print("Sorry, I couldn't find a relevant answer for your question.")

### QAChain

In [None]:
llm = ChatOpenAI(model_name=llm_name, temperature=0, engine=deployment_name)

In [None]:
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=redis.as_retriever()
)

In [None]:
question = "What are the typical cosmetic claims?"
result = qa_chain({"query": question})
result["result"]

In [None]:
question = "What are the typical medicinal claims?"
result = qa_chain({"query": question})
result["result"]

### PromptTemplates

In [None]:
# Build prompt
template = """Use the following pieces of context to answer the question at the end. \
    If you don't know the answer, just say that you don't know, don't try to make up an answer. \
    Use three sentences maximum. Keep the answer as concise as possible. \
    Always say "thanks for asking!" at the end of the answer. \
    {context}
    Question: {question}
    Helpful Answer:"""
    
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)

In [None]:
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=redis.as_retriever(),
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)

In [None]:
question = "What are the typical cosmetic claims?"
result = qa_chain({"query": question})

In [None]:
result["result"]

In [None]:
result["source_documents"]

In [None]:
question = "What is blah blah blah?"
result = qa_chain({"query": question})
result["result"]