In [1]:
from langchain.output_parsers import CommaSeparatedListOutputParser
from langchain.prompts import PromptTemplate
from langchain.retrievers import AzureCognitiveSearchRetriever
from langchain.llms import AzureOpenAI
from langchain.chains import LLMChain
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
import magic
import os
import nltk

os.environ['OPENAI_API_TYPE'] = 'azure'
os.environ['OPENAI_API_VERSION'] = '2023-03-15-preview'
os.environ['OPENAI_API_BASE'] = 'https://openai-nois-intern.openai.azure.com/'
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')

os.environ["AZURE_COGNITIVE_SEARCH_SERVICE_NAME"] = "search-service01"
os.environ["AZURE_COGNITIVE_SEARCH_INDEX_NAME"] = "public-data-index"
os.environ["AZURE_COGNITIVE_SEARCH_API_KEY"] = "73Swa5YqUR5IRMwUIqOH6ww2YBm3SveLv7rDmZVXtIAzSeBjEQe9"

retriever_public = AzureCognitiveSearchRetriever(content_key='content')
print(retriever_public)

# os.environ["AZURE_COGNITIVE_SEARCH_INDEX_NAME"] = "public-index-ver2"
# retriever_private = AzureCognitiveSearchRetriever(content_key='content')
# print(retriever_private)

service_name='search-service01' index_name='public-data-index' api_key='73Swa5YqUR5IRMwUIqOH6ww2YBm3SveLv7rDmZVXtIAzSeBjEQe9' api_version='2020-06-30' aiosession=None content_key='content'


In [2]:
# Load up your LLM
llm = AzureOpenAI(
    deployment_name="text-davinci-003",
    model_name="text-davinci-003",
    max_tokens=300,
    temperature=0.5
)

output_parser = CommaSeparatedListOutputParser()
history = []

def get_history_as_txt():
    txt = ""
    for i in history:
        txt += f"\nHuman: {i['user']}\n"
        txt += f"AI:{i['AI']}"
        
    return txt

def add_to_history(user_msg, ai_msg):
    history.append({'user': user_msg, 'AI': ai_msg})

In [3]:
keyword_template = """Given a sentence, extract maximum 2 most important sets of keywords and translate them to Vietnamese if they're in English and vice versa.
Your output will be in both languages if possible, separated by commas.
######
SENTENCE: What are New Ocean company's products ?
OUTPUT: New Ocean products, New Ocean sản phẩm
SENTENCE: Was Nestle a customer of New Ocean ?
OUTPUT: Nestle
SENTENCE: When was New Ocean founded ?
OUTPUT: New Ocean founded, New Ocean thành lập
SENTENCE: Các sản phẩm của công ty New Ocean là gì ?
OUTPUT: New Ocean sản phẩm, New Ocean products
SENTENCE: Ai chưa đóng tiền nước ?
OUTPUT: tiền nước, water bill
SENTENCE: {question}
OUTPUT:"""

keyword_templ = """Given a sentence, extract keywords and translate them to Vietnamese if they're in English and vice versa.
Your output will be in both languages if possible, separated by commas.
DON'T duplicate keywords.
######
EXAMPLE
SENTENCE: Ai chưa đóng tiền nước?
OUTPUT: tiền nước, water bill
######
SENTENCE: {output}
OUTPUT:"""

template = """Given a list of documents from a company called New Ocean, answer the question.
When asked a question not related to New Ocean, say "Sorry, I will only answer questions related to New Ocean. Could you please ask another related question?".
If the user greets you, respond accordingly. If you don't know the answer, just say that you don't know. Don't try to make up an answer. DON'T use other sources than the ones provided.
######
{summaries}
######{context}
Human: {question}
AI:"""

templ = PromptTemplate(
    input_variables = ['summaries', 'question', 'context'],
    template = template
)

In [4]:
qaChain = load_qa_with_sources_chain(llm=llm, chain_type="stuff", prompt=templ)
keyword_getter = LLMChain(llm=llm, prompt=PromptTemplate.from_template(keyword_templ))

In [24]:
keywords = keyword_getter(query)['text']
keywords_lst = output_parser.parse(keywords)
print(keywords)
print(keywords_lst)

 Tổng thống Mỹ, President of the United States
['Tổng thống Mỹ', 'President of the United States']


In [5]:
def get_docs_using_keyword_list(keywords, ret):
    doc = []
    keywords_lst = output_parser.parse(keywords)
    
    num = int(4 / len(keywords_lst))
    
    for i in keywords_lst:
        doc.extend(ret.get_relevant_documents(i)[:num])
        
    return doc

def get_docs_using_keyword_string(keywords, ret):
    return ret.get_relevant_documents(keywords)[:5]

In [21]:
query = "Tổng thống Mỹ là ai ?"
keywords = keyword_getter(query)['text']

doc = get_docs_using_keyword_string(keywords, retriever_public)

num = 1
for x in doc:
    x.metadata['source'] = f"doc-{num}"

    num += 1

try:
    result = qaChain({'input_documents': doc, 'question': query, 'context': get_history_as_txt()}, return_only_outputs=False)
except Exception:
    history = history[len(history) - 5:]
    
add_to_history(query, result['output_text'])

In [22]:
result['output_text']

' Sorry, I will only answer questions related to New Ocean. Could you please ask another related question?'

In [23]:
print(doc)

[Document(page_content='AUTOMATION SOLUTIONS\r\nMANUFACTURING EXECUTION SYSTEM\r\nHệ thống điều hành sản xuất - MES Trong quá trình chuyển đổi từ sản xuất truyền thống sang mô hình công nghiệp 4.0, việc sử dụng công nghệ số trong vận hành nhà máy không còn là điều xa...\r\nView more\r\nMACHINE MANAGEMENT SYSTEM\r\nAre you running a factory and having difficulty in managing machines, looking for a solution to optimize your production line operations?\r\nView more\r\nBATCH CONTROL SYSTEM\r\nBatch Control System is an efficient and reliable mass production process management system. During mass production, the precise and uniform mixing of ingredients...\r\nView more\r\nENERGY MANAGEMENT SOLUTIONS\r\nEnergy Management Solutions Energy plays an important role in economic growth, is an important input for many manufacturing industries and one of the most important commodities...\r\nView more\r\nSYSTEM INTEGRATION\r\nIn current Industry 4.0 Manufacturing, one of the most essential factors of

In [27]:
history = []

In [19]:
doc_txt = ""
for x in doc:
    doc_txt += x.page_content
    
context = get_history_as_txt()
    
msg = f"""Given a list of documents from a company called New Ocean, answer the question. When asked a question not related to New Ocean, say "Sorry, I will only answer questions related to New Ocean. Could you please ask another related question?".
If the user greets you, respond accordingly. If you don't know the answer, just say that you don't know. Don't try to make up an answer. DON'T use other sources than the ones provided.
######
{doc_txt}
######{context}"""

print(msg)
print(llm.get_num_tokens(msg))
print(f"Length of chat history: {llm.get_num_tokens(get_history_as_txt())}")

Given a list of documents from a company called New Ocean, answer the question. When asked a question not related to New Ocean, say "Sorry, I will only answer questions related to New Ocean. Could you please ask another related question?".
If the user greets you, respond accordingly. If you don't know the answer, just say that you don't know. Don't try to make up an answer. DON'T use other sources than the ones provided.
######
Contact
HomeContact
// contact details
GET IN TOUCH
Please feel free to get in touch with us via any convenient means (phone, email or submit a contact form). We will be glad to answer your question as soon as possible
Our Address:
Headquarter: 4th Floor, 51 Hoang Viet, Tan Binh, Ho Chi Minh, VN
Branch: 10th Floor, 34 Hoang Viet, Tan Binh, Ho Chi Minh, VN
Branch: 5th Floor, 51 Hoang Viet, Tan Binh, Ho Chi Minh, VN
Our Mailbox:
info[@]nois[.]vn
sales[@]nois[.]vn
Our Phone:
(+84) 28 6681 0782 (Hotline)
(+84) 985 868 872 (Sales Department)
Ready to G