In [1]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.prompts import PromptTemplate
from langchain.retrievers import TFIDFRetriever, AzureCognitiveSearchRetriever
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import AzureOpenAI
from langchain.chains import LLMChain
from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
import magic
import os
import nltk

os.environ['OPENAI_API_TYPE'] = 'azure'
os.environ['OPENAI_API_VERSION'] = '2023-03-15-preview'
os.environ['OPENAI_API_BASE'] = 'https://openai-nois-intern.openai.azure.com/'
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')

index_type = 'public-index-ver3'

os.environ["AZURE_COGNITIVE_SEARCH_SERVICE_NAME"] = "search-service01"
os.environ["AZURE_COGNITIVE_SEARCH_INDEX_NAME"] = index_type
os.environ["AZURE_COGNITIVE_SEARCH_API_KEY"] = "73Swa5YqUR5IRMwUIqOH6ww2YBm3SveLv7rDmZVXtIAzSeBjEQe9"

In [2]:
retriever = AzureCognitiveSearchRetriever(content_key='content')

In [3]:
# Load up your LLM
llm = AzureOpenAI(
    deployment_name="text-davinci-003",
    model_name="text-davinci-003",
    max_tokens=300,
    temperature=0.5
)

### Sources

In [9]:
doc_txt = ""
for x in doc:
    doc_txt += x.page_content
    
msg = f"""Given a list of documents from a company called New Ocean, answer the question. If the user greets you, respond accordingly.
If you don't know the answer, just say that you don't know. Don't try to make up an answer. DON'T use other sources than the ones provided.
When asked a question not related to New Ocean or the provided documents, say "Sorry, I will only answer questions related to New Ocean. Could you please ask another related question?"
------
{doc_txt}
------
QUESTION: {query}
FINAL ANSWER:"""

print(msg)
print(llm.get_num_tokens(msg))

Given a list of documents from a company called New Ocean, answer the question. If the user greets you, respond accordingly.
If you don't know the answer, just say that you don't know. Don't try to make up an answer. DON'T use other sources than the ones provided.
When asked a question not related to New Ocean or the provided documents, say "Sorry, I will only answer questions related to New Ocean. Could you please ask another related question?"
------
, ink, staples .. units by month, by year will have a very high cost. When a manufacturing defect occurs, it is difficult to trace. Management, reporting through many levels, time consuming and sometimes not really effective.
Smart Form. Souce: nois.vn
So now and further into the future, what goals do businesses and executives aim for in management? Developing with modern technical technology is an inevitable and smart step. Staying out of this trend means you are pushing your business into “backward”. It’s time to get rid of unnecessa

In [4]:
keyword_template = """Given a sentence, extract keywords and translate them to Vietnamese if they're in English and vice versa.
Your output will include the keywords in both languages, separated by commas.
######
SENTENCE: What are New Ocean company's products ?
OUTPUT: New Ocean,products,sản phẩm
SENTENCE: When was New Ocean founded ?
OUTPUT: New Ocean,founded,thành lập
SENTENCE: Các sản phẩm của công ty New Ocean là gì ?
OUTPUT: New Ocean,sản phẩm,products
SENTENCE: {question}
OUTPUT:"""

template = """Given a list of documents from a company called New Ocean, answer the question. If the user greets you, respond accordingly.
If you don't know the answer, just say that you don't know. Don't try to make up an answer. DON'T use other sources than the ones provided.
When asked a question not related to New Ocean or the provided documents, say "Sorry, I will only answer questions related to New Ocean. Could you please ask another related question?".
######
{summaries}
######
QUESTION: {question}
FINAL ANSWER:"""

templ = PromptTemplate(
    input_variables = ['summaries', 'question'],
    template = template
)

In [5]:
qaChain = load_qa_with_sources_chain(llm=llm, chain_type="stuff", prompt=templ)
keyword_getter = LLMChain(llm=llm, prompt=PromptTemplate.from_template(keyword_template))

In [9]:
keywords = keyword_getter(query)['text']
keywords

' NOIS,products,sản phẩm'

In [10]:
query = "What is New Ocean ?"
keywords = keyword_getter(query)['text']
doc = retriever.get_relevant_documents(keywords)[:4]

if isinstance(doc, list):
    num = 1
    for x in doc:
        x.metadata['source'] = f"doc-{num}"
        
        num += 1
    
else:
    doc.metadata['source'] = "doc-1"
    doc = [doc]

result = qaChain({'input_documents': doc, 'question': query}, return_only_outputs=False)

In [11]:
result['output_text']

' New Ocean is a company that has been operating for more than 12 years in the field of software and technology solutions. It is also the developer of Factory Smart Forms & Checklists, a digital checklist tool.'

In [12]:
print(doc)

[Document(page_content='is.vn\r\nBạn biết gì về biểu mẫu số\r\nThực tế ý tưởng biểu mẫu số ra đời có nguồn gốc dựa trên các biểu mẫu truyền thống. Trước đây khi công nghệ chưa phát triển, đa số thông tin dữ liệu được lưu trữ trên các biểu mẫu truyền thống dưới dạng giấy tờ, văn bản.\r\nNew Ocean Information System (NOIS)\r\nNew Ocean Information System NOIS là công ty có thâm niên hoạt động hơn 12 năm trong lĩnh vực cung cấp phần mềm, giải pháp công nghệ… không có lý do gì để NOIS bỏ qua một nguồn tài nguyên lớn như biểu mẫu số. Đó cũng chính là lý do Factory Smart Forms & Checklists ra đời.Factory Smart Forms & Checklists là điển hình của một biểu mẫu số tập trung giải quyết các khó khăn và vấn đề “lỗi thời” của\n', metadata={'@search.score': 9.238283, 'metadata_storage_path': 'aHR0cHM6Ly9hY3NjaGF0Ym90bm9pc2ludGVybi5ibG9iLmNvcmUud2luZG93cy5uZXQvcHVibGljLWRhdGEtdmVyMy8yMjItNC50eHQ1', 'source': 'doc-1'}), Document(page_content='is.vn\r\nAs mentioned above, Factory Smart Forms & Checklis