In [1]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.prompts import PromptTemplate
from langchain.retrievers import TFIDFRetriever, AzureCognitiveSearchRetriever
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import AzureOpenAI
from langchain.chains import LLMChain
from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
import magic
import os
import nltk

os.environ['OPENAI_API_TYPE'] = 'azure'
os.environ['OPENAI_API_VERSION'] = '2023-03-15-preview'
os.environ['OPENAI_API_BASE'] = 'https://openai-nois-intern.openai.azure.com/'
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')

index_type = 'public-index-ver3'

os.environ["AZURE_COGNITIVE_SEARCH_SERVICE_NAME"] = "search-service01"
os.environ["AZURE_COGNITIVE_SEARCH_INDEX_NAME"] = index_type
os.environ["AZURE_COGNITIVE_SEARCH_API_KEY"] = "73Swa5YqUR5IRMwUIqOH6ww2YBm3SveLv7rDmZVXtIAzSeBjEQe9"

In [2]:
retriever = AzureCognitiveSearchRetriever(content_key='content')

In [3]:
# Load up your LLM
llm = AzureOpenAI(
    deployment_name="text-davinci-003",
    model_name="text-davinci-003",
    max_tokens=300,
    temperature=0.5
)

### Sources

In [9]:
doc_txt = ""
for x in doc:
    doc_txt += x.page_content
    
msg = f"""Given a list of documents from a company called New Ocean, answer the question. If the user greets you, respond accordingly.
If you don't know the answer, just say that you don't know. Don't try to make up an answer. DON'T use other sources than the ones provided.
When asked a question not related to New Ocean or the provided documents, say "Sorry, I will only answer questions related to New Ocean. Could you please ask another related question?"
------
{doc_txt}
------
QUESTION: {query}
FINAL ANSWER:"""

print(msg)
print(llm.get_num_tokens(msg))

Given a list of documents from a company called New Ocean, answer the question. If the user greets you, respond accordingly.
If you don't know the answer, just say that you don't know. Don't try to make up an answer. DON'T use other sources than the ones provided.
When asked a question not related to New Ocean or the provided documents, say "Sorry, I will only answer questions related to New Ocean. Could you please ask another related question?"
------
, ink, staples .. units by month, by year will have a very high cost.¬†When a manufacturing defect occurs, it is difficult to trace.¬†Management, reporting through many levels, time consuming and sometimes not really effective.
Smart Form. Souce: nois.vn
So now and further into the future, what goals do businesses and executives aim for in management?¬†Developing with modern technical technology is an inevitable and smart step. Staying out of this trend means you are pushing your business into ‚Äúbackward‚Äù. It‚Äôs time to get rid of 

In [4]:
keyword_template = """Given a sentence, extract keywords and translate them to Vietnamese if they're in English and vice versa.
Your output will include the keywords in both languages, separated by commas.
######
SENTENCE: What are New Ocean company's products ?
OUTPUT: New Ocean,products,s·∫£n ph·∫©m
SENTENCE: When was New Ocean founded ?
OUTPUT: New Ocean,founded,th√†nh l·∫≠p
SENTENCE: C√°c s·∫£n ph·∫©m c·ªßa c√¥ng ty New Ocean l√† g√¨ ?
OUTPUT: New Ocean,s·∫£n ph·∫©m,products
SENTENCE: {question}
OUTPUT:"""

template = """Given a list of documents from a company called New Ocean, answer the question. If the user greets you, respond accordingly.
If you don't know the answer, just say that you don't know. Don't try to make up an answer. DON'T use other sources than the ones provided.
When asked a question not related to New Ocean or the provided documents, say "Sorry, I will only answer questions related to New Ocean. Could you please ask another related question?".
######
{summaries}
######
QUESTION: {question}
FINAL ANSWER:"""

templ = PromptTemplate(
    input_variables = ['summaries', 'question'],
    template = template
)

In [5]:
qaChain = load_qa_with_sources_chain(llm=llm, chain_type="stuff", prompt=templ)
keyword_getter = LLMChain(llm=llm, prompt=PromptTemplate.from_template(keyword_template))

In [9]:
keywords = keyword_getter(query)['text']
keywords

' NOIS,products,s·∫£n ph·∫©m'

In [10]:
query = "What is New Ocean ?"
keywords = keyword_getter(query)['text']
doc = retriever.get_relevant_documents(keywords)[:4]

if isinstance(doc, list):
    num = 1
    for x in doc:
        x.metadata['source'] = f"doc-{num}"
        
        num += 1
    
else:
    doc.metadata['source'] = "doc-1"
    doc = [doc]

result = qaChain({'input_documents': doc, 'question': query}, return_only_outputs=False)

In [11]:
result['output_text']

' New Ocean is a company that has been operating for more than 12 years in the field of software and technology solutions. It is also the developer of Factory Smart Forms & Checklists, a digital checklist tool.'

In [12]:
print(doc)

[Document(page_content='is.vn\r\nB·∫°n bi·∫øt g√¨ v·ªÅ bi·ªÉu m·∫´u s·ªë\r\nTh·ª±c t·∫ø √Ω t∆∞·ªüng bi·ªÉu m·∫´u s·ªë ra ƒë·ªùi c√≥ ngu·ªìn g·ªëc d·ª±a tr√™n c√°c bi·ªÉu m·∫´u truy·ªÅn th·ªëng. Tr∆∞·ªõc ƒë√¢y khi c√¥ng ngh·ªá ch∆∞a ph√°t tri·ªÉn, ƒëa s·ªë th√¥ng tin d·ªØ li·ªáu ƒë∆∞·ª£c l∆∞u tr·ªØ tr√™n c√°c bi·ªÉu m·∫´u truy·ªÅn th·ªëng d∆∞·ªõi d·∫°ng gi·∫•y t·ªù, vƒÉn b·∫£n.\r\nNew Ocean Information System (NOIS)\r\nNew Ocean Information System NOIS l√† c√¥ng ty c√≥ th√¢m ni√™n ho·∫°t ƒë·ªông h∆°n 12 nƒÉm trong lƒ©nh v·ª±c cung c·∫•p ph·∫ßn m·ªÅm, gi·∫£i ph√°p c√¥ng ngh·ªá‚Ä¶ kh√¥ng c√≥ l√Ω do g√¨ ƒë·ªÉ NOIS b·ªè qua m·ªôt ngu·ªìn t√†i nguy√™n l·ªõn nh∆∞ bi·ªÉu m·∫´u s·ªë. ƒê√≥ c≈©ng ch√≠nh l√† l√Ω do Factory Smart Forms & Checklists ra ƒë·ªùi.Factory Smart Forms & Checklists l√† ƒëi·ªÉn h√¨nh c·ªßa m·ªôt bi·ªÉu m·∫´u s·ªë t·∫≠p trung gi·∫£i quy·∫øt c√°c kh√≥ khƒÉn v√† v·∫•n ƒë·ªÅ ‚Äúl·ªói th·ªùi‚Äù c·ªßa\n', metadata={'@search.score': 9.238283, 'metadata_storage_path': 'aHR0cHM6Ly9h