In [1]:
from langchain.document_loaders import OnlinePDFLoader
from langchain.document_loaders import UnstructuredWordDocumentLoader
from langchain.document_loaders import SeleniumURLLoader,WebBaseLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import OpenSearchVectorSearch
from langchain.chains import ConversationalRetrievalChain
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.chains.llm import LLMChain
from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT
from langchain.prompts import PromptTemplate
from langchain.llms import OpenAI

In [2]:
urls=['https://www.shaip.com/blog/what-is-nlp-how-it-works-benefits-challenges-examples/']


In [3]:
# loader = SeleniumURLLoader(urls=urls)
loader=WebBaseLoader(urls)

In [5]:

# split the data into chucks
data = loader.load_and_split()


In [6]:
data

[Document(page_content='What is NLP? How it Works, Benefits, Challenges, Examples | Shaip\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n   \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n \n \n\n\n\n\n\n\n\n\n\n\n\n\nWhat We Do\n\n\n\n\n\n  \n\n\n\n\n\n\nWhat we do best \n\n\n\n\n\n\n\n Speciality Conversational AI Localize AI-enabled speech models with rich structured multi-lingual datasets.Generative AI Harness the power to transform complex data into actionable insight.Computer Vision Train ML models with best-in-class AI data to make sense of the visual world. \n\n\n\n\n\n\n\nAI Data ServicesData Collection Create & collect audio, images, text & video from across the globe.Data Annotation & Labeling Accurately annotate data to make AI & ML think faster & smarter.Data Transcription AI-driven, cloud-based transcription supporting 150+ languages. \n\n\n\n\n\n\n\n\n \n\n\n\n\nBuyer’s Guide: Data Annotation / Labeling \n\n\n\n

In [7]:
# Additional processing on the data, if needed
for item in data:
    item.metadata['source'] = item.metadata['source']
    item.page_content = item.page_content.replace("\n", " ").replace("\t", " ")


In [8]:
data

[Document(page_content='What is NLP? How it Works, Benefits, Challenges, Examples | Shaip                                                                                                 What We Do               What we do best          Speciality Conversational AI Localize AI-enabled speech models with rich structured multi-lingual datasets.Generative AI Harness the power to transform complex data into actionable insight.Computer Vision Train ML models with best-in-class AI data to make sense of the visual world.         AI Data ServicesData Collection Create & collect audio, images, text & video from across the globe.Data Annotation & Labeling Accurately annotate data to make AI & ML think faster & smarter.Data Transcription AI-driven, cloud-based transcription supporting 150+ languages.                Buyer’s Guide: Data Annotation / Labeling                Buyer’s Guide: ConversationalAI                  Off-the-shelf Data               Off-the-shelf Data Catalog & Licensing        

In [9]:
len(data)

7

In [10]:
from opensearchpy import OpenSearch
client=OpenSearch(hosts=[{"host":"localhost","port":9200}],
                http_auth=("admin","admin"),
                use_ssl=False,
                )

In [11]:
import os
os.environ["OPENAI_API_KEY"] = "please give your api key, create using gmail account"


In [12]:
knowledge_index_name='websitedata'

In [13]:
embeddings = OpenAIEmbeddings()
vectorstore=OpenSearchVectorSearch.from_documents(
                data, embeddings, opensearch_url="http://localhost:9200", index_name=knowledge_index_name
            )

In [14]:

combine_prompt_template = """
You are intelligent Bot. You should answer questions only about the given website data.
If you don't know the answer, just say "Sorry, I don't know." Don't try to make up an answer.

Say "Hello!, I'm intelligent Bot. How can I help you today?" 
if the user welcomes you with "hi," "hello," "good morning," "good evening," or any other greeting.
QUESTION: {question}
{summaries}
"""
COMBINE_PROMPT = PromptTemplate(
    template=combine_prompt_template, input_variables=["summaries", "question"]
)
# model 
# open ai model , default model is davinic 003
llm = OpenAI(temperature=0,model_name='gpt-3.5-turbo-0613')

# CONDENSE_QUESTION_PROMPT t will generate the query based on  the summary of user query and the chat history usinh llm  model 
question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)

doc_chain = load_qa_with_sources_chain(llm, chain_type="stuff",prompt=COMBINE_PROMPT)

#Chain for chatting with an  vector store index.
chain = ConversationalRetrievalChain(
    retriever=vectorstore.as_retriever(search_type="similarity", search_kwargs={"k":3}),
    question_generator=question_generator,
    combine_docs_chain=doc_chain,return_source_documents=True)




In [15]:
vectordbkwargs = {"search_distance": 0.8}
def generate_response(chain, question, chat_history):
    try:
        # If chat_history is empty, pass an empty list
        if not chat_history:
            result = chain({"question": question, "chat_history": [],"vectordbkwargs": vectordbkwargs})
        # If chat_history has only one item, pass the single item as previous chat history
        elif len(chat_history) == 1:
            result = chain({"question": question, "chat_history": chat_history,"vectordbkwargs": vectordbkwargs})
        # If chat_history has more than one item, pass the last two items as previous chat history
        else:
            result = chain({"question": question, "chat_history": chat_history[-2:],"vectordbkwargs": vectordbkwargs})
        # Add the question and answer to the chat history
        chat_history.append((question, result["answer"]))
        # Return the answer
        return result
    except Exception as e:
        print(f"An error occurred: {e}")
        return "Sorry, I don't know."
    

In [16]:
chat_history=[]
query="what is  NLP"
result=generate_response(chain, query, chat_history)


In [17]:
result['answer']

'NLP stands for Natural Language Processing. It is a subfield of artificial intelligence (AI) that focuses on enabling computers to analyze and understand human language. NLP allows robots and machines to comprehend and interpret human language, enabling them to perform tasks without human intervention. Some examples of NLP applications include machine translation, summarization, ticket classification, and spell check. NLP systems use machine learning algorithms to analyze large amounts of unstructured data and extract relevant information. These algorithms are trained to recognize patterns and make inferences based on those patterns.'