## Installing libraries

In [None]:
!pip install cohere
!pip install langchain
!pip install pypdf
!pip install tiktoken
!pip install chromadb
!pip install bs4

## read source file

In [9]:
def read_urls(file_path):
    '''Function to read URLs from a file'''
    with open(file_path, 'r') as file:
        urls = [url.strip() for url in file.readlines()]
    return urls

urls = read_urls("./source_url.txt")

## load the source

In [10]:
from langchain.document_loaders import WebBaseLoader

# Specify the URL of the website you want to load
all_documents = []

# Load the website content for each URL and add it to the list
for url in urls:
    loader = WebBaseLoader(url)
    documents = loader.load()
    all_documents.extend(documents)

all_documents

[Document(page_content='\n405 Not Allowed\n\n405 Not Allowed\nAngie\n\n\n', metadata={'source': 'https://www.mql5.com/en/blogs/post/751762', 'title': '405 Not Allowed', 'language': 'No language found.'})]

## split the website content into smaller chunks

In [11]:
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
import os
import cohere

key = "urAN0hpVmnaPAW66RPqZQTKOSs46zd4EI93e1NNg"
co = cohere.Client(key)

text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
docs = text_splitter.split_documents(all_documents)

os.environ["COHERE_API_KEY"] = key

## store vector

In [12]:
from langchain.embeddings import CohereEmbeddings
embeddings = CohereEmbeddings(cohere_api_key=key)
db = Chroma.from_documents(docs, embeddings)

## retrieve dara and output

In [20]:
from langchain.chains import RetrievalQA
from langchain.llms import Cohere

retriever = db.as_retriever()  # Assuming 'db' is your vector store

retrievalQA = RetrievalQA.from_llm(
    llm=Cohere(),
    retriever=retriever
)

In [21]:
query = "What is an activation?"
retrievalQA.invoke(query)

Number of requested results 4 is greater than number of elements in index 2, updating n_results = 2


{'query': 'What is an activation?',
 'result': ' Some concepts relating to the activation you mentioned in your question involve allowance or disallowance of something happening or occuring. Without further context, I cannot decipher the exact meaning of the concept with polar opposite extremes of allowance or disallowance in this particular context. \n\nCan you please provide further information about the situation or context regarding this concept of activation so I can provide more specific information? '}

In [15]:
invalid_query = "where can i buy icecream?"
retrievalQA.invoke(invalid_query)

Number of requested results 4 is greater than number of elements in index 2, updating n_results = 2


{'query': 'where can i buy icecream?',
 'result': ' I apologize, but the provided context does not include any information on ice cream. Hence, I cannot answer this question. \n\nTo gather relevant suggestions for ice cream options according to your location and preferences, please specify the region and any dietary requirements or particular brands/types you might be looking for, and I will be happy to assist you to the best of my abilities. '}