In [None]:
from langchain_community.document_loaders import UnstructuredURLLoader

In [2]:
urls = ['https://learntechnique.com/', 'https://www.elmtraining.com/resettlement-courses/','https://learntechnique.com/electrician-courses/am2-assessment/', 'https://learntechnique.com/contact/']
loader = UnstructuredURLLoader(urls=urls)
data = loader.load()

In [None]:
data

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
docs = text_splitter.split_documents(data)
print(f"Number of documents: {len(docs)}")

In [None]:
docs[0]

In [6]:
from langchain_chroma import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain_openai import OpenAI

In [None]:
from dotenv import load_dotenv
load_dotenv()

In [None]:
vectorstore = Chroma.from_documents(documents=docs, embedding=OpenAIEmbeddings())

In [9]:
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k":3})

In [None]:
retrieved_docs = retriever.get_relevant_documents("What courses are available for electricians?")

len(retrieved_docs)
print(retrieved_docs[0].page_content)


In [None]:
retrieved_docs

In [12]:
llm = OpenAI(temperature=0.4, max_tokens=500)

In [13]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate


system_prompt = ("""You are a helpful AI assistant that helps people find information about courses from the provided context.
If you don't know the answer, just say that you don't know. DO NOT try to make up an answer.
Use the following pieces of context to answer the question at the end.
{context}  
Answer the question truthfully and as best as you can and keep it concise.
""")

prompt = ChatPromptTemplate.from_messages([
    ("system", system_prompt),
    ("user", "{input}")
])

In [15]:
question_answering_chain = create_stuff_documents_chain(llm=llm, prompt=prompt)
rag_chain = create_retrieval_chain(
    retriever,
    question_answering_chain,

)

In [None]:
response = rag_chain.invoke({
    "input": "Can you provide information on the electrician courses available?"
})

print(response['answer'])