In [1]:
import os
from urllib.request import urlretrieve
import numpy as np
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain_community.llms import HuggingFacePipeline
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

In [2]:
from langchain_community.document_loaders import PyPDFLoader

def load_pages_of_book(file_path):
    file_path_book_1 = (file_path)
    loader = PyPDFLoader(file_path)
    pages = loader.load_and_split()
    return pages
pages=load_pages_of_book("python-basics-sample-chapters.pdf")

In [3]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 700,
    chunk_overlap  = 100,
)
docs_after_split = text_splitter.split_documents(pages)

In [4]:
huggingface_embeddings = HuggingFaceBgeEmbeddings(
    model_name="BAAI/bge-large-en-v1.5",  
    model_kwargs={'device':'cpu'}, 
    encode_kwargs={'normalize_embeddings': True}
)

  from tqdm.autonotebook import tqdm, trange


In [5]:
from langchain.vectorstores import Chroma
#Persisiting db contents in a directory
persist_directory = 'docs/chroma/'

vectordb = Chroma.from_documents(
    documents=docs_after_split,
    embedding=huggingface_embeddings,
    persist_directory=persist_directory
)

In [6]:
from langchain.prompts import PromptTemplate
from langchain.chains import ConversationalRetrievalChain
from langchain.llms.ollama import Ollama
from langchain.chains import RetrievalQA
from langchain.memory import ConversationBufferMemory
template = """Use the provided context to answer the question at the end. 
            If answer is not known,respond by stating you don't know, don't try to make up an answer.
            Use 2 to 3 sentences maximum to state answer. Keep the answer as concise as possible. 
            Always say "thanks for asking!" at the end of the answer. 
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate(input_variables=["context", "question"],template=template,)

# Run chain

question = "is python open source language??"
llm = Ollama(model="llama3")
retriever=vectordb.as_retriever()

memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True
)
qa_chain = ConversationalRetrievalChain.from_llm(llm,
                                       retriever=retriever,
                                       memory=memory)

  llm = Ollama(model="llama3")
  memory = ConversationBufferMemory(


In [7]:
while True:
    line = input("please enter the query")
    if line == 'stop':
        print("Ending conversation")
        break
    elif line != '':
        result = qa_chain({"question": line})
        print(result)


please enter the query what are f-strings?


  result = qa_chain({"question": line})


{'question': 'what are f-strings?', 'chat_history': [HumanMessage(content='what are f-strings?', additional_kwargs={}, response_metadata={}), AIMessage(content='According to the provided context, f-strings are a type of string formatting known as "known as f-strings". They allow you to insert variable names surrounded by curly braces ({}), which are replaced by their corresponding values without using str().', additional_kwargs={}, response_metadata={})], 'answer': 'According to the provided context, f-strings are a type of string formatting known as "known as f-strings". They allow you to insert variable names surrounded by curly braces ({}), which are replaced by their corresponding values without using str().'}


please enter the query show an example


{'question': 'show an example', 'chat_history': [HumanMessage(content='what are f-strings?', additional_kwargs={}, response_metadata={}), AIMessage(content='According to the provided context, f-strings are a type of string formatting known as "known as f-strings". They allow you to insert variable names surrounded by curly braces ({}), which are replaced by their corresponding values without using str().', additional_kwargs={}, response_metadata={}), HumanMessage(content='show an example', additional_kwargs={}, response_metadata={}), AIMessage(content='According to the context, one example of how to use f-strings is:\n\nf"{name }has {heads }heads and {arms }arms"\n\nThis would replace the variable names with their corresponding values without using str(), as shown in the example:\n\n\'Zaphod has 2 heads and 3 arms\'\n\nAnother example given is inserting Python expressions between curly braces, such as:\n\nf"{n} times {m} is{n*m}"\n\nThis would replace the expressions with their result 

please enter the query stop


Ending conversation


In [None]:
#Removing 
!rm -rf ./docs/chroma

In [None]:
#general search based on similarity of words
query = """Is python open source language??"""  
relevant_documents = vectordb.similarity_search(query,k=4)
print(f'There are {len(relevant_documents)} documents retrieved which are relevant to the query. Display the first one:\n')
print(relevant_documents[0].page_content)
#max_marginal_relevance_search helps searching by removing duplicates
query = """is python open source language??"""  
relevant_documents = vectordb.max_marginal_relevance_search(query,k=4)
print(f'There are {len(relevant_documents)} documents retrieved which are relevant to the query. Display the first one:\n')
print(relevant_documents[0].page_content)

In [None]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_ollama.llms import OllamaLLM

template = """Question: {question}

Answer: Let's think step by step."""

prompt = ChatPromptTemplate.from_template(template)

model = OllamaLLM(model="mistral-small")

chain = prompt | model

chain.invoke({"question": "What is LangChain?"})

In [None]:
chain.invoke({"question": "What is LangChain?"})