In [None]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"] = "true"

# Document loaders


In [None]:
from langchain_community.document_loaders import TextLoader
loader = TextLoader("../data/text_files/sample.txt", encoding='utf-8')
text_document = loader.load()
print(text_document)

In [None]:
from langchain_community.document_loaders import WebBaseLoader
import bs4

loader = WebBaseLoader(web_path="https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/", 
                        bs_kwargs=dict( parse_only=bs4.SoupStrainer(
                                        class_=("post-content", "post-title", "post-header")
                                    )
                                ))

text_documents = loader.load()
text_document

In [None]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader('../data/pdf/yolo.pdf')
text_document = loader.load()
text_document

# Text splitters

In [None]:
from langchain_text_splitters import CharacterTextSplitter, RecursiveCharacterTextSplitter

character_text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=20, separator="\n\n\n")
splitted = character_text_splitter.split_documents(text_document)
print(splitted[:1])

recursive_text_splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=20, separators=["\n\n\n"])
splitted = recursive_text_splitter.split_documents(text_document)
print(splitted[:1])

# Embeddings and Store

In [None]:
!ollama pull embeddinggemma:latest

In [None]:
from langchain_community.vectorstores import Chroma
from langchain_ollama.embeddings import OllamaEmbeddings

db = Chroma.from_documents(splitted[:20], OllamaEmbeddings(model='embeddinggemma:latest'))

# db = FAISS.from_documents(splitted, OllamaEmbeddings(model='embeddinggemma:latest'))

In [None]:
query = 'Who are the authors of YOLO reserach paper?'

result = db.similarity_search(query)

result[0].page_content

In [None]:
from langchain_ollama.llms import OllamaLLM
llm = OllamaLLM(model='gemma3:1b')
llm

In [None]:
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_template(
    """Answer the following question based on the provided context. Think step by step before providing a detailed answer.
    If there's not enough information in the context just return \"Sorry i don't have knowledge of that\"
    <context>{context}</context>
    
    Question : {input}
    """
)
prompt

# Chains

In [None]:
from langchain_classic.chains.combine_documents import create_stuff_documents_chain

document_chain = create_stuff_documents_chain(llm, prompt)
document_chain

# Retriever

In [None]:
from langchain_classic.retrievers import BM25Retriever

retriever = db.as_retriever()
retriever

# Retriever Chain

In [None]:
from langchain_classic.chains import create_retrieval_chain

retriever_chain = create_retrieval_chain(retriever, document_chain)
retriever_chain

In [None]:
response = retriever_chain.invoke({"input":"What is the yolo research paper about?"})
response

In [None]:
print(response['answer'])

In [None]:
response = retriever_chain.invoke({"input":"What is the architecture of yolo"})
print(response['answer'])