https://www.youtube.com/watch?v=KnoVFU0yCUc

In [18]:
from langchain.document_loaders import PyPDFLoader,DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [19]:
def load_data(file_path):
    loader=DirectoryLoader(
        file_path,
        glob="*.pdf",
        loader_cls=PyPDFLoader
    )
    document=loader.load()
    return document
loaded_data=load_data(r"../data")


In [20]:
print(loaded_data)


[Document(metadata={'producer': 'GPL Ghostscript 8.15', 'creator': 'PageMaker 7.0', 'creationdate': '2017-12-20T12:15:25+00:00', 'author': 'dtpcell5', 'moddate': '2025-04-04T12:10:04+05:30', 'title': 'CHAP 1.pmd', 'source': '..\\data\\chapter1.pdf', 'total_pages': 16, 'page': 0, 'page_label': '1'}, page_content='Chemical Reactions\nand Equations\n1CHAPTER\nC\nonsider the following situations of daily life and think what happens\nwhen –\n/square6milk is left at room temperature during summers.\n/square6an iron tawa/pan/nail is left exposed to humid atmosphere.\n/square6grapes get fermented.\n/square6food is cooked.\n/square6food gets digested in our body.\n/square6we respire.\nIn all the above situations, the nature and the identity of the initial\nsubstance have somewhat changed. We have already learnt about physical\nand chemical changes of matter in our previous classes. Whenever a chemical\nchange occurs, we can say that a chemical reaction has taken place.\nYou may perhaps be wonde

In [21]:
from typing import List
from langchain.schema import Document

def filter_important_texts(docs: List[Document]) -> List[Document]:
    imp_documents: List[Document] = []
    for doc in docs:  #  iterate over the argument, not the function
        src = doc.metadata.get("source")
        imp_documents.append(
            Document(
                page_content=doc.page_content,
                metadata={"source": src}
            )
        )
    return imp_documents


In [22]:
imp_documents=filter_important_texts(loaded_data)
imp_documents

[Document(metadata={'source': '..\\data\\chapter1.pdf'}, page_content='Chemical Reactions\nand Equations\n1CHAPTER\nC\nonsider the following situations of daily life and think what happens\nwhen –\n/square6milk is left at room temperature during summers.\n/square6an iron tawa/pan/nail is left exposed to humid atmosphere.\n/square6grapes get fermented.\n/square6food is cooked.\n/square6food gets digested in our body.\n/square6we respire.\nIn all the above situations, the nature and the identity of the initial\nsubstance have somewhat changed. We have already learnt about physical\nand chemical changes of matter in our previous classes. Whenever a chemical\nchange occurs, we can say that a chemical reaction has taken place.\nYou may perhaps be wondering as to what is actually meant by a\nchemical reaction. How do we come to know that a chemical reaction\nhas taken place? Let us perform some activities to find the answer to\nthese questions.\nFigure 1.1\nBurning of a magnesium ribbon in a

## splitting data into chunks


In [27]:
def text_split(imp_documents):
    text_splitter=RecursiveCharacterTextSplitter(
        chunk_size=500,
        chunk_overlap=50,
    )

    text_chunk=text_splitter.split_documents(imp_documents)
    return text_chunk

In [28]:
text_chunk=text_split(imp_documents)
print(len(text_chunk))

79


In [30]:
text_chunk[0]

Document(metadata={'source': '..\\data\\chapter1.pdf'}, page_content='Chemical Reactions\nand Equations\n1CHAPTER\nC\nonsider the following situations of daily life and think what happens\nwhen –\n/square6milk is left at room temperature during summers.\n/square6an iron tawa/pan/nail is left exposed to humid atmosphere.\n/square6grapes get fermented.\n/square6food is cooked.\n/square6food gets digested in our body.\n/square6we respire.\nIn all the above situations, the nature and the identity of the initial')

## Embedding

In [None]:
from langchain_ollama import OllamaEmbeddings

def generate_embedding(text_chunk):

    model_name="nomic-embed-text:v1.5"
    embeddings = OllamaEmbeddings(model=model_name)
    return embeddings
embedding= generate_embedding(text_chunk)

## Vector store


In [5]:
from langchain_ollama import OllamaEmbeddings
def get_embeddings():
    """
    Returns OllamaEmbeddings object to be used with Chroma.
    """
    embedding=OllamaEmbeddings(model="nomic-embed-text:v1.5")
    return embedding


def load_embeddings():
    embeddings = get_embeddings()
    print("Embedding Generating...")
    return embeddings

result = load_embeddings()


texts = ["Hello world", "How are you?", "LangChain is awesome!"]
vectors = result.embed_documents(texts)
print("\nMultiple text embeddings:")
print(vectors)
for i, vec in enumerate(vectors):
    print(f"Text: {texts[i]} -> Embedding length: {len(vec)}")


Embedding Generated Successfully

Multiple text embeddings:
[[-0.0068068937, -0.0013317408, -0.1713692, 0.008486269, 0.0057576164, 0.069907546, -0.0001778986, -0.04310814, -0.014571766, -0.05410749, 0.0004833916, 0.039149854, 0.027707087, 0.08085989, 0.04535544, -0.06293717, 0.010284204, -0.029606659, -0.0428147, 0.02963792, -0.0037334608, -0.09433962, -0.0076002427, 0.03810766, 0.09213185, -0.014298273, -0.015048545, 0.061593615, 0.0065042116, -0.021995092, -0.0012198782, -0.010931785, -0.00021556119, 0.015677065, 0.03946626, 0.0027239285, 0.03260276, 0.017275644, 0.016295034, 0.0058946433, -0.004687081, -0.014834137, 0.012066562, 0.010206321, 0.06592599, -0.0015066613, -0.004174442, 0.00031046756, 0.08687279, -0.060576126, -0.018184286, 0.0053332997, -0.0009333106, 0.060244568, 0.06724294, 0.03540538, 0.04964917, -0.061643865, 0.024229009, 0.034566592, 0.02173924, 0.0436769, 0.032966163, 0.06524755, -0.017506905, -0.033568393, -0.025202312, 0.035453293, -0.0027622774, 0.018122848, 0.

[Document(metadata={'producer': 'GPL Ghostscript 8.15', 'creator': 'PageMaker 7.0', 'creationdate': '2017-12-20T12:15:25+00:00', 'author': 'dtpcell5', 'moddate': '2025-04-04T12:10:04+05:30', 'title': 'CHAP 1.pmd', 'source': 'chapter1.pdf', 'total_pages': 16, 'page': 0, 'page_label': '1'}, page_content='Chemical Reactions\nand Equations\n1CHAPTER\nC\nonsider the following situations of daily life and think what happens\nwhen –\n/square6milk is left at room temperature during summers.\n/square6an iron tawa/pan/nail is left exposed to humid atmosphere.\n/square6grapes get fermented.\n/square6food is cooked.\n/square6food gets digested in our body.\n/square6we respire.\nIn all the above situations, the nature and the identity of the initial\nsubstance have somewhat changed. We have already learnt about physical\nand chemical changes of matter in our previous classes. Whenever a chemical\nchange occurs, we can say that a chemical reaction has taken place.\nYou may perhaps be wondering as to

In [8]:
from langchain_community.document_loaders import PyPDFLoader
loader=PyPDFLoader('chapter1.pdf')
result=loader.load()
print(result)   

[Document(metadata={'producer': 'GPL Ghostscript 8.15', 'creator': 'PageMaker 7.0', 'creationdate': '2017-12-20T12:15:25+00:00', 'author': 'dtpcell5', 'moddate': '2025-04-04T12:10:04+05:30', 'title': 'CHAP 1.pmd', 'source': 'chapter1.pdf', 'total_pages': 16, 'page': 0, 'page_label': '1'}, page_content='Chemical Reactions\nand Equations\n1CHAPTER\nC\nonsider the following situations of daily life and think what happens\nwhen –\n/square6milk is left at room temperature during summers.\n/square6an iron tawa/pan/nail is left exposed to humid atmosphere.\n/square6grapes get fermented.\n/square6food is cooked.\n/square6food gets digested in our body.\n/square6we respire.\nIn all the above situations, the nature and the identity of the initial\nsubstance have somewhat changed. We have already learnt about physical\nand chemical changes of matter in our previous classes. Whenever a chemical\nchange occurs, we can say that a chemical reaction has taken place.\nYou may perhaps be wondering as to

In [9]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)    
splitted_doc=text_splitter.split_documents(result)
splitted_doc

[Document(metadata={'producer': 'GPL Ghostscript 8.15', 'creator': 'PageMaker 7.0', 'creationdate': '2017-12-20T12:15:25+00:00', 'author': 'dtpcell5', 'moddate': '2025-04-04T12:10:04+05:30', 'title': 'CHAP 1.pmd', 'source': 'chapter1.pdf', 'total_pages': 16, 'page': 0, 'page_label': '1'}, page_content='Chemical Reactions\nand Equations\n1CHAPTER\nC\nonsider the following situations of daily life and think what happens\nwhen –\n/square6milk is left at room temperature during summers.\n/square6an iron tawa/pan/nail is left exposed to humid atmosphere.\n/square6grapes get fermented.\n/square6food is cooked.\n/square6food gets digested in our body.\n/square6we respire.\nIn all the above situations, the nature and the identity of the initial\nsubstance have somewhat changed. We have already learnt about physical\nand chemical changes of matter in our previous classes. Whenever a chemical\nchange occurs, we can say that a chemical reaction has taken place.\nYou may perhaps be wondering as to

In [13]:
from langchain_ollama import ChatOllama
llm=ChatOllama(
    model="mistral:7b",
)
output=llm.invoke("What is the capital of France?")
print(output.content)

 The capital of France is Paris.


In [2]:
from langchain_ollama import OllamaEmbeddings
embedding=OllamaEmbeddings(model="nomic-embed-text:v1.5")
embedded_doc=embedding.embed_query("Hello world")
print(embedded_doc)

[-0.0068068937, -0.0013317408, -0.1713692, 0.008486269, 0.0057576164, 0.069907546, -0.0001778986, -0.04310814, -0.014571766, -0.05410749, 0.0004833916, 0.039149854, 0.027707087, 0.08085989, 0.04535544, -0.06293717, 0.010284204, -0.029606659, -0.0428147, 0.02963792, -0.0037334608, -0.09433962, -0.0076002427, 0.03810766, 0.09213185, -0.014298273, -0.015048545, 0.061593615, 0.0065042116, -0.021995092, -0.0012198782, -0.010931785, -0.00021556119, 0.015677065, 0.03946626, 0.0027239285, 0.03260276, 0.017275644, 0.016295034, 0.0058946433, -0.004687081, -0.014834137, 0.012066562, 0.010206321, 0.06592599, -0.0015066613, -0.004174442, 0.00031046756, 0.08687279, -0.060576126, -0.018184286, 0.0053332997, -0.0009333106, 0.060244568, 0.06724294, 0.03540538, 0.04964917, -0.061643865, 0.024229009, 0.034566592, 0.02173924, 0.0436769, 0.032966163, 0.06524755, -0.017506905, -0.033568393, -0.025202312, 0.035453293, -0.0027622774, 0.018122848, 0.073062226, 0.004253712, 0.010794255, 0.014032314, 0.024610993