In [1]:
import getpass
import os

os.environ["LANGSMITH_TRACING"] = "true"
os.environ["LANGSMITH_API_KEY"] = getpass.getpass()

 ········


In [2]:
from langchain_core.documents import Document

documents = [
    Document(
        page_content="Dogs are great companions, known for their loyalty and friendliness.",
        metadata={"source": "mammal-pets-doc"},
    ),
    Document(
        page_content="Cats are independent pets that often enjoy their own space.",
        metadata={"source": "mammal-pets-doc"},
    ),
]

In [31]:
from langchain_community.document_loaders import PyPDFLoader

file_path = "./research.pdf"
print(file_path)
loader = PyPDFLoader(file_path)
print("🔍 Loader created")

try:
    docs = loader.load()
    print('hello')
    print(len(docs))
except Exception as e:
    print("Error:", e)



./research.pdf
🔍 Loader created
hello
110


In [19]:

# print(f"{docs[1].page_content[:1800]}\n")
# print(docs[1].metadata)

Contents
About 1 ................................................................................................................................................................................... 
Chapter 1: Getting started with React 2 ............................................................................................................... 
Section 1.1: What is ReactJS? 2 ........................................................................................................................................ 
Section 1.2: Installation or Setup 3 .................................................................................................................................. 
Section 1.3: Hello World with Stateless Functions 4 ....................................................................................................... 
Section 1.4: Absolute Basics of Creating Reusable Components 5 ............................................................................. 
Section 1.5

In [4]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200, add_start_index=True
)
all_splits = text_splitter.split_documents(docs)

all_splits[0]

Document(metadata={'producer': '3-Heights(TM) PDF Optimization Shell 4.8.25.2 (http://www.pdf-tools.com)', 'creator': 'TCPDF', 'creationdate': "20180504111451+00'00'", 'title': 'React JS Notes for Professionals', 'subject': 'React JS', 'author': 'GoalKicker.com', 'keywords': 'React JS Notes for Professionals', 'moddate': '2018-05-04T12:14:31+00:00', 'source': './research.pdf', 'total_pages': 110, 'page': 0, 'page_label': '1', 'start_index': 0}, page_content='React JS\nNotes for Professionals\nReact JS\nNotes for Professionals\nGoalKicker.com\nFree Programming Books\nDisclaimer\nThis is an uno\ue024cial free book created for educational purposes and is\nnot a\ue024liated with o\ue024cial React JS group(s) or company(s).\nAll trademarks and registered trademarks are\nthe property of their respective owners\n100+ pages\nof professional hints and tricks')

In [5]:
import getpass
import os

if not os.environ.get("GOOGLE_API_KEY"):
  os.environ["GOOGLE_API_KEY"] = getpass.getpass("Enter API key for Google Gemini: ")

from langchain_google_genai import GoogleGenerativeAIEmbeddings

embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

Enter API key for Google Gemini:  ········


In [8]:
vector_1 = embeddings.embed_query(all_splits[0].page_content)
vector_2 = embeddings.embed_query(all_splits[1].page_content)

assert len(vector_1) == len(vector_2)
print(f"Generated vectors of length {len(vector_1)}\n")
print(vector_1[:10])

Generated vectors of length 768

[0.05509152635931969, -0.01796991191804409, -0.033930450677871704, -0.057573217898607254, 0.07959714531898499, 0.0312516912817955, 0.009266228415071964, 0.0037142718210816383, 0.02444448508322239, 0.06996161490678787]


In [10]:
from langchain_core.vectorstores import InMemoryVectorStore

vector_store = InMemoryVectorStore(embeddings)

In [15]:
ids = vector_store.add_documents(documents=all_splits)

In [20]:
results = vector_store.similarity_search(
    "What are benefits of react js ?"
)

print(results[0])

page_content='React JS
Notes for Professionals
React JS
Notes for Professionals
GoalKicker.com
Free Programming Books
Disclaimer
This is an unocial free book created for educational purposes and is
not aliated with ocial React JS group(s) or company(s).
All trademarks and registered trademarks are
the property of their respective owners
100+ pages
of professional hints and tricks' metadata={'producer': '3-Heights(TM) PDF Optimization Shell 4.8.25.2 (http://www.pdf-tools.com)', 'creator': 'TCPDF', 'creationdate': "20180504111451+00'00'", 'title': 'React JS Notes for Professionals', 'subject': 'React JS', 'author': 'GoalKicker.com', 'keywords': 'React JS Notes for Professionals', 'moddate': '2018-05-04T12:14:31+00:00', 'source': './research.pdf', 'total_pages': 110, 'page': 0, 'page_label': '1', 'start_index': 0}


In [30]:
results = await vector_store.asimilarity_search("where lifecycle methods are used with example")

print(results[0])

page_content='You may also like' metadata={'producer': '3-Heights(TM) PDF Optimization Shell 4.8.25.2 (http://www.pdf-tools.com)', 'creator': 'TCPDF', 'creationdate': "20180504111451+00'00'", 'title': 'React JS Notes for Professionals', 'subject': 'React JS', 'author': 'GoalKicker.com', 'keywords': 'React JS Notes for Professionals', 'moddate': '2018-05-04T12:14:31+00:00', 'source': './research.pdf', 'total_pages': 110, 'page': 109, 'page_label': '110', 'start_index': 0}


In [23]:
# Note that providers implement different scores; the score here
# is a distance metric that varies inversely with similarity.

results = vector_store.similarity_search_with_score("How react components communicate ?")
doc, score = results[0]
print(f"Score: {score}\n")
print(doc)

Score: 0.8063224540448231

page_content='Chapter 22: React Forms 85 ........................................................................................................................................ 
Section 22.1: Controlled Components 85 ........................................................................................................................' metadata={'producer': '3-Heights(TM) PDF Optimization Shell 4.8.25.2 (http://www.pdf-tools.com)', 'creator': 'TCPDF', 'creationdate': "20180504111451+00'00'", 'title': 'React JS Notes for Professionals', 'subject': 'React JS', 'author': 'GoalKicker.com', 'keywords': 'React JS Notes for Professionals', 'moddate': '2018-05-04T12:14:31+00:00', 'source': './research.pdf', 'total_pages': 110, 'page': 2, 'page_label': '3', 'start_index': 7041}


In [34]:
embedding = embeddings.embed_query("How to setup webpack in react ?")

results = vector_store.similarity_search_by_vector(embedding)
print(results[1])

page_content='Step 6: Create webpack conﬁguration
Create a ﬁle called webpack.conﬁg.js in the root of your project and copy this code into it:
webpack.conﬁg.js
var path = require('path');
var config = {
  context: path.resolve(__dirname + '/src'),
  entry: './index.jsx',
  output: {
    filename: 'app.js',
    path: path.resolve(__dirname + '/dist'),
  },
  devServer: {
    contentBase: path.join(__dirname + '/dist'),
    port: 3000,
    open: true,' metadata={'producer': '3-Heights(TM) PDF Optimization Shell 4.8.25.2 (http://www.pdf-tools.com)', 'creator': 'TCPDF', 'creationdate': "20180504111451+00'00'", 'title': 'React JS Notes for Professionals', 'subject': 'React JS', 'author': 'GoalKicker.com', 'keywords': 'React JS Notes for Professionals', 'moddate': '2018-05-04T12:14:31+00:00', 'source': './research.pdf', 'total_pages': 110, 'page': 59, 'page_label': '60', 'start_index': 773}


In [35]:
from typing import List

from langchain_core.documents import Document
from langchain_core.runnables import chain


@chain
def retriever(query: str) -> List[Document]:
    return vector_store.similarity_search(query, k=1)


retriever.batch(
    [
        "How react components communicate ?",
        "How to setup webpack in react ?",
    ],
)

[[Document(id='304f5232-31f0-451b-a57b-e0ebb208099d', metadata={'producer': '3-Heights(TM) PDF Optimization Shell 4.8.25.2 (http://www.pdf-tools.com)', 'creator': 'TCPDF', 'creationdate': "20180504111451+00'00'", 'title': 'React JS Notes for Professionals', 'subject': 'React JS', 'author': 'GoalKicker.com', 'keywords': 'React JS Notes for Professionals', 'moddate': '2018-05-04T12:14:31+00:00', 'source': './research.pdf', 'total_pages': 110, 'page': 109, 'page_label': '110', 'start_index': 0}, page_content='You may also like')],
 [Document(id='0d7cfe91-b1c9-4c61-935a-e9afb1398adc', metadata={'producer': '3-Heights(TM) PDF Optimization Shell 4.8.25.2 (http://www.pdf-tools.com)', 'creator': 'TCPDF', 'creationdate': "20180504111451+00'00'", 'title': 'React JS Notes for Professionals', 'subject': 'React JS', 'author': 'GoalKicker.com', 'keywords': 'React JS Notes for Professionals', 'moddate': '2018-05-04T12:14:31+00:00', 'source': './research.pdf', 'total_pages': 110, 'page': 109, 'page_la

In [None]:
retriever = vector_store.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 1},
)

retriever.batch(
    [
        "How react components communicate ?",
        "How to setup webpack in react ?",
    ],
)