In [2]:
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
import os

# 1. Load PDF
loader = PyPDFLoader("BERT_Research_Paper.pdf")
docs = loader.load()

# 2. Split
splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)
documents = splitter.split_documents(docs)

# 3. Embeddings
embedding = OpenAIEmbeddings()

# 4. Create FAISS Vector DB
vector_db = FAISS.from_documents(documents, embedding)

# 5. Save DB locally
vector_db.save_local("faiss_index")

print("FAISS DB saved successfully!")

  from .autonotebook import tqdm as notebook_tqdm


FAISS DB saved successfully!


In [3]:
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings

embedding = OpenAIEmbeddings()

# Load existing DB
db = FAISS.load_local("faiss_index", embedding, allow_dangerous_deserialization=True)

# Query
query = "What is BERT architecture?"
results = db.similarity_search(query, k=3)

for i, doc in enumerate(results):
    print(f"\nResult {i+1}:")
    print(doc.page_content)


Result 1:
BERT: Pre-training of Deep Bidirectional Transformers for
Language Understanding
Jacob Devlin Ming-Wei Chang Kenton Lee Kristina Toutanova
Google AI Language
{jacobdevlin,mingweichang,kentonl,kristout}@google.com
Abstract
We introduce a new language representa-
tion model called BERT, which stands for
Bidirectional Encoder Representations from
Transformers. Unlike recent language repre-
sentation models (Peters et al., 2018a; Rad-
ford et al., 2018), BERT is designed to pre-
train deep bidirectional representations from
unlabeled text by jointly conditioning on both
left and right context in all layers. As a re-
sult, the pre-trained BERT model can be ﬁne-
tuned with just one additional output layer
to create state-of-the-art models for a wide
range of tasks, such as question answering and
language inference, without substantial task-
speciﬁc architecture modiﬁcations.
BERT is conceptually simple and empirically
powerful. It obtains new state-of-the-art re-

Result 2:
ing pr