In [1]:
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [3]:
import os
import shutil
import pickle  # For saving FAISS index

In [29]:
import faiss

## Load and Split Documents

In [5]:
Data_Path = r"D:\WorkPlace\Python\Training\May2025\Practice\PracLLM"
FAISS_Path = r"D:\WorkPlace\Python\Training\May2025\Practice\PracLLM\FAISS_DB\UFAREX"

In [7]:
loader = PyPDFDirectoryLoader(Data_Path)
documents = loader.load()

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=100,
    separators=["\n\n", "\n", ".", " "]
)
chunks = text_splitter.split_documents(documents)

In [9]:
print(type(chunks), len(chunks))

<class 'list'> 93


In [11]:
print((chunks[0]))

page_content='IEEE TRANSACTIONS ON SYSTEMS, MAN, AND CYBERNETICS: SYSTEMS, VOL. 54, NO. 12, DECEMBER 2024 7419
UFAREX: A Universal Fully Autonomous Robust
Expansionist Fuzzy System for Optimal Online
Learning From Nonstationary Data Streams
Cyrus Hasanvand , Hamid Hasanvand , and Hamidreza Momeni , Senior Member, IEEE
Abstract—Online intelligent knowledge extraction from real-
world nonstationary data streams presents a multiobjective
optimization challenge. Here, we characterize the learning pro-
cess on a trajectory of global optimality to simultaneously satisfy
six high-proﬁle objectives: 1) optimum generalization for the
best bias-variance tradeoff; 2) compactness of knowledgebase;
3) memory retention and stability-plasticity balance; 4) univer-
sality and full autonomy; 5) robustness against outliers, noise,
and model uncertainty; and 6) active concept drift detection
and adaptation. We propose a ﬂexible Takagi–Sugeno (TS) fuzzy
system, named UFAREX, that self-constructs and self-

## Embed and Build FAISS Vector Store

In [16]:
#embedding_model = HuggingFaceEmbeddings(model_name="all-mpnet-base-v2")
embedding_model = SentenceTransformerEmbeddings(model_name="all-mpnet-base-v2")

# Create FAISS vector store
vectorstore = FAISS.from_documents(chunks, embedding=embedding_model)

## Save the FAISS DB (Persistence)

Unlike Chroma (which has persist_directory), FAISS requires manual saving using faiss + pickle.

In [31]:
if os.path.exists(FAISS_Path):
    shutil.rmtree(FAISS_Path) # Delete the folder and its contents
os.makedirs(FAISS_Path, exist_ok=True) # Re-create the folder

# Save index
index_path = os.path.join(FAISS_Path, "index.faiss")
faiss.write_index(vectorstore.index, index_path)

# Save documents and embeddings separately
store_path = os.path.join(FAISS_Path, "store.pkl")
with open(store_path, "wb") as f:
    pickle.dump(vectorstore, f)

print(f"Saved {len(chunks)} chunks to {FAISS_Path}")

Saved 93 chunks to D:\WorkPlace\Python\Training\May2025\Practice\PracLLM\FAISS_DB\UFAREX


## Load the FAISS DB Later

When you want to reload your saved FAISS database:

In [None]:
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
import faiss
import pickle

FAISS_Path = r"D:\WorkPlace\Python\Training\May2025\Practice\PracLLM\FAISS_DB\UFAREX"

index_path = os.path.join(FAISS_Path, "index.faiss")
store_path = os.path.join(FAISS_Path, "store.pkl")

with open(store_path, "rb") as f:
    vectorstore = pickle.load(f)

## Add New Documents to Existing FAISS VectorStore
### Load or prepare new documents
I already have __**vectorstore**__ from previous cell.

Use any method to get new chunks. For example:

In [None]:
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

New_Data_Path = r"D:\NewDocuments"

loader = PyPDFDirectoryLoader(New_Data_Path)
new_docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=100
)

new_chunks = text_splitter.split_documents(new_docs)

### Add new chunks to the existing vectorstore
This step embeds the new chunks using the original embedding model (automatically stored in the vectorstore) and adds them to the FAISS index.

In [None]:
vectorstore.add_documents(new_chunks)

### Save the updated vectorstore again

In [None]:
import faiss
import pickle
import os

# Save updated FAISS index
index_path = os.path.join(FAISS_Path, "index.faiss")
faiss.write_index(vectorstore.index, index_path)

# Save updated full store
store_path = os.path.join(FAISS_Path, "store.pkl")
with open(store_path, "wb") as f:
    pickle.dump(vectorstore, f)

## Load for search

In [None]:
results = vectorstore.similarity_search("your query", k=5)