# Chat With Your Data

## Persist Data to Vector Stores

# Install libraries

In [None]:
pip install openai

In [None]:
pip install python-dotenv

In [None]:
pip install langchain

In [None]:
pip install langchain-openai

In [None]:
pip install pypdf

In [None]:
pip install faiss-cpu

In [None]:
pip install langchainhub

## Load OpenAI API Key to use OpenAI's embedding model

In [25]:
import os

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

In [26]:
OPENAI_API_KEY=os.environ['OPENAI_API_KEY']

## Load documents

In [27]:
from langchain.document_loaders import PyPDFLoader

loader = PyPDFLoader('michelle_obama_speech.pdf')
pages = loader.load()

## Chunk documents

In [32]:
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import CharacterTextSplitter

# Load the document, split it into chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
documents = text_splitter.split_documents(pages)

# Generate embeddings and store in vector database
## FAISS vector database

In [29]:
from langchain_community.vectorstores import FAISS

# Load it into the vector store and embed
vectordb = FAISS.from_documents(documents, OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY, model="text-embedding-3-small"))

In [30]:
print(vectordb.index.ntotal)

8


## Persist Data in your Vector Store

In [31]:
vectordb.save_local("faiss2_index")

## Load Vector Store

In [24]:
new_db = FAISS.load_local("faiss2_index", embeddings_model, allow_dangerous_deserialization=True)