# Chat With Your Data

## Solution: Implement a Vector Store and Load Data

# Install libraries

In [None]:
pip install openai

In [None]:
pip install python-dotenv

In [None]:
pip install langchain

In [None]:
pip install langchain-openai

In [None]:
pip install pypdf

In [None]:
pip install faiss-cpu

In [None]:
pip install langchainhub

## Load PDF documents

In [7]:
from langchain.document_loaders import PyPDFLoader

# Load PDF
loaders = [
    PyPDFLoader("../Data/botanical.pdf"),
    PyPDFLoader("../Data/astronomical.pdf"),
    PyPDFLoader("../Data/biological.pdf"),
    PyPDFLoader("../Data/cosmological.pdf"),
    PyPDFLoader("../Data/culinary.pdf"),
    PyPDFLoader("../Data/pharmaceutical.pdf")
]

pages = []

for loader in loaders:
    pages.extend(loader.load())

## Split PDF documents into chunks

In [8]:
from langchain.text_splitter import CharacterTextSplitter
text_splitter = CharacterTextSplitter(
    separator="\n",
    chunk_size=1000,
    chunk_overlap=150,
    length_function=len
)

In [9]:
docs = text_splitter.split_documents(pages)

In [10]:
len(docs)

42

## Convert chunks to embeddings and store in FAISS vector database

In [11]:
import os

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

In [12]:
OPENAI_API_KEY=os.environ['OPENAI_API_KEY']

In [13]:
from langchain_openai import OpenAIEmbeddings

embeddings_model = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY, model="text-embedding-3-small")

In [14]:
from langchain_community.vectorstores import FAISS

vectordb = FAISS.from_documents(docs, embeddings_model)
print(vectordb.index.ntotal)

42


## Persist the vector database to disk

In [15]:
vectordb.save_local("../faiss_index")