# Embedding Documents and storing them in vector store
We will use FAISS for storing vectors.

In [1]:
import os
from pathlib import Path

try:
    file_path = Path(__file__).resolve()
except NameError:
    # Fallback for notebooks
    file_path = Path.cwd()

os.chdir(file_path.parent)

In [None]:
from config.settings import MODEL, EMBEDDING_MODEL, INPUT_DATA_SAMPLE_DIR, INPUT_DATA_TABLES_DIR, VECTOR_DB_DIR_SAMPLEQ, VECTOR_DB_DIR_TABLES

from langchain_community.vectorstores import FAISS
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_community.document_loaders import DirectoryLoader,UnstructuredMarkdownLoader, TextLoader
import tiktoken

In [17]:
encoding = tiktoken.encoding_for_model(MODEL)

## Load Documents

### TABLES

In [None]:
tables_loader = DirectoryLoader(
    path=INPUT_DATA_TABLES_DIR,
    glob="**/*.md",
    loader_cls=lambda filename: TextLoader(filename, encoding="utf-8")
)
tables_docs = tables_loader.load()

Add table_name in metadata

In [None]:
for i,doc in enumerate(tables_docs, start=101):
    doc.metadata['table_name'] = doc.metadata['source'].split('\\')[-1].split('.')[-2]
    doc.metadata['token_count'] = len(encoding.encode(doc.page_content))
    doc.metadata['index'] = i

In [None]:
tables_docs

[Document(metadata={'source': 'd:\\code\\text2sql\\data\\tables\\department.md', 'table_name': 'department', 'token_count': 54, 'index': 101}, page_content='## Table Name: `departments`\n\n- **Description**: Lists all departments within the organization.\n- **Primary Key**: `dept_no`\n\n### Columns:\n- `dept_no`: Unique identifier for each department.\n- `dept_name`: Name of the department.'),
 Document(metadata={'source': 'd:\\code\\text2sql\\data\\tables\\dep_emp.md', 'table_name': 'dep_emp', 'token_count': 122, 'index': 102}, page_content='## Table Name: `dept_emp`\n\n- **Description**: Associates employees with the departments they have worked in, including the duration.\n- **Composite Primary Key**: `emp_no`, `dept_no`, `from_date`\n- **Foreign Keys**:\n    - `emp_no`: References `employees(emp_no)`\n    - `dept_no`: References `departments(dept_no)`\n\n### Columns:\n- `emp_no`: Employee number.\n- `dept_no`: Department number.\n- `from_date`: Start date of the department assignme

### SAMPLE Questions

## Creating Vectors and storing in DB

In [5]:
embeddings = OpenAIEmbeddings(model=EMBEDDING_MODEL)

In [6]:
vector_store = FAISS.from_documents(docs, embeddings)

## Store Vector DB locally

In [7]:
vector_store.save_local(VECTOR_DB_DIR)