# PostgreSQL Setup

### Imports

In [None]:
%pip install llama-index-vector-stores-postgres

In [None]:
!sudo apt update
!echo | sudo apt install -y postgresql-common
!echo | sudo /usr/share/postgresql-common/pgdg/apt.postgresql.org.sh
!echo | sudo apt install postgresql-15-pgvector
!sudo service postgresql start
!sudo -u postgres psql -c "ALTER USER postgres PASSWORD 'password';"
!sudo -u postgres psql -c "CREATE DATABASE vector_db;"

### Load credentials

In [None]:
import os
from getpass import getpass

# if "LLAMA_CLOUD_API_KEY" not in os.environ:
#     os.environ["LLAMA_CLOUD_API_KEY"] = getpass("Enter your Llama Cloud API Key: ")

if "OPENAI_API_KEY" not in os.environ:
    os.environ["OPENAI_API_KEY"] = getpass("Enter your OpenAI API Key: ")

### Loading documents

In [None]:
from llama_index.core import (
    SimpleDirectoryReader,
    VectorStoreIndex,
    StorageContext,
)

from llama_index.vector_stores.postgres import PGVectorStore
import textwrap

documents = SimpleDirectoryReader("/home/daghbeji/rag-factory/sandbox/vector-stores/data/").load_data()
print("Document ID:", documents[0].doc_id)

### Create the Database
Using an existing postgres running at localhost, create the database weâ€™ll be using.

In [None]:
import psycopg2

connection_string = "postgresql://postgres:password@localhost:5432"
db_name = "vector_db"
conn = psycopg2.connect(connection_string)
conn.autocommit = True

with conn.cursor() as c:
    c.execute(f"DROP DATABASE IF EXISTS {db_name}")
    c.execute(f"CREATE DATABASE {db_name}")

### Create the index
The example below constructs a PGVectorStore with a HNSW index with m = 16, ef_construction = 64, and ef_search = 40, with the vector_cosine_ops method.

In [None]:
from sqlalchemy import make_url

url = make_url(connection_string)
vector_store = PGVectorStore.from_params(
    database=db_name,
    host=url.host,
    password=url.password,
    port=url.port,
    user=url.username,
    table_name="raganything-paper",
    embed_dim=1536,  # openai embedding dimension
    hnsw_kwargs={
        "hnsw_m": 16,
        "hnsw_ef_construction": 64,
        "hnsw_ef_search": 40,
        "hnsw_dist_method": "vector_cosine_ops",
    },
)

storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(
    documents, storage_context=storage_context, show_progress=True
)
query_engine = index.as_query_engine()

### Query the index

In [None]:
response = query_engine.query("How many times was the word RAG used? Give me the line number at which each word occurs.")
print(textwrap.fill(str(response), 100))

# Response: DocBench and MMLongBenc
response = query_engine.query("Which Benchmarks were used to evaluate the pipeline?")
print(textwrap.fill(str(response), 100))

### Query existing index

In [None]:
vector_store = PGVectorStore.from_params(
    database="vector_db",
    host="localhost",
    password="password",
    port=5432,
    user="postgres",
    table_name="raganything",
    embed_dim=1536,  # openai embedding dimension
    hnsw_kwargs={
        "hnsw_m": 16,
        "hnsw_ef_construction": 64,
        "hnsw_ef_search": 40,
        "hnsw_dist_method": "vector_cosine_ops",
    },
)

index = VectorStoreIndex.from_vector_store(vector_store=vector_store)
query_engine = index.as_query_engine()

response = query_engine.query("How many times was the word RAG used? Give me the line number at which each word occurs.")
print(textwrap.fill(str(response), 100))
response = query_engine.query("Which Benchmarks were used to evaluate the pipeline?")
print(textwrap.fill(str(response), 100))

### Hybrid Search