# PostgreSQL Vectore Store (exploring **vector-** and **hybrid** search)

## Setup Postgres and Dependencies

In [None]:
%pip install llama-index-vector-stores-postgres

In [5]:
import os
import getpass
import subprocess

def run_sudo(cmd, sudo_password, check=True):
    """Run a command with sudo -S, providing password via stdin."""
    return subprocess.run(
        ["sudo", "-S"] + cmd,
        input=(sudo_password + "\n"),
        text=True,
        capture_output=True,
        check=check,
        cwd="/tmp",
    )

# --- passwords ---
sudo_password = getpass.getpass("Provide sudo password: ")
postgres_pw = getpass.getpass("Provide PostgreSQL password for user 'postgres': ")

In [None]:
# --- system packages ---
run_sudo(["apt", "update"], sudo_password)
run_sudo(["apt", "install", "-y", "postgresql-common"], sudo_password)
print("✅ system packages")

# Add PostgreSQL APT repo helper (from postgresql-common)
run_sudo(["/usr/share/postgresql-common/pgdg/apt.postgresql.org.sh"], sudo_password)
print("✅ PostgreSQL APT repo helper")

# Install PostgreSQL + pgvector
command = "sudo -S apt install postgresql-15-pgvector"
os.system(f'echo "{sudo_password}" | {command}')
# run_sudo(["apt", "install", "-y", "postgresql", "postgresql-15-pgvector"], sudo_password)
print("✅ Install PostgreSQL + pgvector")

✅ system packages
✅ PostgreSQL APT repo helper
Reading package lists...
Building dependency tree...






Reading state information...
postgresql-15-pgvector is already the newest version (0.8.2-1.pgdg24.04+1).
0 upgraded, 0 newly installed, 0 to remove and 748 not upgraded.
✅ Install PostgreSQL + pgvector
✅ service is running


## Run DB service

In [6]:
# Ensure service is running
run_sudo(["systemctl", "enable", "--now", "postgresql"], sudo_password)
print("✅ service is running")

# --- set postgres user password ---
sql_set_pw = f"ALTER USER postgres WITH PASSWORD '{postgres_pw}';"
res = subprocess.run(
    ["sudo", "-S", "-u", "postgres", "psql", "-c", sql_set_pw],
    input=(sudo_password + "\n"),
    text=True,
    check=True,
    cwd="/tmp",
)
# print("Return code:", res.returncode)
# print("STDOUT:\n", res.stdout)
# print("STDERR:\n", res.stderr)
print("✅ set postgres user password")

✅ service is running
ALTER ROLE
✅ set postgres user password


## Create the database

In [7]:
# --- create database (idempotent) ---
sql_create_db = "CREATE DATABASE vector_db;"
# If DB exists, CREATE DATABASE fails; so check first with psql:
sql_create_db_safe = """
DO $$
BEGIN
   IF NOT EXISTS (SELECT FROM pg_database WHERE datname = 'vector_db') THEN
      CREATE DATABASE vector_db;
   END IF;
END $$;
"""
subprocess.run(
    ["sudo", "-S", "-u", "postgres", "psql", "-c", sql_create_db_safe],
    input=(sudo_password + "\n"),
    text=True,
    check=True,
    cwd="/tmp",
)
print("✅ create database")

DO
✅ create database


### Load credentials

In [8]:
import os
from getpass import getpass

# if "LLAMA_CLOUD_API_KEY" not in os.environ:
#     os.environ["LLAMA_CLOUD_API_KEY"] = getpass("Enter your Llama Cloud API Key: ")

if "OPENAI_API_KEY" not in os.environ:
    os.environ["OPENAI_API_KEY"] = getpass("Enter your OpenAI API Key: ")

### Loading documents

In [10]:
from llama_index.core import SimpleDirectoryReader, StorageContext
from llama_index.core import VectorStoreIndex
from llama_index.vector_stores.postgres import PGVectorStore
import textwrap

documents = SimpleDirectoryReader("../data/paul_graham").load_data()
print("Document ID:", documents[0].doc_id)

Document ID: f661d2f7-11a2-4587-91b4-dc9861fcc770


### Create the Database connection

In [11]:
import psycopg2

# --- connect with psycopg2 to the new DB and enable pgvector extension ---
connection_string=f"postgresql://postgres:{postgres_pw}@localhost:5432"

db_name = "vector_db"
conn = psycopg2.connect(
    dbname=db_name,
    user="postgres",
    password=postgres_pw,
    host="localhost",
    port=5432,
)
conn.autocommit = True

with conn.cursor() as c:
    # c.execute(f"DROP DATABASE IF EXISTS {db_name}")
    # c.execute(f"CREATE DATABASE {db_name}")
    c.execute("CREATE EXTENSION IF NOT EXISTS vector;")
    c.execute("SELECT extname, extversion FROM pg_extension WHERE extname='vector';")
    print("pgvector extension:", c.fetchone())

conn.close()

print("✅ PostgreSQL + pgvector ready. DB: vector_db, user: postgres")

pgvector extension: ('vector', '0.8.2')
✅ PostgreSQL + pgvector ready. DB: vector_db, user: postgres


## 1. VECTOR SEARCH

### Create the vector index
The example below:
1. generates embeddings for each document.
2. stores them in PostgreSQL.
3. creates a vector index (HNSW) on the embedding column with ``m = 16``, ``ef_construction = 64``, and ``ef_search = 40``, with the **vector_cosine_ops** method.

In [17]:
from sqlalchemy import make_url

url = make_url(connection_string)
vector_store = PGVectorStore.from_params(
    database=db_name,
    host=url.host,
    password=url.password,
    port=url.port,
    user=url.username,
    table_name="paul_graham_essay",
    embed_dim=1536,  # openai embedding dimension
    hnsw_kwargs={
        "hnsw_m": 16,
        "hnsw_ef_construction": 64,
        "hnsw_ef_search": 40,
        "hnsw_dist_method": "vector_cosine_ops",
    },
)

storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(
    documents, storage_context=storage_context, show_progress=True
)
query_engine = index.as_query_engine()

Parsing nodes: 100%|██████████| 1/1 [00:00<00:00, 27.22it/s]
Generating embeddings:   0%|          | 0/22 [00:00<?, ?it/s]2026-02-28 00:07:37,579 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
Generating embeddings: 100%|██████████| 22/22 [00:00<00:00, 38.27it/s]


### Query the index

In [18]:
response = query_engine.query("What did the author do?")
print(textwrap.fill(str(response), 100))
response = query_engine.query("What happened in the mid 1980s?")
print(textwrap.fill(str(response), 100))

2026-02-28 00:10:20,031 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-28 00:10:21,831 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2026-02-28 00:10:21,982 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


The author worked on writing and programming before college, then transitioned to studying
philosophy in college before eventually switching to studying AI. Later on, the author bought a
house in the Santa Cruz Mountains, returned to New York, resumed painting, and eventually had the
idea to build a web app for making web apps, leading to the creation of a new company.


2026-02-28 00:10:23,390 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


AI was in the air in the mid 1980s, with influences such as a novel by Heinlein called The Moon is a
Harsh Mistress featuring an intelligent computer called Mike, and a PBS documentary showing Terry
Winograd using SHRDLU.


### Query existing index

In [13]:
vector_store = PGVectorStore.from_params(
    database="vector_db",
    host="localhost",
    password="password",
    port=5432,
    user="postgres",
    table_name="raganything",
    embed_dim=1536,  # openai embedding dimension
    hnsw_kwargs={
        "hnsw_m": 16,
        "hnsw_ef_construction": 64,
        "hnsw_ef_search": 40,
        "hnsw_dist_method": "vector_cosine_ops",
    },
)

index = VectorStoreIndex.from_vector_store(vector_store=vector_store)
query_engine = index.as_query_engine()

response = query_engine.query("What did the author do?")
print(textwrap.fill(str(response), 100))

2026-02-28 00:42:52,156 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
connection to server at "localhost" (127.0.0.1), port 5432 failed: FATAL:  password authentication failed for user "postgres"

(Background on this error at: https://sqlalche.me/e/20/e3q8)
connection to server at "localhost" (127.0.0.1), port 5432 failed: FATAL:  password authentication failed for user "postgres"

(Background on this error at: https://sqlalche.me/e/20/e3q8)
connection to server at "localhost" (127.0.0.1), port 5432 failed: FATAL:  password authentication failed for user "postgres"

(Background on this error at: https://sqlalche.me/e/20/e3q8)
connection to server at "localhost" (127.0.0.1), port 5432 failed: FATAL:  password authentication failed for user "postgres"

(Background on this error at: https://sqlalche.me/e/20/e3q8)


OperationalError: (psycopg2.OperationalError) connection to server at "localhost" (127.0.0.1), port 5432 failed: FATAL:  password authentication failed for user "postgres"
connection to server at "localhost" (127.0.0.1), port 5432 failed: FATAL:  password authentication failed for user "postgres"

(Background on this error at: https://sqlalche.me/e/20/e3q8)

## 2. HYBRID SEARCH

#### Create the hybrid index (vector + BM25)
* Set ``hybrid_search=True`` <br>
* Change ``text_search_config="english"``<br>
* Set ``vector_store_query_mode="hybrid"``<br>
* Adjust ``sparse_top_k`` (Default is "similarity_top_k")

In [14]:
from sqlalchemy import make_url

url = make_url(connection_string)
hybrid_vector_store = PGVectorStore.from_params(
    database=db_name,
    host=url.host,
    password=url.password,
    port=url.port,
    user=url.username,
    table_name="paul_graham_essay_hybrid_search",
    embed_dim=1536,  # openai embedding dimension
    hybrid_search=True,
    text_search_config="english",
    hnsw_kwargs={
        "hnsw_m": 16,
        "hnsw_ef_construction": 64,
        "hnsw_ef_search": 40,
        "hnsw_dist_method": "vector_cosine_ops",
    },
)

storage_context = StorageContext.from_defaults(
    vector_store=hybrid_vector_store
)
hybrid_index = VectorStoreIndex.from_documents(
    documents, storage_context=storage_context
)

2026-02-28 00:43:00,256 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


In [28]:
hybrid_query_engine = hybrid_index.as_query_engine(
    vector_store_query_mode="hybrid", sparse_top_k=3
)

hybrid_response = hybrid_query_engine.query(
    "Who does Paul Graham think of with the word schtick"
)
print(hybrid_response)

2026-02-28 00:51:36,038 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-28 00:51:36,609 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Painting students


## Improving hybrid search with ``QueryFusionRetriever``:

NOTE: score values from vector and sparse are on different scales; **mode="relative_score"** normalizes/fuses them. If combined naively, one may dominate where the other may be underevaluated and ranked too low. That's why we do **smart score fusion**(agreement between methods).

In [None]:
from llama_index.core.response_synthesizers import CompactAndRefine
from llama_index.core.retrievers import QueryFusionRetriever
from llama_index.core.query_engine import RetrieverQueryEngine

vector_retriever = hybrid_index.as_retriever(
    vector_store_query_mode="default",
    similarity_top_k=5,
)
text_retriever = hybrid_index.as_retriever(
    vector_store_query_mode="sparse",
    similarity_top_k=5,  # interchangeable with sparse_top_k in this context
)
retriever = QueryFusionRetriever(
    [vector_retriever, text_retriever],
    similarity_top_k=5,
    num_queries=1,  # set this to 1 to disable query generation
    mode="relative_score",  
    use_async=False,
)

response_synthesizer = CompactAndRefine()
query_engine = RetrieverQueryEngine(
    retriever=retriever,
    response_synthesizer=response_synthesizer,
)

In [None]:
response = query_engine.query(
    "Who does Paul Graham think of with the word schtick, and why?"
)
print(response)

2026-02-28 00:58:54,433 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2026-02-28 00:58:56,047 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Paul Graham thinks of a signature style when he uses the word "schtick" because it is something that immediately identifies the work as belonging to a specific individual and distinguishes it from the work of others.


### See each retriever’s results + scores (vector vs text)

In [34]:
from llama_index.core.schema import QueryBundle

qb = QueryBundle("Who does Paul Graham think of with the word schtick, and why?")

vec_nodes  = vector_retriever.retrieve(qb)
text_nodes = text_retriever.retrieve(qb)

print("=== VECTOR ===")
for n in vec_nodes:
    # print(n.score, n.node.node_id, n.node.get_text()[:120])
    print(n.score, n.node.get_text()[:120])

print("\n=== TEXT/SPARSE ===")
for n in text_nodes:
    # print(n.score, n.node.node_id, n.node.get_text()[:120])
    print(n.score, n.node.get_text()[:120])

2026-02-28 01:12:31,129 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


=== VECTOR ===
0.809428000731111 Occasionally after wrestling for hours with some gruesome bug I'd check Twitter or HN and see someone asking "Does Paul 
0.8038267008427997 I knew that online essays would be a marginal medium at first. Socially they'd seem more like rants posted by nutjobs on
0.7974737878849818 This name didn't last long before it was replaced by "software as a service," but it was current for long enough that I 

=== TEXT/SPARSE ===
0.0516738 Occasionally after wrestling for hours with some gruesome bug I'd check Twitter or HN and see someone asking "Does Paul 
0.036475625 But the Accademia wasn't teaching me anything except Italian, and my money was running out, so at the end of the first y
0.030396353 Grad students could take classes in any department, and my advisor, Tom Cheatham, was very easy going. If he even knew a


___
* Vector scores are typically ``in [0, 1]`` (cosine-ish)
* Sparse/BM25-like scores are ``often small decimals`` (or sometimes larger numbers) depending on normalization.
___

### See the fused (final) list + fused scores

In [33]:
fused_nodes = retriever.retrieve(qb)

print("=== FUSED ===")
for n in fused_nodes:
    # print(n.score, n.node.node_id, n.node.get_text()[:120])
    print(n.score, n.node.get_text()[:120])

=== FUSED ===
1.0 Occasionally after wrestling for hours with some gruesome bug I'd check Twitter or HN and see someone asking "Does Paul 
0.26571858137338666 I knew that online essays would be a marginal medium at first. Socially they'd seem more like rants posted by nutjobs on
0.14285717642722826 But the Accademia wasn't teaching me anything except Italian, and my money was running out, so at the end of the first y
0.0 This name didn't last long before it was replaced by "software as a service," but it was current for long enough that I 
0.0 Grad students could take classes in any department, and my advisor, Tom Cheatham, was very easy going. If he even knew a


___
* Node 8862... appears in **BOTH lists** and is **ranked #1** in both → strong signal it’s relevant (good candidate for fusion).
* The other TEXT/SPARSE hits (c2ee..., 5b90...) contain query terms more literally (or match common words), but are not semantically close enough to appear in the vector top-k.
* The other VECTOR hits (825e..., 773d...) are semantically related to the “Paul Graham essay” context, even if they don’t share exact keywords.

-> Balance ``literal keyword matche`` vs ``meaning matches`` without being fooled by the different score scales.
___