diff --git a/.github/workflows/server_ci.yml b/.github/workflows/server_ci.yml
index e0e851f..5557d29 100644
--- a/.github/workflows/server_ci.yml
+++ b/.github/workflows/server_ci.yml
@@ -37,15 +37,9 @@ jobs:
       - name: start containers
         env:
           DATABASE_URL: postgresql://postgres:postgres@postgres:5432/postgres
-          VECTORIZE_PROXY_ENABLED: 1
         run: |
-          docker compose -f docker-compose.server.yml up postgres -d;
-          sleep 5;
-          docker compose -f docker-compose.server.yml up vector-serve server --build -d;
-      - name: Test Core
-        run: |
-          cargo test
-      - name: Test Core - Integration
+          make compose-server-up
+      - name: Test
         # skip when on external forks
         if: github.repository_owner == 'ChuckHend'
         env:
@@ -56,11 +50,12 @@ jobs:
           VOYAGE_API_KEY: ${{ secrets.VOYAGE_API_KEY }}
           DATABASE_URL: postgresql://postgres:postgres@0.0.0.0:5432/postgres
         run: |
-          cargo test -- --ignored
+          cd server
+          cargo test
       - name: debugging info
         if: failure()
         run: |
-          docker compose -f docker-compose.server.yml logs
+          docker compose -f server/docker-compose.yml logs
           docker ps
 
   build_and_push_amd64:
diff --git a/Makefile b/Makefile
index 25bb346..8c4506a 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,8 @@
 .PHONY: docs
 
+compose-server-up:
+	docker compose -f server/docker-compose.yml up --build -d
+
 docs:
 	poetry install --no-directory --no-root
 	poetry run mkdocs serve
diff --git a/README.md b/README.md
index 1c62edd..92d0016 100644
--- a/README.md
+++ b/README.md
@@ -1,324 +1,95 @@
 <h1 align="center">
- <b>pg_vectorize: a VectorDB for Postgres</b>
+ <b>pg_vectorize: a VectorDB on Postgres</b>
 </h1>
 
-A Postgres extension that automates the transformation and orchestration of text to embeddings and provides hooks into the most popular LLMs. This allows you to do vector search and build LLM applications on existing data with as little as two function calls.
+A Postgres server and extension that automates the transformation and orchestration of text to embeddings and provides hooks into the most popular LLMs. This allows you to do get up and running and automate maintenance for vector search, full text search, and hybrid search, which enables you to quickly build RAG and search engines on Postgres.
 
 This project relies heavily on the work by [pgvector](https://github.com/pgvector/pgvector) for vector similarity search, [pgmq](https://github.com/pgmq/pgmq) for orchestration in background workers, and [SentenceTransformers](https://huggingface.co/sentence-transformers).
 
 ---
 
-[![PGXN version](https://badge.fury.io/pg/vectorize.svg)](https://pgxn.org/dist/vectorize/)
-[![OSSRank](https://shields.io/endpoint?url=https://ossrank.com/shield/3815)](https://ossrank.com/p/3815)
-
+[![PostgreSQL](https://img.shields.io/badge/PostgreSQL-13%20%7C%2014%20%7C%2015%20%7C%2016%20%7C%2017%20%7C%2018-336791?logo=postgresql&logoColor=white)](https://www.postgresql.org/)
 
 **API Documentation**: https://chuckhend.github.io/pg_vectorize/
 
-**Source**: https://github.com/ChuckHend/pg_vectorize
+**Source**: https://github.com/tembo-io/pg_vectorize
+
+## Overview
 
-## Features
+pg_vectorize provides two ways to add semantic, full text, and hybrid search to any Postgres making it easy to build retrieval-augmented generation (RAG) on Postgres. This project provides an external server only implementation and SQL experience via a Postgres extension.
 
-- Workflows for both vector search and RAG
-- Integrations with OpenAI's [embeddings](https://platform.openai.com/docs/guides/embeddings) and [Text-Generation](https://platform.openai.com/docs/guides/text-generation) endpoints and a self-hosted container for running [Hugging Face Sentence-Transformers](https://huggingface.co/sentence-transformers)
-- Automated creation of Postgres triggers to keep your embeddings up to date
-- High level API - one function to initialize embeddings transformations, and another function to search
+Modes at a glance:
 
-## Table of Contents
-- [Features](#features)
-- [Table of Contents](#table-of-contents)
-- [Installation](#installation)
-- [Vector Search Example](#vector-search-example)
-- [RAG Example](#rag-example)
-- [Updating Embeddings](#updating-embeddings)
-- [Directly Interact with LLMs](#directly-interact-with-llms)
-- [Importing Pre-existing Embeddings](#importing-pre-existing-embeddings)
-- [Creating a Table from Existing Embeddings](#creating-a-table-from-existing-embeddings)
+- HTTP server (recommended for managed DBs): run a standalone service that connects to Postgres and exposes a REST API (POST /api/v1/table, GET /api/v1/search).
+- Postgres extension (SQL): install the extension into Postgres and use SQL functions like `vectorize.table()` and `vectorize.search()` (requires filesystem access to Postgres; see [./extension/README.md](./extension/README.md)).
 
-## Installation
+## Quick start — HTTP server
 
-The fastest way to get started is by using [docker compose](https://docs.docker.com/compose/).
+Run Postgres and the HTTP servers locally using docker compose:
 
 ```bash
+# runs Postgres, the embeddings server, and the management API
 docker compose up -d
 ```
 
-Then connect to Postgres:
-
-```text
-docker compose exec -it postgres psql
-```
-
-Enable the extension and its dependencies
-
-```sql
-CREATE EXTENSION vectorize CASCADE;
-```
-
-<details>
-
-<summary>Install into an existing Postgres instance</summary>
-
-If you're installing in an existing Postgres instance, you will need the following dependencies:
-
-Rust:
-
-- [pgrx toolchain](https://github.com/pgcentralfoundation/pgrx)
-
-Postgres Extensions:
-
-- [pg_cron](https://github.com/citusdata/pg_cron) ^1.5
-- [pgmq](https://github.com/pgmq/pgmq) ^1
-- [pgvector](https://github.com/pgvector/pgvector) ^0.5.0
-
-Then set the following either in postgresql.conf or as a configuration parameter:
-
-```sql
--- requires restart of Postgres
-alter system set shared_preload_libraries = 'vectorize,pg_cron';
-alter system set cron.database_name = 'postgres';
-```
-
-And if you're running the vector-serve container, set the following url as a configuration parameter in Postgres.
- The host may need to change from `localhost` to something else depending on where you are running the container.
-
-```sql
-alter system set vectorize.embedding_service_url = 'http://localhost:3000/v1';
-
-SELECT pg_reload_conf();
-```
-
-</details>
-
-## Vector Search Example
-
-Text-to-embedding transformation can be done with either Hugging Face's Sentence-Transformers or OpenAI's embeddings. The following examples use Hugging Face's Sentence-Transformers. See the project [documentation](https://chuckhend.github.io/pg_vectorize/examples/openai_embeddings/) for OpenAI examples.
-
-Follow the [installation](#installation) steps if you haven't already.
-
-Setup a products table. Copy from the example data provided by the extension.
-
-```sql
-CREATE TABLE products (LIKE vectorize.example_products INCLUDING ALL);
-INSERT INTO products SELECT * FROM vectorize.example_products;
-```
-
-```sql
-SELECT * FROM products limit 2;
-```
-
-```text
- product_id | product_name |                      description                       |        last_updated_at        
-------------+--------------+--------------------------------------------------------+-------------------------------
-          1 | Pencil       | Utensil used for writing and often works best on paper | 2023-07-26 17:20:43.639351-05
-          2 | Laptop Stand | Elevated platform for laptops, enhancing ergonomics    | 2023-07-26 17:20:43.639351-05
-```
-
-Create a job to vectorize the products table. We'll specify the tables primary key (product_id) and the columns that we want to search (product_name and description).
-
-```sql
-SELECT vectorize.table(
-    job_name    => 'product_search_hf',
-    relation    => 'products',
-    primary_key => 'product_id',
-    columns     => ARRAY['product_name', 'description'],
-    transformer => 'sentence-transformers/all-MiniLM-L6-v2',
-    schedule    => 'realtime'
-);
-```
-
-This adds a new column to your table, in our case it is named `product_search_embeddings`, then populates that data with the transformed embeddings from the `product_name` and `description` columns.
-
-Then search,
-
-```sql
-SELECT * FROM vectorize.search(
-    job_name        => 'product_search_hf',
-    query           => 'accessories for mobile devices',
-    return_columns  => ARRAY['product_id', 'product_name'],
-    num_results     => 3
-);
-```
-
-```text
-                                       search_results                                        
----------------------------------------------------------------------------------------------
- {"product_id": 13, "product_name": "Phone Charger", "similarity_score": 0.8147814132322894}
- {"product_id": 6, "product_name": "Backpack", "similarity_score": 0.7743061352550308}
- {"product_id": 11, "product_name": "Stylus Pen", "similarity_score": 0.7709902653575383}
-```
-
-## RAG Example
-
-Ask raw text questions of the example  `products` dataset and get chat responses from an OpenAI LLM.
-
-Follow the [installation](#installation) steps if you haven't already.
-
-Set the [OpenAI API key](https://platform.openai.com/docs/guides/embeddings), this is required to for use with OpenAI's chat-completion models.
-
-```sql
-ALTER SYSTEM SET vectorize.openai_key TO '<your api key>';
-SELECT pg_reload_conf();
-```
-
-Create an example table if it does not already exist.
-
-```sql
-CREATE TABLE products (LIKE vectorize.example_products INCLUDING ALL);
-INSERT INTO products SELECT * FROM vectorize.example_products;
-```
-
-Initialize a table for RAG. We'll use an open source Sentence Transformer to generate embeddings.
-
-Create a new column that we want to use as the context. In this case, we'll concatenate both `product_name` and `description`.
-
-```sql
-ALTER TABLE products
-ADD COLUMN context TEXT GENERATED ALWAYS AS (product_name || ': ' || description) STORED;
-```
-
-Initialize the RAG project.
- We'll use the `openai/text-embedding-3-small` model to generate embeddings on our source documents.
+Load the example dataset into Postgres (optional):
 
-```sql
-SELECT vectorize.table(
-    job_name    => 'product_chat',
-    relation    => 'products',
-    primary_key => 'product_id',
-    columns     => ARRAY['context'],
-    transformer => 'openai/text-embedding-3-small',
-    schedule    => 'realtime'
-);
-```
-
-Now we can ask questions of the `products` table and get responses from the `product_chat` agent using the `openai/gpt-3.5-turbo` generative model.
-
-```sql
-SELECT vectorize.rag(
-    job_name    => 'product_chat',
-    query       => 'What is a pencil?',
-    chat_model  => 'openai/gpt-3.5-turbo'
-) -> 'chat_response';
-```
-
-```text
-"A pencil is an item that is commonly used for writing and is known to be most effective on paper."
-```
-
-And to use a locally hosted Ollama service, change the `chat_model` parameter:
-
-```sql
-SELECT vectorize.rag(
-    job_name    => 'product_chat',
-    query       => 'What is a pencil?',
-    chat_model  => 'ollama/wizardlm2:7b'
-) -> 'chat_response';
+```bash
+psql postgres://postgres:postgres@localhost:5432/postgres -f server/sql/example.sql
 ```
 
 ```text
-" A pencil is a writing instrument that consists of a solid or gelignola wood core, known as the \"lead,\" encased in a cylindrical piece of breakable material (traditionally wood or plastic), which serves as the body of the pencil. The tip of the body is tapered to a point for writing, and it can mark paper with the imprint of the lead. When used on a sheet of paper, the combination of the pencil's lead and the paper creates a visible mark that is distinct from unmarked areas of the paper. Pencils are particularly well-suited for writing on paper, as they allow for precise control over the marks made."
-```
-
-
-:bulb: Note that the `-> 'chat_response'` addition selects for that field of the JSON object output. Removing it will show the full JSON object, including information on which documents were included in the contextual prompt.
-
-## Updating Embeddings
-
-When the source text data is updated, how and when the embeddings are updated is determined by the value set to the `schedule` parameter in `vectorize.table`.
-
-The default behavior is `schedule => '* * * * *'`, which means the background worker process checks for changes every minute, and updates the embeddings accordingly. This method requires setting the `updated_at_col` value to point to a colum on the table indicating the time that the input text columns were last changed. `schedule` can be set to any cron-like value.
-
-Alternatively, `schedule => 'realtime` creates triggers on the source table and updates embeddings anytime new records are inserted to the source table or existing records are updated.
-
-Statements below would will result in new embeddings being generated either immediately (`schedule => 'realtime'`) or within the cron schedule set in the `schedule` parameter.
-
-```sql
-INSERT INTO products (product_id, product_name, description, product_category, price)
-VALUES (12345, 'pizza', 'dish of Italian origin consisting of a flattened disk of bread', 'food', 5.99);
-
-UPDATE products
-SET description = 'sling made of fabric, rope, or netting, suspended between two or more points, used for swinging, sleeping, or resting'
-WHERE product_name = 'Hammock';
+CREATE TABLE
+INSERT 0 40
 ```
 
-## Directly Interact with LLMs
+Create an embedding job via the HTTP API. This generates embeddings for the existing data and continuously watches for updates or new data:
 
-Sometimes you want more control over the handling of embeddings.
- For those situations you can directly call various LLM providers using SQL:
-
-For text generation:
-
-```sql
-select vectorize.generate(
-  input => 'Tell me the difference between a cat and a dog in 1 sentence',
-  model => 'openai/gpt-4o'
-);
+```bash
+curl -X POST http://localhost:8080/api/v1/table -d '{
+		"job_name": "my_job",
+		"src_table": "my_products",
+		"src_schema": "public",
+		"src_columns": ["product_name", "description"],
+		"primary_key": "product_id",
+		"update_time_col": "updated_at",
+		"model": "sentence-transformers/all-MiniLM-L6-v2"
+	}' -H "Content-Type: application/json"
 ```
 
-```text
-                                                 generate                                                  
------------------------------------------------------------------------------------------------------------
- Cats are generally more independent and solitary, while dogs tend to be more social and loyal companions.
-(1 row)
+```json
+{"id":"16b80184-2e8e-4ee6-b7e2-1a068ff4b314"}
 ```
 
-And for embedding generation:
+Search using the HTTP API:
 
-```sql
-select vectorize.encode(
-  input => 'Tell me the difference between a cat and a dog in 1 sentence',
-  model => 'openai/text-embedding-3-large'
-);
+```bash
+curl -X GET "http://localhost:8080/api/v1/search?job_name=my_job&query=camping%20backpack&limit=1" | jq .
 ```
 
-```text
-{0.0028769304,-0.005826319,-0.0035932811, ...}
+```json
+[
+  {
+    "description": "Storage solution for carrying personal items on ones back",
+    "fts_rank": 1,
+    "price": 45.0,
+    "product_category": "accessories",
+    "product_id": 6,
+    "product_name": "Backpack",
+    "rrf_score": 0.03278688524590164,
+    "semantic_rank": 1,
+    "similarity_score": 0.6296013593673706,
+    "updated_at": "2025-10-04T14:45:16.152526+00:00"
+  }
+]
 ```
 
-## Importing Pre-existing Embeddings
+## Which should I pick?
 
-If you have already computed embeddings using a compatible model (e.g., using Sentence-Transformers directly), you can import these into pg_vectorize without recomputation:
-
-```sql
--- First create the vectorize project
-SELECT vectorize.table(
-    job_name    => 'my_search',
-    relation    => 'my_table',
-    primary_key => 'id',
-    columns     => ARRAY['content'],
-    transformer => 'sentence-transformers/all-MiniLM-L6-v2'
-);
-
--- Then import your pre-computed embeddings
-SELECT vectorize.import_embeddings(
-    job_name            => 'my_search',
-    src_table           => 'my_embeddings_table',
-    src_primary_key     => 'id',
-    src_embeddings_col  => 'embedding'
-);
-```
-
-The embeddings must match the dimensions of the specified transformer model. For example, 'sentence-transformers/all-MiniLM-L6-v2' expects 384-dimensional vectors.
-
-## Creating a Table from Existing Embeddings
-
-If you have already computed embeddings using a compatible model, you can create a new vectorize table directly from them:
-
-```sql
--- Create a vectorize table from existing embeddings
-SELECT vectorize.table_from(
-    relation => 'my_table',
-    columns => ARRAY['content'],
-    job_name => 'my_search',
-    primary_key => 'id',
-    src_table => 'my_embeddings_table',
-    src_primary_key => 'id',
-    src_embeddings_col => 'embedding',
-    transformer => 'sentence-transformers/all-MiniLM-L6-v2'
-);
-```
+- Use the HTTP server when your Postgres is managed (RDS, Cloud SQL, etc.) or you cannot install extensions. It requires only that `pgvector` is available in the database. You the HTTP services separately.
+- Use Postgres extension when you self-host Postgres and can install extensions. This provides an in-database experience and direct SQL APIs for vectorization and RAG.
 
-The embeddings must match the dimensions of the specified transformer model. This approach ensures your pre-computed embeddings are properly imported before any automatic updates are enabled.
+If you want hands-on SQL examples or to install the extension into Postgres, see `./extension/README.md`. For full HTTP API docs and deployment notes, see `./server/README.md`.
 
-## Contributing
 
-We welcome contributions from the community! If you're interested in contributing to `pg_vectorize`, please check out our [Contributing Guide](CONTRIBUTING.md). You can also open an issue.
+For contribution guidelines see `CONTRIBUTING.md` in the repo root.
diff --git a/docker-compose.server.yml b/docker-compose.server.yml
deleted file mode 100644
index b8e77ac..0000000
--- a/docker-compose.server.yml
+++ /dev/null
@@ -1,45 +0,0 @@
-x-logging: &default-logging
-  driver: json-file
-  options:
-    max-size: 10m
-    max-file: 50
-
-x-env: &default-env
-  OPENAI_API_KEY: ${OPENAI_API_KEY}
-  CO_API_KEY: ${CO_API_KEY}
-  VOYAGE_API_KEY: ${VOYAGE_API_KEY}
-  DATABASE_URL: ${DATABASE_URL:-postgresql://postgres:postgres@postgres:5432/postgres}
-  EMBEDDING_SVC_URL: ${EMBEDDING_SVC_URL:-http://vector-serve:3000/v1}
-
-services:
-  postgres:
-    restart: always
-    logging: *default-logging
-    environment:
-      <<: *default-env
-      POSTGRES_PASSWORD: postgres
-    image: pgvector/pgvector:0.8.0-pg17
-    ports:
-      - 5432:5432
-  server:
-    restart: always
-    logging: *default-logging
-    depends_on:
-      - postgres
-    build:
-      dockerfile: server/Dockerfile
-      context: ./
-    ports:
-      - 8080:8080 # http server
-      - 5433:5433 # proxy
-    environment:
-        <<: *default-env
-        RUST_LOG: debug
-        VECTORIZE_PROXY_ENABLED: ${VECTORIZE_PROXY_ENABLED:-1}
-    command: vectorize-server
-  vector-serve:
-    restart: always
-    logging: *default-logging
-    image: ghcr.io/chuckhend/vector-serve:latest
-    ports:
-      - 3000:3000
diff --git a/docker-compose.yml b/docker-compose.yml
index 19a9205..943c155 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,27 +1,44 @@
+x-logging: &default-logging
+  driver: json-file
+  options:
+    max-size: 10m
+    max-file: 50
+
+x-env: &default-env
+  OPENAI_API_KEY: ${OPENAI_API_KEY}
+  CO_API_KEY: ${CO_API_KEY}
+  VOYAGE_API_KEY: ${VOYAGE_API_KEY}
+  DATABASE_URL: ${DATABASE_URL:-postgresql://postgres:postgres@postgres:5432/postgres}
+  EMBEDDING_SVC_URL: ${EMBEDDING_SVC_URL:-http://vector-serve:3000/v1}
+
 services:
   postgres:
     restart: always
-    image: ghcr.io/chuckhend/vectorize-pg:latest
+    logging: *default-logging
+    environment:
+      <<: *default-env
+      POSTGRES_PASSWORD: postgres
+    image: pgvector/pgvector:0.8.1-pg18
     ports:
       - 5432:5432
+  server:
+    restart: always
+    logging: *default-logging
+    depends_on:
+      - postgres
+    build:
+      dockerfile: server/Dockerfile
+      context: ./
+    ports:
+      - 8080:8080 # http server
     environment:
-      - POSTGRES_PASSWORD=postgres
+        <<: *default-env
+        RUST_LOG: debug
+        VECTORIZE_PROXY_ENABLED: ${VECTORIZE_PROXY_ENABLED:-1}
+    command: vectorize-server
   vector-serve:
     restart: always
+    logging: *default-logging
     image: ghcr.io/chuckhend/vector-serve:latest
     ports:
       - 3000:3000
-  ollama-serve:
-    image: quay.io/tembo/ollama-serve:latest
-    ports:
-      - 3001:3001
-    environment:
-      - OLLAMA_HOST=0.0.0.0:3001
-    # deploy:
-    #   replicas: 1
-    #   resources:
-    #     reservations:
-    #       devices:
-    #         - driver: nvidia
-    #           count: 1
-    #           capabilities: [gpu]
\ No newline at end of file
diff --git a/extension/README.md b/extension/README.md
new file mode 100644
index 0000000..0b23dfe
--- /dev/null
+++ b/extension/README.md
@@ -0,0 +1,322 @@
+<h1 align="center">
+ <b>pg_vectorize: a VectorDB for Postgres</b>
+</h1>
+
+A Postgres extension that automates the transformation and orchestration of text to embeddings and provides hooks into the most popular LLMs. This allows you to do vector search and build LLM applications on existing data with as little as two function calls.
+
+This project relies heavily on the work by [pgvector](https://github.com/pgvector/pgvector) for vector similarity search, [pgmq](https://github.com/pgmq/pgmq) for orchestration in background workers, and [SentenceTransformers](https://huggingface.co/sentence-transformers).
+
+---
+
+[![PGXN version](https://badge.fury.io/pg/vectorize.svg)](https://pgxn.org/dist/vectorize/)
+
+**API Documentation**: https://chuckhend.github.io/pg_vectorize/
+
+**Source**: https://github.com/ChuckHend/pg_vectorize
+
+## Features
+
+- Workflows for both vector search and RAG
+- Integrations with OpenAI's [embeddings](https://platform.openai.com/docs/guides/embeddings) and [Text-Generation](https://platform.openai.com/docs/guides/text-generation) endpoints and a self-hosted container for running [Hugging Face Sentence-Transformers](https://huggingface.co/sentence-transformers)
+- Automated creation of Postgres triggers to keep your embeddings up to date
+- High level API - one function to initialize embeddings transformations, and another function to search
+
+## Table of Contents
+- [Features](#features)
+- [Table of Contents](#table-of-contents)
+- [Installation](#installation)
+- [Vector Search Example](#vector-search-example)
+- [RAG Example](#rag-example)
+- [Updating Embeddings](#updating-embeddings)
+- [Directly Interact with LLMs](#directly-interact-with-llms)
+- [Importing Pre-existing Embeddings](#importing-pre-existing-embeddings)
+- [Creating a Table from Existing Embeddings](#creating-a-table-from-existing-embeddings)
+
+## Installation
+
+The fastest way to get started is by using [docker compose](https://docs.docker.com/compose/).
+
+```bash
+docker compose up -d
+```
+
+Then connect to Postgres:
+
+```text
+docker compose exec -it postgres psql
+```
+
+Enable the extension and its dependencies
+
+```sql
+CREATE EXTENSION vectorize CASCADE;
+```
+
+<details>
+
+<summary>Install into an existing Postgres instance</summary>
+
+If you're installing in an existing Postgres instance, you will need the following dependencies:
+
+Rust:
+
+- [pgrx toolchain](https://github.com/pgcentralfoundation/pgrx)
+
+Postgres Extensions:
+
+- [pg_cron](https://github.com/citusdata/pg_cron) ^1.5
+- [pgmq](https://github.com/pgmq/pgmq) ^1
+- [pgvector](https://github.com/pgvector/pgvector) ^0.5.0
+
+Then set the following either in postgresql.conf or as a configuration parameter:
+
+```sql
+-- requires restart of Postgres
+alter system set shared_preload_libraries = 'vectorize,pg_cron';
+alter system set cron.database_name = 'postgres';
+```
+
+And if you're running the vector-serve container, set the following url as a configuration parameter in Postgres.
+ The host may need to change from `localhost` to something else depending on where you are running the container.
+
+```sql
+alter system set vectorize.embedding_service_url = 'http://localhost:3000/v1';
+
+SELECT pg_reload_conf();
+```
+
+</details>
+
+## Vector Search Example
+
+Text-to-embedding transformation can be done with either Hugging Face's Sentence-Transformers or OpenAI's embeddings. The following examples use Hugging Face's Sentence-Transformers. See the project [documentation](https://chuckhend.github.io/pg_vectorize/examples/openai_embeddings/) for OpenAI examples.
+
+Follow the [installation](#installation) steps if you haven't already.
+
+Setup a products table. Copy from the example data provided by the extension.
+
+```sql
+CREATE TABLE products (LIKE vectorize.example_products INCLUDING ALL);
+INSERT INTO products SELECT * FROM vectorize.example_products;
+```
+
+```sql
+SELECT * FROM products limit 2;
+```
+
+```text
+ product_id | product_name |                      description                       |        last_updated_at        
+------------+--------------+--------------------------------------------------------+-------------------------------
+          1 | Pencil       | Utensil used for writing and often works best on paper | 2023-07-26 17:20:43.639351-05
+          2 | Laptop Stand | Elevated platform for laptops, enhancing ergonomics    | 2023-07-26 17:20:43.639351-05
+```
+
+Create a job to vectorize the products table. We'll specify the tables primary key (product_id) and the columns that we want to search (product_name and description).
+
+```sql
+SELECT vectorize.table(
+    job_name    => 'product_search_hf',
+    relation    => 'products',
+    primary_key => 'product_id',
+    columns     => ARRAY['product_name', 'description'],
+    transformer => 'sentence-transformers/all-MiniLM-L6-v2',
+    schedule    => 'realtime'
+);
+```
+
+This adds a new column to your table, in our case it is named `product_search_embeddings`, then populates that data with the transformed embeddings from the `product_name` and `description` columns.
+
+Then search,
+
+```sql
+SELECT * FROM vectorize.search(
+    job_name        => 'product_search_hf',
+    query           => 'accessories for mobile devices',
+    return_columns  => ARRAY['product_id', 'product_name'],
+    num_results     => 3
+);
+```
+
+```text
+                                       search_results                                        
+---------------------------------------------------------------------------------------------
+ {"product_id": 13, "product_name": "Phone Charger", "similarity_score": 0.8147814132322894}
+ {"product_id": 6, "product_name": "Backpack", "similarity_score": 0.7743061352550308}
+ {"product_id": 11, "product_name": "Stylus Pen", "similarity_score": 0.7709902653575383}
+```
+
+## RAG Example
+
+Ask raw text questions of the example  `products` dataset and get chat responses from an OpenAI LLM.
+
+Follow the [installation](#installation) steps if you haven't already.
+
+Set the [OpenAI API key](https://platform.openai.com/docs/guides/embeddings), this is required to for use with OpenAI's chat-completion models.
+
+```sql
+ALTER SYSTEM SET vectorize.openai_key TO '<your api key>';
+SELECT pg_reload_conf();
+```
+
+Create an example table if it does not already exist.
+
+```sql
+CREATE TABLE products (LIKE vectorize.example_products INCLUDING ALL);
+INSERT INTO products SELECT * FROM vectorize.example_products;
+```
+
+Initialize a table for RAG. We'll use an open source Sentence Transformer to generate embeddings.
+
+Create a new column that we want to use as the context. In this case, we'll concatenate both `product_name` and `description`.
+
+```sql
+ALTER TABLE products
+ADD COLUMN context TEXT GENERATED ALWAYS AS (product_name || ': ' || description) STORED;
+```
+
+Initialize the RAG project.
+ We'll use the `openai/text-embedding-3-small` model to generate embeddings on our source documents.
+
+```sql
+SELECT vectorize.table(
+    job_name    => 'product_chat',
+    relation    => 'products',
+    primary_key => 'product_id',
+    columns     => ARRAY['context'],
+    transformer => 'openai/text-embedding-3-small',
+    schedule    => 'realtime'
+);
+```
+
+Now we can ask questions of the `products` table and get responses from the `product_chat` agent using the `openai/gpt-3.5-turbo` generative model.
+
+```sql
+SELECT vectorize.rag(
+    job_name    => 'product_chat',
+    query       => 'What is a pencil?',
+    chat_model  => 'openai/gpt-3.5-turbo'
+) -> 'chat_response';
+```
+
+```text
+"A pencil is an item that is commonly used for writing and is known to be most effective on paper."
+```
+
+And to use a locally hosted Ollama service, change the `chat_model` parameter:
+
+```sql
+SELECT vectorize.rag(
+    job_name    => 'product_chat',
+    query       => 'What is a pencil?',
+    chat_model  => 'ollama/wizardlm2:7b'
+) -> 'chat_response';
+```
+
+```text
+" A pencil is a writing instrument that consists of a solid or gelignola wood core, known as the \"lead,\" encased in a cylindrical piece of breakable material (traditionally wood or plastic), which serves as the body of the pencil. The tip of the body is tapered to a point for writing, and it can mark paper with the imprint of the lead. When used on a sheet of paper, the combination of the pencil's lead and the paper creates a visible mark that is distinct from unmarked areas of the paper. Pencils are particularly well-suited for writing on paper, as they allow for precise control over the marks made."
+```
+
+
+:bulb: Note that the `-> 'chat_response'` addition selects for that field of the JSON object output. Removing it will show the full JSON object, including information on which documents were included in the contextual prompt.
+
+## Updating Embeddings
+
+When the source text data is updated, how and when the embeddings are updated is determined by the value set to the `schedule` parameter in `vectorize.table`.
+
+The default behavior is `schedule => '* * * * *'`, which means the background worker process checks for changes every minute, and updates the embeddings accordingly. This method requires setting the `updated_at_col` value to point to a colum on the table indicating the time that the input text columns were last changed. `schedule` can be set to any cron-like value.
+
+Alternatively, `schedule => 'realtime` creates triggers on the source table and updates embeddings anytime new records are inserted to the source table or existing records are updated.
+
+Statements below would will result in new embeddings being generated either immediately (`schedule => 'realtime'`) or within the cron schedule set in the `schedule` parameter.
+
+```sql
+INSERT INTO products (product_id, product_name, description, product_category, price)
+VALUES (12345, 'pizza', 'dish of Italian origin consisting of a flattened disk of bread', 'food', 5.99);
+
+UPDATE products
+SET description = 'sling made of fabric, rope, or netting, suspended between two or more points, used for swinging, sleeping, or resting'
+WHERE product_name = 'Hammock';
+```
+
+## Directly Interact with LLMs
+
+Sometimes you want more control over the handling of embeddings.
+ For those situations you can directly call various LLM providers using SQL:
+
+For text generation:
+
+```sql
+select vectorize.generate(
+  input => 'Tell me the difference between a cat and a dog in 1 sentence',
+  model => 'openai/gpt-4o'
+);
+```
+
+```text
+                                                 generate                                                  
+-----------------------------------------------------------------------------------------------------------
+ Cats are generally more independent and solitary, while dogs tend to be more social and loyal companions.
+(1 row)
+```
+
+And for embedding generation:
+
+```sql
+select vectorize.encode(
+  input => 'Tell me the difference between a cat and a dog in 1 sentence',
+  model => 'openai/text-embedding-3-large'
+);
+```
+
+```text
+{0.0028769304,-0.005826319,-0.0035932811, ...}
+```
+
+## Importing Pre-existing Embeddings
+
+If you have already computed embeddings using a compatible model (e.g., using Sentence-Transformers directly), you can import these into pg_vectorize without recomputation:
+
+```sql
+-- First create the vectorize project
+SELECT vectorize.table(
+    job_name    => 'my_search',
+    relation    => 'my_table',
+    primary_key => 'id',
+    columns     => ARRAY['content'],
+    transformer => 'sentence-transformers/all-MiniLM-L6-v2'
+);
+
+-- Then import your pre-computed embeddings
+SELECT vectorize.import_embeddings(
+    job_name            => 'my_search',
+    src_table           => 'my_embeddings_table',
+    src_primary_key     => 'id',
+    src_embeddings_col  => 'embedding'
+);
+```
+
+The embeddings must match the dimensions of the specified transformer model. For example, 'sentence-transformers/all-MiniLM-L6-v2' expects 384-dimensional vectors.
+
+## Creating a Table from Existing Embeddings
+
+If you have already computed embeddings using a compatible model, you can create a new vectorize table directly from them:
+
+```sql
+-- Create a vectorize table from existing embeddings
+SELECT vectorize.table_from(
+    relation => 'my_table',
+    columns => ARRAY['content'],
+    job_name => 'my_search',
+    primary_key => 'id',
+    src_table => 'my_embeddings_table',
+    src_primary_key => 'id',
+    src_embeddings_col => 'embedding',
+    transformer => 'sentence-transformers/all-MiniLM-L6-v2'
+);
+```
+
+The embeddings must match the dimensions of the specified transformer model. This approach ensures your pre-computed embeddings are properly imported before any automatic updates are enabled.
+
+## Contributing
+
+We welcome contributions from the community! If you're interested in contributing to `pg_vectorize`, please check out our [Contributing Guide](CONTRIBUTING.md). You can also open an issue.
\ No newline at end of file
diff --git a/extension/docker-compose.yml b/extension/docker-compose.yml
new file mode 100644
index 0000000..19a9205
--- /dev/null
+++ b/extension/docker-compose.yml
@@ -0,0 +1,27 @@
+services:
+  postgres:
+    restart: always
+    image: ghcr.io/chuckhend/vectorize-pg:latest
+    ports:
+      - 5432:5432
+    environment:
+      - POSTGRES_PASSWORD=postgres
+  vector-serve:
+    restart: always
+    image: ghcr.io/chuckhend/vector-serve:latest
+    ports:
+      - 3000:3000
+  ollama-serve:
+    image: quay.io/tembo/ollama-serve:latest
+    ports:
+      - 3001:3001
+    environment:
+      - OLLAMA_HOST=0.0.0.0:3001
+    # deploy:
+    #   replicas: 1
+    #   resources:
+    #     reservations:
+    #       devices:
+    #         - driver: nvidia
+    #           count: 1
+    #           capabilities: [gpu]
\ No newline at end of file
diff --git a/server/Dockerfile b/server/Dockerfile
index 4c16681..113b1f4 100644
--- a/server/Dockerfile
+++ b/server/Dockerfile
@@ -1,4 +1,4 @@
-FROM rust:1.88.0-slim-bookworm AS builder
+FROM rust:1.90.0-slim-bookworm AS builder
 
 RUN apt-get update && \
     apt-get install -y curl pkg-config libssl-dev postgresql-client && apt-get clean && \
@@ -16,7 +16,7 @@ COPY Cargo.toml Cargo.lock ./
 ENV SQLX_OFFLINE=1
 RUN cargo build --bin vectorize-server --release
 
-FROM rust:1.88.0-slim-bookworm
+FROM rust:1.90.0-slim-bookworm
 
 RUN apt-get update && \
     apt-get install -y postgresql-client && apt-get clean && \
diff --git a/server/README.md b/server/README.md
index c50c8ca..cd0ce7a 100644
--- a/server/README.md
+++ b/server/README.md
@@ -2,6 +2,10 @@
 
 An HTTP server that sits in between your application and Postgres.
 
+## See also
+
+- Top-level project overview: `../README.md`
+
 ## Features
 - Quickly sets up semantic and full text search on any Postgres table.
 - Generate embeddings from OpenAI, Hugging Face, and many other embedding model providers.
@@ -13,8 +17,8 @@ An HTTP server that sits in between your application and Postgres.
 Run Postgres and the HTTP servers in separate containers locally:
 
 ```bash
-# docker-compose.server.yml is located in the root of this repository
-docker compose -f docker-compose.server.yml up -d
+# docker-compose.yml is located in the root of this repository
+docker compose -f docker-compose.yml up -d
 ```
 
 There are three contains; postgres, a local embedding server, and the HTTP search service.
@@ -88,36 +92,6 @@ curl -X GET "http://localhost:8080/api/v1/search?job_name=my_job&query=camping%2
 ]
 ```
 
-## SQL proxy example
-
-We can also use the SQL proxy to perform the same search query, but using SQL instead of the HTTP API. This is useful if you have additional joins are advanced SQL queries that you want to perform.
-
-Note that this query routes through the proxy on port 5433.
-
-```sql
-psql postgres://postgres:postgres@localhost:5433/postgres -c \
-"SELECT * FROM (
-    SELECT t0.*, t1.similarity_score
-    FROM (
-        SELECT
-            product_id,
-            1 - (embeddings <=> vectorize.embed('plants', 'my_job')) as similarity_score
-        FROM vectorize._embeddings_my_job
-        ) t1
-    INNER JOIN public.my_products t0 on t0.product_id = t1.product_id
-) t
-ORDER BY t.similarity_score DESC
-LIMIT 2;"
-```
-
-```plaintext
- product_id |   product_name   |                    description                    | product_category | price |          updated_at           |  similarity_score   
-------------+------------------+---------------------------------------------------+------------------+-------+-------------------------------+---------------------
-          8 | Plant Pot        | Container for holding plants, often with drainage | garden           | 12.00 | 2025-06-25 20:27:07.725765+00 | 0.46105278002586925
-         35 | Gardening Gloves | Handwear for protection during gardening tasks    | garden           |  8.00 | 2025-06-25 20:27:07.725765+00 |  0.2909192990160845
-(2 rows)
-```
-
 ## Running on an existing Postgres instance
 
 Assuming you have an existing Postgres instance with `pgvector` installed, you can run the HTTP servers using Docker and get started quickly.
diff --git a/server/docker-compose.yml b/server/docker-compose.yml
index 97d0117..55d66b0 100644
--- a/server/docker-compose.yml
+++ b/server/docker-compose.yml
@@ -4,19 +4,40 @@ x-logging: &default-logging
     max-size: 10m
     max-file: 50
 
+x-env: &default-env
+  OPENAI_API_KEY: ${OPENAI_API_KEY}
+  CO_API_KEY: ${CO_API_KEY}
+  VOYAGE_API_KEY: ${VOYAGE_API_KEY}
+  DATABASE_URL: ${DATABASE_URL:-postgresql://postgres:postgres@postgres:5432/postgres}
+  EMBEDDING_SVC_URL: ${EMBEDDING_SVC_URL:-http://vector-serve:3000/v1}
+
+
 services:
+  postgres:
+    restart: always
+    logging: *default-logging
+    environment:
+      POSTGRES_PASSWORD: postgres
+    image: pgvector/pgvector:0.8.1-pg18
+    ports:
+      - 5432:5432
+    healthcheck:
+      test: ["CMD-SHELL", "pg_isready"]
+      interval: 5s
+      timeout: 5s
+      retries: 5
   server:
     restart: always
     logging: *default-logging
     depends_on:
       - vector-serve
-    image: ghcr.io/chuckhend/vectorize-server:latest
+    build:
+      dockerfile: server/Dockerfile
+      context: ../
     ports:
       - 8080:8080
     environment:
-      EMBEDDING_SVC_URL: ${EMBEDDING_SVC_URL:-http://vector-serve:3000/v1}
-      DATABASE_URL: ${DATABASE_URL:-postgresql://postgres:postgres@host.docker.internal:5432/postgres}
-      OPENAI_API_KEY: ${OPENAI_API_KEY}
+      <<: *default-env
       RUST_LOG: ${RUST_LOG:-info}
     command: vectorize-server
   vector-serve:
@@ -26,4 +47,5 @@ services:
     ports:
       - 3000:3000
     environment:
+      <<: *default-env
       HF_API_KEY: ${HF_API_KEY}
diff --git a/server/tests/tests.rs b/server/tests/tests.rs
index 43c532c..3c92a21 100644
--- a/server/tests/tests.rs
+++ b/server/tests/tests.rs
@@ -9,7 +9,6 @@ use util::common;
 use vectorize_server::routes::table::JobResponse;
 // these tests require the following main server, vector-serve, and Postgres to be running
 // easiest way is to use the docker-compose file in the root of the project
-#[ignore]
 #[tokio::test]
 async fn test_search_server() {
     common::init_test_environment().await;
@@ -102,7 +101,6 @@ async fn test_search_server() {
     );
 }
 
-#[ignore]
 #[tokio::test]
 async fn test_search_filters() {
     let mut rng = rand::rng();
@@ -175,9 +173,10 @@ async fn test_search_filters() {
     );
 }
 
+/// proxy is an incomplete feature
 #[ignore]
 #[tokio::test]
-async fn test_lifecycle() {
+async fn test_proxy() {
     // Initialize the project (database setup, etc.) without creating test app
     common::init_test_environment().await;
 
@@ -282,21 +281,84 @@ async fn test_lifecycle() {
 
     // test prepared statements
     // Use parameter binding instead of string formatting
-    // let row = sqlx::query("SELECT vectorize.embed('food'::text, $1);")
-    //     .bind(&job_name)
-    //     .fetch_one(&pool)
-    //     .await
-    //     .unwrap();
-    // let result_str: String = row.get(0);
-    // let result_str = result_str.trim_start_matches('[').trim_end_matches(']');
-    // let values: Vec<f64> = result_str
-    //     .split(',')
-    //     .map(|s| s.trim().parse::<f64>().unwrap())
-    //     .collect();
-    // assert_eq!(values.len(), 384); // sentence-transformers/all-MiniLM-L6-v2 has 384 dimensions
+    let row = sqlx::query("SELECT vectorize.embed('food'::text, $1);")
+        .bind(&job_name)
+        .fetch_one(&pool)
+        .await
+        .unwrap();
+    let result_str: String = row.get(0);
+    let result_str = result_str.trim_start_matches('[').trim_end_matches(']');
+    let values: Vec<f64> = result_str
+        .split(',')
+        .map(|s| s.trim().parse::<f64>().unwrap())
+        .collect();
+    assert_eq!(values.len(), 384); // sentence-transformers/all-MiniLM-L6-v2 has 384 dimensions
+}
+
+#[tokio::test]
+async fn test_lifecycle() {
+    // Initialize the project (database setup, etc.) without creating test app
+    common::init_test_environment().await;
+
+    // Create test table with required columns
+    let table = common::create_test_table().await;
+
+    let job_name = format!("test_job_{table}");
+
+    // Create a valid VectorizeJob payload
+    let payload = json!({
+        "job_name": job_name,
+        "src_table": table,
+        "src_schema": "vectorize_test",
+        "src_columns": ["content"],
+        "primary_key": "id",
+        "update_time_col": "updated_at",
+        "model": "sentence-transformers/all-MiniLM-L6-v2"
+    });
+
+    // Use reqwest to make HTTP request to running server
+    let client = reqwest::Client::new();
+    let resp = client
+        .post("http://localhost:8080/api/v1/table")
+        .header("Content-Type", "application/json")
+        .json(&payload)
+        .send()
+        .await
+        .expect("Failed to send request");
+
+    assert_eq!(
+        resp.status(),
+        reqwest::StatusCode::OK,
+        "Response status: {:?}",
+        resp.status()
+    );
+
+    let response: JobResponse = resp.json().await.expect("Failed to parse response");
+    assert!(!response.id.is_nil(), "Job ID should not be nil");
+
+    // request a job that does not exist should be a 404
+    let resp = client
+        .get("http://localhost:8080/api/v1/search?job_name=does_not_exist")
+        .send()
+        .await
+        .expect("Failed to send request");
+    assert_eq!(resp.status(), reqwest::StatusCode::BAD_REQUEST);
+
+    // sleep for 2 seconds
+    tokio::time::sleep(std::time::Duration::from_secs(2)).await;
+
+    // test searching the job
+    let params = format!("job_name={job_name}&query=food");
+    let search_results = common::search_with_retry(&params, 3).await.unwrap();
+
+    // Should return 3 results
+    assert_eq!(search_results.len(), 3);
+
+    // First result should be pizza (highest similarity)
+    assert_eq!(search_results[0]["content"].as_str().unwrap(), "pizza");
+    assert!(search_results[0]["similarity_score"].as_f64().unwrap() > 0.5);
 }
 
-#[ignore]
 #[tokio::test]
 async fn test_health_monitoring() {
     // Initialize the test environment without creating test app
diff --git a/server/tests/util.rs b/server/tests/util.rs
index 21a9535..b7b2899 100644
--- a/server/tests/util.rs
+++ b/server/tests/util.rs
@@ -93,10 +93,10 @@ pub mod common {
             .await
             .expect("unable to connect to postgres");
 
-        sqlx::query("create schema if not exists vectorize_test;")
+        // there is a race condition on create schema during test cases, so ignore errors
+        let _created = sqlx::query("create schema if not exists vectorize_test;")
             .execute(&pool)
-            .await
-            .expect("unable to create vectorize_test schema");
+            .await;
 
         let mut rng = rand::rng();
         let test_num = rng.random_range(1..1000);