diff --git a/external/Makefile b/external/Makefile index 3a235e3b157..18e5edbbc4d 100755 --- a/external/Makefile +++ b/external/Makefile @@ -10,6 +10,7 @@ SUBDIRS += polar_monitor_preload # NB: those will be ignored in minimal mode. ifeq ($(enable_polar_minimal),no) +SUBDIRS += pgvector SUBDIRS += polar_worker SUBDIRS += polar_tde_utils SUBDIRS += polar_parameter_check diff --git a/external/pgvector/.dockerignore b/external/pgvector/.dockerignore new file mode 100644 index 00000000000..6c60e6d5d4f --- /dev/null +++ b/external/pgvector/.dockerignore @@ -0,0 +1,8 @@ +/.git/ +/dist/ +/results/ +/tmp_check/ +/sql/vector--?.?.?.sql +regression.* +*.o +*.so diff --git a/external/pgvector/.editorconfig b/external/pgvector/.editorconfig new file mode 100644 index 00000000000..54324ab335a --- /dev/null +++ b/external/pgvector/.editorconfig @@ -0,0 +1,6 @@ +root = true + +[*.{c,h,pl,pm,sql}] +indent_style = tab +indent_size = tab +tab_width = 4 diff --git a/external/pgvector/.github/workflows/build.yml b/external/pgvector/.github/workflows/build.yml new file mode 100644 index 00000000000..249400d06cb --- /dev/null +++ b/external/pgvector/.github/workflows/build.yml @@ -0,0 +1,102 @@ +name: build +on: [push, pull_request] +jobs: + ubuntu: + runs-on: ${{ matrix.os }} + if: ${{ !startsWith(github.ref_name, 'mac') && !startsWith(github.ref_name, 'windows') }} + strategy: + fail-fast: false + matrix: + include: + - postgres: 17 + os: ubuntu-22.04 + - postgres: 16 + os: ubuntu-22.04 + - postgres: 15 + os: ubuntu-22.04 + - postgres: 14 + os: ubuntu-22.04 + - postgres: 13 + os: ubuntu-20.04 + - postgres: 12 + os: ubuntu-20.04 + - postgres: 11 + os: ubuntu-20.04 + steps: + - uses: actions/checkout@v4 + - uses: ankane/setup-postgres@v1 + with: + postgres-version: ${{ matrix.postgres }} + dev-files: true + - run: make + env: + PG_CFLAGS: -Wall -Wextra -Werror -Wno-unused-parameter -Wno-sign-compare + - run: | + export PG_CONFIG=`which pg_config` + sudo --preserve-env=PG_CONFIG make install + - run: make installcheck + - if: ${{ failure() }} + run: cat regression.diffs + - run: | + sudo apt-get update + sudo apt-get install libipc-run-perl + - run: make prove_installcheck + mac: + runs-on: macos-latest + if: ${{ !startsWith(github.ref_name, 'windows') }} + steps: + - uses: actions/checkout@v4 + - uses: ankane/setup-postgres@v1 + with: + postgres-version: 14 + - run: make + env: + PG_CFLAGS: -Wall -Wextra -Werror -Wno-unused-parameter + - run: make install + - run: make installcheck + - if: ${{ failure() }} + run: cat regression.diffs + - run: | + brew install cpanm + cpanm --notest IPC::Run + wget -q https://github.com/postgres/postgres/archive/refs/tags/REL_14_5.tar.gz + tar xf REL_14_5.tar.gz + - run: make prove_installcheck PROVE_FLAGS="-I ./postgres-REL_14_5/src/test/perl" PERL5LIB="/Users/runner/perl5/lib/perl5" + - run: make clean && /usr/local/opt/llvm@15/bin/scan-build --status-bugs make + windows: + runs-on: windows-latest + if: ${{ !startsWith(github.ref_name, 'mac') }} + steps: + - uses: actions/checkout@v4 + - uses: ankane/setup-postgres@v1 + with: + postgres-version: 14 + - run: | + call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat" && ^ + nmake /NOLOGO /F Makefile.win && ^ + nmake /NOLOGO /F Makefile.win install && ^ + nmake /NOLOGO /F Makefile.win installcheck && ^ + nmake /NOLOGO /F Makefile.win clean && ^ + nmake /NOLOGO /F Makefile.win uninstall + shell: cmd + i386: + if: ${{ !startsWith(github.ref_name, 'mac') && !startsWith(github.ref_name, 'windows') }} + runs-on: ubuntu-latest + container: + image: debian:11 + options: --platform linux/386 + steps: + - run: apt-get update && apt-get install -y build-essential git libipc-run-perl postgresql-13 postgresql-server-dev-13 sudo + - run: service postgresql start + - run: | + git clone https://github.com/${{ github.repository }}.git pgvector + cd pgvector + git fetch origin ${{ github.ref }} + git reset --hard FETCH_HEAD + make + make install + chown -R postgres . + sudo -u postgres make installcheck + sudo -u postgres make prove_installcheck + env: + PG_CFLAGS: -Wall -Wextra -Werror -Wno-unused-parameter -Wno-sign-compare diff --git a/external/pgvector/.gitignore b/external/pgvector/.gitignore new file mode 100644 index 00000000000..9c51258de26 --- /dev/null +++ b/external/pgvector/.gitignore @@ -0,0 +1,13 @@ +/dist/ +/log/ +/results/ +/tmp_check/ +/sql/vector--?.?.?.sql +*.o +*.so +*.bc +*.dll +*.dylib +*.obj +*.lib +*.exp diff --git a/external/pgvector/CHANGELOG.md b/external/pgvector/CHANGELOG.md new file mode 100644 index 00000000000..07040d09a5f --- /dev/null +++ b/external/pgvector/CHANGELOG.md @@ -0,0 +1,148 @@ +## 0.5.1 (2023-10-10) + +- Improved performance of HNSW index builds +- Added check for MVCC-compliant snapshot for index scans + +## 0.5.0 (2023-08-28) + +- Added HNSW index type +- Added support for parallel index builds for IVFFlat +- Added `l1_distance` function +- Added element-wise multiplication for vectors +- Added `sum` aggregate +- Improved performance of distance functions +- Fixed out of range results for cosine distance +- Fixed results for NULL and NaN distances for IVFFlat + +## 0.4.4 (2023-06-12) + +- Improved error message for malformed vector literal +- Fixed segmentation fault with text input +- Fixed consecutive delimiters with text input + +## 0.4.3 (2023-06-10) + +- Improved cost estimation +- Improved support for spaces with text input +- Fixed infinite and NaN values with binary input +- Fixed infinite values with vector addition and subtraction +- Fixed infinite values with list centers +- Fixed compilation error when `float8` is pass by reference +- Fixed compilation error on PowerPC +- Fixed segmentation fault with index creation on i386 + +## 0.4.2 (2023-05-13) + +- Added notice when index created with little data +- Fixed dimensions check for some direct function calls +- Fixed installation error with Postgres 12.0-12.2 + +## 0.4.1 (2023-03-21) + +- Improved performance of cosine distance +- Fixed index scan count + +## 0.4.0 (2023-01-11) + +If upgrading with Postgres < 13, see [this note](https://github.com/pgvector/pgvector#040). + +- Changed text representation for vector elements to match `real` +- Changed storage for vector from `plain` to `extended` +- Increased max dimensions for vector from 1024 to 16000 +- Increased max dimensions for index from 1024 to 2000 +- Improved accuracy of text parsing for certain inputs +- Added `avg` aggregate for vector +- Added experimental support for Windows +- Dropped support for Postgres 10 + +## 0.3.2 (2022-11-22) + +- Fixed `invalid memory alloc request size` error + +## 0.3.1 (2022-11-02) + +If upgrading from 0.2.7 or 0.3.0, [recreate](https://github.com/pgvector/pgvector#031) all `ivfflat` indexes after upgrading to ensure all data is indexed. + +- Fixed issue with inserts silently corrupting `ivfflat` indexes (introduced in 0.2.7) +- Fixed segmentation fault with index creation when lists > 6500 + +## 0.3.0 (2022-10-15) + +- Added support for Postgres 15 +- Dropped support for Postgres 9.6 + +## 0.2.7 (2022-07-31) + +- Fixed `unexpected data beyond EOF` error + +## 0.2.6 (2022-05-22) + +- Improved performance of index creation for Postgres < 12 + +## 0.2.5 (2022-02-11) + +- Reduced memory usage during index creation +- Fixed index creation exceeding `maintenance_work_mem` +- Fixed error with index creation when lists > 1600 + +## 0.2.4 (2022-02-06) + +- Added support for parallel vacuum +- Fixed issue with index not reusing space + +## 0.2.3 (2022-01-30) + +- Added indexing progress for Postgres 12+ +- Improved interrupt handling during index creation + +## 0.2.2 (2022-01-15) + +- Fixed compilation error on Mac ARM + +## 0.2.1 (2022-01-02) + +- Fixed `operator is not unique` error + +## 0.2.0 (2021-10-03) + +- Added support for Postgres 14 + +## 0.1.8 (2021-09-07) + +- Added cast for `vector` to `real[]` + +## 0.1.7 (2021-06-13) + +- Added cast for `numeric[]` to `vector` + +## 0.1.6 (2021-06-09) + +- Fixed segmentation fault with `COUNT` + +## 0.1.5 (2021-05-25) + +- Reduced memory usage during index creation + +## 0.1.4 (2021-05-09) + +- Fixed kmeans for inner product +- Fixed multiple definition error with GCC 10 + +## 0.1.3 (2021-05-06) + +- Added Dockerfile +- Fixed version + +## 0.1.2 (2021-04-26) + +- Vectorized distance calculations +- Improved cost estimation + +## 0.1.1 (2021-04-25) + +- Added binary representation for `COPY` +- Marked functions as `PARALLEL SAFE` + +## 0.1.0 (2021-04-20) + +- First release diff --git a/external/pgvector/Dockerfile b/external/pgvector/Dockerfile new file mode 100644 index 00000000000..f3ded45905b --- /dev/null +++ b/external/pgvector/Dockerfile @@ -0,0 +1,20 @@ +ARG PG_MAJOR=15 +FROM postgres:$PG_MAJOR +ARG PG_MAJOR + +COPY . /tmp/pgvector + +RUN apt-get update && \ + apt-mark hold locales && \ + apt-get install -y --no-install-recommends build-essential postgresql-server-dev-$PG_MAJOR && \ + cd /tmp/pgvector && \ + make clean && \ + make OPTFLAGS="" && \ + make install && \ + mkdir /usr/share/doc/pgvector && \ + cp LICENSE README.md /usr/share/doc/pgvector && \ + rm -r /tmp/pgvector && \ + apt-get remove -y build-essential postgresql-server-dev-$PG_MAJOR && \ + apt-get autoremove -y && \ + apt-mark unhold locales && \ + rm -rf /var/lib/apt/lists/* diff --git a/external/pgvector/LICENSE b/external/pgvector/LICENSE new file mode 100644 index 00000000000..483e2b9ae90 --- /dev/null +++ b/external/pgvector/LICENSE @@ -0,0 +1,20 @@ +Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + +Portions Copyright (c) 1994, The Regents of the University of California + +Permission to use, copy, modify, and distribute this software and its +documentation for any purpose, without fee, and without a written agreement +is hereby granted, provided that the above copyright notice and this +paragraph and the following two paragraphs appear in all copies. + +IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR +DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING +LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS +DOCUMENTATION, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, +INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY +AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS +ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATIONS TO +PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. diff --git a/external/pgvector/META.json b/external/pgvector/META.json new file mode 100644 index 00000000000..38d3919f5d2 --- /dev/null +++ b/external/pgvector/META.json @@ -0,0 +1,49 @@ +{ + "name": "vector", + "abstract": "Open-source vector similarity search for Postgres", + "description": "Supports L2 distance, inner product, and cosine distance", + "version": "0.5.1", + "maintainer": [ + "Andrew Kane " + ], + "license": { + "PostgreSQL": "http://www.postgresql.org/about/licence" + }, + "prereqs": { + "runtime": { + "requires": { + "PostgreSQL": "11.0.0" + } + } + }, + "provides": { + "vector": { + "file": "sql/vector.sql", + "docfile": "README.md", + "version": "0.5.1", + "abstract": "Open-source vector similarity search for Postgres" + } + }, + "resources": { + "homepage": "https://github.com/pgvector/pgvector", + "bugtracker": { + "web": "https://github.com/pgvector/pgvector/issues" + }, + "repository": { + "url": "https://github.com/pgvector/pgvector.git", + "web": "https://github.com/pgvector/pgvector", + "type": "git" + } + }, + "generated_by": "Andrew Kane", + "meta-spec": { + "version": "1.0.0", + "url": "http://pgxn.org/meta/spec.txt" + }, + "tags": [ + "vectors", + "datatype", + "nearest neighbor search", + "approximate nearest neighbors" + ] +} diff --git a/external/pgvector/Makefile b/external/pgvector/Makefile new file mode 100644 index 00000000000..5386fdd0f6f --- /dev/null +++ b/external/pgvector/Makefile @@ -0,0 +1,67 @@ +EXTENSION = vector +EXTVERSION = 0.5.1 + +MODULE_big = vector +DATA = $(wildcard sql/*--*.sql) +OBJS = src/hnsw.o src/hnswbuild.o src/hnswinsert.o src/hnswscan.o src/hnswutils.o src/hnswvacuum.o src/ivfbuild.o src/ivfflat.o src/ivfinsert.o src/ivfkmeans.o src/ivfscan.o src/ivfutils.o src/ivfvacuum.o src/vector.o +HEADERS = src/vector.h + +TESTS = $(wildcard test/sql/*.sql) +REGRESS = $(patsubst test/sql/%.sql,%,$(TESTS)) +REGRESS_OPTS = --inputdir=test --load-extension=$(EXTENSION) + +TAP_TEST = 1 + +# Mac ARM doesn't support -march=native +ifeq ($(shell uname -s), Darwin) + ifeq ($(shell uname -p), arm) + # no difference with -march=armv8.5-a + OPTFLAGS = + endif +endif + +# For auto-vectorization: +# - GCC (needs -ftree-vectorize OR -O3) - https://gcc.gnu.org/projects/tree-ssa/vectorization.html +# - Clang (could use pragma instead) - https://llvm.org/docs/Vectorizers.html +PG_CFLAGS += $(OPTFLAGS) -ftree-vectorize -fassociative-math -fno-signed-zeros -fno-trapping-math + +all: sql/$(EXTENSION)--$(EXTVERSION).sql + +sql/$(EXTENSION)--$(EXTVERSION).sql: sql/$(EXTENSION).sql + cp $< $@ + +EXTRA_CLEAN = sql/$(EXTENSION)--$(EXTVERSION).sql + +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +top_builddir = ../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif + +# for Postgres 15 +PROVE_FLAGS += -I ./test/perl + +prove_installcheck: + rm -rf $(CURDIR)/tmp_check + cd $(srcdir) && TESTDIR='$(CURDIR)' PATH="$(bindir):$$PATH" PGPORT='6$(DEF_PGPORT)' PG_REGRESS='$(top_builddir)/src/test/regress/pg_regress' $(PROVE) $(PG_PROVE_FLAGS) $(PROVE_FLAGS) $(if $(PROVE_TESTS),$(PROVE_TESTS),test/t/*.pl) + +.PHONY: dist + +dist: + mkdir -p dist + git archive --format zip --prefix=$(EXTENSION)-$(EXTVERSION)/ --output dist/$(EXTENSION)-$(EXTVERSION).zip master + +.PHONY: docker + +docker: + docker build --pull --no-cache --platform linux/amd64 -t ankane/pgvector:latest . + +.PHONY: docker-release + +docker-release: + docker buildx build --push --pull --no-cache --platform linux/amd64,linux/arm64 -t ankane/pgvector:latest . + docker buildx build --push --platform linux/amd64,linux/arm64 -t ankane/pgvector:v$(EXTVERSION) . diff --git a/external/pgvector/Makefile.win b/external/pgvector/Makefile.win new file mode 100644 index 00000000000..fbe57683fdd --- /dev/null +++ b/external/pgvector/Makefile.win @@ -0,0 +1,75 @@ +EXTENSION = vector +EXTVERSION = 0.5.1 + +OBJS = src\hnsw.obj src\hnswbuild.obj src\hnswinsert.obj src\hnswscan.obj src\hnswutils.obj src\hnswvacuum.obj src\ivfbuild.obj src\ivfflat.obj src\ivfinsert.obj src\ivfkmeans.obj src\ivfscan.obj src\ivfutils.obj src\ivfvacuum.obj src\vector.obj +HEADERS = src\vector.h + +REGRESS = btree cast copy functions input ivfflat_cosine ivfflat_ip ivfflat_l2 ivfflat_options ivfflat_unlogged +REGRESS_OPTS = --inputdir=test --load-extension=$(EXTENSION) + +# For /arch flags +# https://learn.microsoft.com/en-us/cpp/build/reference/arch-minimum-cpu-architecture +OPTFLAGS = + +# For auto-vectorization: +# - MSVC (needs /O2 /fp:fast) - https://learn.microsoft.com/en-us/cpp/parallel/auto-parallelization-and-auto-vectorization?#auto-vectorizer +PG_CFLAGS = $(PG_CFLAGS) $(OPTFLAGS) /O2 /fp:fast + +# Debug MSVC auto-vectorization +# https://learn.microsoft.com/en-us/cpp/error-messages/tool-errors/vectorizer-and-parallelizer-messages +# PG_CFLAGS = $(PG_CFLAGS) /Qvec-report:2 + +all: sql\$(EXTENSION)--$(EXTVERSION).sql + +sql\$(EXTENSION)--$(EXTVERSION).sql: sql\$(EXTENSION).sql + copy sql\$(EXTENSION).sql $@ + +# TODO use pg_config +!ifndef PGROOT +!error PGROOT is not set +!endif +BINDIR = $(PGROOT)\bin +INCLUDEDIR = $(PGROOT)\include +INCLUDEDIR_SERVER = $(PGROOT)\include\server +LIBDIR = $(PGROOT)\lib +PKGLIBDIR = $(PGROOT)\lib +SHAREDIR = $(PGROOT)\share + +CFLAGS = /nologo /I"$(INCLUDEDIR_SERVER)\port\win32_msvc" /I"$(INCLUDEDIR_SERVER)\port\win32" /I"$(INCLUDEDIR_SERVER)" /I"$(INCLUDEDIR)" + +CFLAGS = $(CFLAGS) $(PG_CFLAGS) + +SHLIB = $(EXTENSION).dll + +LIBS = "$(LIBDIR)\postgres.lib" + +.c.obj: + $(CC) $(CFLAGS) /c $< /Fo$@ + +$(SHLIB): $(OBJS) + $(CC) $(CFLAGS) $(OBJS) $(LIBS) /link /DLL /OUT:$(SHLIB) + +all: $(SHLIB) + +install: + copy $(SHLIB) "$(PKGLIBDIR)" + copy $(EXTENSION).control "$(SHAREDIR)\extension" + copy sql\$(EXTENSION)--*.sql "$(SHAREDIR)\extension" + mkdir "$(INCLUDEDIR_SERVER)\extension\$(EXTENSION)" + copy $(HEADERS) "$(INCLUDEDIR_SERVER)\extension\$(EXTENSION)" + +installcheck: + "$(BINDIR)\pg_regress" --bindir="$(BINDIR)" $(REGRESS_OPTS) $(REGRESS) + +uninstall: + del /f "$(PKGLIBDIR)\$(SHLIB)" + del /f "$(SHAREDIR)\extension\$(EXTENSION).control" + del /f "$(SHAREDIR)\extension\$(EXTENSION)--*.sql" + del /f "$(INCLUDEDIR_SERVER)\extension\$(EXTENSION)\*.h" + rmdir "$(INCLUDEDIR_SERVER)\extension\$(EXTENSION)" + +clean: + del /f $(SHLIB) $(EXTENSION).lib $(EXTENSION).exp + del /f $(OBJS) + del /f sql\$(EXTENSION)--$(EXTVERSION).sql + del /f /s /q results regression.diffs regression.out tmp_check tmp_check_iso log output_iso diff --git a/external/pgvector/README.md b/external/pgvector/README.md new file mode 100644 index 00000000000..20a151689ee --- /dev/null +++ b/external/pgvector/README.md @@ -0,0 +1,709 @@ +# pgvector + +Open-source vector similarity search for Postgres + +Store your vectors with the rest of your data. Supports: + +- exact and approximate nearest neighbor search +- L2 distance, inner product, and cosine distance +- any [language](#languages) with a Postgres client + +Plus [ACID](https://en.wikipedia.org/wiki/ACID) compliance, point-in-time recovery, JOINs, and all of the other [great features](https://www.postgresql.org/about/) of Postgres + +[![Build Status](https://github.com/pgvector/pgvector/workflows/build/badge.svg?branch=master)](https://github.com/pgvector/pgvector/actions) + +## Installation + +Compile and install the extension (supports Postgres 11+) + +```sh +cd /tmp +git clone --branch v0.5.1 https://github.com/pgvector/pgvector.git +cd pgvector +make +make install # may need sudo +``` + +See the [installation notes](#installation-notes) if you run into issues + +You can also install it with [Docker](#docker), [Homebrew](#homebrew), [PGXN](#pgxn), [APT](#apt), [Yum](#yum), or [conda-forge](#conda-forge), and it comes preinstalled with [Postgres.app](#postgresapp) and many [hosted providers](#hosted-postgres) + +## Getting Started + +Enable the extension (do this once in each database where you want to use it) + +```tsql +CREATE EXTENSION vector; +``` + +Create a vector column with 3 dimensions + +```sql +CREATE TABLE items (id bigserial PRIMARY KEY, embedding vector(3)); +``` + +Insert vectors + +```sql +INSERT INTO items (embedding) VALUES ('[1,2,3]'), ('[4,5,6]'); +``` + +Get the nearest neighbors by L2 distance + +```sql +SELECT * FROM items ORDER BY embedding <-> '[3,1,2]' LIMIT 5; +``` + +Also supports inner product (`<#>`) and cosine distance (`<=>`) + +Note: `<#>` returns the negative inner product since Postgres only supports `ASC` order index scans on operators + +## Storing + +Create a new table with a vector column + +```sql +CREATE TABLE items (id bigserial PRIMARY KEY, embedding vector(3)); +``` + +Or add a vector column to an existing table + +```sql +ALTER TABLE items ADD COLUMN embedding vector(3); +``` + +Insert vectors + +```sql +INSERT INTO items (embedding) VALUES ('[1,2,3]'), ('[4,5,6]'); +``` + +Upsert vectors + +```sql +INSERT INTO items (id, embedding) VALUES (1, '[1,2,3]'), (2, '[4,5,6]') + ON CONFLICT (id) DO UPDATE SET embedding = EXCLUDED.embedding; +``` + +Update vectors + +```sql +UPDATE items SET embedding = '[1,2,3]' WHERE id = 1; +``` + +Delete vectors + +```sql +DELETE FROM items WHERE id = 1; +``` + +## Querying + +Get the nearest neighbors to a vector + +```sql +SELECT * FROM items ORDER BY embedding <-> '[3,1,2]' LIMIT 5; +``` + +Get the nearest neighbors to a row + +```sql +SELECT * FROM items WHERE id != 1 ORDER BY embedding <-> (SELECT embedding FROM items WHERE id = 1) LIMIT 5; +``` + +Get rows within a certain distance + +```sql +SELECT * FROM items WHERE embedding <-> '[3,1,2]' < 5; +``` + +Note: Combine with `ORDER BY` and `LIMIT` to use an index + +#### Distances + +Get the distance + +```sql +SELECT embedding <-> '[3,1,2]' AS distance FROM items; +``` + +For inner product, multiply by -1 (since `<#>` returns the negative inner product) + +```tsql +SELECT (embedding <#> '[3,1,2]') * -1 AS inner_product FROM items; +``` + +For cosine similarity, use 1 - cosine distance + +```sql +SELECT 1 - (embedding <=> '[3,1,2]') AS cosine_similarity FROM items; +``` + +#### Aggregates + +Average vectors + +```sql +SELECT AVG(embedding) FROM items; +``` + +Average groups of vectors + +```sql +SELECT category_id, AVG(embedding) FROM items GROUP BY category_id; +``` + +## Indexing + +By default, pgvector performs exact nearest neighbor search, which provides perfect recall. + +You can add an index to use approximate nearest neighbor search, which trades some recall for speed. Unlike typical indexes, you will see different results for queries after adding an approximate index. + +Supported index types are: + +- [IVFFlat](#ivfflat) +- [HNSW](#hnsw) - added in 0.5.0 + +## IVFFlat + +An IVFFlat index divides vectors into lists, and then searches a subset of those lists that are closest to the query vector. It has faster build times and uses less memory than HNSW, but has lower query performance (in terms of speed-recall tradeoff). + +Three keys to achieving good recall are: + +1. Create the index *after* the table has some data +2. Choose an appropriate number of lists - a good place to start is `rows / 1000` for up to 1M rows and `sqrt(rows)` for over 1M rows +3. When querying, specify an appropriate number of [probes](#query-options) (higher is better for recall, lower is better for speed) - a good place to start is `sqrt(lists)` + +Add an index for each distance function you want to use. + +L2 distance + +```sql +CREATE INDEX ON items USING ivfflat (embedding vector_l2_ops) WITH (lists = 100); +``` + +Inner product + +```sql +CREATE INDEX ON items USING ivfflat (embedding vector_ip_ops) WITH (lists = 100); +``` + +Cosine distance + +```sql +CREATE INDEX ON items USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100); +``` + +Vectors with up to 2,000 dimensions can be indexed. + +### Query Options + +Specify the number of probes (1 by default) + +```sql +SET ivfflat.probes = 10; +``` + +A higher value provides better recall at the cost of speed, and it can be set to the number of lists for exact nearest neighbor search (at which point the planner won’t use the index) + +Use `SET LOCAL` inside a transaction to set it for a single query + +```sql +BEGIN; +SET LOCAL ivfflat.probes = 10; +SELECT ... +COMMIT; +``` + +### Indexing Progress + +Check [indexing progress](https://www.postgresql.org/docs/current/progress-reporting.html#CREATE-INDEX-PROGRESS-REPORTING) with Postgres 12+ + +```sql +SELECT phase, round(100.0 * tuples_done / nullif(tuples_total, 0), 1) AS "%" FROM pg_stat_progress_create_index; +``` + +The phases for IVFFlat are: + +1. `initializing` +2. `performing k-means` +3. `assigning tuples` +4. `loading tuples` + +Note: `%` is only populated during the `loading tuples` phase + +## HNSW + +An HNSW index creates a multilayer graph. It has slower build times and uses more memory than IVFFlat, but has better query performance (in terms of speed-recall tradeoff). There’s no training step like IVFFlat, so the index can be created without any data in the table. + +Add an index for each distance function you want to use. + +L2 distance + +```sql +CREATE INDEX ON items USING hnsw (embedding vector_l2_ops); +``` + +Inner product + +```sql +CREATE INDEX ON items USING hnsw (embedding vector_ip_ops); +``` + +Cosine distance + +```sql +CREATE INDEX ON items USING hnsw (embedding vector_cosine_ops); +``` + +Vectors with up to 2,000 dimensions can be indexed. + +### Index Options + +Specify HNSW parameters + +- `m` - the max number of connections per layer (16 by default) +- `ef_construction` - the size of the dynamic candidate list for constructing the graph (64 by default) + +```sql +CREATE INDEX ON items USING hnsw (embedding vector_l2_ops) WITH (m = 16, ef_construction = 64); +``` + +### Query Options + +Specify the size of the dynamic candidate list for search (40 by default) + +```sql +SET hnsw.ef_search = 100; +``` + +A higher value provides better recall at the cost of speed. + +Use `SET LOCAL` inside a transaction to set it for a single query + +```sql +BEGIN; +SET LOCAL hnsw.ef_search = 100; +SELECT ... +COMMIT; +``` + +### Indexing Progress + +Check [indexing progress](https://www.postgresql.org/docs/current/progress-reporting.html#CREATE-INDEX-PROGRESS-REPORTING) with Postgres 12+ + +```sql +SELECT phase, round(100.0 * blocks_done / nullif(blocks_total, 0), 1) AS "%" FROM pg_stat_progress_create_index; +``` + +The phases for HNSW are: + +1. `initializing` +2. `loading tuples` + +## Filtering + +There are a few ways to index nearest neighbor queries with a `WHERE` clause + +```sql +SELECT * FROM items WHERE category_id = 123 ORDER BY embedding <-> '[3,1,2]' LIMIT 5; +``` + +Create an index on one [or more](https://www.postgresql.org/docs/current/indexes-multicolumn.html) of the `WHERE` columns for exact search + +```sql +CREATE INDEX ON items (category_id); +``` + +Or a [partial index](https://www.postgresql.org/docs/current/indexes-partial.html) on the vector column for approximate search + +```sql +CREATE INDEX ON items USING ivfflat (embedding vector_l2_ops) WITH (lists = 100) + WHERE (category_id = 123); +``` + +Use [partitioning](https://www.postgresql.org/docs/current/ddl-partitioning.html) for approximate search on many different values of the `WHERE` columns + +```sql +CREATE TABLE items (embedding vector(3), category_id int) PARTITION BY LIST(category_id); +``` + +## Hybrid Search + +Use together with Postgres [full-text search](https://www.postgresql.org/docs/current/textsearch-intro.html) for hybrid search ([Python example](https://github.com/pgvector/pgvector-python/blob/master/examples/hybrid_search.py)). + +```sql +SELECT id, content FROM items, plainto_tsquery('hello search') query + WHERE textsearch @@ query ORDER BY ts_rank_cd(textsearch, query) DESC LIMIT 5; +``` + +## Performance + +Use `EXPLAIN ANALYZE` to debug performance. + +```sql +EXPLAIN ANALYZE SELECT * FROM items ORDER BY embedding <-> '[3,1,2]' LIMIT 5; +``` + +### Exact Search + +To speed up queries without an index, increase `max_parallel_workers_per_gather`. + +```sql +SET max_parallel_workers_per_gather = 4; +``` + +If vectors are normalized to length 1 (like [OpenAI embeddings](https://platform.openai.com/docs/guides/embeddings/which-distance-function-should-i-use)), use inner product for best performance. + +```tsql +SELECT * FROM items ORDER BY embedding <#> '[3,1,2]' LIMIT 5; +``` + +### Approximate Search + +To speed up queries with an IVFFlat index, increase the number of inverted lists (at the expense of recall). + +```sql +CREATE INDEX ON items USING ivfflat (embedding vector_l2_ops) WITH (lists = 1000); +``` + +## Languages + +Use pgvector from any language with a Postgres client. You can even generate and store vectors in one language and query them in another. + +Language | Libraries / Examples +--- | --- +C | [pgvector-c](https://github.com/pgvector/pgvector-c) +C++ | [pgvector-cpp](https://github.com/pgvector/pgvector-cpp) +C# | [pgvector-dotnet](https://github.com/pgvector/pgvector-dotnet) +Crystal | [pgvector-crystal](https://github.com/pgvector/pgvector-crystal) +Dart | [pgvector-dart](https://github.com/pgvector/pgvector-dart) +Elixir | [pgvector-elixir](https://github.com/pgvector/pgvector-elixir) +Go | [pgvector-go](https://github.com/pgvector/pgvector-go) +Haskell | [pgvector-haskell](https://github.com/pgvector/pgvector-haskell) +Java, Kotlin, Groovy, Scala | [pgvector-java](https://github.com/pgvector/pgvector-java) +JavaScript, TypeScript | [pgvector-node](https://github.com/pgvector/pgvector-node) +Julia | [pgvector-julia](https://github.com/pgvector/pgvector-julia) +Lua | [pgvector-lua](https://github.com/pgvector/pgvector-lua) +Nim | [pgvector-nim](https://github.com/pgvector/pgvector-nim) +Perl | [pgvector-perl](https://github.com/pgvector/pgvector-perl) +PHP | [pgvector-php](https://github.com/pgvector/pgvector-php) +Python | [pgvector-python](https://github.com/pgvector/pgvector-python) +R | [pgvector-r](https://github.com/pgvector/pgvector-r) +Ruby | [pgvector-ruby](https://github.com/pgvector/pgvector-ruby), [Neighbor](https://github.com/ankane/neighbor) +Rust | [pgvector-rust](https://github.com/pgvector/pgvector-rust) +Swift | [pgvector-swift](https://github.com/pgvector/pgvector-swift) +Zig | [pgvector-zig](https://github.com/pgvector/pgvector-zig) + +## Frequently Asked Questions + +#### How many vectors can be stored in a single table? + +A non-partitioned table has a limit of 32 TB by default in Postgres. A partitioned table can have thousands of partitions of that size. + +#### Is replication supported? + +Yes, pgvector uses the write-ahead log (WAL), which allows for replication and point-in-time recovery. + +#### What if I want to index vectors with more than 2,000 dimensions? + +You’ll need to use [dimensionality reduction](https://en.wikipedia.org/wiki/Dimensionality_reduction) at the moment. + +## Troubleshooting + +#### Why isn’t a query using an index? + +The cost estimation in pgvector < 0.4.3 does not always work well with the planner. You can encourage the planner to use an index for a query with: + +```sql +BEGIN; +SET LOCAL enable_seqscan = off; +SELECT ... +COMMIT; +``` + +#### Why isn’t a query using a parallel table scan? + +The planner doesn’t consider [out-of-line storage](https://www.postgresql.org/docs/current/storage-toast.html) in cost estimates, which can make a serial scan look cheaper. You can reduce the cost of a parallel scan for a query with: + +```sql +BEGIN; +SET LOCAL min_parallel_table_scan_size = 1; +SET LOCAL parallel_setup_cost = 1; +SELECT ... +COMMIT; +``` + +or choose to store vectors inline: + +```sql +ALTER TABLE items ALTER COLUMN embedding SET STORAGE PLAIN; +``` + +#### Why are there less results for a query after adding an IVFFlat index? + +The index was likely created with too little data for the number of lists. Drop the index until the table has more data. + +```sql +DROP INDEX index_name; +``` + +## Reference + +### Vector Type + +Each vector takes `4 * dimensions + 8` bytes of storage. Each element is a single precision floating-point number (like the `real` type in Postgres), and all elements must be finite (no `NaN`, `Infinity` or `-Infinity`). Vectors can have up to 16,000 dimensions. + +### Vector Operators + +Operator | Description | Added +--- | --- | --- +\+ | element-wise addition | +\- | element-wise subtraction | +\* | element-wise multiplication | 0.5.0 +<-> | Euclidean distance | +<#> | negative inner product | +<=> | cosine distance | + +### Vector Functions + +Function | Description | Added +--- | --- | --- +cosine_distance(vector, vector) → double precision | cosine distance | +inner_product(vector, vector) → double precision | inner product | +l2_distance(vector, vector) → double precision | Euclidean distance | +l1_distance(vector, vector) → double precision | taxicab distance | 0.5.0 +vector_dims(vector) → integer | number of dimensions | +vector_norm(vector) → double precision | Euclidean norm | + +### Aggregate Functions + +Function | Description | Added +--- | --- | --- +avg(vector) → vector | average | +sum(vector) → vector | sum | 0.5.0 + +## Installation Notes + +### Postgres Location + +If your machine has multiple Postgres installations, specify the path to [pg_config](https://www.postgresql.org/docs/current/app-pgconfig.html) with: + +```sh +export PG_CONFIG=/Applications/Postgres.app/Contents/Versions/latest/bin/pg_config +``` + +Then re-run the installation instructions (run `make clean` before `make` if needed). If `sudo` is needed for `make install`, use: + +```sh +sudo --preserve-env=PG_CONFIG make install +``` + +### Missing Header + +If compilation fails with `fatal error: postgres.h: No such file or directory`, make sure Postgres development files are installed on the server. + +For Ubuntu and Debian, use: + +```sh +sudo apt install postgresql-server-dev-15 +``` + +Note: Replace `15` with your Postgres server version + +### Windows + +Support for Windows is currently experimental. Ensure [C++ support in Visual Studio](https://learn.microsoft.com/en-us/cpp/build/building-on-the-command-line?view=msvc-170#download-and-install-the-tools) is installed, and run: + +```cmd +call "C:\Program Files\Microsoft Visual Studio\2022\Community\VC\Auxiliary\Build\vcvars64.bat" +``` + +Note: The exact path will vary depending on your Visual Studio version and edition + +Then use `nmake` to build: + +```cmd +set "PGROOT=C:\Program Files\PostgreSQL\15" +git clone --branch v0.5.1 https://github.com/pgvector/pgvector.git +cd pgvector +nmake /F Makefile.win +nmake /F Makefile.win install +``` + +## Additional Installation Methods + +### Docker + +Get the [Docker image](https://hub.docker.com/r/ankane/pgvector) with: + +```sh +docker pull ankane/pgvector +``` + +This adds pgvector to the [Postgres image](https://hub.docker.com/_/postgres) (run it the same way). + +You can also build the image manually: + +```sh +git clone --branch v0.5.1 https://github.com/pgvector/pgvector.git +cd pgvector +docker build --build-arg PG_MAJOR=15 -t myuser/pgvector . +``` + +### Homebrew + +With Homebrew Postgres, you can use: + +```sh +brew install pgvector +``` + +Note: This only adds it to the `postgresql@14` formula + +### PGXN + +Install from the [PostgreSQL Extension Network](https://pgxn.org/dist/vector) with: + +```sh +pgxn install vector +``` + +### APT + +Debian and Ubuntu packages are available from the [PostgreSQL APT Repository](https://wiki.postgresql.org/wiki/Apt). Follow the [setup instructions](https://wiki.postgresql.org/wiki/Apt#Quickstart) and run: + +```sh +sudo apt install postgresql-15-pgvector +``` + +Note: Replace `15` with your Postgres server version + +### Yum + +RPM packages are available from the [PostgreSQL Yum Repository](https://yum.postgresql.org/). Follow the [setup instructions](https://www.postgresql.org/download/linux/redhat/) for your distribution and run: + +```sh +sudo yum install pgvector_15 +# or +sudo dnf install pgvector_15 +``` + +Note: Replace `15` with your Postgres server version + +### conda-forge + +With Conda Postgres, install from [conda-forge](https://anaconda.org/conda-forge/pgvector) with: + +```sh +conda install -c conda-forge pgvector +``` + +This method is [community-maintained](https://github.com/conda-forge/pgvector-feedstock) by [@mmcauliffe](https://github.com/mmcauliffe) + +### Postgres.app + +Download the [latest release](https://postgresapp.com/downloads.html) with Postgres 15+. + +## Hosted Postgres + +pgvector is available on [these providers](https://github.com/pgvector/pgvector/issues/54). + +## Upgrading + +Install the latest version. Then in each database you want to upgrade, run: + +```sql +ALTER EXTENSION vector UPDATE; +``` + +You can check the version in the current database with: + +```sql +SELECT extversion FROM pg_extension WHERE extname = 'vector'; +``` + +## Upgrade Notes + +### 0.4.0 + +If upgrading with Postgres < 13, remove this line from `sql/vector--0.3.2--0.4.0.sql`: + +```sql +ALTER TYPE vector SET (STORAGE = extended); +``` + +Then run `make install` and `ALTER EXTENSION vector UPDATE;`. + +### 0.3.1 + +If upgrading from 0.2.7 or 0.3.0, recreate all `ivfflat` indexes after upgrading to ensure all data is indexed. + +```sql +-- Postgres 12+ +REINDEX INDEX CONCURRENTLY index_name; + +-- Postgres < 12 +CREATE INDEX CONCURRENTLY temp_name ON table USING ivfflat (column opclass); +DROP INDEX CONCURRENTLY index_name; +ALTER INDEX temp_name RENAME TO index_name; +``` + +## Thanks + +Thanks to: + +- [PASE: PostgreSQL Ultra-High-Dimensional Approximate Nearest Neighbor Search Extension](https://dl.acm.org/doi/pdf/10.1145/3318464.3386131) +- [Faiss: A Library for Efficient Similarity Search and Clustering of Dense Vectors](https://github.com/facebookresearch/faiss) +- [Using the Triangle Inequality to Accelerate k-means](https://cdn.aaai.org/ICML/2003/ICML03-022.pdf) +- [k-means++: The Advantage of Careful Seeding](https://theory.stanford.edu/~sergei/papers/kMeansPP-soda.pdf) +- [Concept Decompositions for Large Sparse Text Data using Clustering](https://www.cs.utexas.edu/users/inderjit/public_papers/concept_mlj.pdf) +- [Efficient and Robust Approximate Nearest Neighbor Search using Hierarchical Navigable Small World Graphs](https://arxiv.org/ftp/arxiv/papers/1603/1603.09320.pdf) + +## History + +View the [changelog](https://github.com/pgvector/pgvector/blob/master/CHANGELOG.md) + +## Contributing + +Everyone is encouraged to help improve this project. Here are a few ways you can help: + +- [Report bugs](https://github.com/pgvector/pgvector/issues) +- Fix bugs and [submit pull requests](https://github.com/pgvector/pgvector/pulls) +- Write, clarify, or fix documentation +- Suggest or add new features + +To get started with development: + +```sh +git clone https://github.com/pgvector/pgvector.git +cd pgvector +make +make install +``` + +To run all tests: + +```sh +make installcheck # regression tests +make prove_installcheck # TAP tests +``` + +To run single tests: + +```sh +make installcheck REGRESS=functions # regression test +make prove_installcheck PROVE_TESTS=test/t/001_wal.pl # TAP test +``` + +To enable benchmarking: + +```sh +make clean && PG_CFLAGS=-DIVFFLAT_BENCH make && make install +``` + +Resources for contributors + +- [Extension Building Infrastructure](https://www.postgresql.org/docs/current/extend-pgxs.html) +- [Index Access Method Interface Definition](https://www.postgresql.org/docs/current/indexam.html) +- [Generic WAL Records](https://www.postgresql.org/docs/current/generic-wal.html) diff --git a/external/pgvector/sql/vector--0.1.0--0.1.1.sql b/external/pgvector/sql/vector--0.1.0--0.1.1.sql new file mode 100644 index 00000000000..959a0d72261 --- /dev/null +++ b/external/pgvector/sql/vector--0.1.0--0.1.1.sql @@ -0,0 +1,39 @@ +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "ALTER EXTENSION vector UPDATE TO '0.1.1'" to load this file. \quit + +CREATE FUNCTION vector_recv(internal, oid, integer) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT; + +CREATE FUNCTION vector_send(vector) RETURNS bytea + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT; + +ALTER TYPE vector SET ( RECEIVE = vector_recv, SEND = vector_send ); + +-- functions + +ALTER FUNCTION vector_in(cstring, oid, integer) PARALLEL SAFE; +ALTER FUNCTION vector_out(vector) PARALLEL SAFE; +ALTER FUNCTION vector_typmod_in(cstring[]) PARALLEL SAFE; +ALTER FUNCTION vector_recv(internal, oid, integer) PARALLEL SAFE; +ALTER FUNCTION vector_send(vector) PARALLEL SAFE; +ALTER FUNCTION l2_distance(vector, vector) PARALLEL SAFE; +ALTER FUNCTION inner_product(vector, vector) PARALLEL SAFE; +ALTER FUNCTION cosine_distance(vector, vector) PARALLEL SAFE; +ALTER FUNCTION vector_dims(vector) PARALLEL SAFE; +ALTER FUNCTION vector_norm(vector) PARALLEL SAFE; +ALTER FUNCTION vector_add(vector, vector) PARALLEL SAFE; +ALTER FUNCTION vector_sub(vector, vector) PARALLEL SAFE; +ALTER FUNCTION vector_lt(vector, vector) PARALLEL SAFE; +ALTER FUNCTION vector_le(vector, vector) PARALLEL SAFE; +ALTER FUNCTION vector_eq(vector, vector) PARALLEL SAFE; +ALTER FUNCTION vector_ne(vector, vector) PARALLEL SAFE; +ALTER FUNCTION vector_ge(vector, vector) PARALLEL SAFE; +ALTER FUNCTION vector_gt(vector, vector) PARALLEL SAFE; +ALTER FUNCTION vector_cmp(vector, vector) PARALLEL SAFE; +ALTER FUNCTION vector_l2_squared_distance(vector, vector) PARALLEL SAFE; +ALTER FUNCTION vector_negative_inner_product(vector, vector) PARALLEL SAFE; +ALTER FUNCTION vector_spherical_distance(vector, vector) PARALLEL SAFE; +ALTER FUNCTION vector(vector, integer, boolean) PARALLEL SAFE; +ALTER FUNCTION array_to_vector(integer[], integer, boolean) PARALLEL SAFE; +ALTER FUNCTION array_to_vector(real[], integer, boolean) PARALLEL SAFE; +ALTER FUNCTION array_to_vector(double precision[], integer, boolean) PARALLEL SAFE; diff --git a/external/pgvector/sql/vector--0.1.1--0.1.3.sql b/external/pgvector/sql/vector--0.1.1--0.1.3.sql new file mode 100644 index 00000000000..391835f865c --- /dev/null +++ b/external/pgvector/sql/vector--0.1.1--0.1.3.sql @@ -0,0 +1,2 @@ +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "ALTER EXTENSION vector UPDATE TO '0.1.3'" to load this file. \quit diff --git a/external/pgvector/sql/vector--0.1.3--0.1.4.sql b/external/pgvector/sql/vector--0.1.3--0.1.4.sql new file mode 100644 index 00000000000..56ab0eb501c --- /dev/null +++ b/external/pgvector/sql/vector--0.1.3--0.1.4.sql @@ -0,0 +1,2 @@ +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "ALTER EXTENSION vector UPDATE TO '0.1.4'" to load this file. \quit diff --git a/external/pgvector/sql/vector--0.1.4--0.1.5.sql b/external/pgvector/sql/vector--0.1.4--0.1.5.sql new file mode 100644 index 00000000000..3996b2dcd84 --- /dev/null +++ b/external/pgvector/sql/vector--0.1.4--0.1.5.sql @@ -0,0 +1,2 @@ +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "ALTER EXTENSION vector UPDATE TO '0.1.5'" to load this file. \quit diff --git a/external/pgvector/sql/vector--0.1.5--0.1.6.sql b/external/pgvector/sql/vector--0.1.5--0.1.6.sql new file mode 100644 index 00000000000..fdb605b0b95 --- /dev/null +++ b/external/pgvector/sql/vector--0.1.5--0.1.6.sql @@ -0,0 +1,2 @@ +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "ALTER EXTENSION vector UPDATE TO '0.1.6'" to load this file. \quit diff --git a/external/pgvector/sql/vector--0.1.6--0.1.7.sql b/external/pgvector/sql/vector--0.1.6--0.1.7.sql new file mode 100644 index 00000000000..fcd32f45a90 --- /dev/null +++ b/external/pgvector/sql/vector--0.1.6--0.1.7.sql @@ -0,0 +1,8 @@ +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "ALTER EXTENSION vector UPDATE TO '0.1.7'" to load this file. \quit + +CREATE FUNCTION array_to_vector(numeric[], integer, boolean) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE CAST (numeric[] AS vector) + WITH FUNCTION array_to_vector(numeric[], integer, boolean) AS IMPLICIT; diff --git a/external/pgvector/sql/vector--0.1.7--0.1.8.sql b/external/pgvector/sql/vector--0.1.7--0.1.8.sql new file mode 100644 index 00000000000..5a387a76b6d --- /dev/null +++ b/external/pgvector/sql/vector--0.1.7--0.1.8.sql @@ -0,0 +1,8 @@ +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "ALTER EXTENSION vector UPDATE TO '0.1.8'" to load this file. \quit + +CREATE FUNCTION vector_to_float4(vector, integer, boolean) RETURNS real[] + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE CAST (vector AS real[]) + WITH FUNCTION vector_to_float4(vector, integer, boolean) AS IMPLICIT; diff --git a/external/pgvector/sql/vector--0.1.8--0.2.0.sql b/external/pgvector/sql/vector--0.1.8--0.2.0.sql new file mode 100644 index 00000000000..1ce0d1efd65 --- /dev/null +++ b/external/pgvector/sql/vector--0.1.8--0.2.0.sql @@ -0,0 +1,2 @@ +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "ALTER EXTENSION vector UPDATE TO '0.2.0'" to load this file. \quit diff --git a/external/pgvector/sql/vector--0.2.0--0.2.1.sql b/external/pgvector/sql/vector--0.2.0--0.2.1.sql new file mode 100644 index 00000000000..47606deb3ad --- /dev/null +++ b/external/pgvector/sql/vector--0.2.0--0.2.1.sql @@ -0,0 +1,19 @@ +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "ALTER EXTENSION vector UPDATE TO '0.2.1'" to load this file. \quit + +DROP CAST (integer[] AS vector); +DROP CAST (real[] AS vector); +DROP CAST (double precision[] AS vector); +DROP CAST (numeric[] AS vector); + +CREATE CAST (integer[] AS vector) + WITH FUNCTION array_to_vector(integer[], integer, boolean) AS ASSIGNMENT; + +CREATE CAST (real[] AS vector) + WITH FUNCTION array_to_vector(real[], integer, boolean) AS ASSIGNMENT; + +CREATE CAST (double precision[] AS vector) + WITH FUNCTION array_to_vector(double precision[], integer, boolean) AS ASSIGNMENT; + +CREATE CAST (numeric[] AS vector) + WITH FUNCTION array_to_vector(numeric[], integer, boolean) AS ASSIGNMENT; diff --git a/external/pgvector/sql/vector--0.2.1--0.2.2.sql b/external/pgvector/sql/vector--0.2.1--0.2.2.sql new file mode 100644 index 00000000000..697c1408d70 --- /dev/null +++ b/external/pgvector/sql/vector--0.2.1--0.2.2.sql @@ -0,0 +1,2 @@ +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "ALTER EXTENSION vector UPDATE TO '0.2.2'" to load this file. \quit diff --git a/external/pgvector/sql/vector--0.2.2--0.2.3.sql b/external/pgvector/sql/vector--0.2.2--0.2.3.sql new file mode 100644 index 00000000000..32b07dc228f --- /dev/null +++ b/external/pgvector/sql/vector--0.2.2--0.2.3.sql @@ -0,0 +1,2 @@ +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "ALTER EXTENSION vector UPDATE TO '0.2.3'" to load this file. \quit diff --git a/external/pgvector/sql/vector--0.2.3--0.2.4.sql b/external/pgvector/sql/vector--0.2.3--0.2.4.sql new file mode 100644 index 00000000000..5d1b34168ba --- /dev/null +++ b/external/pgvector/sql/vector--0.2.3--0.2.4.sql @@ -0,0 +1,2 @@ +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "ALTER EXTENSION vector UPDATE TO '0.2.4'" to load this file. \quit diff --git a/external/pgvector/sql/vector--0.2.4--0.2.5.sql b/external/pgvector/sql/vector--0.2.4--0.2.5.sql new file mode 100644 index 00000000000..b372ed0c8c3 --- /dev/null +++ b/external/pgvector/sql/vector--0.2.4--0.2.5.sql @@ -0,0 +1,2 @@ +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "ALTER EXTENSION vector UPDATE TO '0.2.5'" to load this file. \quit diff --git a/external/pgvector/sql/vector--0.2.5--0.2.6.sql b/external/pgvector/sql/vector--0.2.5--0.2.6.sql new file mode 100644 index 00000000000..e68c1ac0374 --- /dev/null +++ b/external/pgvector/sql/vector--0.2.5--0.2.6.sql @@ -0,0 +1,2 @@ +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "ALTER EXTENSION vector UPDATE TO '0.2.6'" to load this file. \quit diff --git a/external/pgvector/sql/vector--0.2.6--0.2.7.sql b/external/pgvector/sql/vector--0.2.6--0.2.7.sql new file mode 100644 index 00000000000..227c2171c41 --- /dev/null +++ b/external/pgvector/sql/vector--0.2.6--0.2.7.sql @@ -0,0 +1,2 @@ +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "ALTER EXTENSION vector UPDATE TO '0.2.7'" to load this file. \quit diff --git a/external/pgvector/sql/vector--0.2.7--0.3.0.sql b/external/pgvector/sql/vector--0.2.7--0.3.0.sql new file mode 100644 index 00000000000..7e62d39e728 --- /dev/null +++ b/external/pgvector/sql/vector--0.2.7--0.3.0.sql @@ -0,0 +1,2 @@ +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "ALTER EXTENSION vector UPDATE TO '0.3.0'" to load this file. \quit diff --git a/external/pgvector/sql/vector--0.3.0--0.3.1.sql b/external/pgvector/sql/vector--0.3.0--0.3.1.sql new file mode 100644 index 00000000000..f1a8fbce5ae --- /dev/null +++ b/external/pgvector/sql/vector--0.3.0--0.3.1.sql @@ -0,0 +1,2 @@ +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "ALTER EXTENSION vector UPDATE TO '0.3.1'" to load this file. \quit diff --git a/external/pgvector/sql/vector--0.3.1--0.3.2.sql b/external/pgvector/sql/vector--0.3.1--0.3.2.sql new file mode 100644 index 00000000000..c3461a10339 --- /dev/null +++ b/external/pgvector/sql/vector--0.3.1--0.3.2.sql @@ -0,0 +1,2 @@ +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "ALTER EXTENSION vector UPDATE TO '0.3.2'" to load this file. \quit diff --git a/external/pgvector/sql/vector--0.3.2--0.4.0.sql b/external/pgvector/sql/vector--0.3.2--0.4.0.sql new file mode 100644 index 00000000000..3652664777c --- /dev/null +++ b/external/pgvector/sql/vector--0.3.2--0.4.0.sql @@ -0,0 +1,23 @@ +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "ALTER EXTENSION vector UPDATE TO '0.4.0'" to load this file. \quit + +-- remove this single line for Postgres < 13 +ALTER TYPE vector SET (STORAGE = extended); + +CREATE FUNCTION vector_accum(double precision[], vector) RETURNS double precision[] + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_avg(double precision[]) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_combine(double precision[], double precision[]) RETURNS double precision[] + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE AGGREGATE avg(vector) ( + SFUNC = vector_accum, + STYPE = double precision[], + FINALFUNC = vector_avg, + COMBINEFUNC = vector_combine, + INITCOND = '{0}', + PARALLEL = SAFE +); diff --git a/external/pgvector/sql/vector--0.4.0--0.4.1.sql b/external/pgvector/sql/vector--0.4.0--0.4.1.sql new file mode 100644 index 00000000000..67ba57ef924 --- /dev/null +++ b/external/pgvector/sql/vector--0.4.0--0.4.1.sql @@ -0,0 +1,2 @@ +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "ALTER EXTENSION vector UPDATE TO '0.4.1'" to load this file. \quit diff --git a/external/pgvector/sql/vector--0.4.1--0.4.2.sql b/external/pgvector/sql/vector--0.4.1--0.4.2.sql new file mode 100644 index 00000000000..24abacce05f --- /dev/null +++ b/external/pgvector/sql/vector--0.4.1--0.4.2.sql @@ -0,0 +1,2 @@ +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "ALTER EXTENSION vector UPDATE TO '0.4.2'" to load this file. \quit diff --git a/external/pgvector/sql/vector--0.4.2--0.4.3.sql b/external/pgvector/sql/vector--0.4.2--0.4.3.sql new file mode 100644 index 00000000000..3db510e557e --- /dev/null +++ b/external/pgvector/sql/vector--0.4.2--0.4.3.sql @@ -0,0 +1,2 @@ +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "ALTER EXTENSION vector UPDATE TO '0.4.3'" to load this file. \quit diff --git a/external/pgvector/sql/vector--0.4.3--0.4.4.sql b/external/pgvector/sql/vector--0.4.3--0.4.4.sql new file mode 100644 index 00000000000..49c4ab4ef77 --- /dev/null +++ b/external/pgvector/sql/vector--0.4.3--0.4.4.sql @@ -0,0 +1,2 @@ +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "ALTER EXTENSION vector UPDATE TO '0.4.4'" to load this file. \quit diff --git a/external/pgvector/sql/vector--0.4.4--0.5.0.sql b/external/pgvector/sql/vector--0.4.4--0.5.0.sql new file mode 100644 index 00000000000..48572bf67a7 --- /dev/null +++ b/external/pgvector/sql/vector--0.4.4--0.5.0.sql @@ -0,0 +1,43 @@ +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "ALTER EXTENSION vector UPDATE TO '0.5.0'" to load this file. \quit + +CREATE FUNCTION l1_distance(vector, vector) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_mul(vector, vector) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE OPERATOR * ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_mul, + COMMUTATOR = * +); + +CREATE AGGREGATE sum(vector) ( + SFUNC = vector_add, + STYPE = vector, + COMBINEFUNC = vector_add, + PARALLEL = SAFE +); + +CREATE FUNCTION hnswhandler(internal) RETURNS index_am_handler + AS 'MODULE_PATHNAME' LANGUAGE C; + +CREATE ACCESS METHOD hnsw TYPE INDEX HANDLER hnswhandler; + +COMMENT ON ACCESS METHOD hnsw IS 'hnsw index access method'; + +CREATE OPERATOR CLASS vector_l2_ops + FOR TYPE vector USING hnsw AS + OPERATOR 1 <-> (vector, vector) FOR ORDER BY float_ops, + FUNCTION 1 vector_l2_squared_distance(vector, vector); + +CREATE OPERATOR CLASS vector_ip_ops + FOR TYPE vector USING hnsw AS + OPERATOR 1 <#> (vector, vector) FOR ORDER BY float_ops, + FUNCTION 1 vector_negative_inner_product(vector, vector); + +CREATE OPERATOR CLASS vector_cosine_ops + FOR TYPE vector USING hnsw AS + OPERATOR 1 <=> (vector, vector) FOR ORDER BY float_ops, + FUNCTION 1 vector_negative_inner_product(vector, vector), + FUNCTION 2 vector_norm(vector); diff --git a/external/pgvector/sql/vector--0.5.0--0.5.1.sql b/external/pgvector/sql/vector--0.5.0--0.5.1.sql new file mode 100644 index 00000000000..54e09c5e5b1 --- /dev/null +++ b/external/pgvector/sql/vector--0.5.0--0.5.1.sql @@ -0,0 +1,2 @@ +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "ALTER EXTENSION vector UPDATE TO '0.5.1'" to load this file. \quit diff --git a/external/pgvector/sql/vector.sql b/external/pgvector/sql/vector.sql new file mode 100644 index 00000000000..137931fed6d --- /dev/null +++ b/external/pgvector/sql/vector.sql @@ -0,0 +1,292 @@ +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION vector" to load this file. \quit + +-- type + +CREATE TYPE vector; + +CREATE FUNCTION vector_in(cstring, oid, integer) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_out(vector) RETURNS cstring + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_typmod_in(cstring[]) RETURNS integer + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_recv(internal, oid, integer) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_send(vector) RETURNS bytea + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE TYPE vector ( + INPUT = vector_in, + OUTPUT = vector_out, + TYPMOD_IN = vector_typmod_in, + RECEIVE = vector_recv, + SEND = vector_send, + STORAGE = extended +); + +-- functions + +CREATE FUNCTION l2_distance(vector, vector) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION inner_product(vector, vector) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION cosine_distance(vector, vector) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION l1_distance(vector, vector) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_dims(vector) RETURNS integer + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_norm(vector) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_add(vector, vector) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_sub(vector, vector) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_mul(vector, vector) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +-- private functions + +CREATE FUNCTION vector_lt(vector, vector) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_le(vector, vector) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_eq(vector, vector) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_ne(vector, vector) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_ge(vector, vector) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_gt(vector, vector) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_cmp(vector, vector) RETURNS int4 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_l2_squared_distance(vector, vector) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_negative_inner_product(vector, vector) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_spherical_distance(vector, vector) RETURNS float8 + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_accum(double precision[], vector) RETURNS double precision[] + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_avg(double precision[]) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_combine(double precision[], double precision[]) RETURNS double precision[] + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +-- aggregates + +CREATE AGGREGATE avg(vector) ( + SFUNC = vector_accum, + STYPE = double precision[], + FINALFUNC = vector_avg, + COMBINEFUNC = vector_combine, + INITCOND = '{0}', + PARALLEL = SAFE +); + +CREATE AGGREGATE sum(vector) ( + SFUNC = vector_add, + STYPE = vector, + COMBINEFUNC = vector_add, + PARALLEL = SAFE +); + +-- cast functions + +CREATE FUNCTION vector(vector, integer, boolean) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_vector(integer[], integer, boolean) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_vector(real[], integer, boolean) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_vector(double precision[], integer, boolean) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_vector(numeric[], integer, boolean) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION vector_to_float4(vector, integer, boolean) RETURNS real[] + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +-- casts + +CREATE CAST (vector AS vector) + WITH FUNCTION vector(vector, integer, boolean) AS IMPLICIT; + +CREATE CAST (vector AS real[]) + WITH FUNCTION vector_to_float4(vector, integer, boolean) AS IMPLICIT; + +CREATE CAST (integer[] AS vector) + WITH FUNCTION array_to_vector(integer[], integer, boolean) AS ASSIGNMENT; + +CREATE CAST (real[] AS vector) + WITH FUNCTION array_to_vector(real[], integer, boolean) AS ASSIGNMENT; + +CREATE CAST (double precision[] AS vector) + WITH FUNCTION array_to_vector(double precision[], integer, boolean) AS ASSIGNMENT; + +CREATE CAST (numeric[] AS vector) + WITH FUNCTION array_to_vector(numeric[], integer, boolean) AS ASSIGNMENT; + +-- operators + +CREATE OPERATOR <-> ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = l2_distance, + COMMUTATOR = '<->' +); + +CREATE OPERATOR <#> ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_negative_inner_product, + COMMUTATOR = '<#>' +); + +CREATE OPERATOR <=> ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = cosine_distance, + COMMUTATOR = '<=>' +); + +CREATE OPERATOR + ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_add, + COMMUTATOR = + +); + +CREATE OPERATOR - ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_sub, + COMMUTATOR = - +); + +CREATE OPERATOR * ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_mul, + COMMUTATOR = * +); + +CREATE OPERATOR < ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_lt, + COMMUTATOR = > , NEGATOR = >= , + RESTRICT = scalarltsel, JOIN = scalarltjoinsel +); + +-- should use scalarlesel and scalarlejoinsel, but not supported in Postgres < 11 +CREATE OPERATOR <= ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_le, + COMMUTATOR = >= , NEGATOR = > , + RESTRICT = scalarltsel, JOIN = scalarltjoinsel +); + +CREATE OPERATOR = ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_eq, + COMMUTATOR = = , NEGATOR = <> , + RESTRICT = eqsel, JOIN = eqjoinsel +); + +CREATE OPERATOR <> ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_ne, + COMMUTATOR = <> , NEGATOR = = , + RESTRICT = eqsel, JOIN = eqjoinsel +); + +-- should use scalargesel and scalargejoinsel, but not supported in Postgres < 11 +CREATE OPERATOR >= ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_ge, + COMMUTATOR = <= , NEGATOR = < , + RESTRICT = scalargtsel, JOIN = scalargtjoinsel +); + +CREATE OPERATOR > ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_gt, + COMMUTATOR = < , NEGATOR = <= , + RESTRICT = scalargtsel, JOIN = scalargtjoinsel +); + +-- access methods + +CREATE FUNCTION ivfflathandler(internal) RETURNS index_am_handler + AS 'MODULE_PATHNAME' LANGUAGE C; + +CREATE ACCESS METHOD ivfflat TYPE INDEX HANDLER ivfflathandler; + +COMMENT ON ACCESS METHOD ivfflat IS 'ivfflat index access method'; + +CREATE FUNCTION hnswhandler(internal) RETURNS index_am_handler + AS 'MODULE_PATHNAME' LANGUAGE C; + +CREATE ACCESS METHOD hnsw TYPE INDEX HANDLER hnswhandler; + +COMMENT ON ACCESS METHOD hnsw IS 'hnsw index access method'; + +-- opclasses + +CREATE OPERATOR CLASS vector_ops + DEFAULT FOR TYPE vector USING btree AS + OPERATOR 1 < , + OPERATOR 2 <= , + OPERATOR 3 = , + OPERATOR 4 >= , + OPERATOR 5 > , + FUNCTION 1 vector_cmp(vector, vector); + +CREATE OPERATOR CLASS vector_l2_ops + DEFAULT FOR TYPE vector USING ivfflat AS + OPERATOR 1 <-> (vector, vector) FOR ORDER BY float_ops, + FUNCTION 1 vector_l2_squared_distance(vector, vector), + FUNCTION 3 l2_distance(vector, vector); + +CREATE OPERATOR CLASS vector_ip_ops + FOR TYPE vector USING ivfflat AS + OPERATOR 1 <#> (vector, vector) FOR ORDER BY float_ops, + FUNCTION 1 vector_negative_inner_product(vector, vector), + FUNCTION 3 vector_spherical_distance(vector, vector), + FUNCTION 4 vector_norm(vector); + +CREATE OPERATOR CLASS vector_cosine_ops + FOR TYPE vector USING ivfflat AS + OPERATOR 1 <=> (vector, vector) FOR ORDER BY float_ops, + FUNCTION 1 vector_negative_inner_product(vector, vector), + FUNCTION 2 vector_norm(vector), + FUNCTION 3 vector_spherical_distance(vector, vector), + FUNCTION 4 vector_norm(vector); + +CREATE OPERATOR CLASS vector_l2_ops + FOR TYPE vector USING hnsw AS + OPERATOR 1 <-> (vector, vector) FOR ORDER BY float_ops, + FUNCTION 1 vector_l2_squared_distance(vector, vector); + +CREATE OPERATOR CLASS vector_ip_ops + FOR TYPE vector USING hnsw AS + OPERATOR 1 <#> (vector, vector) FOR ORDER BY float_ops, + FUNCTION 1 vector_negative_inner_product(vector, vector); + +CREATE OPERATOR CLASS vector_cosine_ops + FOR TYPE vector USING hnsw AS + OPERATOR 1 <=> (vector, vector) FOR ORDER BY float_ops, + FUNCTION 1 vector_negative_inner_product(vector, vector), + FUNCTION 2 vector_norm(vector); diff --git a/external/pgvector/src/hnsw.c b/external/pgvector/src/hnsw.c new file mode 100644 index 00000000000..758e418424a --- /dev/null +++ b/external/pgvector/src/hnsw.c @@ -0,0 +1,224 @@ +#include "postgres.h" + +#include +#include + +#include "access/amapi.h" +#include "commands/vacuum.h" +#include "hnsw.h" +#include "utils/guc.h" +#include "utils/selfuncs.h" + +#if PG_VERSION_NUM >= 120000 +#include "commands/progress.h" +#endif + +int hnsw_ef_search; +static relopt_kind hnsw_relopt_kind; + +/* + * Initialize index options and variables + */ +void +HnswInit(void) +{ + hnsw_relopt_kind = add_reloption_kind(); + add_int_reloption(hnsw_relopt_kind, "m", "Max number of connections", + HNSW_DEFAULT_M, HNSW_MIN_M, HNSW_MAX_M +#if PG_VERSION_NUM >= 130000 + ,AccessExclusiveLock +#endif + ); + add_int_reloption(hnsw_relopt_kind, "ef_construction", "Size of the dynamic candidate list for construction", + HNSW_DEFAULT_EF_CONSTRUCTION, HNSW_MIN_EF_CONSTRUCTION, HNSW_MAX_EF_CONSTRUCTION +#if PG_VERSION_NUM >= 130000 + ,AccessExclusiveLock +#endif + ); + + DefineCustomIntVariable("hnsw.ef_search", "Sets the size of the dynamic candidate list for search", + "Valid range is 1..1000.", &hnsw_ef_search, + HNSW_DEFAULT_EF_SEARCH, HNSW_MIN_EF_SEARCH, HNSW_MAX_EF_SEARCH, PGC_USERSET, 0, NULL, NULL, NULL); +} + +/* + * Get the name of index build phase + */ +#if PG_VERSION_NUM >= 120000 +static char * +hnswbuildphasename(int64 phasenum) +{ + switch (phasenum) + { + case PROGRESS_CREATEIDX_SUBPHASE_INITIALIZE: + return "initializing"; + case PROGRESS_HNSW_PHASE_LOAD: + return "loading tuples"; + default: + return NULL; + } +} +#endif + +/* + * Estimate the cost of an index scan + */ +static void +hnswcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, + Cost *indexStartupCost, Cost *indexTotalCost, + Selectivity *indexSelectivity, double *indexCorrelation, + double *indexPages) +{ + GenericCosts costs; + int m; + int entryLevel; + Relation index; +#if PG_VERSION_NUM < 120000 + List *qinfos; +#endif + + /* Never use index without order */ + if (path->indexorderbys == NULL) + { + *indexStartupCost = DBL_MAX; + *indexTotalCost = DBL_MAX; + *indexSelectivity = 0; + *indexCorrelation = 0; + *indexPages = 0; + return; + } + + MemSet(&costs, 0, sizeof(costs)); + + index = index_open(path->indexinfo->indexoid, NoLock); + HnswGetMetaPageInfo(index, &m, NULL); + index_close(index, NoLock); + + /* Approximate entry level */ + entryLevel = (int) -log(1.0 / path->indexinfo->tuples) * HnswGetMl(m); + + /* TODO Improve estimate of visited tuples (currently underestimates) */ + /* Account for number of tuples (or entry level), m, and ef_search */ + costs.numIndexTuples = (entryLevel + 2) * m; + +#if PG_VERSION_NUM >= 120000 + genericcostestimate(root, path, loop_count, &costs); +#else + qinfos = deconstruct_indexquals(path); + genericcostestimate(root, path, loop_count, qinfos, &costs); +#endif + + /* Use total cost since most work happens before first tuple is returned */ + *indexStartupCost = costs.indexTotalCost; + *indexTotalCost = costs.indexTotalCost; + *indexSelectivity = costs.indexSelectivity; + *indexCorrelation = costs.indexCorrelation; + *indexPages = costs.numIndexPages; +} + +/* + * Parse and validate the reloptions + */ +static bytea * +hnswoptions(Datum reloptions, bool validate) +{ + static const relopt_parse_elt tab[] = { + {"m", RELOPT_TYPE_INT, offsetof(HnswOptions, m)}, + {"ef_construction", RELOPT_TYPE_INT, offsetof(HnswOptions, efConstruction)}, + }; + +#if PG_VERSION_NUM >= 130000 + return (bytea *) build_reloptions(reloptions, validate, + hnsw_relopt_kind, + sizeof(HnswOptions), + tab, lengthof(tab)); +#else + relopt_value *options; + int numoptions; + HnswOptions *rdopts; + + options = parseRelOptions(reloptions, validate, hnsw_relopt_kind, &numoptions); + rdopts = allocateReloptStruct(sizeof(HnswOptions), options, numoptions); + fillRelOptions((void *) rdopts, sizeof(HnswOptions), options, numoptions, + validate, tab, lengthof(tab)); + + return (bytea *) rdopts; +#endif +} + +/* + * Validate catalog entries for the specified operator class + */ +static bool +hnswvalidate(Oid opclassoid) +{ + return true; +} + +/* + * Define index handler + * + * See https://www.postgresql.org/docs/current/index-api.html + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(hnswhandler); +Datum +hnswhandler(PG_FUNCTION_ARGS) +{ + IndexAmRoutine *amroutine = makeNode(IndexAmRoutine); + + amroutine->amstrategies = 0; + amroutine->amsupport = 2; +#if PG_VERSION_NUM >= 130000 + amroutine->amoptsprocnum = 0; +#endif + amroutine->amcanorder = false; + amroutine->amcanorderbyop = true; + amroutine->amcanbackward = false; /* can change direction mid-scan */ + amroutine->amcanunique = false; + amroutine->amcanmulticol = false; + amroutine->amoptionalkey = true; + amroutine->amsearcharray = false; + amroutine->amsearchnulls = false; + amroutine->amstorage = false; + amroutine->amclusterable = false; + amroutine->ampredlocks = false; + amroutine->amcanparallel = false; + amroutine->amcaninclude = false; +#if PG_VERSION_NUM >= 130000 + amroutine->amusemaintenanceworkmem = false; /* not used during VACUUM */ + amroutine->amparallelvacuumoptions = VACUUM_OPTION_PARALLEL_BULKDEL; +#endif + amroutine->amkeytype = InvalidOid; + + /* Interface functions */ + amroutine->ambuild = hnswbuild; + amroutine->ambuildempty = hnswbuildempty; + amroutine->aminsert = hnswinsert; + amroutine->ambulkdelete = hnswbulkdelete; + amroutine->amvacuumcleanup = hnswvacuumcleanup; + amroutine->amcanreturn = NULL; + amroutine->amcostestimate = hnswcostestimate; + amroutine->amoptions = hnswoptions; + amroutine->amproperty = NULL; /* TODO AMPROP_DISTANCE_ORDERABLE */ +#if PG_VERSION_NUM >= 120000 + amroutine->ambuildphasename = hnswbuildphasename; +#endif + amroutine->amvalidate = hnswvalidate; +#if PG_VERSION_NUM >= 140000 + amroutine->amadjustmembers = NULL; +#endif + amroutine->ambeginscan = hnswbeginscan; + amroutine->amrescan = hnswrescan; + amroutine->amgettuple = hnswgettuple; + amroutine->amgetbitmap = NULL; + amroutine->amendscan = hnswendscan; + amroutine->ammarkpos = NULL; + amroutine->amrestrpos = NULL; + + /* Interface functions to support parallel index scans */ + amroutine->amestimateparallelscan = NULL; + amroutine->aminitparallelscan = NULL; + amroutine->amparallelrescan = NULL; + + PG_RETURN_POINTER(amroutine); +} diff --git a/external/pgvector/src/hnsw.h b/external/pgvector/src/hnsw.h new file mode 100644 index 00000000000..eb2aa9f729f --- /dev/null +++ b/external/pgvector/src/hnsw.h @@ -0,0 +1,309 @@ +#ifndef HNSW_H +#define HNSW_H + +#include "postgres.h" + +#include "access/generic_xlog.h" +#include "access/reloptions.h" +#include "nodes/execnodes.h" +#include "port.h" /* for random() */ +#include "utils/sampling.h" +#include "vector.h" + +#if PG_VERSION_NUM < 110000 +#error "Requires PostgreSQL 11+" +#endif + +#define HNSW_MAX_DIM 2000 + +/* Support functions */ +#define HNSW_DISTANCE_PROC 1 +#define HNSW_NORM_PROC 2 + +#define HNSW_VERSION 1 +#define HNSW_MAGIC_NUMBER 0xA953A953 +#define HNSW_PAGE_ID 0xFF90 + +/* Preserved page numbers */ +#define HNSW_METAPAGE_BLKNO 0 +#define HNSW_HEAD_BLKNO 1 /* first element page */ + +/* Must correspond to page numbers since page lock is used */ +#define HNSW_UPDATE_LOCK 0 +#define HNSW_SCAN_LOCK 1 + +/* HNSW parameters */ +#define HNSW_DEFAULT_M 16 +#define HNSW_MIN_M 2 +#define HNSW_MAX_M 100 +#define HNSW_DEFAULT_EF_CONSTRUCTION 64 +#define HNSW_MIN_EF_CONSTRUCTION 4 +#define HNSW_MAX_EF_CONSTRUCTION 1000 +#define HNSW_DEFAULT_EF_SEARCH 40 +#define HNSW_MIN_EF_SEARCH 1 +#define HNSW_MAX_EF_SEARCH 1000 + +/* Tuple types */ +#define HNSW_ELEMENT_TUPLE_TYPE 1 +#define HNSW_NEIGHBOR_TUPLE_TYPE 2 + +/* Make graph robust against non-HOT updates */ +#define HNSW_HEAPTIDS 10 + +#define HNSW_UPDATE_ENTRY_GREATER 1 +#define HNSW_UPDATE_ENTRY_ALWAYS 2 + +/* Build phases */ +/* PROGRESS_CREATEIDX_SUBPHASE_INITIALIZE is 1 */ +#define PROGRESS_HNSW_PHASE_LOAD 2 + +#define HNSW_MAX_SIZE (BLCKSZ - MAXALIGN(SizeOfPageHeaderData) - MAXALIGN(sizeof(HnswPageOpaqueData)) - sizeof(ItemIdData)) + +#define HNSW_ELEMENT_TUPLE_SIZE(_dim) MAXALIGN(offsetof(HnswElementTupleData, vec) + VECTOR_SIZE(_dim)) +#define HNSW_NEIGHBOR_TUPLE_SIZE(level, m) MAXALIGN(offsetof(HnswNeighborTupleData, indextids) + ((level) + 2) * (m) * sizeof(ItemPointerData)) + +#define HnswPageGetOpaque(page) ((HnswPageOpaque) PageGetSpecialPointer(page)) +#define HnswPageGetMeta(page) ((HnswMetaPageData *) PageGetContents(page)) + +#if PG_VERSION_NUM >= 150000 +#define RandomDouble() pg_prng_double(&pg_global_prng_state) +#else +#define RandomDouble() (((double) random()) / MAX_RANDOM_VALUE) +#endif + +#if PG_VERSION_NUM < 130000 +#define list_delete_last(list) list_truncate(list, list_length(list) - 1) +#define list_sort(list, cmp) list_qsort(list, cmp) +#endif + +#define HnswIsElementTuple(tup) ((tup)->type == HNSW_ELEMENT_TUPLE_TYPE) +#define HnswIsNeighborTuple(tup) ((tup)->type == HNSW_NEIGHBOR_TUPLE_TYPE) + +/* 2 * M connections for ground layer */ +#define HnswGetLayerM(m, layer) (layer == 0 ? (m) * 2 : (m)) + +/* Optimal ML from paper */ +#define HnswGetMl(m) (1 / log(m)) + +/* Ensure fits on page and in uint8 */ +#define HnswGetMaxLevel(m) Min(((BLCKSZ - MAXALIGN(SizeOfPageHeaderData) - MAXALIGN(sizeof(HnswPageOpaqueData)) - offsetof(HnswNeighborTupleData, indextids) - sizeof(ItemIdData)) / (sizeof(ItemPointerData)) / m) - 2, 255) + +/* Variables */ +extern int hnsw_ef_search; + +typedef struct HnswNeighborArray HnswNeighborArray; + +typedef struct HnswElementData +{ + List *heaptids; + uint8 level; + uint8 deleted; + HnswNeighborArray *neighbors; + BlockNumber blkno; + OffsetNumber offno; + OffsetNumber neighborOffno; + BlockNumber neighborPage; + Vector *vec; +} HnswElementData; + +typedef HnswElementData * HnswElement; + +typedef struct HnswCandidate +{ + HnswElement element; + float distance; + bool closer; +} HnswCandidate; + +typedef struct HnswNeighborArray +{ + int length; + bool closerSet; + HnswCandidate *items; +} HnswNeighborArray; + +typedef struct HnswPairingHeapNode +{ + pairingheap_node ph_node; + HnswCandidate *inner; +} HnswPairingHeapNode; + +/* HNSW index options */ +typedef struct HnswOptions +{ + int32 vl_len_; /* varlena header (do not touch directly!) */ + int m; /* number of connections */ + int efConstruction; /* size of dynamic candidate list */ +} HnswOptions; + +typedef struct HnswBuildState +{ + /* Info */ + Relation heap; + Relation index; + IndexInfo *indexInfo; + ForkNumber forkNum; + + /* Settings */ + int dimensions; + int m; + int efConstruction; + + /* Statistics */ + double indtuples; + double reltuples; + + /* Support functions */ + FmgrInfo *procinfo; + FmgrInfo *normprocinfo; + Oid collation; + + /* Variables */ + List *elements; + HnswElement entryPoint; + double ml; + int maxLevel; + double maxInMemoryElements; + bool flushed; + Vector *normvec; + + /* Memory */ + MemoryContext tmpCtx; +} HnswBuildState; + +typedef struct HnswMetaPageData +{ + uint32 magicNumber; + uint32 version; + uint32 dimensions; + uint16 m; + uint16 efConstruction; + BlockNumber entryBlkno; + OffsetNumber entryOffno; + int16 entryLevel; + BlockNumber insertPage; +} HnswMetaPageData; + +typedef HnswMetaPageData * HnswMetaPage; + +typedef struct HnswPageOpaqueData +{ + BlockNumber nextblkno; + uint16 unused; + uint16 page_id; /* for identification of HNSW indexes */ +} HnswPageOpaqueData; + +typedef HnswPageOpaqueData * HnswPageOpaque; + +typedef struct HnswElementTupleData +{ + uint8 type; + uint8 level; + uint8 deleted; + uint8 unused; + ItemPointerData heaptids[HNSW_HEAPTIDS]; + ItemPointerData neighbortid; + uint16 unused2; + Vector vec; +} HnswElementTupleData; + +typedef HnswElementTupleData * HnswElementTuple; + +typedef struct HnswNeighborTupleData +{ + uint8 type; + uint8 unused; + uint16 count; + ItemPointerData indextids[FLEXIBLE_ARRAY_MEMBER]; +} HnswNeighborTupleData; + +typedef HnswNeighborTupleData * HnswNeighborTuple; + +typedef struct HnswScanOpaqueData +{ + bool first; + List *w; + MemoryContext tmpCtx; + + /* Support functions */ + FmgrInfo *procinfo; + FmgrInfo *normprocinfo; + Oid collation; +} HnswScanOpaqueData; + +typedef HnswScanOpaqueData * HnswScanOpaque; + +typedef struct HnswVacuumState +{ + /* Info */ + Relation index; + IndexBulkDeleteResult *stats; + IndexBulkDeleteCallback callback; + void *callback_state; + + /* Settings */ + int m; + int efConstruction; + + /* Support functions */ + FmgrInfo *procinfo; + Oid collation; + + /* Variables */ + HTAB *deleted; + BufferAccessStrategy bas; + HnswNeighborTuple ntup; + HnswElementData highestPoint; + + /* Memory */ + MemoryContext tmpCtx; +} HnswVacuumState; + +/* Methods */ +int HnswGetM(Relation index); +int HnswGetEfConstruction(Relation index); +FmgrInfo *HnswOptionalProcInfo(Relation index, uint16 procnum); +bool HnswNormValue(FmgrInfo *procinfo, Oid collation, Datum *value, Vector * result); +void HnswCommitBuffer(Buffer buf, GenericXLogState *state); +Buffer HnswNewBuffer(Relation index, ForkNumber forkNum); +void HnswInitPage(Buffer buf, Page page); +void HnswInitRegisterPage(Relation index, Buffer *buf, Page *page, GenericXLogState **state); +void HnswInit(void); +List *HnswSearchLayer(Datum q, List *ep, int ef, int lc, Relation index, FmgrInfo *procinfo, Oid collation, int m, bool inserting, HnswElement skipElement); +HnswElement HnswGetEntryPoint(Relation index); +void HnswGetMetaPageInfo(Relation index, int *m, HnswElement * entryPoint); +HnswElement HnswInitElement(ItemPointer tid, int m, double ml, int maxLevel); +void HnswFreeElement(HnswElement element); +HnswElement HnswInitElementFromBlock(BlockNumber blkno, OffsetNumber offno); +void HnswInsertElement(HnswElement element, HnswElement entryPoint, Relation index, FmgrInfo *procinfo, Oid collation, int m, int efConstruction, bool existing); +HnswElement HnswFindDuplicate(HnswElement e); +HnswCandidate *HnswEntryCandidate(HnswElement em, Datum q, Relation rel, FmgrInfo *procinfo, Oid collation, bool loadVec); +void HnswUpdateMetaPage(Relation index, int updateEntry, HnswElement entryPoint, BlockNumber insertPage, ForkNumber forkNum); +void HnswSetNeighborTuple(HnswNeighborTuple ntup, HnswElement e, int m); +void HnswAddHeapTid(HnswElement element, ItemPointer heaptid); +void HnswInitNeighbors(HnswElement element, int m); +bool HnswInsertTuple(Relation index, Datum *values, bool *isnull, ItemPointer heap_tid, Relation heapRel); +void HnswUpdateNeighborPages(Relation index, FmgrInfo *procinfo, Oid collation, HnswElement e, int m, bool checkExisting); +void HnswLoadElementFromTuple(HnswElement element, HnswElementTuple etup, bool loadHeaptids, bool loadVec); +void HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec); +void HnswSetElementTuple(HnswElementTuple etup, HnswElement element); +void HnswUpdateConnection(HnswElement element, HnswCandidate * hc, int m, int lc, int *updateIdx, Relation index, FmgrInfo *procinfo, Oid collation); +void HnswLoadNeighbors(HnswElement element, Relation index, int m); + +/* Index access methods */ +IndexBuildResult *hnswbuild(Relation heap, Relation index, IndexInfo *indexInfo); +void hnswbuildempty(Relation index); +bool hnswinsert(Relation index, Datum *values, bool *isnull, ItemPointer heap_tid, Relation heap, IndexUniqueCheck checkUnique +#if PG_VERSION_NUM >= 140000 + ,bool indexUnchanged +#endif + ,IndexInfo *indexInfo +); +IndexBulkDeleteResult *hnswbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state); +IndexBulkDeleteResult *hnswvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats); +IndexScanDesc hnswbeginscan(Relation index, int nkeys, int norderbys); +void hnswrescan(IndexScanDesc scan, ScanKey keys, int nkeys, ScanKey orderbys, int norderbys); +bool hnswgettuple(IndexScanDesc scan, ScanDirection dir); +void hnswendscan(IndexScanDesc scan); + +#endif diff --git a/external/pgvector/src/hnswbuild.c b/external/pgvector/src/hnswbuild.c new file mode 100644 index 00000000000..18959d5c7f8 --- /dev/null +++ b/external/pgvector/src/hnswbuild.c @@ -0,0 +1,523 @@ +#include "postgres.h" + +#include + +#include "catalog/index.h" +#include "hnsw.h" +#include "miscadmin.h" +#include "lib/pairingheap.h" +#include "nodes/pg_list.h" +#include "storage/bufmgr.h" +#include "utils/memutils.h" + +#if PG_VERSION_NUM >= 140000 +#include "utils/backend_progress.h" +#elif PG_VERSION_NUM >= 120000 +#include "pgstat.h" +#endif + +#if PG_VERSION_NUM >= 120000 +#include "access/tableam.h" +#include "commands/progress.h" +#else +#define PROGRESS_CREATEIDX_TUPLES_DONE 0 +#endif + +#if PG_VERSION_NUM >= 130000 +#define CALLBACK_ITEM_POINTER ItemPointer tid +#else +#define CALLBACK_ITEM_POINTER HeapTuple hup +#endif + +#if PG_VERSION_NUM >= 120000 +#define UpdateProgress(index, val) pgstat_progress_update_param(index, val) +#else +#define UpdateProgress(index, val) ((void)val) +#endif + +/* + * Create the metapage + */ +static void +CreateMetaPage(HnswBuildState * buildstate) +{ + Relation index = buildstate->index; + ForkNumber forkNum = buildstate->forkNum; + Buffer buf; + Page page; + GenericXLogState *state; + HnswMetaPage metap; + + buf = HnswNewBuffer(index, forkNum); + HnswInitRegisterPage(index, &buf, &page, &state); + + /* Set metapage data */ + metap = HnswPageGetMeta(page); + metap->magicNumber = HNSW_MAGIC_NUMBER; + metap->version = HNSW_VERSION; + metap->dimensions = buildstate->dimensions; + metap->m = buildstate->m; + metap->efConstruction = buildstate->efConstruction; + metap->entryBlkno = InvalidBlockNumber; + metap->entryOffno = InvalidOffsetNumber; + metap->entryLevel = -1; + metap->insertPage = InvalidBlockNumber; + ((PageHeader) page)->pd_lower = + ((char *) metap + sizeof(HnswMetaPageData)) - (char *) page; + + HnswCommitBuffer(buf, state); +} + +/* + * Add a new page + */ +static void +HnswBuildAppendPage(Relation index, Buffer *buf, Page *page, GenericXLogState **state, ForkNumber forkNum) +{ + /* Add a new page */ + Buffer newbuf = HnswNewBuffer(index, forkNum); + + /* Update previous page */ + HnswPageGetOpaque(*page)->nextblkno = BufferGetBlockNumber(newbuf); + + /* Commit */ + GenericXLogFinish(*state); + UnlockReleaseBuffer(*buf); + + /* Can take a while, so ensure we can interrupt */ + /* Needs to be called when no buffer locks are held */ + LockBuffer(newbuf, BUFFER_LOCK_UNLOCK); + CHECK_FOR_INTERRUPTS(); + LockBuffer(newbuf, BUFFER_LOCK_EXCLUSIVE); + + /* Prepare new page */ + *buf = newbuf; + *state = GenericXLogStart(index); + *page = GenericXLogRegisterBuffer(*state, *buf, GENERIC_XLOG_FULL_IMAGE); + HnswInitPage(*buf, *page); +} + +/* + * Create element pages + */ +static void +CreateElementPages(HnswBuildState * buildstate) +{ + Relation index = buildstate->index; + ForkNumber forkNum = buildstate->forkNum; + int dimensions = buildstate->dimensions; + Size etupSize; + Size maxSize; + HnswElementTuple etup; + HnswNeighborTuple ntup; + BlockNumber insertPage; + Buffer buf; + Page page; + GenericXLogState *state; + ListCell *lc; + + /* Calculate sizes */ + maxSize = HNSW_MAX_SIZE; + etupSize = HNSW_ELEMENT_TUPLE_SIZE(dimensions); + + /* Allocate once */ + etup = palloc0(etupSize); + ntup = palloc0(BLCKSZ); + + /* Prepare first page */ + buf = HnswNewBuffer(index, forkNum); + state = GenericXLogStart(index); + page = GenericXLogRegisterBuffer(state, buf, GENERIC_XLOG_FULL_IMAGE); + HnswInitPage(buf, page); + + foreach(lc, buildstate->elements) + { + HnswElement element = lfirst(lc); + Size ntupSize; + Size combinedSize; + + HnswSetElementTuple(etup, element); + + /* Calculate sizes */ + ntupSize = HNSW_NEIGHBOR_TUPLE_SIZE(element->level, buildstate->m); + combinedSize = etupSize + ntupSize + sizeof(ItemIdData); + + /* Keep element and neighbors on the same page if possible */ + if (PageGetFreeSpace(page) < etupSize || (combinedSize <= maxSize && PageGetFreeSpace(page) < combinedSize)) + HnswBuildAppendPage(index, &buf, &page, &state, forkNum); + + /* Calculate offsets */ + element->blkno = BufferGetBlockNumber(buf); + element->offno = OffsetNumberNext(PageGetMaxOffsetNumber(page)); + if (combinedSize <= maxSize) + { + element->neighborPage = element->blkno; + element->neighborOffno = OffsetNumberNext(element->offno); + } + else + { + element->neighborPage = element->blkno + 1; + element->neighborOffno = FirstOffsetNumber; + } + + ItemPointerSet(&etup->neighbortid, element->neighborPage, element->neighborOffno); + + /* Add element */ + if (PageAddItem(page, (Item) etup, etupSize, InvalidOffsetNumber, false, false) != element->offno) + elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index)); + + /* Add new page if needed */ + if (PageGetFreeSpace(page) < ntupSize) + HnswBuildAppendPage(index, &buf, &page, &state, forkNum); + + /* Add placeholder for neighbors */ + if (PageAddItem(page, (Item) ntup, ntupSize, InvalidOffsetNumber, false, false) != element->neighborOffno) + elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index)); + } + + insertPage = BufferGetBlockNumber(buf); + + /* Commit */ + GenericXLogFinish(state); + UnlockReleaseBuffer(buf); + + HnswUpdateMetaPage(index, HNSW_UPDATE_ENTRY_ALWAYS, buildstate->entryPoint, insertPage, forkNum); + + pfree(etup); + pfree(ntup); +} + +/* + * Create neighbor pages + */ +static void +CreateNeighborPages(HnswBuildState * buildstate) +{ + Relation index = buildstate->index; + ForkNumber forkNum = buildstate->forkNum; + int m = buildstate->m; + ListCell *lc; + HnswNeighborTuple ntup; + + /* Allocate once */ + ntup = palloc0(BLCKSZ); + + foreach(lc, buildstate->elements) + { + HnswElement e = lfirst(lc); + Buffer buf; + Page page; + GenericXLogState *state; + Size ntupSize = HNSW_NEIGHBOR_TUPLE_SIZE(e->level, m); + + /* Can take a while, so ensure we can interrupt */ + /* Needs to be called when no buffer locks are held */ + CHECK_FOR_INTERRUPTS(); + + buf = ReadBufferExtended(index, forkNum, e->neighborPage, RBM_NORMAL, NULL); + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + state = GenericXLogStart(index); + page = GenericXLogRegisterBuffer(state, buf, 0); + + HnswSetNeighborTuple(ntup, e, m); + + if (!PageIndexTupleOverwrite(page, e->neighborOffno, (Item) ntup, ntupSize)) + elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index)); + + /* Commit */ + GenericXLogFinish(state); + UnlockReleaseBuffer(buf); + } + + pfree(ntup); +} + +/* + * Free elements + */ +static void +FreeElements(HnswBuildState * buildstate) +{ + ListCell *lc; + + foreach(lc, buildstate->elements) + HnswFreeElement(lfirst(lc)); + + list_free(buildstate->elements); +} + +/* + * Flush pages + */ +static void +FlushPages(HnswBuildState * buildstate) +{ + CreateMetaPage(buildstate); + CreateElementPages(buildstate); + CreateNeighborPages(buildstate); + + buildstate->flushed = true; + FreeElements(buildstate); +} + +/* + * Insert tuple + */ +static bool +InsertTuple(Relation index, Datum *values, HnswElement element, HnswBuildState * buildstate, HnswElement * dup) +{ + FmgrInfo *procinfo = buildstate->procinfo; + Oid collation = buildstate->collation; + HnswElement entryPoint = buildstate->entryPoint; + int efConstruction = buildstate->efConstruction; + int m = buildstate->m; + + /* Detoast once for all calls */ + Datum value = PointerGetDatum(PG_DETOAST_DATUM(values[0])); + + /* Normalize if needed */ + if (buildstate->normprocinfo != NULL) + { + if (!HnswNormValue(buildstate->normprocinfo, collation, &value, buildstate->normvec)) + return false; + } + + /* Copy value to element so accessible outside of memory context */ + memcpy(element->vec, DatumGetVector(value), VECTOR_SIZE(buildstate->dimensions)); + + /* Insert element in graph */ + HnswInsertElement(element, entryPoint, NULL, procinfo, collation, m, efConstruction, false); + + /* Look for duplicate */ + *dup = HnswFindDuplicate(element); + + /* Update neighbors if needed */ + if (*dup == NULL) + { + for (int lc = element->level; lc >= 0; lc--) + { + int lm = HnswGetLayerM(m, lc); + HnswNeighborArray *neighbors = &element->neighbors[lc]; + + for (int i = 0; i < neighbors->length; i++) + HnswUpdateConnection(element, &neighbors->items[i], lm, lc, NULL, NULL, procinfo, collation); + } + } + + /* Update entry point if needed */ + if (*dup == NULL && (entryPoint == NULL || element->level > entryPoint->level)) + buildstate->entryPoint = element; + + UpdateProgress(PROGRESS_CREATEIDX_TUPLES_DONE, ++buildstate->indtuples); + + return *dup == NULL; +} + +/* + * Callback for table_index_build_scan + */ +static void +BuildCallback(Relation index, CALLBACK_ITEM_POINTER, Datum *values, + bool *isnull, bool tupleIsAlive, void *state) +{ + HnswBuildState *buildstate = (HnswBuildState *) state; + MemoryContext oldCtx; + HnswElement element; + HnswElement dup = NULL; + bool inserted; + +#if PG_VERSION_NUM < 130000 + ItemPointer tid = &hup->t_self; +#endif + + /* Skip nulls */ + if (isnull[0]) + return; + + if (buildstate->indtuples >= buildstate->maxInMemoryElements) + { + if (!buildstate->flushed) + { + ereport(NOTICE, + (errmsg("hnsw graph no longer fits into maintenance_work_mem after " INT64_FORMAT " tuples", (int64) buildstate->indtuples), + errdetail("Building will take significantly more time."), + errhint("Increase maintenance_work_mem to speed up builds."))); + + FlushPages(buildstate); + } + + oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx); + + if (HnswInsertTuple(buildstate->index, values, isnull, tid, buildstate->heap)) + UpdateProgress(PROGRESS_CREATEIDX_TUPLES_DONE, ++buildstate->indtuples); + + /* Reset memory context */ + MemoryContextSwitchTo(oldCtx); + MemoryContextReset(buildstate->tmpCtx); + + return; + } + + /* Allocate necessary memory outside of memory context */ + element = HnswInitElement(tid, buildstate->m, buildstate->ml, buildstate->maxLevel); + element->vec = palloc(VECTOR_SIZE(buildstate->dimensions)); + + /* Use memory context since detoast can allocate */ + oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx); + + /* Insert tuple */ + inserted = InsertTuple(index, values, element, buildstate, &dup); + + /* Reset memory context */ + MemoryContextSwitchTo(oldCtx); + MemoryContextReset(buildstate->tmpCtx); + + /* Add outside memory context */ + if (dup != NULL) + HnswAddHeapTid(dup, tid); + + /* Add to buildstate or free */ + if (inserted) + buildstate->elements = lappend(buildstate->elements, element); + else + HnswFreeElement(element); +} + +/* + * Get the max number of elements that fit into maintenance_work_mem + */ +static double +HnswGetMaxInMemoryElements(int m, double ml, int dimensions) +{ + Size elementSize = sizeof(HnswElementData); + double avgLevel = -log(0.5) * ml; + + elementSize += sizeof(HnswNeighborArray) * (avgLevel + 1); + elementSize += sizeof(HnswCandidate) * (m * (avgLevel + 2)); + elementSize += sizeof(ItemPointerData); + elementSize += VECTOR_SIZE(dimensions); + return (maintenance_work_mem * 1024L) / elementSize; +} + +/* + * Initialize the build state + */ +static void +InitBuildState(HnswBuildState * buildstate, Relation heap, Relation index, IndexInfo *indexInfo, ForkNumber forkNum) +{ + buildstate->heap = heap; + buildstate->index = index; + buildstate->indexInfo = indexInfo; + buildstate->forkNum = forkNum; + + buildstate->m = HnswGetM(index); + buildstate->efConstruction = HnswGetEfConstruction(index); + buildstate->dimensions = TupleDescAttr(index->rd_att, 0)->atttypmod; + + /* Require column to have dimensions to be indexed */ + if (buildstate->dimensions < 0) + elog(ERROR, "column does not have dimensions"); + + if (buildstate->dimensions > HNSW_MAX_DIM) + elog(ERROR, "column cannot have more than %d dimensions for hnsw index", HNSW_MAX_DIM); + + if (buildstate->efConstruction < 2 * buildstate->m) + elog(ERROR, "ef_construction must be greater than or equal to 2 * m"); + + buildstate->reltuples = 0; + buildstate->indtuples = 0; + + /* Get support functions */ + buildstate->procinfo = index_getprocinfo(index, 1, HNSW_DISTANCE_PROC); + buildstate->normprocinfo = HnswOptionalProcInfo(index, HNSW_NORM_PROC); + buildstate->collation = index->rd_indcollation[0]; + + buildstate->elements = NIL; + buildstate->entryPoint = NULL; + buildstate->ml = HnswGetMl(buildstate->m); + buildstate->maxLevel = HnswGetMaxLevel(buildstate->m); + buildstate->maxInMemoryElements = HnswGetMaxInMemoryElements(buildstate->m, buildstate->ml, buildstate->dimensions); + buildstate->flushed = false; + + /* Reuse for each tuple */ + buildstate->normvec = InitVector(buildstate->dimensions); + + buildstate->tmpCtx = AllocSetContextCreate(CurrentMemoryContext, + "Hnsw build temporary context", + ALLOCSET_DEFAULT_SIZES); +} + +/* + * Free resources + */ +static void +FreeBuildState(HnswBuildState * buildstate) +{ + pfree(buildstate->normvec); + MemoryContextDelete(buildstate->tmpCtx); +} + +/* + * Build graph + */ +static void +BuildGraph(HnswBuildState * buildstate, ForkNumber forkNum) +{ + UpdateProgress(PROGRESS_CREATEIDX_SUBPHASE, PROGRESS_HNSW_PHASE_LOAD); + +#if PG_VERSION_NUM >= 120000 + buildstate->reltuples = table_index_build_scan(buildstate->heap, buildstate->index, buildstate->indexInfo, + true, true, BuildCallback, (void *) buildstate, NULL); +#else + buildstate->reltuples = IndexBuildHeapScan(buildstate->heap, buildstate->index, buildstate->indexInfo, + true, BuildCallback, (void *) buildstate, NULL); +#endif +} + +/* + * Build the index + */ +static void +BuildIndex(Relation heap, Relation index, IndexInfo *indexInfo, + HnswBuildState * buildstate, ForkNumber forkNum) +{ + InitBuildState(buildstate, heap, index, indexInfo, forkNum); + + if (buildstate->heap != NULL) + BuildGraph(buildstate, forkNum); + + if (!buildstate->flushed) + FlushPages(buildstate); + + FreeBuildState(buildstate); +} + +/* + * Build the index for a logged table + */ +IndexBuildResult * +hnswbuild(Relation heap, Relation index, IndexInfo *indexInfo) +{ + IndexBuildResult *result; + HnswBuildState buildstate; + + BuildIndex(heap, index, indexInfo, &buildstate, MAIN_FORKNUM); + + result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult)); + result->heap_tuples = buildstate.reltuples; + result->index_tuples = buildstate.indtuples; + + return result; +} + +/* + * Build the index for an unlogged table + */ +void +hnswbuildempty(Relation index) +{ + IndexInfo *indexInfo = BuildIndexInfo(index); + HnswBuildState buildstate; + + BuildIndex(NULL, index, indexInfo, &buildstate, INIT_FORKNUM); +} diff --git a/external/pgvector/src/hnswinsert.c b/external/pgvector/src/hnswinsert.c new file mode 100644 index 00000000000..f7cd51f6ba4 --- /dev/null +++ b/external/pgvector/src/hnswinsert.c @@ -0,0 +1,582 @@ +#include "postgres.h" + +#include + +#include "hnsw.h" +#include "storage/bufmgr.h" +#include "storage/lmgr.h" +#include "utils/memutils.h" + +/* + * Get the insert page + */ +static BlockNumber +GetInsertPage(Relation index) +{ + Buffer buf; + Page page; + HnswMetaPage metap; + BlockNumber insertPage; + + buf = ReadBuffer(index, HNSW_METAPAGE_BLKNO); + LockBuffer(buf, BUFFER_LOCK_SHARE); + page = BufferGetPage(buf); + metap = HnswPageGetMeta(page); + + insertPage = metap->insertPage; + + UnlockReleaseBuffer(buf); + + return insertPage; +} + +/* + * Check for a free offset + */ +static bool +HnswFreeOffset(Relation index, Buffer buf, Page page, HnswElement element, Size ntupSize, Buffer *nbuf, Page *npage, OffsetNumber *freeOffno, OffsetNumber *freeNeighborOffno, BlockNumber *newInsertPage) +{ + OffsetNumber offno; + OffsetNumber maxoffno = PageGetMaxOffsetNumber(page); + + for (offno = FirstOffsetNumber; offno <= maxoffno; offno = OffsetNumberNext(offno)) + { + HnswElementTuple etup = (HnswElementTuple) PageGetItem(page, PageGetItemId(page, offno)); + + /* Skip neighbor tuples */ + if (!HnswIsElementTuple(etup)) + continue; + + if (etup->deleted) + { + BlockNumber elementPage = BufferGetBlockNumber(buf); + BlockNumber neighborPage = ItemPointerGetBlockNumber(&etup->neighbortid); + OffsetNumber neighborOffno = ItemPointerGetOffsetNumber(&etup->neighbortid); + ItemId itemid; + + if (!BlockNumberIsValid(*newInsertPage)) + *newInsertPage = elementPage; + + if (neighborPage == elementPage) + { + *nbuf = buf; + *npage = page; + } + else + { + *nbuf = ReadBuffer(index, neighborPage); + LockBuffer(*nbuf, BUFFER_LOCK_EXCLUSIVE); + + /* Skip WAL for now */ + *npage = BufferGetPage(*nbuf); + } + + itemid = PageGetItemId(*npage, neighborOffno); + + /* Check for space on neighbor tuple page */ + if (PageGetFreeSpace(*npage) + ItemIdGetLength(itemid) - sizeof(ItemIdData) >= ntupSize) + { + *freeOffno = offno; + *freeNeighborOffno = neighborOffno; + return true; + } + else if (*nbuf != buf) + UnlockReleaseBuffer(*nbuf); + } + } + + return false; +} + +/* + * Add a new page + */ +static void +HnswInsertAppendPage(Relation index, Buffer *nbuf, Page *npage, GenericXLogState *state, Page page) +{ + /* Add a new page */ + LockRelationForExtension(index, ExclusiveLock); + *nbuf = HnswNewBuffer(index, MAIN_FORKNUM); + UnlockRelationForExtension(index, ExclusiveLock); + + /* Init new page */ + *npage = GenericXLogRegisterBuffer(state, *nbuf, GENERIC_XLOG_FULL_IMAGE); + HnswInitPage(*nbuf, *npage); + + /* Update previous buffer */ + HnswPageGetOpaque(page)->nextblkno = BufferGetBlockNumber(*nbuf); +} + +/* + * Add to element and neighbor pages + */ +static void +WriteNewElementPages(Relation index, HnswElement e, int m, BlockNumber insertPage, BlockNumber *updatedInsertPage) +{ + Buffer buf; + Page page; + GenericXLogState *state; + Size etupSize; + Size ntupSize; + Size combinedSize; + Size maxSize; + Size minCombinedSize; + HnswElementTuple etup; + BlockNumber currentPage = insertPage; + int dimensions = e->vec->dim; + HnswNeighborTuple ntup; + Buffer nbuf; + Page npage; + OffsetNumber freeOffno = InvalidOffsetNumber; + OffsetNumber freeNeighborOffno = InvalidOffsetNumber; + BlockNumber newInsertPage = InvalidBlockNumber; + + /* Calculate sizes */ + etupSize = HNSW_ELEMENT_TUPLE_SIZE(dimensions); + ntupSize = HNSW_NEIGHBOR_TUPLE_SIZE(e->level, m); + combinedSize = etupSize + ntupSize + sizeof(ItemIdData); + maxSize = HNSW_MAX_SIZE; + minCombinedSize = etupSize + HNSW_NEIGHBOR_TUPLE_SIZE(0, m) + sizeof(ItemIdData); + + /* Prepare element tuple */ + etup = palloc0(etupSize); + HnswSetElementTuple(etup, e); + + /* Prepare neighbor tuple */ + ntup = palloc0(ntupSize); + HnswSetNeighborTuple(ntup, e, m); + + /* Find a page (or two if needed) to insert the tuples */ + for (;;) + { + buf = ReadBuffer(index, currentPage); + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + + state = GenericXLogStart(index); + page = GenericXLogRegisterBuffer(state, buf, 0); + + /* Keep track of first page where element at level 0 can fit */ + if (!BlockNumberIsValid(newInsertPage) && PageGetFreeSpace(page) >= minCombinedSize) + newInsertPage = currentPage; + + /* First, try the fastest path */ + /* Space for both tuples on the current page */ + /* This can split existing tuples in rare cases */ + if (PageGetFreeSpace(page) >= combinedSize) + { + nbuf = buf; + npage = page; + break; + } + + /* Next, try space from a deleted element */ + if (HnswFreeOffset(index, buf, page, e, ntupSize, &nbuf, &npage, &freeOffno, &freeNeighborOffno, &newInsertPage)) + { + if (nbuf != buf) + npage = GenericXLogRegisterBuffer(state, nbuf, 0); + + break; + } + + /* Finally, try space for element only if last page */ + /* Skip if both tuples can fit on the same page */ + if (combinedSize > maxSize && PageGetFreeSpace(page) >= etupSize && !BlockNumberIsValid(HnswPageGetOpaque(page)->nextblkno)) + { + HnswInsertAppendPage(index, &nbuf, &npage, state, page); + break; + } + + currentPage = HnswPageGetOpaque(page)->nextblkno; + + if (BlockNumberIsValid(currentPage)) + { + /* Move to next page */ + GenericXLogAbort(state); + UnlockReleaseBuffer(buf); + } + else + { + Buffer newbuf; + Page newpage; + + HnswInsertAppendPage(index, &newbuf, &newpage, state, page); + + /* Commit */ + GenericXLogFinish(state); + + /* Unlock previous buffer */ + UnlockReleaseBuffer(buf); + + /* Prepare new buffer */ + state = GenericXLogStart(index); + buf = newbuf; + page = GenericXLogRegisterBuffer(state, buf, 0); + + /* Create new page for neighbors if needed */ + if (PageGetFreeSpace(page) < combinedSize) + HnswInsertAppendPage(index, &nbuf, &npage, state, page); + else + { + nbuf = buf; + npage = page; + } + + break; + } + } + + e->blkno = BufferGetBlockNumber(buf); + e->neighborPage = BufferGetBlockNumber(nbuf); + + /* Added tuple to new page if newInsertPage is not set */ + /* So can set to neighbor page instead of element page */ + if (!BlockNumberIsValid(newInsertPage)) + newInsertPage = e->neighborPage; + + if (OffsetNumberIsValid(freeOffno)) + { + e->offno = freeOffno; + e->neighborOffno = freeNeighborOffno; + } + else + { + e->offno = OffsetNumberNext(PageGetMaxOffsetNumber(page)); + if (nbuf == buf) + e->neighborOffno = OffsetNumberNext(e->offno); + else + e->neighborOffno = FirstOffsetNumber; + } + + ItemPointerSet(&etup->neighbortid, e->neighborPage, e->neighborOffno); + + /* Add element and neighbors */ + if (OffsetNumberIsValid(freeOffno)) + { + if (!PageIndexTupleOverwrite(page, e->offno, (Item) etup, etupSize)) + elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index)); + + if (!PageIndexTupleOverwrite(npage, e->neighborOffno, (Item) ntup, ntupSize)) + elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index)); + } + else + { + if (PageAddItem(page, (Item) etup, etupSize, InvalidOffsetNumber, false, false) != e->offno) + elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index)); + + if (PageAddItem(npage, (Item) ntup, ntupSize, InvalidOffsetNumber, false, false) != e->neighborOffno) + elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index)); + } + + /* Commit */ + GenericXLogFinish(state); + UnlockReleaseBuffer(buf); + if (nbuf != buf) + UnlockReleaseBuffer(nbuf); + + /* Update the insert page */ + if (BlockNumberIsValid(newInsertPage) && newInsertPage != insertPage) + *updatedInsertPage = newInsertPage; +} + +/* + * Check if connection already exists + */ +static bool +ConnectionExists(HnswElement e, HnswNeighborTuple ntup, int startIdx, int lm) +{ + for (int i = 0; i < lm; i++) + { + ItemPointer indextid = &ntup->indextids[startIdx + i]; + + if (!ItemPointerIsValid(indextid)) + break; + + if (ItemPointerGetBlockNumber(indextid) == e->blkno && ItemPointerGetOffsetNumber(indextid) == e->offno) + return true; + } + + return false; +} + +/* + * Update neighbors + */ +void +HnswUpdateNeighborPages(Relation index, FmgrInfo *procinfo, Oid collation, HnswElement e, int m, bool checkExisting) +{ + for (int lc = e->level; lc >= 0; lc--) + { + int lm = HnswGetLayerM(m, lc); + HnswNeighborArray *neighbors = &e->neighbors[lc]; + + for (int i = 0; i < neighbors->length; i++) + { + HnswCandidate *hc = &neighbors->items[i]; + Buffer buf; + Page page; + GenericXLogState *state; + ItemId itemid; + HnswNeighborTuple ntup; + Size ntupSize; + int idx = -1; + int startIdx; + OffsetNumber offno = hc->element->neighborOffno; + + /* Get latest neighbors since they may have changed */ + /* Do not lock yet since selecting neighbors can take time */ + HnswLoadNeighbors(hc->element, index, m); + + /* + * Could improve performance for vacuuming by checking neighbors + * against list of elements being deleted to find index. It's + * important to exclude already deleted elements for this since + * they can be replaced at any time. + */ + + /* Select neighbors */ + HnswUpdateConnection(e, hc, lm, lc, &idx, index, procinfo, collation); + + /* New element was not selected as a neighbor */ + if (idx == -1) + continue; + + /* Register page */ + buf = ReadBuffer(index, hc->element->neighborPage); + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + state = GenericXLogStart(index); + page = GenericXLogRegisterBuffer(state, buf, 0); + + /* Get tuple */ + itemid = PageGetItemId(page, offno); + ntup = (HnswNeighborTuple) PageGetItem(page, itemid); + ntupSize = ItemIdGetLength(itemid); + + /* Calculate index for update */ + startIdx = (hc->element->level - lc) * m; + + /* Check for existing connection */ + if (checkExisting && ConnectionExists(e, ntup, startIdx, lm)) + idx = -1; + else if (idx == -2) + { + /* Find free offset if still exists */ + /* TODO Retry updating connections if not */ + for (int j = 0; j < lm; j++) + { + if (!ItemPointerIsValid(&ntup->indextids[startIdx + j])) + { + idx = startIdx + j; + break; + } + } + } + else + idx += startIdx; + + /* Make robust to issues */ + if (idx >= 0 && idx < ntup->count) + { + ItemPointer indextid = &ntup->indextids[idx]; + + /* Update neighbor */ + ItemPointerSet(indextid, e->blkno, e->offno); + + /* Overwrite tuple */ + if (!PageIndexTupleOverwrite(page, offno, (Item) ntup, ntupSize)) + elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index)); + + /* Commit */ + GenericXLogFinish(state); + } + else + GenericXLogAbort(state); + + UnlockReleaseBuffer(buf); + } + } +} + +/* + * Add a heap TID to an existing element + */ +static bool +HnswAddDuplicate(Relation index, HnswElement element, HnswElement dup) +{ + Buffer buf; + Page page; + GenericXLogState *state; + Size etupSize = HNSW_ELEMENT_TUPLE_SIZE(dup->vec->dim); + HnswElementTuple etup; + int i; + + /* Read page */ + buf = ReadBuffer(index, dup->blkno); + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + state = GenericXLogStart(index); + page = GenericXLogRegisterBuffer(state, buf, 0); + + /* Find space */ + etup = (HnswElementTuple) PageGetItem(page, PageGetItemId(page, dup->offno)); + for (i = 0; i < HNSW_HEAPTIDS; i++) + { + if (!ItemPointerIsValid(&etup->heaptids[i])) + break; + } + + /* Either being deleted or we lost our chance to another backend */ + if (i == 0 || i == HNSW_HEAPTIDS) + { + GenericXLogAbort(state); + UnlockReleaseBuffer(buf); + return false; + } + + /* Add heap TID */ + etup->heaptids[i] = *((ItemPointer) linitial(element->heaptids)); + + /* Overwrite tuple */ + if (!PageIndexTupleOverwrite(page, dup->offno, (Item) etup, etupSize)) + elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index)); + + /* Commit */ + GenericXLogFinish(state); + UnlockReleaseBuffer(buf); + + return true; +} + +/* + * Write changes to disk + */ +static void +WriteElement(Relation index, FmgrInfo *procinfo, Oid collation, HnswElement element, int m, int efConstruction, HnswElement dup, HnswElement entryPoint) +{ + BlockNumber newInsertPage = InvalidBlockNumber; + + /* Try to add to existing page */ + if (dup != NULL) + { + if (HnswAddDuplicate(index, element, dup)) + return; + } + + /* Write element and neighbor tuples */ + WriteNewElementPages(index, element, m, GetInsertPage(index), &newInsertPage); + + /* Update insert page if needed */ + if (BlockNumberIsValid(newInsertPage)) + HnswUpdateMetaPage(index, 0, NULL, newInsertPage, MAIN_FORKNUM); + + /* Update neighbors */ + HnswUpdateNeighborPages(index, procinfo, collation, element, m, false); + + /* Update metapage if needed */ + if (entryPoint == NULL || element->level > entryPoint->level) + HnswUpdateMetaPage(index, HNSW_UPDATE_ENTRY_GREATER, element, InvalidBlockNumber, MAIN_FORKNUM); +} + +/* + * Insert a tuple into the index + */ +bool +HnswInsertTuple(Relation index, Datum *values, bool *isnull, ItemPointer heap_tid, Relation heapRel) +{ + Datum value; + FmgrInfo *normprocinfo; + HnswElement entryPoint; + HnswElement element; + int m; + int efConstruction = HnswGetEfConstruction(index); + FmgrInfo *procinfo = index_getprocinfo(index, 1, HNSW_DISTANCE_PROC); + Oid collation = index->rd_indcollation[0]; + HnswElement dup; + LOCKMODE lockmode = ShareLock; + + /* Detoast once for all calls */ + value = PointerGetDatum(PG_DETOAST_DATUM(values[0])); + + /* Normalize if needed */ + normprocinfo = HnswOptionalProcInfo(index, HNSW_NORM_PROC); + if (normprocinfo != NULL) + { + if (!HnswNormValue(normprocinfo, collation, &value, NULL)) + return false; + } + + /* + * Get a shared lock. This allows vacuum to ensure no in-flight inserts + * before repairing graph. Use a page lock so it does not interfere with + * buffer lock (or reads when vacuuming). + */ + LockPage(index, HNSW_UPDATE_LOCK, lockmode); + + /* Get m and entry point */ + HnswGetMetaPageInfo(index, &m, &entryPoint); + + /* Create an element */ + element = HnswInitElement(heap_tid, m, HnswGetMl(m), HnswGetMaxLevel(m)); + element->vec = DatumGetVector(value); + + /* Prevent concurrent inserts when likely updating entry point */ + if (entryPoint == NULL || element->level > entryPoint->level) + { + /* Release shared lock */ + UnlockPage(index, HNSW_UPDATE_LOCK, lockmode); + + /* Get exclusive lock */ + lockmode = ExclusiveLock; + LockPage(index, HNSW_UPDATE_LOCK, lockmode); + + /* Get latest entry point after lock is acquired */ + entryPoint = HnswGetEntryPoint(index); + } + + /* Insert element in graph */ + HnswInsertElement(element, entryPoint, index, procinfo, collation, m, efConstruction, false); + + /* Look for duplicate */ + dup = HnswFindDuplicate(element); + + /* Write to disk */ + WriteElement(index, procinfo, collation, element, m, efConstruction, dup, entryPoint); + + /* Release lock */ + UnlockPage(index, HNSW_UPDATE_LOCK, lockmode); + + return true; +} + +/* + * Insert a tuple into the index + */ +bool +hnswinsert(Relation index, Datum *values, bool *isnull, ItemPointer heap_tid, + Relation heap, IndexUniqueCheck checkUnique +#if PG_VERSION_NUM >= 140000 + ,bool indexUnchanged +#endif + ,IndexInfo *indexInfo +) +{ + MemoryContext oldCtx; + MemoryContext insertCtx; + + /* Skip nulls */ + if (isnull[0]) + return false; + + /* Create memory context */ + insertCtx = AllocSetContextCreate(CurrentMemoryContext, + "Hnsw insert temporary context", + ALLOCSET_DEFAULT_SIZES); + oldCtx = MemoryContextSwitchTo(insertCtx); + + /* Insert tuple */ + HnswInsertTuple(index, values, isnull, heap_tid, heap); + + /* Delete memory context */ + MemoryContextSwitchTo(oldCtx); + MemoryContextDelete(insertCtx); + + return false; +} diff --git a/external/pgvector/src/hnswscan.c b/external/pgvector/src/hnswscan.c new file mode 100644 index 00000000000..7cf2bf0d1c7 --- /dev/null +++ b/external/pgvector/src/hnswscan.c @@ -0,0 +1,229 @@ +#include "postgres.h" + +#include "access/relscan.h" +#include "hnsw.h" +#include "pgstat.h" +#include "storage/bufmgr.h" +#include "storage/lmgr.h" +#include "utils/memutils.h" + +/* + * Algorithm 5 from paper + */ +static List * +GetScanItems(IndexScanDesc scan, Datum q) +{ + HnswScanOpaque so = (HnswScanOpaque) scan->opaque; + Relation index = scan->indexRelation; + FmgrInfo *procinfo = so->procinfo; + Oid collation = so->collation; + List *ep; + List *w; + int m; + HnswElement entryPoint; + + /* Get m and entry point */ + HnswGetMetaPageInfo(index, &m, &entryPoint); + + if (entryPoint == NULL) + return NIL; + + ep = list_make1(HnswEntryCandidate(entryPoint, q, index, procinfo, collation, false)); + + for (int lc = entryPoint->level; lc >= 1; lc--) + { + w = HnswSearchLayer(q, ep, 1, lc, index, procinfo, collation, m, false, NULL); + ep = w; + } + + return HnswSearchLayer(q, ep, hnsw_ef_search, 0, index, procinfo, collation, m, false, NULL); +} + +/* + * Get dimensions from metapage + */ +static int +GetDimensions(Relation index) +{ + Buffer buf; + Page page; + HnswMetaPage metap; + int dimensions; + + buf = ReadBuffer(index, HNSW_METAPAGE_BLKNO); + LockBuffer(buf, BUFFER_LOCK_SHARE); + page = BufferGetPage(buf); + metap = HnswPageGetMeta(page); + + dimensions = metap->dimensions; + + UnlockReleaseBuffer(buf); + + return dimensions; +} + +/* + * Get scan value + */ +static Datum +GetScanValue(IndexScanDesc scan) +{ + HnswScanOpaque so = (HnswScanOpaque) scan->opaque; + Datum value; + + if (scan->orderByData->sk_flags & SK_ISNULL) + value = PointerGetDatum(InitVector(GetDimensions(scan->indexRelation))); + else + { + value = scan->orderByData->sk_argument; + + /* Value should not be compressed or toasted */ + Assert(!VARATT_IS_COMPRESSED(DatumGetPointer(value))); + Assert(!VARATT_IS_EXTENDED(DatumGetPointer(value))); + + /* Fine if normalization fails */ + if (so->normprocinfo != NULL) + HnswNormValue(so->normprocinfo, so->collation, &value, NULL); + } + + return value; +} + +/* + * Prepare for an index scan + */ +IndexScanDesc +hnswbeginscan(Relation index, int nkeys, int norderbys) +{ + IndexScanDesc scan; + HnswScanOpaque so; + + scan = RelationGetIndexScan(index, nkeys, norderbys); + + so = (HnswScanOpaque) palloc(sizeof(HnswScanOpaqueData)); + so->first = true; + so->tmpCtx = AllocSetContextCreate(CurrentMemoryContext, + "Hnsw scan temporary context", + ALLOCSET_DEFAULT_SIZES); + + /* Set support functions */ + so->procinfo = index_getprocinfo(index, 1, HNSW_DISTANCE_PROC); + so->normprocinfo = HnswOptionalProcInfo(index, HNSW_NORM_PROC); + so->collation = index->rd_indcollation[0]; + + scan->opaque = so; + + return scan; +} + +/* + * Start or restart an index scan + */ +void +hnswrescan(IndexScanDesc scan, ScanKey keys, int nkeys, ScanKey orderbys, int norderbys) +{ + HnswScanOpaque so = (HnswScanOpaque) scan->opaque; + + so->first = true; + MemoryContextReset(so->tmpCtx); + + if (keys && scan->numberOfKeys > 0) + memmove(scan->keyData, keys, scan->numberOfKeys * sizeof(ScanKeyData)); + + if (orderbys && scan->numberOfOrderBys > 0) + memmove(scan->orderByData, orderbys, scan->numberOfOrderBys * sizeof(ScanKeyData)); +} + +/* + * Fetch the next tuple in the given scan + */ +bool +hnswgettuple(IndexScanDesc scan, ScanDirection dir) +{ + HnswScanOpaque so = (HnswScanOpaque) scan->opaque; + MemoryContext oldCtx = MemoryContextSwitchTo(so->tmpCtx); + + /* + * Index can be used to scan backward, but Postgres doesn't support + * backward scan on operators + */ + Assert(ScanDirectionIsForward(dir)); + + if (so->first) + { + Datum value; + + /* Count index scan for stats */ + pgstat_count_index_scan(scan->indexRelation); + + /* Safety check */ + if (scan->orderByData == NULL) + elog(ERROR, "cannot scan hnsw index without order"); + + /* Requires MVCC-compliant snapshot as not able to maintain a pin */ + /* https://www.postgresql.org/docs/current/index-locking.html */ + if (!IsMVCCSnapshot(scan->xs_snapshot)) + elog(ERROR, "non-MVCC snapshots are not supported with hnsw"); + + /* Get scan value */ + value = GetScanValue(scan); + + /* + * Get a shared lock. This allows vacuum to ensure no in-flight scans + * before marking tuples as deleted. + */ + LockPage(scan->indexRelation, HNSW_SCAN_LOCK, ShareLock); + + so->w = GetScanItems(scan, value); + + /* Release shared lock */ + UnlockPage(scan->indexRelation, HNSW_SCAN_LOCK, ShareLock); + + so->first = false; + } + + while (list_length(so->w) > 0) + { + HnswCandidate *hc = llast(so->w); + ItemPointer heaptid; + + /* Move to next element if no valid heap TIDs */ + if (list_length(hc->element->heaptids) == 0) + { + so->w = list_delete_last(so->w); + continue; + } + + heaptid = llast(hc->element->heaptids); + + hc->element->heaptids = list_delete_last(hc->element->heaptids); + + MemoryContextSwitchTo(oldCtx); + +#if PG_VERSION_NUM >= 120000 + scan->xs_heaptid = *heaptid; +#else + scan->xs_ctup.t_self = *heaptid; +#endif + + scan->xs_recheckorderby = false; + return true; + } + + MemoryContextSwitchTo(oldCtx); + return false; +} + +/* + * End a scan and release resources + */ +void +hnswendscan(IndexScanDesc scan) +{ + HnswScanOpaque so = (HnswScanOpaque) scan->opaque; + + MemoryContextDelete(so->tmpCtx); + + pfree(so); + scan->opaque = NULL; +} diff --git a/external/pgvector/src/hnswutils.c b/external/pgvector/src/hnswutils.c new file mode 100644 index 00000000000..e7d17051aa7 --- /dev/null +++ b/external/pgvector/src/hnswutils.c @@ -0,0 +1,1072 @@ +#include "postgres.h" + +#include + +#include "hnsw.h" +#include "storage/bufmgr.h" +#include "vector.h" + +/* + * Get the max number of connections in an upper layer for each element in the index + */ +int +HnswGetM(Relation index) +{ + HnswOptions *opts = (HnswOptions *) index->rd_options; + + if (opts) + return opts->m; + + return HNSW_DEFAULT_M; +} + +/* + * Get the size of the dynamic candidate list in the index + */ +int +HnswGetEfConstruction(Relation index) +{ + HnswOptions *opts = (HnswOptions *) index->rd_options; + + if (opts) + return opts->efConstruction; + + return HNSW_DEFAULT_EF_CONSTRUCTION; +} + +/* + * Get proc + */ +FmgrInfo * +HnswOptionalProcInfo(Relation index, uint16 procnum) +{ + if (!OidIsValid(index_getprocid(index, 1, procnum))) + return NULL; + + return index_getprocinfo(index, 1, procnum); +} + +/* + * Divide by the norm + * + * Returns false if value should not be indexed + * + * The caller needs to free the pointer stored in value + * if it's different than the original value + */ +bool +HnswNormValue(FmgrInfo *procinfo, Oid collation, Datum *value, Vector * result) +{ + double norm = DatumGetFloat8(FunctionCall1Coll(procinfo, collation, *value)); + + if (norm > 0) + { + Vector *v = DatumGetVector(*value); + + if (result == NULL) + result = InitVector(v->dim); + + for (int i = 0; i < v->dim; i++) + result->x[i] = v->x[i] / norm; + + *value = PointerGetDatum(result); + + return true; + } + + return false; +} + +/* + * New buffer + */ +Buffer +HnswNewBuffer(Relation index, ForkNumber forkNum) +{ + Buffer buf = ReadBufferExtended(index, forkNum, P_NEW, RBM_NORMAL, NULL); + + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + return buf; +} + +/* + * Init page + */ +void +HnswInitPage(Buffer buf, Page page) +{ + PageInit(page, BufferGetPageSize(buf), sizeof(HnswPageOpaqueData)); + HnswPageGetOpaque(page)->nextblkno = InvalidBlockNumber; + HnswPageGetOpaque(page)->page_id = HNSW_PAGE_ID; +} + +/* + * Init and register page + */ +void +HnswInitRegisterPage(Relation index, Buffer *buf, Page *page, GenericXLogState **state) +{ + *state = GenericXLogStart(index); + *page = GenericXLogRegisterBuffer(*state, *buf, GENERIC_XLOG_FULL_IMAGE); + HnswInitPage(*buf, *page); +} + +/* + * Commit buffer + */ +void +HnswCommitBuffer(Buffer buf, GenericXLogState *state) +{ + GenericXLogFinish(state); + UnlockReleaseBuffer(buf); +} + +/* + * Allocate neighbors + */ +void +HnswInitNeighbors(HnswElement element, int m) +{ + int level = element->level; + + element->neighbors = palloc(sizeof(HnswNeighborArray) * (level + 1)); + + for (int lc = 0; lc <= level; lc++) + { + HnswNeighborArray *a; + int lm = HnswGetLayerM(m, lc); + + a = &element->neighbors[lc]; + a->length = 0; + a->items = palloc(sizeof(HnswCandidate) * lm); + a->closerSet = false; + } +} + +/* + * Free neighbors + */ +static void +HnswFreeNeighbors(HnswElement element) +{ + for (int lc = 0; lc <= element->level; lc++) + pfree(element->neighbors[lc].items); + pfree(element->neighbors); +} + +/* + * Allocate an element + */ +HnswElement +HnswInitElement(ItemPointer heaptid, int m, double ml, int maxLevel) +{ + HnswElement element = palloc(sizeof(HnswElementData)); + + int level = (int) (-log(RandomDouble()) * ml); + + /* Cap level */ + if (level > maxLevel) + level = maxLevel; + + element->heaptids = NIL; + HnswAddHeapTid(element, heaptid); + + element->level = level; + element->deleted = 0; + + HnswInitNeighbors(element, m); + + return element; +} + +/* + * Free an element + */ +void +HnswFreeElement(HnswElement element) +{ + HnswFreeNeighbors(element); + list_free_deep(element->heaptids); + pfree(element->vec); + pfree(element); +} + +/* + * Add a heap TID to an element + */ +void +HnswAddHeapTid(HnswElement element, ItemPointer heaptid) +{ + ItemPointer copy = palloc(sizeof(ItemPointerData)); + + ItemPointerCopy(heaptid, copy); + element->heaptids = lappend(element->heaptids, copy); +} + +/* + * Allocate an element from block and offset numbers + */ +HnswElement +HnswInitElementFromBlock(BlockNumber blkno, OffsetNumber offno) +{ + HnswElement element = palloc(sizeof(HnswElementData)); + + element->blkno = blkno; + element->offno = offno; + element->neighbors = NULL; + element->vec = NULL; + return element; +} + +/* + * Get the metapage info + */ +void +HnswGetMetaPageInfo(Relation index, int *m, HnswElement * entryPoint) +{ + Buffer buf; + Page page; + HnswMetaPage metap; + + buf = ReadBuffer(index, HNSW_METAPAGE_BLKNO); + LockBuffer(buf, BUFFER_LOCK_SHARE); + page = BufferGetPage(buf); + metap = HnswPageGetMeta(page); + + if (m != NULL) + *m = metap->m; + + if (entryPoint != NULL) + { + if (BlockNumberIsValid(metap->entryBlkno)) + *entryPoint = HnswInitElementFromBlock(metap->entryBlkno, metap->entryOffno); + else + *entryPoint = NULL; + } + + UnlockReleaseBuffer(buf); +} + +/* + * Get the entry point + */ +HnswElement +HnswGetEntryPoint(Relation index) +{ + HnswElement entryPoint; + + HnswGetMetaPageInfo(index, NULL, &entryPoint); + + return entryPoint; +} + +/* + * Update the metapage info + */ +static void +HnswUpdateMetaPageInfo(Page page, int updateEntry, HnswElement entryPoint, BlockNumber insertPage) +{ + HnswMetaPage metap = HnswPageGetMeta(page); + + if (updateEntry) + { + if (entryPoint == NULL) + { + metap->entryBlkno = InvalidBlockNumber; + metap->entryOffno = InvalidOffsetNumber; + metap->entryLevel = -1; + } + else if (entryPoint->level > metap->entryLevel || updateEntry == HNSW_UPDATE_ENTRY_ALWAYS) + { + metap->entryBlkno = entryPoint->blkno; + metap->entryOffno = entryPoint->offno; + metap->entryLevel = entryPoint->level; + } + } + + if (BlockNumberIsValid(insertPage)) + metap->insertPage = insertPage; +} + +/* + * Update the metapage + */ +void +HnswUpdateMetaPage(Relation index, int updateEntry, HnswElement entryPoint, BlockNumber insertPage, ForkNumber forkNum) +{ + Buffer buf; + Page page; + GenericXLogState *state; + + buf = ReadBufferExtended(index, forkNum, HNSW_METAPAGE_BLKNO, RBM_NORMAL, NULL); + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + state = GenericXLogStart(index); + page = GenericXLogRegisterBuffer(state, buf, 0); + + HnswUpdateMetaPageInfo(page, updateEntry, entryPoint, insertPage); + + HnswCommitBuffer(buf, state); +} + +/* + * Set element tuple, except for neighbor info + */ +void +HnswSetElementTuple(HnswElementTuple etup, HnswElement element) +{ + etup->type = HNSW_ELEMENT_TUPLE_TYPE; + etup->level = element->level; + etup->deleted = 0; + for (int i = 0; i < HNSW_HEAPTIDS; i++) + { + if (i < list_length(element->heaptids)) + etup->heaptids[i] = *((ItemPointer) list_nth(element->heaptids, i)); + else + ItemPointerSetInvalid(&etup->heaptids[i]); + } + memcpy(&etup->vec, element->vec, VECTOR_SIZE(element->vec->dim)); +} + +/* + * Set neighbor tuple + */ +void +HnswSetNeighborTuple(HnswNeighborTuple ntup, HnswElement e, int m) +{ + int idx = 0; + + ntup->type = HNSW_NEIGHBOR_TUPLE_TYPE; + + for (int lc = e->level; lc >= 0; lc--) + { + HnswNeighborArray *neighbors = &e->neighbors[lc]; + int lm = HnswGetLayerM(m, lc); + + for (int i = 0; i < lm; i++) + { + ItemPointer indextid = &ntup->indextids[idx++]; + + if (i < neighbors->length) + { + HnswCandidate *hc = &neighbors->items[i]; + + ItemPointerSet(indextid, hc->element->blkno, hc->element->offno); + } + else + ItemPointerSetInvalid(indextid); + } + } + + ntup->count = idx; +} + +/* + * Load neighbors from page + */ +static void +LoadNeighborsFromPage(HnswElement element, Relation index, Page page, int m) +{ + HnswNeighborTuple ntup = (HnswNeighborTuple) PageGetItem(page, PageGetItemId(page, element->neighborOffno)); + int neighborCount = (element->level + 2) * m; + + Assert(HnswIsNeighborTuple(ntup)); + + HnswInitNeighbors(element, m); + + /* Ensure expected neighbors */ + if (ntup->count != neighborCount) + return; + + for (int i = 0; i < neighborCount; i++) + { + HnswElement e; + int level; + HnswCandidate *hc; + ItemPointer indextid; + HnswNeighborArray *neighbors; + + indextid = &ntup->indextids[i]; + + if (!ItemPointerIsValid(indextid)) + continue; + + e = HnswInitElementFromBlock(ItemPointerGetBlockNumber(indextid), ItemPointerGetOffsetNumber(indextid)); + + /* Calculate level based on offset */ + level = element->level - i / m; + if (level < 0) + level = 0; + + neighbors = &element->neighbors[level]; + hc = &neighbors->items[neighbors->length++]; + hc->element = e; + } +} + +/* + * Load neighbors + */ +void +HnswLoadNeighbors(HnswElement element, Relation index, int m) +{ + Buffer buf; + Page page; + + buf = ReadBuffer(index, element->neighborPage); + LockBuffer(buf, BUFFER_LOCK_SHARE); + page = BufferGetPage(buf); + + LoadNeighborsFromPage(element, index, page, m); + + UnlockReleaseBuffer(buf); +} + +/* + * Load an element from a tuple + */ +void +HnswLoadElementFromTuple(HnswElement element, HnswElementTuple etup, bool loadHeaptids, bool loadVec) +{ + element->level = etup->level; + element->deleted = etup->deleted; + element->neighborPage = ItemPointerGetBlockNumber(&etup->neighbortid); + element->neighborOffno = ItemPointerGetOffsetNumber(&etup->neighbortid); + element->heaptids = NIL; + + if (loadHeaptids) + { + for (int i = 0; i < HNSW_HEAPTIDS; i++) + { + /* Can stop at first invalid */ + if (!ItemPointerIsValid(&etup->heaptids[i])) + break; + + HnswAddHeapTid(element, &etup->heaptids[i]); + } + } + + if (loadVec) + { + element->vec = palloc(VECTOR_SIZE(etup->vec.dim)); + memcpy(element->vec, &etup->vec, VECTOR_SIZE(etup->vec.dim)); + } +} + +/* + * Load an element and optionally get its distance from q + */ +void +HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec) +{ + Buffer buf; + Page page; + HnswElementTuple etup; + + /* Read vector */ + buf = ReadBuffer(index, element->blkno); + LockBuffer(buf, BUFFER_LOCK_SHARE); + page = BufferGetPage(buf); + + etup = (HnswElementTuple) PageGetItem(page, PageGetItemId(page, element->offno)); + + Assert(HnswIsElementTuple(etup)); + + /* Load element */ + HnswLoadElementFromTuple(element, etup, true, loadVec); + + /* Calculate distance */ + if (distance != NULL) + *distance = (float) DatumGetFloat8(FunctionCall2Coll(procinfo, collation, *q, PointerGetDatum(&etup->vec))); + + UnlockReleaseBuffer(buf); +} + +/* + * Get the distance for a candidate + */ +static float +GetCandidateDistance(HnswCandidate * hc, Datum q, FmgrInfo *procinfo, Oid collation) +{ + return DatumGetFloat8(FunctionCall2Coll(procinfo, collation, q, PointerGetDatum(hc->element->vec))); +} + +/* + * Create a candidate for the entry point + */ +HnswCandidate * +HnswEntryCandidate(HnswElement entryPoint, Datum q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec) +{ + HnswCandidate *hc = palloc(sizeof(HnswCandidate)); + + hc->element = entryPoint; + if (index == NULL) + hc->distance = GetCandidateDistance(hc, q, procinfo, collation); + else + HnswLoadElement(hc->element, &hc->distance, &q, index, procinfo, collation, loadVec); + return hc; +} + +/* + * Compare candidate distances + */ +static int +CompareNearestCandidates(const pairingheap_node *a, const pairingheap_node *b, void *arg) +{ + if (((const HnswPairingHeapNode *) a)->inner->distance < ((const HnswPairingHeapNode *) b)->inner->distance) + return 1; + + if (((const HnswPairingHeapNode *) a)->inner->distance > ((const HnswPairingHeapNode *) b)->inner->distance) + return -1; + + return 0; +} + +/* + * Compare candidate distances + */ +static int +CompareFurthestCandidates(const pairingheap_node *a, const pairingheap_node *b, void *arg) +{ + if (((const HnswPairingHeapNode *) a)->inner->distance < ((const HnswPairingHeapNode *) b)->inner->distance) + return -1; + + if (((const HnswPairingHeapNode *) a)->inner->distance > ((const HnswPairingHeapNode *) b)->inner->distance) + return 1; + + return 0; +} + +/* + * Create a pairing heap node for a candidate + */ +static HnswPairingHeapNode * +CreatePairingHeapNode(HnswCandidate * c) +{ + HnswPairingHeapNode *node = palloc(sizeof(HnswPairingHeapNode)); + + node->inner = c; + return node; +} + +/* + * Add to visited + */ +static inline void +AddToVisited(HTAB *v, HnswCandidate * hc, Relation index, bool *found) +{ + if (index == NULL) + hash_search(v, &hc->element, HASH_ENTER, found); + else + { + ItemPointerData indextid; + + ItemPointerSet(&indextid, hc->element->blkno, hc->element->offno); + hash_search(v, &indextid, HASH_ENTER, found); + } +} + +/* + * Algorithm 2 from paper + */ +List * +HnswSearchLayer(Datum q, List *ep, int ef, int lc, Relation index, FmgrInfo *procinfo, Oid collation, int m, bool inserting, HnswElement skipElement) +{ + ListCell *lc2; + + List *w = NIL; + pairingheap *C = pairingheap_allocate(CompareNearestCandidates, NULL); + pairingheap *W = pairingheap_allocate(CompareFurthestCandidates, NULL); + int wlen = 0; + HASHCTL hash_ctl; + HTAB *v; + + /* Create hash table */ + if (index == NULL) + { + hash_ctl.keysize = sizeof(HnswElement *); + hash_ctl.entrysize = sizeof(HnswElement *); + } + else + { + hash_ctl.keysize = sizeof(ItemPointerData); + hash_ctl.entrysize = sizeof(ItemPointerData); + } + + hash_ctl.hcxt = CurrentMemoryContext; + v = hash_create("hnsw visited", 256, &hash_ctl, HASH_ELEM | HASH_BLOBS | HASH_CONTEXT); + + /* Add entry points to v, C, and W */ + foreach(lc2, ep) + { + HnswCandidate *hc = (HnswCandidate *) lfirst(lc2); + + AddToVisited(v, hc, index, NULL); + + pairingheap_add(C, &(CreatePairingHeapNode(hc)->ph_node)); + pairingheap_add(W, &(CreatePairingHeapNode(hc)->ph_node)); + + /* + * Do not count elements being deleted towards ef when vacuuming. It + * would be ideal to do this for inserts as well, but this could + * affect insert performance. + */ + if (skipElement == NULL || list_length(hc->element->heaptids) != 0) + wlen++; + } + + while (!pairingheap_is_empty(C)) + { + HnswNeighborArray *neighborhood; + HnswCandidate *c = ((HnswPairingHeapNode *) pairingheap_remove_first(C))->inner; + HnswCandidate *f = ((HnswPairingHeapNode *) pairingheap_first(W))->inner; + + if (c->distance > f->distance) + break; + + if (c->element->neighbors == NULL) + HnswLoadNeighbors(c->element, index, m); + + /* Get the neighborhood at layer lc */ + neighborhood = &c->element->neighbors[lc]; + + for (int i = 0; i < neighborhood->length; i++) + { + HnswCandidate *e = &neighborhood->items[i]; + bool visited; + + AddToVisited(v, e, index, &visited); + + if (!visited) + { + float eDistance; + + f = ((HnswPairingHeapNode *) pairingheap_first(W))->inner; + + if (index == NULL) + eDistance = GetCandidateDistance(e, q, procinfo, collation); + else + HnswLoadElement(e->element, &eDistance, &q, index, procinfo, collation, inserting); + + Assert(!e->element->deleted); + + /* Make robust to issues */ + if (e->element->level < lc) + continue; + + if (eDistance < f->distance || wlen < ef) + { + /* Copy e */ + HnswCandidate *ec = palloc(sizeof(HnswCandidate)); + + ec->element = e->element; + ec->distance = eDistance; + + pairingheap_add(C, &(CreatePairingHeapNode(ec)->ph_node)); + pairingheap_add(W, &(CreatePairingHeapNode(ec)->ph_node)); + + /* + * Do not count elements being deleted towards ef when + * vacuuming. It would be ideal to do this for inserts as + * well, but this could affect insert performance. + */ + if (skipElement == NULL || list_length(e->element->heaptids) != 0) + { + wlen++; + + /* No need to decrement wlen */ + if (wlen > ef) + pairingheap_remove_first(W); + } + } + } + } + } + + /* Add each element of W to w */ + while (!pairingheap_is_empty(W)) + { + HnswCandidate *hc = ((HnswPairingHeapNode *) pairingheap_remove_first(W))->inner; + + w = lappend(w, hc); + } + + return w; +} + +/* + * Compare candidate distances + */ +static int +#if PG_VERSION_NUM >= 130000 +CompareCandidateDistances(const ListCell *a, const ListCell *b) +#else +CompareCandidateDistances(const void *a, const void *b) +#endif +{ + HnswCandidate *hca = lfirst((ListCell *) a); + HnswCandidate *hcb = lfirst((ListCell *) b); + + if (hca->distance < hcb->distance) + return 1; + + if (hca->distance > hcb->distance) + return -1; + + if (hca->element < hcb->element) + return 1; + + if (hca->element > hcb->element) + return -1; + + return 0; +} + +/* + * Calculate the distance between elements + */ +static float +HnswGetDistance(HnswElement a, HnswElement b, int lc, FmgrInfo *procinfo, Oid collation) +{ + /* Look for cached distance */ + if (a->neighbors != NULL) + { + Assert(a->level >= lc); + + for (int i = 0; i < a->neighbors[lc].length; i++) + { + if (a->neighbors[lc].items[i].element == b) + return a->neighbors[lc].items[i].distance; + } + } + + if (b->neighbors != NULL) + { + Assert(b->level >= lc); + + for (int i = 0; i < b->neighbors[lc].length; i++) + { + if (b->neighbors[lc].items[i].element == a) + return b->neighbors[lc].items[i].distance; + } + } + + return DatumGetFloat8(FunctionCall2Coll(procinfo, collation, PointerGetDatum(a->vec), PointerGetDatum(b->vec))); +} + +/* + * Check if an element is closer to q than any element from R + */ +static bool +CheckElementCloser(HnswCandidate * e, List *r, int lc, FmgrInfo *procinfo, Oid collation) +{ + ListCell *lc2; + + foreach(lc2, r) + { + HnswCandidate *ri = lfirst(lc2); + float distance = HnswGetDistance(e->element, ri->element, lc, procinfo, collation); + + if (distance <= e->distance) + return false; + } + + return true; +} + +/* + * Algorithm 4 from paper + */ +static List * +SelectNeighbors(List *c, int m, int lc, FmgrInfo *procinfo, Oid collation, HnswElement e2, HnswCandidate * newCandidate, HnswCandidate * *pruned, bool sortCandidates) +{ + List *r = NIL; + List *w = list_copy(c); + pairingheap *wd; + bool mustCalculate = !e2->neighbors[lc].closerSet; + List *added = NIL; + bool removedAny = false; + + if (list_length(w) <= m) + return w; + + wd = pairingheap_allocate(CompareNearestCandidates, NULL); + + /* Ensure order of candidates is deterministic for closer caching */ + if (sortCandidates) + list_sort(w, CompareCandidateDistances); + + while (list_length(w) > 0 && list_length(r) < m) + { + /* Assumes w is already ordered desc */ + HnswCandidate *e = llast(w); + + w = list_delete_last(w); + + /* Use previous state of r and wd to skip work when possible */ + if (mustCalculate) + e->closer = CheckElementCloser(e, r, lc, procinfo, collation); + else if (list_length(added) > 0) + { + /* + * If the current candidate was closer, we only need to compare it + * with the other candidates that we have added. + */ + if (e->closer) + { + e->closer = CheckElementCloser(e, added, lc, procinfo, collation); + + if (!e->closer) + removedAny = true; + } + else + { + /* + * If we have removed any candidates from closer, a candidate + * that was not closer earlier might now be. + */ + if (removedAny) + { + e->closer = CheckElementCloser(e, r, lc, procinfo, collation); + if (e->closer) + added = lappend(added, e); + } + } + } + else if (e == newCandidate) + { + e->closer = CheckElementCloser(e, r, lc, procinfo, collation); + if (e->closer) + added = lappend(added, e); + } + + if (e->closer) + r = lappend(r, e); + else + pairingheap_add(wd, &(CreatePairingHeapNode(e)->ph_node)); + } + + /* Cached value can only be used in future if sorted deterministically */ + e2->neighbors[lc].closerSet = sortCandidates; + + /* Keep pruned connections */ + while (!pairingheap_is_empty(wd) && list_length(r) < m) + r = lappend(r, ((HnswPairingHeapNode *) pairingheap_remove_first(wd))->inner); + + /* Return pruned for update connections */ + if (pruned != NULL) + { + if (!pairingheap_is_empty(wd)) + *pruned = ((HnswPairingHeapNode *) pairingheap_first(wd))->inner; + else + *pruned = linitial(w); + } + + return r; +} + +/* + * Find duplicate element + */ +HnswElement +HnswFindDuplicate(HnswElement e) +{ + HnswNeighborArray *neighbors = &e->neighbors[0]; + + for (int i = 0; i < neighbors->length; i++) + { + HnswCandidate *neighbor = &neighbors->items[i]; + + /* Exit early since ordered by distance */ + if (vector_cmp_internal(e->vec, neighbor->element->vec) != 0) + break; + + /* Check for space */ + if (list_length(neighbor->element->heaptids) < HNSW_HEAPTIDS) + return neighbor->element; + } + + return NULL; +} + +/* + * Add connections + */ +static void +AddConnections(HnswElement element, List *neighbors, int m, int lc) +{ + ListCell *lc2; + HnswNeighborArray *a = &element->neighbors[lc]; + + foreach(lc2, neighbors) + a->items[a->length++] = *((HnswCandidate *) lfirst(lc2)); +} + +/* + * Update connections + */ +void +HnswUpdateConnection(HnswElement element, HnswCandidate * hc, int m, int lc, int *updateIdx, Relation index, FmgrInfo *procinfo, Oid collation) +{ + HnswNeighborArray *currentNeighbors = &hc->element->neighbors[lc]; + + HnswCandidate hc2; + + hc2.element = element; + hc2.distance = hc->distance; + + if (currentNeighbors->length < m) + { + currentNeighbors->items[currentNeighbors->length++] = hc2; + + /* Track update */ + if (updateIdx != NULL) + *updateIdx = -2; + } + else + { + /* Shrink connections */ + HnswCandidate *pruned = NULL; + + /* Load elements on insert */ + if (index != NULL) + { + Datum q = PointerGetDatum(hc->element->vec); + + for (int i = 0; i < currentNeighbors->length; i++) + { + HnswCandidate *hc3 = ¤tNeighbors->items[i]; + + if (hc3->element->vec == NULL) + HnswLoadElement(hc3->element, &hc3->distance, &q, index, procinfo, collation, true); + else + hc3->distance = GetCandidateDistance(hc3, q, procinfo, collation); + + /* Prune element if being deleted */ + if (list_length(hc3->element->heaptids) == 0) + { + pruned = ¤tNeighbors->items[i]; + break; + } + } + } + + if (pruned == NULL) + { + List *c = NIL; + + /* Add candidates */ + for (int i = 0; i < currentNeighbors->length; i++) + c = lappend(c, ¤tNeighbors->items[i]); + c = lappend(c, &hc2); + + SelectNeighbors(c, m, lc, procinfo, collation, hc->element, &hc2, &pruned, true); + + /* Should not happen */ + if (pruned == NULL) + return; + } + + /* Find and replace the pruned element */ + for (int i = 0; i < currentNeighbors->length; i++) + { + if (currentNeighbors->items[i].element == pruned->element) + { + currentNeighbors->items[i] = hc2; + + /* Track update */ + if (updateIdx != NULL) + *updateIdx = i; + + break; + } + } + } +} + +/* + * Remove elements being deleted or skipped + */ +static List * +RemoveElements(List *w, HnswElement skipElement) +{ + ListCell *lc2; + List *w2 = NIL; + + foreach(lc2, w) + { + HnswCandidate *hc = (HnswCandidate *) lfirst(lc2); + + /* Skip self for vacuuming update */ + if (skipElement != NULL && hc->element->blkno == skipElement->blkno && hc->element->offno == skipElement->offno) + continue; + + if (list_length(hc->element->heaptids) != 0) + w2 = lappend(w2, hc); + } + + return w2; +} + +/* + * Algorithm 1 from paper + */ +void +HnswInsertElement(HnswElement element, HnswElement entryPoint, Relation index, FmgrInfo *procinfo, Oid collation, int m, int efConstruction, bool existing) +{ + List *ep; + List *w; + int level = element->level; + int entryLevel; + Datum q = PointerGetDatum(element->vec); + HnswElement skipElement = existing ? element : NULL; + + /* No neighbors if no entry point */ + if (entryPoint == NULL) + return; + + /* Get entry point and level */ + ep = list_make1(HnswEntryCandidate(entryPoint, q, index, procinfo, collation, true)); + entryLevel = entryPoint->level; + + /* 1st phase: greedy search to insert level */ + for (int lc = entryLevel; lc >= level + 1; lc--) + { + w = HnswSearchLayer(q, ep, 1, lc, index, procinfo, collation, m, true, skipElement); + ep = w; + } + + if (level > entryLevel) + level = entryLevel; + + /* Add one for existing element */ + if (existing) + efConstruction++; + + /* 2nd phase */ + for (int lc = level; lc >= 0; lc--) + { + int lm = HnswGetLayerM(m, lc); + List *neighbors; + List *lw; + + w = HnswSearchLayer(q, ep, efConstruction, lc, index, procinfo, collation, m, true, skipElement); + + /* Elements being deleted or skipped can help with search */ + /* but should be removed before selecting neighbors */ + if (index != NULL) + lw = RemoveElements(w, skipElement); + else + lw = w; + + /* + * Candidates are sorted, but not deterministically. Could set + * sortCandidates to true for in-memory builds to enable closer + * caching, but there does not seem to be a difference in performance. + */ + neighbors = SelectNeighbors(lw, lm, lc, procinfo, collation, element, NULL, NULL, false); + + AddConnections(element, neighbors, lm, lc); + + ep = w; + } +} diff --git a/external/pgvector/src/hnswvacuum.c b/external/pgvector/src/hnswvacuum.c new file mode 100644 index 00000000000..29b675fcae1 --- /dev/null +++ b/external/pgvector/src/hnswvacuum.c @@ -0,0 +1,660 @@ +#include "postgres.h" + +#include + +#include "commands/vacuum.h" +#include "hnsw.h" +#include "storage/bufmgr.h" +#include "storage/lmgr.h" +#include "utils/memutils.h" + +/* + * Check if deleted list contains an index TID + */ +static bool +DeletedContains(HTAB *deleted, ItemPointer indextid) +{ + bool found; + + hash_search(deleted, indextid, HASH_FIND, &found); + return found; +} + +/* + * Remove deleted heap TIDs + * + * OK to remove for entry point, since always considered for searches and inserts + */ +static void +RemoveHeapTids(HnswVacuumState * vacuumstate) +{ + BlockNumber blkno = HNSW_HEAD_BLKNO; + HnswElement highestPoint = &vacuumstate->highestPoint; + Relation index = vacuumstate->index; + BufferAccessStrategy bas = vacuumstate->bas; + HnswElement entryPoint = HnswGetEntryPoint(vacuumstate->index); + IndexBulkDeleteResult *stats = vacuumstate->stats; + + /* Store separately since highestPoint.level is uint8 */ + int highestLevel = -1; + + /* Initialize highest point */ + highestPoint->blkno = InvalidBlockNumber; + highestPoint->offno = InvalidOffsetNumber; + + while (BlockNumberIsValid(blkno)) + { + Buffer buf; + Page page; + GenericXLogState *state; + OffsetNumber offno; + OffsetNumber maxoffno; + bool updated = false; + + vacuum_delay_point(); + + buf = ReadBufferExtended(index, MAIN_FORKNUM, blkno, RBM_NORMAL, bas); + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + state = GenericXLogStart(index); + page = GenericXLogRegisterBuffer(state, buf, 0); + maxoffno = PageGetMaxOffsetNumber(page); + + /* Iterate over nodes */ + for (offno = FirstOffsetNumber; offno <= maxoffno; offno = OffsetNumberNext(offno)) + { + HnswElementTuple etup = (HnswElementTuple) PageGetItem(page, PageGetItemId(page, offno)); + int idx = 0; + bool itemUpdated = false; + + /* Skip neighbor tuples */ + if (!HnswIsElementTuple(etup)) + continue; + + if (ItemPointerIsValid(&etup->heaptids[0])) + { + for (int i = 0; i < HNSW_HEAPTIDS; i++) + { + /* Stop at first unused */ + if (!ItemPointerIsValid(&etup->heaptids[i])) + break; + + if (vacuumstate->callback(&etup->heaptids[i], vacuumstate->callback_state)) + { + itemUpdated = true; + stats->tuples_removed++; + } + else + { + /* Move to front of list */ + etup->heaptids[idx++] = etup->heaptids[i]; + stats->num_index_tuples++; + } + } + + if (itemUpdated) + { + Size etupSize = HNSW_ELEMENT_TUPLE_SIZE(etup->vec.dim); + + /* Mark rest as invalid */ + for (int i = idx; i < HNSW_HEAPTIDS; i++) + ItemPointerSetInvalid(&etup->heaptids[i]); + + if (!PageIndexTupleOverwrite(page, offno, (Item) etup, etupSize)) + elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index)); + + updated = true; + } + } + + if (!ItemPointerIsValid(&etup->heaptids[0])) + { + ItemPointerData ip; + + /* Add to deleted list */ + ItemPointerSet(&ip, blkno, offno); + + (void) hash_search(vacuumstate->deleted, &ip, HASH_ENTER, NULL); + } + else if (etup->level > highestLevel && !(entryPoint != NULL && blkno == entryPoint->blkno && offno == entryPoint->offno)) + { + /* Keep track of highest non-entry point */ + highestPoint->blkno = blkno; + highestPoint->offno = offno; + highestPoint->level = etup->level; + highestLevel = etup->level; + } + } + + blkno = HnswPageGetOpaque(page)->nextblkno; + + if (updated) + GenericXLogFinish(state); + else + GenericXLogAbort(state); + + UnlockReleaseBuffer(buf); + } +} + +/* + * Check for deleted neighbors + */ +static bool +NeedsUpdated(HnswVacuumState * vacuumstate, HnswElement element) +{ + Relation index = vacuumstate->index; + BufferAccessStrategy bas = vacuumstate->bas; + Buffer buf; + Page page; + HnswNeighborTuple ntup; + bool needsUpdated = false; + + buf = ReadBufferExtended(index, MAIN_FORKNUM, element->neighborPage, RBM_NORMAL, bas); + LockBuffer(buf, BUFFER_LOCK_SHARE); + page = BufferGetPage(buf); + ntup = (HnswNeighborTuple) PageGetItem(page, PageGetItemId(page, element->neighborOffno)); + + Assert(HnswIsNeighborTuple(ntup)); + + /* Check neighbors */ + for (int i = 0; i < ntup->count; i++) + { + ItemPointer indextid = &ntup->indextids[i]; + + if (!ItemPointerIsValid(indextid)) + continue; + + /* Check if in deleted list */ + if (DeletedContains(vacuumstate->deleted, indextid)) + { + needsUpdated = true; + break; + } + } + + /* Also update if layer 0 is not full */ + /* This could indicate too many candidates being deleted during insert */ + if (!needsUpdated) + needsUpdated = !ItemPointerIsValid(&ntup->indextids[ntup->count - 1]); + + UnlockReleaseBuffer(buf); + + return needsUpdated; +} + +/* + * Repair graph for a single element + */ +static void +RepairGraphElement(HnswVacuumState * vacuumstate, HnswElement element, HnswElement entryPoint) +{ + Relation index = vacuumstate->index; + Buffer buf; + Page page; + GenericXLogState *state; + int m = vacuumstate->m; + int efConstruction = vacuumstate->efConstruction; + FmgrInfo *procinfo = vacuumstate->procinfo; + Oid collation = vacuumstate->collation; + BufferAccessStrategy bas = vacuumstate->bas; + HnswNeighborTuple ntup = vacuumstate->ntup; + Size ntupSize = HNSW_NEIGHBOR_TUPLE_SIZE(element->level, m); + + /* Skip if element is entry point */ + if (entryPoint != NULL && element->blkno == entryPoint->blkno && element->offno == entryPoint->offno) + return; + + /* Init fields */ + HnswInitNeighbors(element, m); + element->heaptids = NIL; + + /* Add element to graph, skipping itself */ + HnswInsertElement(element, entryPoint, index, procinfo, collation, m, efConstruction, true); + + /* Update neighbor tuple */ + /* Do this before getting page to minimize locking */ + HnswSetNeighborTuple(ntup, element, m); + + /* Get neighbor page */ + buf = ReadBufferExtended(index, MAIN_FORKNUM, element->neighborPage, RBM_NORMAL, bas); + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + state = GenericXLogStart(index); + page = GenericXLogRegisterBuffer(state, buf, 0); + + /* Overwrite tuple */ + if (!PageIndexTupleOverwrite(page, element->neighborOffno, (Item) ntup, ntupSize)) + elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index)); + + /* Commit */ + GenericXLogFinish(state); + UnlockReleaseBuffer(buf); + + /* Update neighbors */ + HnswUpdateNeighborPages(index, procinfo, collation, element, m, true); +} + +/* + * Repair graph entry point + */ +static void +RepairGraphEntryPoint(HnswVacuumState * vacuumstate) +{ + Relation index = vacuumstate->index; + HnswElement highestPoint = &vacuumstate->highestPoint; + HnswElement entryPoint; + MemoryContext oldCtx = MemoryContextSwitchTo(vacuumstate->tmpCtx); + + if (!BlockNumberIsValid(highestPoint->blkno)) + highestPoint = NULL; + + /* + * Repair graph for highest non-entry point. Highest point may be outdated + * due to inserts that happen during and after RemoveHeapTids. + */ + if (highestPoint != NULL) + { + /* Get a shared lock */ + LockPage(index, HNSW_UPDATE_LOCK, ShareLock); + + /* Load element */ + HnswLoadElement(highestPoint, NULL, NULL, index, vacuumstate->procinfo, vacuumstate->collation, true); + + /* Repair if needed */ + if (NeedsUpdated(vacuumstate, highestPoint)) + RepairGraphElement(vacuumstate, highestPoint, HnswGetEntryPoint(index)); + + /* Release lock */ + UnlockPage(index, HNSW_UPDATE_LOCK, ShareLock); + } + + /* Prevent concurrent inserts when possibly updating entry point */ + LockPage(index, HNSW_UPDATE_LOCK, ExclusiveLock); + + /* Get latest entry point */ + entryPoint = HnswGetEntryPoint(index); + + if (entryPoint != NULL) + { + ItemPointerData epData; + + ItemPointerSet(&epData, entryPoint->blkno, entryPoint->offno); + + if (DeletedContains(vacuumstate->deleted, &epData)) + { + /* + * Replace the entry point with the highest point. If highest + * point is outdated and empty, the entry point will be empty + * until an element is repaired. + */ + HnswUpdateMetaPage(index, HNSW_UPDATE_ENTRY_ALWAYS, highestPoint, InvalidBlockNumber, MAIN_FORKNUM); + } + else + { + /* + * Repair the entry point with the highest point. If highest point + * is outdated, this can remove connections at higher levels in + * the graph until they are repaired, but this should be fine. + */ + HnswLoadElement(entryPoint, NULL, NULL, index, vacuumstate->procinfo, vacuumstate->collation, true); + + if (NeedsUpdated(vacuumstate, entryPoint)) + { + /* Reset neighbors from previous update */ + if (highestPoint != NULL) + highestPoint->neighbors = NULL; + + RepairGraphElement(vacuumstate, entryPoint, highestPoint); + } + } + } + + /* Release lock */ + UnlockPage(index, HNSW_UPDATE_LOCK, ExclusiveLock); + + /* Reset memory context */ + MemoryContextSwitchTo(oldCtx); + MemoryContextReset(vacuumstate->tmpCtx); +} + +/* + * Repair graph for all elements + */ +static void +RepairGraph(HnswVacuumState * vacuumstate) +{ + Relation index = vacuumstate->index; + BufferAccessStrategy bas = vacuumstate->bas; + BlockNumber blkno = HNSW_HEAD_BLKNO; + + /* + * Wait for inserts to complete. Inserts before this point may have + * neighbors about to be deleted. Inserts after this point will not. + */ + LockPage(index, HNSW_UPDATE_LOCK, ExclusiveLock); + UnlockPage(index, HNSW_UPDATE_LOCK, ExclusiveLock); + + /* Repair entry point first */ + RepairGraphEntryPoint(vacuumstate); + + while (BlockNumberIsValid(blkno)) + { + Buffer buf; + Page page; + OffsetNumber offno; + OffsetNumber maxoffno; + List *elements = NIL; + ListCell *lc2; + MemoryContext oldCtx; + + vacuum_delay_point(); + + oldCtx = MemoryContextSwitchTo(vacuumstate->tmpCtx); + + buf = ReadBufferExtended(index, MAIN_FORKNUM, blkno, RBM_NORMAL, bas); + LockBuffer(buf, BUFFER_LOCK_SHARE); + page = BufferGetPage(buf); + maxoffno = PageGetMaxOffsetNumber(page); + + /* Load items into memory to minimize locking */ + for (offno = FirstOffsetNumber; offno <= maxoffno; offno = OffsetNumberNext(offno)) + { + HnswElementTuple etup = (HnswElementTuple) PageGetItem(page, PageGetItemId(page, offno)); + HnswElement element; + + /* Skip neighbor tuples */ + if (!HnswIsElementTuple(etup)) + continue; + + /* Skip updating neighbors if being deleted */ + if (!ItemPointerIsValid(&etup->heaptids[0])) + continue; + + /* Create an element */ + element = HnswInitElementFromBlock(blkno, offno); + HnswLoadElementFromTuple(element, etup, false, true); + + elements = lappend(elements, element); + } + + blkno = HnswPageGetOpaque(page)->nextblkno; + + UnlockReleaseBuffer(buf); + + /* Update neighbor pages */ + foreach(lc2, elements) + { + HnswElement element = (HnswElement) lfirst(lc2); + HnswElement entryPoint; + LOCKMODE lockmode = ShareLock; + + /* Check if any neighbors point to deleted values */ + if (!NeedsUpdated(vacuumstate, element)) + continue; + + /* Get a shared lock */ + LockPage(index, HNSW_UPDATE_LOCK, lockmode); + + /* Refresh entry point for each element */ + entryPoint = HnswGetEntryPoint(index); + + /* Prevent concurrent inserts when likely updating entry point */ + if (entryPoint == NULL || element->level > entryPoint->level) + { + /* Release shared lock */ + UnlockPage(index, HNSW_UPDATE_LOCK, lockmode); + + /* Get exclusive lock */ + lockmode = ExclusiveLock; + LockPage(index, HNSW_UPDATE_LOCK, lockmode); + + /* Get latest entry point after lock is acquired */ + entryPoint = HnswGetEntryPoint(index); + } + + /* Repair connections */ + RepairGraphElement(vacuumstate, element, entryPoint); + + /* + * Update metapage if needed. Should only happen if entry point + * was replaced and highest point was outdated. + */ + if (entryPoint == NULL || element->level > entryPoint->level) + HnswUpdateMetaPage(index, HNSW_UPDATE_ENTRY_GREATER, element, InvalidBlockNumber, MAIN_FORKNUM); + + /* Release lock */ + UnlockPage(index, HNSW_UPDATE_LOCK, lockmode); + } + + /* Reset memory context */ + MemoryContextSwitchTo(oldCtx); + MemoryContextReset(vacuumstate->tmpCtx); + } +} + +/* + * Mark items as deleted + */ +static void +MarkDeleted(HnswVacuumState * vacuumstate) +{ + BlockNumber blkno = HNSW_HEAD_BLKNO; + BlockNumber insertPage = InvalidBlockNumber; + Relation index = vacuumstate->index; + BufferAccessStrategy bas = vacuumstate->bas; + + /* + * Wait for index scans to complete. Scans before this point may contain + * tuples about to be deleted. Scans after this point will not, since the + * graph has been repaired. + */ + LockPage(index, HNSW_SCAN_LOCK, ExclusiveLock); + UnlockPage(index, HNSW_SCAN_LOCK, ExclusiveLock); + + while (BlockNumberIsValid(blkno)) + { + Buffer buf; + Page page; + GenericXLogState *state; + OffsetNumber offno; + OffsetNumber maxoffno; + + vacuum_delay_point(); + + buf = ReadBufferExtended(index, MAIN_FORKNUM, blkno, RBM_NORMAL, bas); + + /* + * ambulkdelete cannot delete entries from pages that are pinned by + * other backends + * + * https://www.postgresql.org/docs/current/index-locking.html + */ + LockBufferForCleanup(buf); + + state = GenericXLogStart(index); + page = GenericXLogRegisterBuffer(state, buf, 0); + maxoffno = PageGetMaxOffsetNumber(page); + + /* Update element and neighbors together */ + for (offno = FirstOffsetNumber; offno <= maxoffno; offno = OffsetNumberNext(offno)) + { + HnswElementTuple etup = (HnswElementTuple) PageGetItem(page, PageGetItemId(page, offno)); + HnswNeighborTuple ntup; + Size etupSize; + Size ntupSize; + Buffer nbuf; + Page npage; + BlockNumber neighborPage; + OffsetNumber neighborOffno; + + /* Skip neighbor tuples */ + if (!HnswIsElementTuple(etup)) + continue; + + /* Skip deleted tuples */ + if (etup->deleted) + { + /* Set to first free page */ + if (!BlockNumberIsValid(insertPage)) + insertPage = blkno; + + continue; + } + + /* Skip live tuples */ + if (ItemPointerIsValid(&etup->heaptids[0])) + continue; + + /* Calculate sizes */ + etupSize = HNSW_ELEMENT_TUPLE_SIZE(etup->vec.dim); + ntupSize = HNSW_NEIGHBOR_TUPLE_SIZE(etup->level, vacuumstate->m); + + /* Get neighbor page */ + neighborPage = ItemPointerGetBlockNumber(&etup->neighbortid); + neighborOffno = ItemPointerGetOffsetNumber(&etup->neighbortid); + + if (neighborPage == blkno) + { + nbuf = buf; + npage = page; + } + else + { + nbuf = ReadBufferExtended(index, MAIN_FORKNUM, neighborPage, RBM_NORMAL, bas); + LockBuffer(nbuf, BUFFER_LOCK_EXCLUSIVE); + npage = GenericXLogRegisterBuffer(state, nbuf, 0); + } + + ntup = (HnswNeighborTuple) PageGetItem(npage, PageGetItemId(npage, neighborOffno)); + + /* Overwrite element */ + etup->deleted = 1; + MemSet(&etup->vec.x, 0, etup->vec.dim * sizeof(float)); + + /* Overwrite neighbors */ + for (int i = 0; i < ntup->count; i++) + ItemPointerSetInvalid(&ntup->indextids[i]); + + /* Overwrite element tuple */ + if (!PageIndexTupleOverwrite(page, offno, (Item) etup, etupSize)) + elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index)); + + /* Overwrite neighbor tuple */ + if (!PageIndexTupleOverwrite(npage, neighborOffno, (Item) ntup, ntupSize)) + elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index)); + + /* Commit */ + GenericXLogFinish(state); + if (nbuf != buf) + UnlockReleaseBuffer(nbuf); + + /* Set to first free page */ + if (!BlockNumberIsValid(insertPage)) + insertPage = blkno; + + /* Prepare new xlog */ + state = GenericXLogStart(index); + page = GenericXLogRegisterBuffer(state, buf, 0); + } + + blkno = HnswPageGetOpaque(page)->nextblkno; + + GenericXLogAbort(state); + UnlockReleaseBuffer(buf); + } + + /* Update insert page last, after everything has been marked as deleted */ + HnswUpdateMetaPage(index, 0, NULL, insertPage, MAIN_FORKNUM); +} + +/* + * Initialize the vacuum state + */ +static void +InitVacuumState(HnswVacuumState * vacuumstate, IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state) +{ + Relation index = info->index; + HASHCTL hash_ctl; + + if (stats == NULL) + stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult)); + + vacuumstate->index = index; + vacuumstate->stats = stats; + vacuumstate->callback = callback; + vacuumstate->callback_state = callback_state; + vacuumstate->efConstruction = HnswGetEfConstruction(index); + vacuumstate->bas = GetAccessStrategy(BAS_BULKREAD); + vacuumstate->procinfo = index_getprocinfo(index, 1, HNSW_DISTANCE_PROC); + vacuumstate->collation = index->rd_indcollation[0]; + vacuumstate->ntup = palloc0(BLCKSZ); + vacuumstate->tmpCtx = AllocSetContextCreate(CurrentMemoryContext, + "Hnsw vacuum temporary context", + ALLOCSET_DEFAULT_SIZES); + + /* Get m from metapage */ + HnswGetMetaPageInfo(index, &vacuumstate->m, NULL); + + /* Create hash table */ + hash_ctl.keysize = sizeof(ItemPointerData); + hash_ctl.entrysize = sizeof(ItemPointerData); + hash_ctl.hcxt = CurrentMemoryContext; + vacuumstate->deleted = hash_create("hnswbulkdelete indextids", 256, &hash_ctl, HASH_ELEM | HASH_BLOBS | HASH_CONTEXT); +} + +/* + * Free resources + */ +static void +FreeVacuumState(HnswVacuumState * vacuumstate) +{ + hash_destroy(vacuumstate->deleted); + FreeAccessStrategy(vacuumstate->bas); + pfree(vacuumstate->ntup); + MemoryContextDelete(vacuumstate->tmpCtx); +} + +/* + * Bulk delete tuples from the index + */ +IndexBulkDeleteResult * +hnswbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, + IndexBulkDeleteCallback callback, void *callback_state) +{ + HnswVacuumState vacuumstate; + + InitVacuumState(&vacuumstate, info, stats, callback, callback_state); + + /* Pass 1: Remove heap TIDs */ + RemoveHeapTids(&vacuumstate); + + /* Pass 2: Repair graph */ + RepairGraph(&vacuumstate); + + /* Pass 3: Mark as deleted */ + MarkDeleted(&vacuumstate); + + FreeVacuumState(&vacuumstate); + + return vacuumstate.stats; +} + +/* + * Clean up after a VACUUM operation + */ +IndexBulkDeleteResult * +hnswvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats) +{ + Relation rel = info->index; + + if (info->analyze_only) + return stats; + + /* stats is NULL if ambulkdelete not called */ + /* OK to return NULL if index not changed */ + if (stats == NULL) + return NULL; + + stats->num_pages = RelationGetNumberOfBlocks(rel); + + return stats; +} diff --git a/external/pgvector/src/ivfbuild.c b/external/pgvector/src/ivfbuild.c new file mode 100644 index 00000000000..15ba9aac044 --- /dev/null +++ b/external/pgvector/src/ivfbuild.c @@ -0,0 +1,1112 @@ +#include "postgres.h" + +#include + +#include "access/parallel.h" +#include "access/xact.h" +#include "catalog/index.h" +#include "catalog/pg_operator_d.h" +#include "catalog/pg_type_d.h" +#include "ivfflat.h" +#include "miscadmin.h" +#include "storage/bufmgr.h" +#include "tcop/tcopprot.h" +#include "utils/memutils.h" + +#if PG_VERSION_NUM >= 140000 +#include "utils/backend_progress.h" +#elif PG_VERSION_NUM >= 120000 +#include "pgstat.h" +#endif + +#if PG_VERSION_NUM >= 120000 +#include "access/tableam.h" +#include "commands/progress.h" +#else +#define PROGRESS_CREATEIDX_SUBPHASE 0 +#define PROGRESS_CREATEIDX_TUPLES_TOTAL 0 +#define PROGRESS_CREATEIDX_TUPLES_DONE 0 +#endif + +#if PG_VERSION_NUM >= 130000 +#define CALLBACK_ITEM_POINTER ItemPointer tid +#else +#define CALLBACK_ITEM_POINTER HeapTuple hup +#endif + +#if PG_VERSION_NUM >= 120000 +#define UpdateProgress(index, val) pgstat_progress_update_param(index, val) +#else +#define UpdateProgress(index, val) ((void)val) +#endif + +#if PG_VERSION_NUM >= 140000 +#include "utils/backend_status.h" +#include "utils/wait_event.h" +#endif + +#if PG_VERSION_NUM >= 120000 +#include "access/table.h" +#include "optimizer/optimizer.h" +#else +#include "access/heapam.h" +#include "optimizer/planner.h" +#include "pgstat.h" +#endif + +#define PARALLEL_KEY_IVFFLAT_SHARED UINT64CONST(0xA000000000000001) +#define PARALLEL_KEY_TUPLESORT UINT64CONST(0xA000000000000002) +#define PARALLEL_KEY_IVFFLAT_CENTERS UINT64CONST(0xA000000000000003) +#define PARALLEL_KEY_QUERY_TEXT UINT64CONST(0xA000000000000004) + +/* + * Add sample + */ +static void +AddSample(Datum *values, IvfflatBuildState * buildstate) +{ + VectorArray samples = buildstate->samples; + int targsamples = samples->maxlen; + + /* Detoast once for all calls */ + Datum value = PointerGetDatum(PG_DETOAST_DATUM(values[0])); + + /* + * Normalize with KMEANS_NORM_PROC since spherical distance function + * expects unit vectors + */ + if (buildstate->kmeansnormprocinfo != NULL) + { + if (!IvfflatNormValue(buildstate->kmeansnormprocinfo, buildstate->collation, &value, buildstate->normvec)) + return; + } + + if (samples->length < targsamples) + { + VectorArraySet(samples, samples->length, DatumGetVector(value)); + samples->length++; + } + else + { + if (buildstate->rowstoskip < 0) + buildstate->rowstoskip = reservoir_get_next_S(&buildstate->rstate, samples->length, targsamples); + + if (buildstate->rowstoskip <= 0) + { +#if PG_VERSION_NUM >= 150000 + int k = (int) (targsamples * sampler_random_fract(&buildstate->rstate.randstate)); +#else + int k = (int) (targsamples * sampler_random_fract(buildstate->rstate.randstate)); +#endif + + Assert(k >= 0 && k < targsamples); + VectorArraySet(samples, k, DatumGetVector(value)); + } + + buildstate->rowstoskip -= 1; + } +} + +/* + * Callback for sampling + */ +static void +SampleCallback(Relation index, CALLBACK_ITEM_POINTER, Datum *values, + bool *isnull, bool tupleIsAlive, void *state) +{ + IvfflatBuildState *buildstate = (IvfflatBuildState *) state; + MemoryContext oldCtx; + + /* Skip nulls */ + if (isnull[0]) + return; + + /* Use memory context since detoast can allocate */ + oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx); + + /* Add sample */ + AddSample(values, state); + + /* Reset memory context */ + MemoryContextSwitchTo(oldCtx); + MemoryContextReset(buildstate->tmpCtx); +} + +/* + * Sample rows with same logic as ANALYZE + */ +static void +SampleRows(IvfflatBuildState * buildstate) +{ + int targsamples = buildstate->samples->maxlen; + BlockNumber totalblocks = RelationGetNumberOfBlocks(buildstate->heap); + + buildstate->rowstoskip = -1; + + BlockSampler_Init(&buildstate->bs, totalblocks, targsamples, RandomInt()); + + reservoir_init_selection_state(&buildstate->rstate, targsamples); + while (BlockSampler_HasMore(&buildstate->bs)) + { + BlockNumber targblock = BlockSampler_Next(&buildstate->bs); + +#if PG_VERSION_NUM >= 120000 + table_index_build_range_scan(buildstate->heap, buildstate->index, buildstate->indexInfo, + false, true, false, targblock, 1, SampleCallback, (void *) buildstate, NULL); +#else + IndexBuildHeapRangeScan(buildstate->heap, buildstate->index, buildstate->indexInfo, + false, true, targblock, 1, SampleCallback, (void *) buildstate, NULL); +#endif + } +} + +/* + * Add tuple to sort + */ +static void +AddTupleToSort(Relation index, ItemPointer tid, Datum *values, IvfflatBuildState * buildstate) +{ + double distance; + double minDistance = DBL_MAX; + int closestCenter = 0; + VectorArray centers = buildstate->centers; + TupleTableSlot *slot = buildstate->slot; + + /* Detoast once for all calls */ + Datum value = PointerGetDatum(PG_DETOAST_DATUM(values[0])); + + /* Normalize if needed */ + if (buildstate->normprocinfo != NULL) + { + if (!IvfflatNormValue(buildstate->normprocinfo, buildstate->collation, &value, buildstate->normvec)) + return; + } + + /* Find the list that minimizes the distance */ + for (int i = 0; i < centers->length; i++) + { + distance = DatumGetFloat8(FunctionCall2Coll(buildstate->procinfo, buildstate->collation, value, PointerGetDatum(VectorArrayGet(centers, i)))); + + if (distance < minDistance) + { + minDistance = distance; + closestCenter = i; + } + } + +#ifdef IVFFLAT_KMEANS_DEBUG + buildstate->inertia += minDistance; + buildstate->listSums[closestCenter] += minDistance; + buildstate->listCounts[closestCenter]++; +#endif + + /* Create a virtual tuple */ + ExecClearTuple(slot); + slot->tts_values[0] = Int32GetDatum(closestCenter); + slot->tts_isnull[0] = false; + slot->tts_values[1] = PointerGetDatum(tid); + slot->tts_isnull[1] = false; + slot->tts_values[2] = value; + slot->tts_isnull[2] = false; + ExecStoreVirtualTuple(slot); + + /* + * Add tuple to sort + * + * tuplesort_puttupleslot comment: Input data is always copied; the caller + * need not save it. + */ + tuplesort_puttupleslot(buildstate->sortstate, slot); + + buildstate->indtuples++; +} + +/* + * Callback for table_index_build_scan + */ +static void +BuildCallback(Relation index, CALLBACK_ITEM_POINTER, Datum *values, + bool *isnull, bool tupleIsAlive, void *state) +{ + IvfflatBuildState *buildstate = (IvfflatBuildState *) state; + MemoryContext oldCtx; + +#if PG_VERSION_NUM < 130000 + ItemPointer tid = &hup->t_self; +#endif + + /* Skip nulls */ + if (isnull[0]) + return; + + /* Use memory context since detoast can allocate */ + oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx); + + /* Add tuple to sort */ + AddTupleToSort(index, tid, values, buildstate); + + /* Reset memory context */ + MemoryContextSwitchTo(oldCtx); + MemoryContextReset(buildstate->tmpCtx); +} + +/* + * Get index tuple from sort state + */ +static inline void +GetNextTuple(Tuplesortstate *sortstate, TupleDesc tupdesc, TupleTableSlot *slot, IndexTuple *itup, int *list) +{ + Datum value; + bool isnull; + + if (tuplesort_gettupleslot(sortstate, true, false, slot, NULL)) + { + *list = DatumGetInt32(slot_getattr(slot, 1, &isnull)); + value = slot_getattr(slot, 3, &isnull); + + /* Form the index tuple */ + *itup = index_form_tuple(tupdesc, &value, &isnull); + (*itup)->t_tid = *((ItemPointer) DatumGetPointer(slot_getattr(slot, 2, &isnull))); + } + else + *list = -1; +} + +/* + * Create initial entry pages + */ +static void +InsertTuples(Relation index, IvfflatBuildState * buildstate, ForkNumber forkNum) +{ + int list; + IndexTuple itup = NULL; /* silence compiler warning */ + int64 inserted = 0; + +#if PG_VERSION_NUM >= 120000 + TupleTableSlot *slot = MakeSingleTupleTableSlot(buildstate->tupdesc, &TTSOpsMinimalTuple); +#else + TupleTableSlot *slot = MakeSingleTupleTableSlot(buildstate->tupdesc); +#endif + TupleDesc tupdesc = RelationGetDescr(index); + + UpdateProgress(PROGRESS_CREATEIDX_SUBPHASE, PROGRESS_IVFFLAT_PHASE_LOAD); + + UpdateProgress(PROGRESS_CREATEIDX_TUPLES_TOTAL, buildstate->indtuples); + + GetNextTuple(buildstate->sortstate, tupdesc, slot, &itup, &list); + + for (int i = 0; i < buildstate->centers->length; i++) + { + Buffer buf; + Page page; + GenericXLogState *state; + BlockNumber startPage; + BlockNumber insertPage; + + /* Can take a while, so ensure we can interrupt */ + /* Needs to be called when no buffer locks are held */ + CHECK_FOR_INTERRUPTS(); + + buf = IvfflatNewBuffer(index, forkNum); + IvfflatInitRegisterPage(index, &buf, &page, &state); + + startPage = BufferGetBlockNumber(buf); + + /* Get all tuples for list */ + while (list == i) + { + /* Check for free space */ + Size itemsz = MAXALIGN(IndexTupleSize(itup)); + + if (PageGetFreeSpace(page) < itemsz) + IvfflatAppendPage(index, &buf, &page, &state, forkNum); + + /* Add the item */ + if (PageAddItem(page, (Item) itup, itemsz, InvalidOffsetNumber, false, false) == InvalidOffsetNumber) + elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index)); + + pfree(itup); + + UpdateProgress(PROGRESS_CREATEIDX_TUPLES_DONE, ++inserted); + + GetNextTuple(buildstate->sortstate, tupdesc, slot, &itup, &list); + } + + insertPage = BufferGetBlockNumber(buf); + + IvfflatCommitBuffer(buf, state); + + /* Set the start and insert pages */ + IvfflatUpdateList(index, buildstate->listInfo[i], insertPage, InvalidBlockNumber, startPage, forkNum); + } +} + +/* + * Initialize the build state + */ +static void +InitBuildState(IvfflatBuildState * buildstate, Relation heap, Relation index, IndexInfo *indexInfo) +{ + buildstate->heap = heap; + buildstate->index = index; + buildstate->indexInfo = indexInfo; + + buildstate->lists = IvfflatGetLists(index); + buildstate->dimensions = TupleDescAttr(index->rd_att, 0)->atttypmod; + + /* Require column to have dimensions to be indexed */ + if (buildstate->dimensions < 0) + elog(ERROR, "column does not have dimensions"); + + if (buildstate->dimensions > IVFFLAT_MAX_DIM) + elog(ERROR, "column cannot have more than %d dimensions for ivfflat index", IVFFLAT_MAX_DIM); + + buildstate->reltuples = 0; + buildstate->indtuples = 0; + + /* Get support functions */ + buildstate->procinfo = index_getprocinfo(index, 1, IVFFLAT_DISTANCE_PROC); + buildstate->normprocinfo = IvfflatOptionalProcInfo(index, IVFFLAT_NORM_PROC); + buildstate->kmeansnormprocinfo = IvfflatOptionalProcInfo(index, IVFFLAT_KMEANS_NORM_PROC); + buildstate->collation = index->rd_indcollation[0]; + + /* Require more than one dimension for spherical k-means */ + if (buildstate->kmeansnormprocinfo != NULL && buildstate->dimensions == 1) + elog(ERROR, "dimensions must be greater than one for this opclass"); + + /* Create tuple description for sorting */ +#if PG_VERSION_NUM >= 120000 + buildstate->tupdesc = CreateTemplateTupleDesc(3); +#else + buildstate->tupdesc = CreateTemplateTupleDesc(3, false); +#endif + TupleDescInitEntry(buildstate->tupdesc, (AttrNumber) 1, "list", INT4OID, -1, 0); + TupleDescInitEntry(buildstate->tupdesc, (AttrNumber) 2, "tid", TIDOID, -1, 0); + TupleDescInitEntry(buildstate->tupdesc, (AttrNumber) 3, "vector", RelationGetDescr(index)->attrs[0].atttypid, -1, 0); + +#if PG_VERSION_NUM >= 120000 + buildstate->slot = MakeSingleTupleTableSlot(buildstate->tupdesc, &TTSOpsVirtual); +#else + buildstate->slot = MakeSingleTupleTableSlot(buildstate->tupdesc); +#endif + + buildstate->centers = VectorArrayInit(buildstate->lists, buildstate->dimensions); + buildstate->listInfo = palloc(sizeof(ListInfo) * buildstate->lists); + + /* Reuse for each tuple */ + buildstate->normvec = InitVector(buildstate->dimensions); + + buildstate->tmpCtx = AllocSetContextCreate(CurrentMemoryContext, + "Ivfflat build temporary context", + ALLOCSET_DEFAULT_SIZES); + +#ifdef IVFFLAT_KMEANS_DEBUG + buildstate->inertia = 0; + buildstate->listSums = palloc0(sizeof(double) * buildstate->lists); + buildstate->listCounts = palloc0(sizeof(int) * buildstate->lists); +#endif + + buildstate->ivfleader = NULL; +} + +/* + * Free resources + */ +static void +FreeBuildState(IvfflatBuildState * buildstate) +{ + VectorArrayFree(buildstate->centers); + pfree(buildstate->listInfo); + pfree(buildstate->normvec); + +#ifdef IVFFLAT_KMEANS_DEBUG + pfree(buildstate->listSums); + pfree(buildstate->listCounts); +#endif + + MemoryContextDelete(buildstate->tmpCtx); +} + +/* + * Compute centers + */ +static void +ComputeCenters(IvfflatBuildState * buildstate) +{ + int numSamples; + + UpdateProgress(PROGRESS_CREATEIDX_SUBPHASE, PROGRESS_IVFFLAT_PHASE_KMEANS); + + /* Target 50 samples per list, with at least 10000 samples */ + /* The number of samples has a large effect on index build time */ + numSamples = buildstate->lists * 50; + if (numSamples < 10000) + numSamples = 10000; + + /* Skip samples for unlogged table */ + if (buildstate->heap == NULL) + numSamples = 1; + + /* Sample rows */ + /* TODO Ensure within maintenance_work_mem */ + buildstate->samples = VectorArrayInit(numSamples, buildstate->dimensions); + if (buildstate->heap != NULL) + { + SampleRows(buildstate); + + if (buildstate->samples->length < buildstate->lists) + { + ereport(NOTICE, + (errmsg("ivfflat index created with little data"), + errdetail("This will cause low recall."), + errhint("Drop the index until the table has more data."))); + } + } + + /* Calculate centers */ + IvfflatBench("k-means", IvfflatKmeans(buildstate->index, buildstate->samples, buildstate->centers)); + + /* Free samples before we allocate more memory */ + VectorArrayFree(buildstate->samples); +} + +/* + * Create the metapage + */ +static void +CreateMetaPage(Relation index, int dimensions, int lists, ForkNumber forkNum) +{ + Buffer buf; + Page page; + GenericXLogState *state; + IvfflatMetaPage metap; + + buf = IvfflatNewBuffer(index, forkNum); + IvfflatInitRegisterPage(index, &buf, &page, &state); + + /* Set metapage data */ + metap = IvfflatPageGetMeta(page); + metap->magicNumber = IVFFLAT_MAGIC_NUMBER; + metap->version = IVFFLAT_VERSION; + metap->dimensions = dimensions; + metap->lists = lists; + ((PageHeader) page)->pd_lower = + ((char *) metap + sizeof(IvfflatMetaPageData)) - (char *) page; + + IvfflatCommitBuffer(buf, state); +} + +/* + * Create list pages + */ +static void +CreateListPages(Relation index, VectorArray centers, int dimensions, + int lists, ForkNumber forkNum, ListInfo * *listInfo) +{ + Buffer buf; + Page page; + GenericXLogState *state; + Size listSize; + IvfflatList list; + + listSize = MAXALIGN(IVFFLAT_LIST_SIZE(dimensions)); + list = palloc(listSize); + + buf = IvfflatNewBuffer(index, forkNum); + IvfflatInitRegisterPage(index, &buf, &page, &state); + + for (int i = 0; i < lists; i++) + { + OffsetNumber offno; + + /* Load list */ + list->startPage = InvalidBlockNumber; + list->insertPage = InvalidBlockNumber; + memcpy(&list->center, VectorArrayGet(centers, i), VECTOR_SIZE(dimensions)); + + /* Ensure free space */ + if (PageGetFreeSpace(page) < listSize) + IvfflatAppendPage(index, &buf, &page, &state, forkNum); + + /* Add the item */ + offno = PageAddItem(page, (Item) list, listSize, InvalidOffsetNumber, false, false); + if (offno == InvalidOffsetNumber) + elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index)); + + /* Save location info */ + (*listInfo)[i].blkno = BufferGetBlockNumber(buf); + (*listInfo)[i].offno = offno; + } + + IvfflatCommitBuffer(buf, state); + + pfree(list); +} + +/* + * Print k-means metrics + */ +#ifdef IVFFLAT_KMEANS_DEBUG +static void +PrintKmeansMetrics(IvfflatBuildState * buildstate) +{ + elog(INFO, "inertia: %.3e", buildstate->inertia); + + /* Calculate Davies-Bouldin index */ + if (buildstate->lists > 1 && !buildstate->ivfleader) + { + double db = 0.0; + + /* Calculate average distance */ + for (int i = 0; i < buildstate->lists; i++) + { + if (buildstate->listCounts[i] > 0) + buildstate->listSums[i] /= buildstate->listCounts[i]; + } + + for (int i = 0; i < buildstate->lists; i++) + { + double max = 0.0; + double distance; + + for (int j = 0; j < buildstate->lists; j++) + { + if (j == i) + continue; + + distance = DatumGetFloat8(FunctionCall2Coll(buildstate->procinfo, buildstate->collation, PointerGetDatum(VectorArrayGet(buildstate->centers, i)), PointerGetDatum(VectorArrayGet(buildstate->centers, j)))); + distance = (buildstate->listSums[i] + buildstate->listSums[j]) / distance; + + if (distance > max) + max = distance; + } + db += max; + } + db /= buildstate->lists; + elog(INFO, "davies-bouldin: %.3f", db); + } +} +#endif + +/* + * Within leader, wait for end of heap scan + */ +static double +ParallelHeapScan(IvfflatBuildState * buildstate) +{ + IvfflatShared *ivfshared = buildstate->ivfleader->ivfshared; + int nparticipanttuplesorts; + double reltuples; + + nparticipanttuplesorts = buildstate->ivfleader->nparticipanttuplesorts; + for (;;) + { + SpinLockAcquire(&ivfshared->mutex); + if (ivfshared->nparticipantsdone == nparticipanttuplesorts) + { + buildstate->indtuples = ivfshared->indtuples; + reltuples = ivfshared->reltuples; +#ifdef IVFFLAT_KMEANS_DEBUG + buildstate->inertia = ivfshared->inertia; +#endif + SpinLockRelease(&ivfshared->mutex); + break; + } + SpinLockRelease(&ivfshared->mutex); + + ConditionVariableSleep(&ivfshared->workersdonecv, + WAIT_EVENT_PARALLEL_CREATE_INDEX_SCAN); + } + + ConditionVariableCancelSleep(); + + return reltuples; +} + +/* + * Perform a worker's portion of a parallel sort + */ +static void +IvfflatParallelScanAndSort(IvfflatSpool * ivfspool, IvfflatShared * ivfshared, Sharedsort *sharedsort, Vector * ivfcenters, int sortmem, bool progress) +{ + SortCoordinate coordinate; + IvfflatBuildState buildstate; +#if PG_VERSION_NUM >= 120000 + TableScanDesc scan; +#else + HeapScanDesc scan; +#endif + double reltuples; + IndexInfo *indexInfo; + + /* Sort options, which must match AssignTuples */ + AttrNumber attNums[] = {1}; + Oid sortOperators[] = {Int4LessOperator}; + Oid sortCollations[] = {InvalidOid}; + bool nullsFirstFlags[] = {false}; + + /* Initialize local tuplesort coordination state */ + coordinate = palloc0(sizeof(SortCoordinateData)); + coordinate->isWorker = true; + coordinate->nParticipants = -1; + coordinate->sharedsort = sharedsort; + + /* Join parallel scan */ + indexInfo = BuildIndexInfo(ivfspool->index); + indexInfo->ii_Concurrent = ivfshared->isconcurrent; + InitBuildState(&buildstate, ivfspool->heap, ivfspool->index, indexInfo); + memcpy(buildstate.centers->items, ivfcenters, VECTOR_SIZE(buildstate.centers->dim) * buildstate.centers->maxlen); + buildstate.centers->length = buildstate.centers->maxlen; + ivfspool->sortstate = tuplesort_begin_heap(buildstate.tupdesc, 1, attNums, sortOperators, sortCollations, nullsFirstFlags, sortmem, coordinate, false); + buildstate.sortstate = ivfspool->sortstate; +#if PG_VERSION_NUM >= 120000 + scan = table_beginscan_parallel(ivfspool->heap, + ParallelTableScanFromIvfflatShared(ivfshared)); + reltuples = table_index_build_scan(ivfspool->heap, ivfspool->index, indexInfo, + true, progress, BuildCallback, + (void *) &buildstate, scan); +#else + scan = heap_beginscan_parallel(ivfspool->heap, &ivfshared->heapdesc); + reltuples = IndexBuildHeapScan(ivfspool->heap, ivfspool->index, indexInfo, + true, BuildCallback, + (void *) &buildstate, scan); +#endif + + /* Execute this worker's part of the sort */ + tuplesort_performsort(ivfspool->sortstate); + + /* Record statistics */ + SpinLockAcquire(&ivfshared->mutex); + ivfshared->nparticipantsdone++; + ivfshared->reltuples += reltuples; + ivfshared->indtuples += buildstate.indtuples; +#ifdef IVFFLAT_KMEANS_DEBUG + ivfshared->inertia += buildstate.inertia; +#endif + SpinLockRelease(&ivfshared->mutex); + + /* Log statistics */ + if (progress) + ereport(DEBUG1, (errmsg("leader processed " INT64_FORMAT " tuples", (int64) reltuples))); + else + ereport(DEBUG1, (errmsg("worker processed " INT64_FORMAT " tuples", (int64) reltuples))); + + /* Notify leader */ + ConditionVariableSignal(&ivfshared->workersdonecv); + + /* We can end tuplesorts immediately */ + tuplesort_end(ivfspool->sortstate); + + FreeBuildState(&buildstate); +} + +/* + * Perform work within a launched parallel process + */ +void +IvfflatParallelBuildMain(dsm_segment *seg, shm_toc *toc) +{ + char *sharedquery; + IvfflatSpool *ivfspool; + IvfflatShared *ivfshared; + Sharedsort *sharedsort; + Vector *ivfcenters; + Relation heapRel; + Relation indexRel; + LOCKMODE heapLockmode; + LOCKMODE indexLockmode; + int sortmem; + + /* Set debug_query_string for individual workers first */ + sharedquery = shm_toc_lookup(toc, PARALLEL_KEY_QUERY_TEXT, true); + debug_query_string = sharedquery; + + /* Report the query string from leader */ + pgstat_report_activity(STATE_RUNNING, debug_query_string); + + /* Look up shared state */ + ivfshared = shm_toc_lookup(toc, PARALLEL_KEY_IVFFLAT_SHARED, false); + + /* Open relations using lock modes known to be obtained by index.c */ + if (!ivfshared->isconcurrent) + { + heapLockmode = ShareLock; + indexLockmode = AccessExclusiveLock; + } + else + { + heapLockmode = ShareUpdateExclusiveLock; + indexLockmode = RowExclusiveLock; + } + + /* Open relations within worker */ +#if PG_VERSION_NUM >= 120000 + heapRel = table_open(ivfshared->heaprelid, heapLockmode); +#else + heapRel = heap_open(ivfshared->heaprelid, heapLockmode); +#endif + indexRel = index_open(ivfshared->indexrelid, indexLockmode); + + /* Initialize worker's own spool */ + ivfspool = (IvfflatSpool *) palloc0(sizeof(IvfflatSpool)); + ivfspool->heap = heapRel; + ivfspool->index = indexRel; + + /* Look up shared state private to tuplesort.c */ + sharedsort = shm_toc_lookup(toc, PARALLEL_KEY_TUPLESORT, false); + tuplesort_attach_shared(sharedsort, seg); + + ivfcenters = shm_toc_lookup(toc, PARALLEL_KEY_IVFFLAT_CENTERS, false); + + /* Perform sorting */ + sortmem = maintenance_work_mem / ivfshared->scantuplesortstates; + IvfflatParallelScanAndSort(ivfspool, ivfshared, sharedsort, ivfcenters, sortmem, false); + + /* Close relations within worker */ + index_close(indexRel, indexLockmode); +#if PG_VERSION_NUM >= 120000 + table_close(heapRel, heapLockmode); +#else + heap_close(heapRel, heapLockmode); +#endif +} + +/* + * End parallel build + */ +static void +IvfflatEndParallel(IvfflatLeader * ivfleader) +{ + /* Shutdown worker processes */ + WaitForParallelWorkersToFinish(ivfleader->pcxt); + + /* Free last reference to MVCC snapshot, if one was used */ + if (IsMVCCSnapshot(ivfleader->snapshot)) + UnregisterSnapshot(ivfleader->snapshot); + DestroyParallelContext(ivfleader->pcxt); + ExitParallelMode(); +} + +/* + * Return size of shared memory required for parallel index build + */ +static Size +ParallelEstimateShared(Relation heap, Snapshot snapshot) +{ +#if PG_VERSION_NUM >= 120000 + return add_size(BUFFERALIGN(sizeof(IvfflatShared)), table_parallelscan_estimate(heap, snapshot)); +#else + if (!IsMVCCSnapshot(snapshot)) + { + Assert(snapshot == SnapshotAny); + return sizeof(IvfflatShared); + } + + return add_size(offsetof(IvfflatShared, heapdesc) + + offsetof(ParallelHeapScanDescData, phs_snapshot_data), + EstimateSnapshotSpace(snapshot)); +#endif +} + +/* + * Within leader, participate as a parallel worker + */ +static void +IvfflatLeaderParticipateAsWorker(IvfflatBuildState * buildstate) +{ + IvfflatLeader *ivfleader = buildstate->ivfleader; + IvfflatSpool *leaderworker; + int sortmem; + + /* Allocate memory and initialize private spool */ + leaderworker = (IvfflatSpool *) palloc0(sizeof(IvfflatSpool)); + leaderworker->heap = buildstate->heap; + leaderworker->index = buildstate->index; + + /* Perform work common to all participants */ + sortmem = maintenance_work_mem / ivfleader->nparticipanttuplesorts; + IvfflatParallelScanAndSort(leaderworker, ivfleader->ivfshared, + ivfleader->sharedsort, ivfleader->ivfcenters, + sortmem, true); +} + +/* + * Begin parallel build + */ +static void +IvfflatBeginParallel(IvfflatBuildState * buildstate, bool isconcurrent, int request) +{ + ParallelContext *pcxt; + int scantuplesortstates; + Snapshot snapshot; + Size estivfshared; + Size estsort; + Size estcenters; + IvfflatShared *ivfshared; + Sharedsort *sharedsort; + Vector *ivfcenters; + IvfflatLeader *ivfleader = (IvfflatLeader *) palloc0(sizeof(IvfflatLeader)); + bool leaderparticipates = true; + int querylen; + +#ifdef DISABLE_LEADER_PARTICIPATION + leaderparticipates = false; +#endif + + /* Enter parallel mode and create context */ + EnterParallelMode(); + Assert(request > 0); +#if PG_VERSION_NUM >= 120000 + pcxt = CreateParallelContext("vector", "IvfflatParallelBuildMain", request); +#else + pcxt = CreateParallelContext("vector", "IvfflatParallelBuildMain", request, true); +#endif + + scantuplesortstates = leaderparticipates ? request + 1 : request; + + /* Get snapshot for table scan */ + if (!isconcurrent) + snapshot = SnapshotAny; + else + snapshot = RegisterSnapshot(GetTransactionSnapshot()); + + /* Estimate size of workspaces */ + estivfshared = ParallelEstimateShared(buildstate->heap, snapshot); + shm_toc_estimate_chunk(&pcxt->estimator, estivfshared); + estsort = tuplesort_estimate_shared(scantuplesortstates); + shm_toc_estimate_chunk(&pcxt->estimator, estsort); + estcenters = VECTOR_SIZE(buildstate->dimensions) * buildstate->lists; + shm_toc_estimate_chunk(&pcxt->estimator, estcenters); + shm_toc_estimate_keys(&pcxt->estimator, 3); + + /* Finally, estimate PARALLEL_KEY_QUERY_TEXT space */ + if (debug_query_string) + { + querylen = strlen(debug_query_string); + shm_toc_estimate_chunk(&pcxt->estimator, querylen + 1); + shm_toc_estimate_keys(&pcxt->estimator, 1); + } + else + querylen = 0; /* keep compiler quiet */ + + /* Everyone's had a chance to ask for space, so now create the DSM */ + InitializeParallelDSM(pcxt); + + /* If no DSM segment was available, back out (do serial build) */ + if (pcxt->seg == NULL) + { + if (IsMVCCSnapshot(snapshot)) + UnregisterSnapshot(snapshot); + DestroyParallelContext(pcxt); + ExitParallelMode(); + return; + } + + /* Store shared build state, for which we reserved space */ + ivfshared = (IvfflatShared *) shm_toc_allocate(pcxt->toc, estivfshared); + /* Initialize immutable state */ + ivfshared->heaprelid = RelationGetRelid(buildstate->heap); + ivfshared->indexrelid = RelationGetRelid(buildstate->index); + ivfshared->isconcurrent = isconcurrent; + ivfshared->scantuplesortstates = scantuplesortstates; + ConditionVariableInit(&ivfshared->workersdonecv); + SpinLockInit(&ivfshared->mutex); + /* Initialize mutable state */ + ivfshared->nparticipantsdone = 0; + ivfshared->reltuples = 0; + ivfshared->indtuples = 0; +#ifdef IVFFLAT_KMEANS_DEBUG + ivfshared->inertia = 0; +#endif +#if PG_VERSION_NUM >= 120000 + table_parallelscan_initialize(buildstate->heap, + ParallelTableScanFromIvfflatShared(ivfshared), + snapshot); +#else + heap_parallelscan_initialize(&ivfshared->heapdesc, buildstate->heap, snapshot); +#endif + + /* Store shared tuplesort-private state, for which we reserved space */ + sharedsort = (Sharedsort *) shm_toc_allocate(pcxt->toc, estsort); + tuplesort_initialize_shared(sharedsort, scantuplesortstates, + pcxt->seg); + + ivfcenters = (Vector *) shm_toc_allocate(pcxt->toc, estcenters); + memcpy(ivfcenters, buildstate->centers->items, estcenters); + + shm_toc_insert(pcxt->toc, PARALLEL_KEY_IVFFLAT_SHARED, ivfshared); + shm_toc_insert(pcxt->toc, PARALLEL_KEY_TUPLESORT, sharedsort); + shm_toc_insert(pcxt->toc, PARALLEL_KEY_IVFFLAT_CENTERS, ivfcenters); + + /* Store query string for workers */ + if (debug_query_string) + { + char *sharedquery; + + sharedquery = (char *) shm_toc_allocate(pcxt->toc, querylen + 1); + memcpy(sharedquery, debug_query_string, querylen + 1); + shm_toc_insert(pcxt->toc, PARALLEL_KEY_QUERY_TEXT, sharedquery); + } + + /* Launch workers, saving status for leader/caller */ + LaunchParallelWorkers(pcxt); + ivfleader->pcxt = pcxt; + ivfleader->nparticipanttuplesorts = pcxt->nworkers_launched; + if (leaderparticipates) + ivfleader->nparticipanttuplesorts++; + ivfleader->ivfshared = ivfshared; + ivfleader->sharedsort = sharedsort; + ivfleader->snapshot = snapshot; + ivfleader->ivfcenters = ivfcenters; + + /* If no workers were successfully launched, back out (do serial build) */ + if (pcxt->nworkers_launched == 0) + { + IvfflatEndParallel(ivfleader); + return; + } + + /* Log participants */ + ereport(DEBUG1, (errmsg("using %d parallel workers", pcxt->nworkers_launched))); + + /* Save leader state now that it's clear build will be parallel */ + buildstate->ivfleader = ivfleader; + + /* Join heap scan ourselves */ + if (leaderparticipates) + IvfflatLeaderParticipateAsWorker(buildstate); + + /* Wait for all launched workers */ + WaitForParallelWorkersToAttach(pcxt); +} + +/* + * Scan table for tuples to index + */ +static void +AssignTuples(IvfflatBuildState * buildstate) +{ + int parallel_workers = 0; + SortCoordinate coordinate = NULL; + + /* Sort options, which must match IvfflatParallelScanAndSort */ + AttrNumber attNums[] = {1}; + Oid sortOperators[] = {Int4LessOperator}; + Oid sortCollations[] = {InvalidOid}; + bool nullsFirstFlags[] = {false}; + + UpdateProgress(PROGRESS_CREATEIDX_SUBPHASE, PROGRESS_IVFFLAT_PHASE_ASSIGN); + + /* Calculate parallel workers */ + if (buildstate->heap != NULL) + parallel_workers = plan_create_index_workers(RelationGetRelid(buildstate->heap), RelationGetRelid(buildstate->index)); + + /* Attempt to launch parallel worker scan when required */ + if (parallel_workers > 0) + IvfflatBeginParallel(buildstate, buildstate->indexInfo->ii_Concurrent, parallel_workers); + + /* Set up coordination state if at least one worker launched */ + if (buildstate->ivfleader) + { + coordinate = (SortCoordinate) palloc0(sizeof(SortCoordinateData)); + coordinate->isWorker = false; + coordinate->nParticipants = buildstate->ivfleader->nparticipanttuplesorts; + coordinate->sharedsort = buildstate->ivfleader->sharedsort; + } + + /* Begin serial/leader tuplesort */ + buildstate->sortstate = tuplesort_begin_heap(buildstate->tupdesc, 1, attNums, sortOperators, sortCollations, nullsFirstFlags, maintenance_work_mem, coordinate, false); + + /* Add tuples to sort */ + if (buildstate->heap != NULL) + { + if (buildstate->ivfleader) + buildstate->reltuples = ParallelHeapScan(buildstate); + else + { +#if PG_VERSION_NUM >= 120000 + buildstate->reltuples = table_index_build_scan(buildstate->heap, buildstate->index, buildstate->indexInfo, + true, true, BuildCallback, (void *) buildstate, NULL); +#else + buildstate->reltuples = IndexBuildHeapScan(buildstate->heap, buildstate->index, buildstate->indexInfo, + true, BuildCallback, (void *) buildstate, NULL); +#endif + } + +#ifdef IVFFLAT_KMEANS_DEBUG + PrintKmeansMetrics(buildstate); +#endif + } +} + +/* + * Create entry pages + */ +static void +CreateEntryPages(IvfflatBuildState * buildstate, ForkNumber forkNum) +{ + /* Assign */ + IvfflatBench("assign tuples", AssignTuples(buildstate)); + + /* Sort */ + IvfflatBench("sort tuples", tuplesort_performsort(buildstate->sortstate)); + + /* Load */ + IvfflatBench("load tuples", InsertTuples(buildstate->index, buildstate, forkNum)); + + /* End sort */ + tuplesort_end(buildstate->sortstate); + + /* End parallel build */ + if (buildstate->ivfleader) + IvfflatEndParallel(buildstate->ivfleader); +} + +/* + * Build the index + */ +static void +BuildIndex(Relation heap, Relation index, IndexInfo *indexInfo, + IvfflatBuildState * buildstate, ForkNumber forkNum) +{ + InitBuildState(buildstate, heap, index, indexInfo); + + ComputeCenters(buildstate); + + /* Create pages */ + CreateMetaPage(index, buildstate->dimensions, buildstate->lists, forkNum); + CreateListPages(index, buildstate->centers, buildstate->dimensions, buildstate->lists, forkNum, &buildstate->listInfo); + CreateEntryPages(buildstate, forkNum); + + FreeBuildState(buildstate); +} + +/* + * Build the index for a logged table + */ +IndexBuildResult * +ivfflatbuild(Relation heap, Relation index, IndexInfo *indexInfo) +{ + IndexBuildResult *result; + IvfflatBuildState buildstate; + + BuildIndex(heap, index, indexInfo, &buildstate, MAIN_FORKNUM); + + result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult)); + result->heap_tuples = buildstate.reltuples; + result->index_tuples = buildstate.indtuples; + + return result; +} + +/* + * Build the index for an unlogged table + */ +void +ivfflatbuildempty(Relation index) +{ + IndexInfo *indexInfo = BuildIndexInfo(index); + IvfflatBuildState buildstate; + + BuildIndex(NULL, index, indexInfo, &buildstate, INIT_FORKNUM); +} diff --git a/external/pgvector/src/ivfflat.c b/external/pgvector/src/ivfflat.c new file mode 100644 index 00000000000..d6383f460c9 --- /dev/null +++ b/external/pgvector/src/ivfflat.c @@ -0,0 +1,251 @@ +#include "postgres.h" + +#include + +#include "access/amapi.h" +#include "commands/vacuum.h" +#include "ivfflat.h" +#include "utils/guc.h" +#include "utils/selfuncs.h" +#include "utils/spccache.h" + +#if PG_VERSION_NUM >= 120000 +#include "commands/progress.h" +#endif + +int ivfflat_probes; +static relopt_kind ivfflat_relopt_kind; + +/* + * Initialize index options and variables + */ +void +IvfflatInit(void) +{ + ivfflat_relopt_kind = add_reloption_kind(); + add_int_reloption(ivfflat_relopt_kind, "lists", "Number of inverted lists", + IVFFLAT_DEFAULT_LISTS, IVFFLAT_MIN_LISTS, IVFFLAT_MAX_LISTS +#if PG_VERSION_NUM >= 130000 + ,AccessExclusiveLock +#endif + ); + + DefineCustomIntVariable("ivfflat.probes", "Sets the number of probes", + "Valid range is 1..lists.", &ivfflat_probes, + IVFFLAT_DEFAULT_PROBES, IVFFLAT_MIN_LISTS, IVFFLAT_MAX_LISTS, PGC_USERSET, 0, NULL, NULL, NULL); +} + +/* + * Get the name of index build phase + */ +#if PG_VERSION_NUM >= 120000 +static char * +ivfflatbuildphasename(int64 phasenum) +{ + switch (phasenum) + { + case PROGRESS_CREATEIDX_SUBPHASE_INITIALIZE: + return "initializing"; + case PROGRESS_IVFFLAT_PHASE_KMEANS: + return "performing k-means"; + case PROGRESS_IVFFLAT_PHASE_ASSIGN: + return "assigning tuples"; + case PROGRESS_IVFFLAT_PHASE_LOAD: + return "loading tuples"; + default: + return NULL; + } +} +#endif + +/* + * Estimate the cost of an index scan + */ +static void +ivfflatcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, + Cost *indexStartupCost, Cost *indexTotalCost, + Selectivity *indexSelectivity, double *indexCorrelation, + double *indexPages) +{ + GenericCosts costs; + int lists; + double ratio; + double spc_seq_page_cost; + Relation index; +#if PG_VERSION_NUM < 120000 + List *qinfos; +#endif + + /* Never use index without order */ + if (path->indexorderbys == NULL) + { + *indexStartupCost = DBL_MAX; + *indexTotalCost = DBL_MAX; + *indexSelectivity = 0; + *indexCorrelation = 0; + *indexPages = 0; + return; + } + + MemSet(&costs, 0, sizeof(costs)); + + index = index_open(path->indexinfo->indexoid, NoLock); + IvfflatGetMetaPageInfo(index, &lists, NULL); + index_close(index, NoLock); + + /* Get the ratio of lists that we need to visit */ + ratio = ((double) ivfflat_probes) / lists; + if (ratio > 1.0) + ratio = 1.0; + + /* + * This gives us the subset of tuples to visit. This value is passed into + * the generic cost estimator to determine the number of pages to visit + * during the index scan. + */ + costs.numIndexTuples = path->indexinfo->tuples * ratio; + +#if PG_VERSION_NUM >= 120000 + genericcostestimate(root, path, loop_count, &costs); +#else + qinfos = deconstruct_indexquals(path); + genericcostestimate(root, path, loop_count, qinfos, &costs); +#endif + + get_tablespace_page_costs(path->indexinfo->reltablespace, NULL, &spc_seq_page_cost); + + /* Adjust cost if needed since TOAST not included in seq scan cost */ + if (costs.numIndexPages > path->indexinfo->rel->pages && ratio < 0.5) + { + /* Change all page cost from random to sequential */ + costs.indexTotalCost -= costs.numIndexPages * (costs.spc_random_page_cost - spc_seq_page_cost); + + /* Remove cost of extra pages */ + costs.indexTotalCost -= (costs.numIndexPages - path->indexinfo->rel->pages) * spc_seq_page_cost; + } + else + { + /* Change some page cost from random to sequential */ + costs.indexTotalCost -= 0.5 * costs.numIndexPages * (costs.spc_random_page_cost - spc_seq_page_cost); + } + + /* + * If the list selectivity is lower than what is returned from the generic + * cost estimator, use that. + */ + if (ratio < costs.indexSelectivity) + costs.indexSelectivity = ratio; + + /* Use total cost since most work happens before first tuple is returned */ + *indexStartupCost = costs.indexTotalCost; + *indexTotalCost = costs.indexTotalCost; + *indexSelectivity = costs.indexSelectivity; + *indexCorrelation = costs.indexCorrelation; + *indexPages = costs.numIndexPages; +} + +/* + * Parse and validate the reloptions + */ +static bytea * +ivfflatoptions(Datum reloptions, bool validate) +{ + static const relopt_parse_elt tab[] = { + {"lists", RELOPT_TYPE_INT, offsetof(IvfflatOptions, lists)}, + }; + +#if PG_VERSION_NUM >= 130000 + return (bytea *) build_reloptions(reloptions, validate, + ivfflat_relopt_kind, + sizeof(IvfflatOptions), + tab, lengthof(tab)); +#else + relopt_value *options; + int numoptions; + IvfflatOptions *rdopts; + + options = parseRelOptions(reloptions, validate, ivfflat_relopt_kind, &numoptions); + rdopts = allocateReloptStruct(sizeof(IvfflatOptions), options, numoptions); + fillRelOptions((void *) rdopts, sizeof(IvfflatOptions), options, numoptions, + validate, tab, lengthof(tab)); + + return (bytea *) rdopts; +#endif +} + +/* + * Validate catalog entries for the specified operator class + */ +static bool +ivfflatvalidate(Oid opclassoid) +{ + return true; +} + +/* + * Define index handler + * + * See https://www.postgresql.org/docs/current/index-api.html + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(ivfflathandler); +Datum +ivfflathandler(PG_FUNCTION_ARGS) +{ + IndexAmRoutine *amroutine = makeNode(IndexAmRoutine); + + amroutine->amstrategies = 0; + amroutine->amsupport = 4; +#if PG_VERSION_NUM >= 130000 + amroutine->amoptsprocnum = 0; +#endif + amroutine->amcanorder = false; + amroutine->amcanorderbyop = true; + amroutine->amcanbackward = false; /* can change direction mid-scan */ + amroutine->amcanunique = false; + amroutine->amcanmulticol = false; + amroutine->amoptionalkey = true; + amroutine->amsearcharray = false; + amroutine->amsearchnulls = false; + amroutine->amstorage = false; + amroutine->amclusterable = false; + amroutine->ampredlocks = false; + amroutine->amcanparallel = false; + amroutine->amcaninclude = false; +#if PG_VERSION_NUM >= 130000 + amroutine->amusemaintenanceworkmem = false; /* not used during VACUUM */ + amroutine->amparallelvacuumoptions = VACUUM_OPTION_PARALLEL_BULKDEL; +#endif + amroutine->amkeytype = InvalidOid; + + /* Interface functions */ + amroutine->ambuild = ivfflatbuild; + amroutine->ambuildempty = ivfflatbuildempty; + amroutine->aminsert = ivfflatinsert; + amroutine->ambulkdelete = ivfflatbulkdelete; + amroutine->amvacuumcleanup = ivfflatvacuumcleanup; + amroutine->amcanreturn = NULL; /* tuple not included in heapsort */ + amroutine->amcostestimate = ivfflatcostestimate; + amroutine->amoptions = ivfflatoptions; + amroutine->amproperty = NULL; /* TODO AMPROP_DISTANCE_ORDERABLE */ +#if PG_VERSION_NUM >= 120000 + amroutine->ambuildphasename = ivfflatbuildphasename; +#endif + amroutine->amvalidate = ivfflatvalidate; +#if PG_VERSION_NUM >= 140000 + amroutine->amadjustmembers = NULL; +#endif + amroutine->ambeginscan = ivfflatbeginscan; + amroutine->amrescan = ivfflatrescan; + amroutine->amgettuple = ivfflatgettuple; + amroutine->amgetbitmap = NULL; + amroutine->amendscan = ivfflatendscan; + amroutine->ammarkpos = NULL; + amroutine->amrestrpos = NULL; + + /* Interface functions to support parallel index scans */ + amroutine->amestimateparallelscan = NULL; + amroutine->aminitparallelscan = NULL; + amroutine->amparallelrescan = NULL; + + PG_RETURN_POINTER(amroutine); +} diff --git a/external/pgvector/src/ivfflat.h b/external/pgvector/src/ivfflat.h new file mode 100644 index 00000000000..1eb35b0cf1b --- /dev/null +++ b/external/pgvector/src/ivfflat.h @@ -0,0 +1,307 @@ +#ifndef IVFFLAT_H +#define IVFFLAT_H + +#include "postgres.h" + +#include "access/generic_xlog.h" +#include "access/parallel.h" +#include "access/reloptions.h" +#include "nodes/execnodes.h" +#include "port.h" /* for random() */ +#include "utils/sampling.h" +#include "utils/tuplesort.h" +#include "vector.h" + +#if PG_VERSION_NUM >= 150000 +#include "common/pg_prng.h" +#endif + +#if PG_VERSION_NUM < 120000 +#include "access/relscan.h" +#endif + +#ifdef IVFFLAT_BENCH +#include "portability/instr_time.h" +#endif + +#define IVFFLAT_MAX_DIM 2000 + +/* Support functions */ +#define IVFFLAT_DISTANCE_PROC 1 +#define IVFFLAT_NORM_PROC 2 +#define IVFFLAT_KMEANS_DISTANCE_PROC 3 +#define IVFFLAT_KMEANS_NORM_PROC 4 + +#define IVFFLAT_VERSION 1 +#define IVFFLAT_MAGIC_NUMBER 0x14FF1A7 +#define IVFFLAT_PAGE_ID 0xFF84 + +/* Preserved page numbers */ +#define IVFFLAT_METAPAGE_BLKNO 0 +#define IVFFLAT_HEAD_BLKNO 1 /* first list page */ + +/* IVFFlat parameters */ +#define IVFFLAT_DEFAULT_LISTS 100 +#define IVFFLAT_MIN_LISTS 1 +#define IVFFLAT_MAX_LISTS 32768 +#define IVFFLAT_DEFAULT_PROBES 1 + +/* Build phases */ +/* PROGRESS_CREATEIDX_SUBPHASE_INITIALIZE is 1 */ +#define PROGRESS_IVFFLAT_PHASE_KMEANS 2 +#define PROGRESS_IVFFLAT_PHASE_ASSIGN 3 +#define PROGRESS_IVFFLAT_PHASE_LOAD 4 + +#define IVFFLAT_LIST_SIZE(_dim) (offsetof(IvfflatListData, center) + VECTOR_SIZE(_dim)) + +#define IvfflatPageGetOpaque(page) ((IvfflatPageOpaque) PageGetSpecialPointer(page)) +#define IvfflatPageGetMeta(page) ((IvfflatMetaPageData *) PageGetContents(page)) + +#ifdef IVFFLAT_BENCH +#define IvfflatBench(name, code) \ + do { \ + instr_time start; \ + instr_time duration; \ + INSTR_TIME_SET_CURRENT(start); \ + (code); \ + INSTR_TIME_SET_CURRENT(duration); \ + INSTR_TIME_SUBTRACT(duration, start); \ + elog(INFO, "%s: %.3f ms", name, INSTR_TIME_GET_MILLISEC(duration)); \ + } while (0) +#else +#define IvfflatBench(name, code) (code) +#endif + +#if PG_VERSION_NUM >= 150000 +#define RandomDouble() pg_prng_double(&pg_global_prng_state) +#define RandomInt() pg_prng_uint32(&pg_global_prng_state) +#else +#define RandomDouble() (((double) random()) / MAX_RANDOM_VALUE) +#define RandomInt() random() +#endif + +/* Variables */ +extern int ivfflat_probes; + +typedef struct VectorArrayData +{ + int length; + int maxlen; + int dim; + Vector *items; +} VectorArrayData; + +typedef VectorArrayData * VectorArray; + +typedef struct ListInfo +{ + BlockNumber blkno; + OffsetNumber offno; +} ListInfo; + +/* IVFFlat index options */ +typedef struct IvfflatOptions +{ + int32 vl_len_; /* varlena header (do not touch directly!) */ + int lists; /* number of lists */ +} IvfflatOptions; + +typedef struct IvfflatSpool +{ + Tuplesortstate *sortstate; + Relation heap; + Relation index; +} IvfflatSpool; + +typedef struct IvfflatShared +{ + /* Immutable state */ + Oid heaprelid; + Oid indexrelid; + bool isconcurrent; + int scantuplesortstates; + + /* Worker progress */ + ConditionVariable workersdonecv; + + /* Mutex for mutable state */ + slock_t mutex; + + /* Mutable state */ + int nparticipantsdone; + double reltuples; + double indtuples; + +#ifdef IVFFLAT_KMEANS_DEBUG + double inertia; +#endif + +#if PG_VERSION_NUM < 120000 + ParallelHeapScanDescData heapdesc; /* must come last */ +#endif +} IvfflatShared; + +#if PG_VERSION_NUM >= 120000 +#define ParallelTableScanFromIvfflatShared(shared) \ + (ParallelTableScanDesc) ((char *) (shared) + BUFFERALIGN(sizeof(IvfflatShared))) +#endif + +typedef struct IvfflatLeader +{ + ParallelContext *pcxt; + int nparticipanttuplesorts; + IvfflatShared *ivfshared; + Sharedsort *sharedsort; + Snapshot snapshot; + Vector *ivfcenters; +} IvfflatLeader; + +typedef struct IvfflatBuildState +{ + /* Info */ + Relation heap; + Relation index; + IndexInfo *indexInfo; + + /* Settings */ + int dimensions; + int lists; + + /* Statistics */ + double indtuples; + double reltuples; + + /* Support functions */ + FmgrInfo *procinfo; + FmgrInfo *normprocinfo; + FmgrInfo *kmeansnormprocinfo; + Oid collation; + + /* Variables */ + VectorArray samples; + VectorArray centers; + ListInfo *listInfo; + Vector *normvec; + +#ifdef IVFFLAT_KMEANS_DEBUG + double inertia; + double *listSums; + int *listCounts; +#endif + + /* Sampling */ + BlockSamplerData bs; + ReservoirStateData rstate; + int rowstoskip; + + /* Sorting */ + Tuplesortstate *sortstate; + TupleDesc tupdesc; + TupleTableSlot *slot; + + /* Memory */ + MemoryContext tmpCtx; + + /* Parallel builds */ + IvfflatLeader *ivfleader; +} IvfflatBuildState; + +typedef struct IvfflatMetaPageData +{ + uint32 magicNumber; + uint32 version; + uint16 dimensions; + uint16 lists; +} IvfflatMetaPageData; + +typedef IvfflatMetaPageData * IvfflatMetaPage; + +typedef struct IvfflatPageOpaqueData +{ + BlockNumber nextblkno; + uint16 unused; + uint16 page_id; /* for identification of IVFFlat indexes */ +} IvfflatPageOpaqueData; + +typedef IvfflatPageOpaqueData * IvfflatPageOpaque; + +typedef struct IvfflatListData +{ + BlockNumber startPage; + BlockNumber insertPage; + Vector center; +} IvfflatListData; + +typedef IvfflatListData * IvfflatList; + +typedef struct IvfflatScanList +{ + pairingheap_node ph_node; + BlockNumber startPage; + double distance; +} IvfflatScanList; + +typedef struct IvfflatScanOpaqueData +{ + int probes; + int dimensions; + bool first; + + /* Sorting */ + Tuplesortstate *sortstate; + TupleDesc tupdesc; + TupleTableSlot *slot; + bool isnull; + + /* Support functions */ + FmgrInfo *procinfo; + FmgrInfo *normprocinfo; + Oid collation; + + /* Lists */ + pairingheap *listQueue; + IvfflatScanList lists[FLEXIBLE_ARRAY_MEMBER]; /* must come last */ +} IvfflatScanOpaqueData; + +typedef IvfflatScanOpaqueData * IvfflatScanOpaque; + +#define VECTOR_ARRAY_SIZE(_length, _dim) (sizeof(VectorArrayData) + (_length) * VECTOR_SIZE(_dim)) +#define VECTOR_ARRAY_OFFSET(_arr, _offset) ((char*) (_arr)->items + (_offset) * VECTOR_SIZE((_arr)->dim)) +#define VectorArrayGet(_arr, _offset) ((Vector *) VECTOR_ARRAY_OFFSET(_arr, _offset)) +#define VectorArraySet(_arr, _offset, _val) memcpy(VECTOR_ARRAY_OFFSET(_arr, _offset), _val, VECTOR_SIZE((_arr)->dim)) + +/* Methods */ +VectorArray VectorArrayInit(int maxlen, int dimensions); +void VectorArrayFree(VectorArray arr); +void PrintVectorArray(char *msg, VectorArray arr); +void IvfflatKmeans(Relation index, VectorArray samples, VectorArray centers); +FmgrInfo *IvfflatOptionalProcInfo(Relation index, uint16 procnum); +bool IvfflatNormValue(FmgrInfo *procinfo, Oid collation, Datum *value, Vector * result); +int IvfflatGetLists(Relation index); +void IvfflatGetMetaPageInfo(Relation index, int *lists, int *dimensions); +void IvfflatUpdateList(Relation index, ListInfo listInfo, BlockNumber insertPage, BlockNumber originalInsertPage, BlockNumber startPage, ForkNumber forkNum); +void IvfflatCommitBuffer(Buffer buf, GenericXLogState *state); +void IvfflatAppendPage(Relation index, Buffer *buf, Page *page, GenericXLogState **state, ForkNumber forkNum); +Buffer IvfflatNewBuffer(Relation index, ForkNumber forkNum); +void IvfflatInitPage(Buffer buf, Page page); +void IvfflatInitRegisterPage(Relation index, Buffer *buf, Page *page, GenericXLogState **state); +void IvfflatInit(void); +PGDLLEXPORT void IvfflatParallelBuildMain(dsm_segment *seg, shm_toc *toc); + +/* Index access methods */ +IndexBuildResult *ivfflatbuild(Relation heap, Relation index, IndexInfo *indexInfo); +void ivfflatbuildempty(Relation index); +bool ivfflatinsert(Relation index, Datum *values, bool *isnull, ItemPointer heap_tid, Relation heap, IndexUniqueCheck checkUnique +#if PG_VERSION_NUM >= 140000 + ,bool indexUnchanged +#endif + ,IndexInfo *indexInfo +); +IndexBulkDeleteResult *ivfflatbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state); +IndexBulkDeleteResult *ivfflatvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats); +IndexScanDesc ivfflatbeginscan(Relation index, int nkeys, int norderbys); +void ivfflatrescan(IndexScanDesc scan, ScanKey keys, int nkeys, ScanKey orderbys, int norderbys); +bool ivfflatgettuple(IndexScanDesc scan, ScanDirection dir); +void ivfflatendscan(IndexScanDesc scan); + +#endif diff --git a/external/pgvector/src/ivfinsert.c b/external/pgvector/src/ivfinsert.c new file mode 100644 index 00000000000..103fe49fd55 --- /dev/null +++ b/external/pgvector/src/ivfinsert.c @@ -0,0 +1,205 @@ +#include "postgres.h" + +#include + +#include "ivfflat.h" +#include "storage/bufmgr.h" +#include "storage/lmgr.h" +#include "utils/memutils.h" + +/* + * Find the list that minimizes the distance function + */ +static void +FindInsertPage(Relation index, Datum *values, BlockNumber *insertPage, ListInfo * listInfo) +{ + double minDistance = DBL_MAX; + BlockNumber nextblkno = IVFFLAT_HEAD_BLKNO; + FmgrInfo *procinfo; + Oid collation; + + /* Avoid compiler warning */ + listInfo->blkno = nextblkno; + listInfo->offno = FirstOffsetNumber; + + procinfo = index_getprocinfo(index, 1, IVFFLAT_DISTANCE_PROC); + collation = index->rd_indcollation[0]; + + /* Search all list pages */ + while (BlockNumberIsValid(nextblkno)) + { + Buffer cbuf; + Page cpage; + OffsetNumber maxoffno; + + cbuf = ReadBuffer(index, nextblkno); + LockBuffer(cbuf, BUFFER_LOCK_SHARE); + cpage = BufferGetPage(cbuf); + maxoffno = PageGetMaxOffsetNumber(cpage); + + for (OffsetNumber offno = FirstOffsetNumber; offno <= maxoffno; offno = OffsetNumberNext(offno)) + { + IvfflatList list; + double distance; + + list = (IvfflatList) PageGetItem(cpage, PageGetItemId(cpage, offno)); + distance = DatumGetFloat8(FunctionCall2Coll(procinfo, collation, values[0], PointerGetDatum(&list->center))); + + if (distance < minDistance || !BlockNumberIsValid(*insertPage)) + { + *insertPage = list->insertPage; + listInfo->blkno = nextblkno; + listInfo->offno = offno; + minDistance = distance; + } + } + + nextblkno = IvfflatPageGetOpaque(cpage)->nextblkno; + + UnlockReleaseBuffer(cbuf); + } +} + +/* + * Insert a tuple into the index + */ +static void +InsertTuple(Relation index, Datum *values, bool *isnull, ItemPointer heap_tid, Relation heapRel) +{ + IndexTuple itup; + Datum value; + FmgrInfo *normprocinfo; + Buffer buf; + Page page; + GenericXLogState *state; + Size itemsz; + BlockNumber insertPage = InvalidBlockNumber; + ListInfo listInfo; + BlockNumber originalInsertPage; + + /* Detoast once for all calls */ + value = PointerGetDatum(PG_DETOAST_DATUM(values[0])); + + /* Normalize if needed */ + normprocinfo = IvfflatOptionalProcInfo(index, IVFFLAT_NORM_PROC); + if (normprocinfo != NULL) + { + if (!IvfflatNormValue(normprocinfo, index->rd_indcollation[0], &value, NULL)) + return; + } + + /* Find the insert page - sets the page and list info */ + FindInsertPage(index, values, &insertPage, &listInfo); + Assert(BlockNumberIsValid(insertPage)); + originalInsertPage = insertPage; + + /* Form tuple */ + itup = index_form_tuple(RelationGetDescr(index), &value, isnull); + itup->t_tid = *heap_tid; + + /* Get tuple size */ + itemsz = MAXALIGN(IndexTupleSize(itup)); + Assert(itemsz <= BLCKSZ - MAXALIGN(SizeOfPageHeaderData) - MAXALIGN(sizeof(IvfflatPageOpaqueData)) - sizeof(ItemIdData)); + + /* Find a page to insert the item */ + for (;;) + { + buf = ReadBuffer(index, insertPage); + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + + state = GenericXLogStart(index); + page = GenericXLogRegisterBuffer(state, buf, 0); + + if (PageGetFreeSpace(page) >= itemsz) + break; + + insertPage = IvfflatPageGetOpaque(page)->nextblkno; + + if (BlockNumberIsValid(insertPage)) + { + /* Move to next page */ + GenericXLogAbort(state); + UnlockReleaseBuffer(buf); + } + else + { + Buffer newbuf; + Page newpage; + + /* Add a new page */ + LockRelationForExtension(index, ExclusiveLock); + newbuf = IvfflatNewBuffer(index, MAIN_FORKNUM); + UnlockRelationForExtension(index, ExclusiveLock); + + /* Init new page */ + newpage = GenericXLogRegisterBuffer(state, newbuf, GENERIC_XLOG_FULL_IMAGE); + IvfflatInitPage(newbuf, newpage); + + /* Update insert page */ + insertPage = BufferGetBlockNumber(newbuf); + + /* Update previous buffer */ + IvfflatPageGetOpaque(page)->nextblkno = insertPage; + + /* Commit */ + GenericXLogFinish(state); + + /* Unlock previous buffer */ + UnlockReleaseBuffer(buf); + + /* Prepare new buffer */ + state = GenericXLogStart(index); + buf = newbuf; + page = GenericXLogRegisterBuffer(state, buf, 0); + break; + } + } + + /* Add to next offset */ + if (PageAddItem(page, (Item) itup, itemsz, InvalidOffsetNumber, false, false) == InvalidOffsetNumber) + elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index)); + + IvfflatCommitBuffer(buf, state); + + /* Update the insert page */ + if (insertPage != originalInsertPage) + IvfflatUpdateList(index, listInfo, insertPage, originalInsertPage, InvalidBlockNumber, MAIN_FORKNUM); +} + +/* + * Insert a tuple into the index + */ +bool +ivfflatinsert(Relation index, Datum *values, bool *isnull, ItemPointer heap_tid, + Relation heap, IndexUniqueCheck checkUnique +#if PG_VERSION_NUM >= 140000 + ,bool indexUnchanged +#endif + ,IndexInfo *indexInfo +) +{ + MemoryContext oldCtx; + MemoryContext insertCtx; + + /* Skip nulls */ + if (isnull[0]) + return false; + + /* + * Use memory context since detoast, IvfflatNormValue, and + * index_form_tuple can allocate + */ + insertCtx = AllocSetContextCreate(CurrentMemoryContext, + "Ivfflat insert temporary context", + ALLOCSET_DEFAULT_SIZES); + oldCtx = MemoryContextSwitchTo(insertCtx); + + /* Insert tuple */ + InsertTuple(index, values, isnull, heap_tid, heap); + + /* Delete memory context */ + MemoryContextSwitchTo(oldCtx); + MemoryContextDelete(insertCtx); + + return false; +} diff --git a/external/pgvector/src/ivfkmeans.c b/external/pgvector/src/ivfkmeans.c new file mode 100644 index 00000000000..a87edcbd1d7 --- /dev/null +++ b/external/pgvector/src/ivfkmeans.c @@ -0,0 +1,525 @@ +#include "postgres.h" + +#include +#include + +#include "ivfflat.h" +#include "miscadmin.h" + +/* + * Initialize with kmeans++ + * + * https://theory.stanford.edu/~sergei/papers/kMeansPP-soda.pdf + */ +static void +InitCenters(Relation index, VectorArray samples, VectorArray centers, float *lowerBound) +{ + FmgrInfo *procinfo; + Oid collation; + int64 j; + float *weight = palloc(samples->length * sizeof(float)); + int numCenters = centers->maxlen; + int numSamples = samples->length; + + procinfo = index_getprocinfo(index, 1, IVFFLAT_KMEANS_DISTANCE_PROC); + collation = index->rd_indcollation[0]; + + /* Choose an initial center uniformly at random */ + VectorArraySet(centers, 0, VectorArrayGet(samples, RandomInt() % samples->length)); + centers->length++; + + for (j = 0; j < numSamples; j++) + weight[j] = FLT_MAX; + + for (int i = 0; i < numCenters; i++) + { + double sum; + double choice; + + CHECK_FOR_INTERRUPTS(); + + sum = 0.0; + + for (j = 0; j < numSamples; j++) + { + Vector *vec = VectorArrayGet(samples, j); + double distance; + + /* Only need to compute distance for new center */ + /* TODO Use triangle inequality to reduce distance calculations */ + distance = DatumGetFloat8(FunctionCall2Coll(procinfo, collation, PointerGetDatum(vec), PointerGetDatum(VectorArrayGet(centers, i)))); + + /* Set lower bound */ + lowerBound[j * numCenters + i] = distance; + + /* Use distance squared for weighted probability distribution */ + distance *= distance; + + if (distance < weight[j]) + weight[j] = distance; + + sum += weight[j]; + } + + /* Only compute lower bound on last iteration */ + if (i + 1 == numCenters) + break; + + /* Choose new center using weighted probability distribution. */ + choice = sum * RandomDouble(); + for (j = 0; j < numSamples - 1; j++) + { + choice -= weight[j]; + if (choice <= 0) + break; + } + + VectorArraySet(centers, i + 1, VectorArrayGet(samples, j)); + centers->length++; + } + + pfree(weight); +} + +/* + * Apply norm to vector + */ +static inline void +ApplyNorm(FmgrInfo *normprocinfo, Oid collation, Vector * vec) +{ + double norm = DatumGetFloat8(FunctionCall1Coll(normprocinfo, collation, PointerGetDatum(vec))); + + /* TODO Handle zero norm */ + if (norm > 0) + { + for (int i = 0; i < vec->dim; i++) + vec->x[i] /= norm; + } +} + +/* + * Compare vectors + */ +static int +CompareVectors(const void *a, const void *b) +{ + return vector_cmp_internal((Vector *) a, (Vector *) b); +} + +/* + * Quick approach if we have little data + */ +static void +QuickCenters(Relation index, VectorArray samples, VectorArray centers) +{ + int dimensions = centers->dim; + Oid collation = index->rd_indcollation[0]; + FmgrInfo *normprocinfo = IvfflatOptionalProcInfo(index, IVFFLAT_KMEANS_NORM_PROC); + + /* Copy existing vectors while avoiding duplicates */ + if (samples->length > 0) + { + qsort(samples->items, samples->length, VECTOR_SIZE(samples->dim), CompareVectors); + for (int i = 0; i < samples->length; i++) + { + Vector *vec = VectorArrayGet(samples, i); + + if (i == 0 || CompareVectors(vec, VectorArrayGet(samples, i - 1)) != 0) + { + VectorArraySet(centers, centers->length, vec); + centers->length++; + } + } + } + + /* Fill remaining with random data */ + while (centers->length < centers->maxlen) + { + Vector *vec = VectorArrayGet(centers, centers->length); + + SET_VARSIZE(vec, VECTOR_SIZE(dimensions)); + vec->dim = dimensions; + + for (int j = 0; j < dimensions; j++) + vec->x[j] = RandomDouble(); + + /* Normalize if needed (only needed for random centers) */ + if (normprocinfo != NULL) + ApplyNorm(normprocinfo, collation, vec); + + centers->length++; + } +} + +/* + * Use Elkan for performance. This requires distance function to satisfy triangle inequality. + * + * We use L2 distance for L2 (not L2 squared like index scan) + * and angular distance for inner product and cosine distance + * + * https://www.aaai.org/Papers/ICML/2003/ICML03-022.pdf + */ +static void +ElkanKmeans(Relation index, VectorArray samples, VectorArray centers) +{ + FmgrInfo *procinfo; + FmgrInfo *normprocinfo; + Oid collation; + Vector *vec; + Vector *newCenter; + int64 j; + int64 k; + int dimensions = centers->dim; + int numCenters = centers->maxlen; + int numSamples = samples->length; + VectorArray newCenters; + int *centerCounts; + int *closestCenters; + float *lowerBound; + float *upperBound; + float *s; + float *halfcdist; + float *newcdist; + + /* Calculate allocation sizes */ + Size samplesSize = VECTOR_ARRAY_SIZE(samples->maxlen, samples->dim); + Size centersSize = VECTOR_ARRAY_SIZE(centers->maxlen, centers->dim); + Size newCentersSize = VECTOR_ARRAY_SIZE(numCenters, dimensions); + Size centerCountsSize = sizeof(int) * numCenters; + Size closestCentersSize = sizeof(int) * numSamples; + Size lowerBoundSize = sizeof(float) * numSamples * numCenters; + Size upperBoundSize = sizeof(float) * numSamples; + Size sSize = sizeof(float) * numCenters; + Size halfcdistSize = sizeof(float) * numCenters * numCenters; + Size newcdistSize = sizeof(float) * numCenters; + + /* Calculate total size */ + Size totalSize = samplesSize + centersSize + newCentersSize + centerCountsSize + closestCentersSize + lowerBoundSize + upperBoundSize + sSize + halfcdistSize + newcdistSize; + + /* Check memory requirements */ + /* Add one to error message to ceil */ + if (totalSize > (Size) maintenance_work_mem * 1024L) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("memory required is %zu MB, maintenance_work_mem is %d MB", + totalSize / (1024 * 1024) + 1, maintenance_work_mem / 1024))); + + /* Ensure indexing does not overflow */ + if (numCenters * numCenters > INT_MAX) + elog(ERROR, "Indexing overflow detected. Please report a bug."); + + /* Set support functions */ + procinfo = index_getprocinfo(index, 1, IVFFLAT_KMEANS_DISTANCE_PROC); + normprocinfo = IvfflatOptionalProcInfo(index, IVFFLAT_KMEANS_NORM_PROC); + collation = index->rd_indcollation[0]; + + /* Allocate space */ + /* Use float instead of double to save memory */ + centerCounts = palloc(centerCountsSize); + closestCenters = palloc(closestCentersSize); + lowerBound = palloc_extended(lowerBoundSize, MCXT_ALLOC_HUGE); + upperBound = palloc(upperBoundSize); + s = palloc(sSize); + halfcdist = palloc_extended(halfcdistSize, MCXT_ALLOC_HUGE); + newcdist = palloc(newcdistSize); + + newCenters = VectorArrayInit(numCenters, dimensions); + for (j = 0; j < numCenters; j++) + { + vec = VectorArrayGet(newCenters, j); + SET_VARSIZE(vec, VECTOR_SIZE(dimensions)); + vec->dim = dimensions; + } + + /* Pick initial centers */ + InitCenters(index, samples, centers, lowerBound); + + /* Assign each x to its closest initial center c(x) = argmin d(x,c) */ + for (j = 0; j < numSamples; j++) + { + float minDistance = FLT_MAX; + int closestCenter = 0; + + /* Find closest center */ + for (k = 0; k < numCenters; k++) + { + /* TODO Use Lemma 1 in k-means++ initialization */ + float distance = lowerBound[j * numCenters + k]; + + if (distance < minDistance) + { + minDistance = distance; + closestCenter = k; + } + } + + upperBound[j] = minDistance; + closestCenters[j] = closestCenter; + } + + /* Give 500 iterations to converge */ + for (int iteration = 0; iteration < 500; iteration++) + { + int changes = 0; + bool rjreset; + + /* Can take a while, so ensure we can interrupt */ + CHECK_FOR_INTERRUPTS(); + + /* Step 1: For all centers, compute distance */ + for (j = 0; j < numCenters; j++) + { + vec = VectorArrayGet(centers, j); + + for (k = j + 1; k < numCenters; k++) + { + float distance = 0.5 * DatumGetFloat8(FunctionCall2Coll(procinfo, collation, PointerGetDatum(vec), PointerGetDatum(VectorArrayGet(centers, k)))); + + halfcdist[j * numCenters + k] = distance; + halfcdist[k * numCenters + j] = distance; + } + } + + /* For all centers c, compute s(c) */ + for (j = 0; j < numCenters; j++) + { + float minDistance = FLT_MAX; + + for (k = 0; k < numCenters; k++) + { + float distance; + + if (j == k) + continue; + + distance = halfcdist[j * numCenters + k]; + if (distance < minDistance) + minDistance = distance; + } + + s[j] = minDistance; + } + + rjreset = iteration != 0; + + for (j = 0; j < numSamples; j++) + { + bool rj; + + /* Step 2: Identify all points x such that u(x) <= s(c(x)) */ + if (upperBound[j] <= s[closestCenters[j]]) + continue; + + rj = rjreset; + + for (k = 0; k < numCenters; k++) + { + float dxcx; + + /* Step 3: For all remaining points x and centers c */ + if (k == closestCenters[j]) + continue; + + if (upperBound[j] <= lowerBound[j * numCenters + k]) + continue; + + if (upperBound[j] <= halfcdist[closestCenters[j] * numCenters + k]) + continue; + + vec = VectorArrayGet(samples, j); + + /* Step 3a */ + if (rj) + { + dxcx = DatumGetFloat8(FunctionCall2Coll(procinfo, collation, PointerGetDatum(vec), PointerGetDatum(VectorArrayGet(centers, closestCenters[j])))); + + /* d(x,c(x)) computed, which is a form of d(x,c) */ + lowerBound[j * numCenters + closestCenters[j]] = dxcx; + upperBound[j] = dxcx; + + rj = false; + } + else + dxcx = upperBound[j]; + + /* Step 3b */ + if (dxcx > lowerBound[j * numCenters + k] || dxcx > halfcdist[closestCenters[j] * numCenters + k]) + { + float dxc = DatumGetFloat8(FunctionCall2Coll(procinfo, collation, PointerGetDatum(vec), PointerGetDatum(VectorArrayGet(centers, k)))); + + /* d(x,c) calculated */ + lowerBound[j * numCenters + k] = dxc; + + if (dxc < dxcx) + { + closestCenters[j] = k; + + /* c(x) changed */ + upperBound[j] = dxc; + + changes++; + } + } + } + } + + /* Step 4: For each center c, let m(c) be mean of all points assigned */ + for (j = 0; j < numCenters; j++) + { + vec = VectorArrayGet(newCenters, j); + for (k = 0; k < dimensions; k++) + vec->x[k] = 0.0; + + centerCounts[j] = 0; + } + + for (j = 0; j < numSamples; j++) + { + int closestCenter; + + vec = VectorArrayGet(samples, j); + closestCenter = closestCenters[j]; + + /* Increment sum and count of closest center */ + newCenter = VectorArrayGet(newCenters, closestCenter); + for (k = 0; k < dimensions; k++) + newCenter->x[k] += vec->x[k]; + + centerCounts[closestCenter] += 1; + } + + for (j = 0; j < numCenters; j++) + { + vec = VectorArrayGet(newCenters, j); + + if (centerCounts[j] > 0) + { + /* Double avoids overflow, but requires more memory */ + /* TODO Update bounds */ + for (k = 0; k < dimensions; k++) + { + if (isinf(vec->x[k])) + vec->x[k] = vec->x[k] > 0 ? FLT_MAX : -FLT_MAX; + } + + for (k = 0; k < dimensions; k++) + vec->x[k] /= centerCounts[j]; + } + else + { + /* TODO Handle empty centers properly */ + for (k = 0; k < dimensions; k++) + vec->x[k] = RandomDouble(); + } + + /* Normalize if needed */ + if (normprocinfo != NULL) + ApplyNorm(normprocinfo, collation, vec); + } + + /* Step 5 */ + for (j = 0; j < numCenters; j++) + newcdist[j] = DatumGetFloat8(FunctionCall2Coll(procinfo, collation, PointerGetDatum(VectorArrayGet(centers, j)), PointerGetDatum(VectorArrayGet(newCenters, j)))); + + for (j = 0; j < numSamples; j++) + { + for (k = 0; k < numCenters; k++) + { + float distance = lowerBound[j * numCenters + k] - newcdist[k]; + + if (distance < 0) + distance = 0; + + lowerBound[j * numCenters + k] = distance; + } + } + + /* Step 6 */ + /* We reset r(x) before Step 3 in the next iteration */ + for (j = 0; j < numSamples; j++) + upperBound[j] += newcdist[closestCenters[j]]; + + /* Step 7 */ + for (j = 0; j < numCenters; j++) + VectorArraySet(centers, j, VectorArrayGet(newCenters, j)); + + if (changes == 0 && iteration != 0) + break; + } + + VectorArrayFree(newCenters); + pfree(centerCounts); + pfree(closestCenters); + pfree(lowerBound); + pfree(upperBound); + pfree(s); + pfree(halfcdist); + pfree(newcdist); +} + +/* + * Detect issues with centers + */ +static void +CheckCenters(Relation index, VectorArray centers) +{ + FmgrInfo *normprocinfo; + + if (centers->length != centers->maxlen) + elog(ERROR, "Not enough centers. Please report a bug."); + + /* Ensure no NaN or infinite values */ + for (int i = 0; i < centers->length; i++) + { + Vector *vec = VectorArrayGet(centers, i); + + for (int j = 0; j < vec->dim; j++) + { + if (isnan(vec->x[j])) + elog(ERROR, "NaN detected. Please report a bug."); + + if (isinf(vec->x[j])) + elog(ERROR, "Infinite value detected. Please report a bug."); + } + } + + /* Ensure no duplicate centers */ + /* Fine to sort in-place */ + qsort(centers->items, centers->length, VECTOR_SIZE(centers->dim), CompareVectors); + for (int i = 1; i < centers->length; i++) + { + if (CompareVectors(VectorArrayGet(centers, i), VectorArrayGet(centers, i - 1)) == 0) + elog(ERROR, "Duplicate centers detected. Please report a bug."); + } + + /* Ensure no zero vectors for cosine distance */ + /* Check NORM_PROC instead of KMEANS_NORM_PROC */ + normprocinfo = IvfflatOptionalProcInfo(index, IVFFLAT_NORM_PROC); + if (normprocinfo != NULL) + { + Oid collation = index->rd_indcollation[0]; + + for (int i = 0; i < centers->length; i++) + { + double norm = DatumGetFloat8(FunctionCall1Coll(normprocinfo, collation, PointerGetDatum(VectorArrayGet(centers, i)))); + + if (norm == 0) + elog(ERROR, "Zero norm detected. Please report a bug."); + } + } +} + +/* + * Perform naive k-means centering + * We use spherical k-means for inner product and cosine + */ +void +IvfflatKmeans(Relation index, VectorArray samples, VectorArray centers) +{ + if (samples->length <= centers->maxlen) + QuickCenters(index, samples, centers); + else + ElkanKmeans(index, samples, centers); + + CheckCenters(index, centers); +} diff --git a/external/pgvector/src/ivfscan.c b/external/pgvector/src/ivfscan.c new file mode 100644 index 00000000000..e6a96bb8c91 --- /dev/null +++ b/external/pgvector/src/ivfscan.c @@ -0,0 +1,350 @@ +#include "postgres.h" + +#include + +#include "access/relscan.h" +#include "catalog/pg_operator_d.h" +#include "catalog/pg_type_d.h" +#include "ivfflat.h" +#include "miscadmin.h" +#include "pgstat.h" +#include "storage/bufmgr.h" + +/* + * Compare list distances + */ +static int +CompareLists(const pairingheap_node *a, const pairingheap_node *b, void *arg) +{ + if (((const IvfflatScanList *) a)->distance > ((const IvfflatScanList *) b)->distance) + return 1; + + if (((const IvfflatScanList *) a)->distance < ((const IvfflatScanList *) b)->distance) + return -1; + + return 0; +} + +/* + * Get lists and sort by distance + */ +static void +GetScanLists(IndexScanDesc scan, Datum value) +{ + IvfflatScanOpaque so = (IvfflatScanOpaque) scan->opaque; + BlockNumber nextblkno = IVFFLAT_HEAD_BLKNO; + int listCount = 0; + double maxDistance = DBL_MAX; + + /* Search all list pages */ + while (BlockNumberIsValid(nextblkno)) + { + Buffer cbuf; + Page cpage; + OffsetNumber maxoffno; + + cbuf = ReadBuffer(scan->indexRelation, nextblkno); + LockBuffer(cbuf, BUFFER_LOCK_SHARE); + cpage = BufferGetPage(cbuf); + + maxoffno = PageGetMaxOffsetNumber(cpage); + + for (OffsetNumber offno = FirstOffsetNumber; offno <= maxoffno; offno = OffsetNumberNext(offno)) + { + IvfflatList list = (IvfflatList) PageGetItem(cpage, PageGetItemId(cpage, offno)); + double distance; + + /* Use procinfo from the index instead of scan key for performance */ + distance = DatumGetFloat8(FunctionCall2Coll(so->procinfo, so->collation, PointerGetDatum(&list->center), value)); + + if (listCount < so->probes) + { + IvfflatScanList *scanlist; + + scanlist = &so->lists[listCount]; + scanlist->startPage = list->startPage; + scanlist->distance = distance; + listCount++; + + /* Add to heap */ + pairingheap_add(so->listQueue, &scanlist->ph_node); + + /* Calculate max distance */ + if (listCount == so->probes) + maxDistance = ((IvfflatScanList *) pairingheap_first(so->listQueue))->distance; + } + else if (distance < maxDistance) + { + IvfflatScanList *scanlist; + + /* Remove */ + scanlist = (IvfflatScanList *) pairingheap_remove_first(so->listQueue); + + /* Reuse */ + scanlist->startPage = list->startPage; + scanlist->distance = distance; + pairingheap_add(so->listQueue, &scanlist->ph_node); + + /* Update max distance */ + maxDistance = ((IvfflatScanList *) pairingheap_first(so->listQueue))->distance; + } + } + + nextblkno = IvfflatPageGetOpaque(cpage)->nextblkno; + + UnlockReleaseBuffer(cbuf); + } +} + +/* + * Get items + */ +static void +GetScanItems(IndexScanDesc scan, Datum value) +{ + IvfflatScanOpaque so = (IvfflatScanOpaque) scan->opaque; + TupleDesc tupdesc = RelationGetDescr(scan->indexRelation); + double tuples = 0; + +#if PG_VERSION_NUM >= 120000 + TupleTableSlot *slot = MakeSingleTupleTableSlot(so->tupdesc, &TTSOpsVirtual); +#else + TupleTableSlot *slot = MakeSingleTupleTableSlot(so->tupdesc); +#endif + + /* + * Reuse same set of shared buffers for scan + * + * See postgres/src/backend/storage/buffer/README for description + */ + BufferAccessStrategy bas = GetAccessStrategy(BAS_BULKREAD); + + /* Search closest probes lists */ + while (!pairingheap_is_empty(so->listQueue)) + { + BlockNumber searchPage = ((IvfflatScanList *) pairingheap_remove_first(so->listQueue))->startPage; + + /* Search all entry pages for list */ + while (BlockNumberIsValid(searchPage)) + { + Buffer buf; + Page page; + OffsetNumber maxoffno; + + buf = ReadBufferExtended(scan->indexRelation, MAIN_FORKNUM, searchPage, RBM_NORMAL, bas); + LockBuffer(buf, BUFFER_LOCK_SHARE); + page = BufferGetPage(buf); + maxoffno = PageGetMaxOffsetNumber(page); + + for (OffsetNumber offno = FirstOffsetNumber; offno <= maxoffno; offno = OffsetNumberNext(offno)) + { + IndexTuple itup; + Datum datum; + bool isnull; + ItemId itemid = PageGetItemId(page, offno); + + itup = (IndexTuple) PageGetItem(page, itemid); + datum = index_getattr(itup, 1, tupdesc, &isnull); + + /* + * Add virtual tuple + * + * Use procinfo from the index instead of scan key for + * performance + */ + ExecClearTuple(slot); + slot->tts_values[0] = FunctionCall2Coll(so->procinfo, so->collation, datum, value); + slot->tts_isnull[0] = false; + slot->tts_values[1] = PointerGetDatum(&itup->t_tid); + slot->tts_isnull[1] = false; + ExecStoreVirtualTuple(slot); + + tuplesort_puttupleslot(so->sortstate, slot); + + tuples++; + } + + searchPage = IvfflatPageGetOpaque(page)->nextblkno; + + UnlockReleaseBuffer(buf); + } + } + + FreeAccessStrategy(bas); + + if (tuples < 100) + ereport(DEBUG1, + (errmsg("index scan found few tuples"), + errdetail("Index may have been created with little data."), + errhint("Recreate the index and possibly decrease lists."))); + + tuplesort_performsort(so->sortstate); +} + +/* + * Prepare for an index scan + */ +IndexScanDesc +ivfflatbeginscan(Relation index, int nkeys, int norderbys) +{ + IndexScanDesc scan; + IvfflatScanOpaque so; + int lists; + int dimensions; + AttrNumber attNums[] = {1}; + Oid sortOperators[] = {Float8LessOperator}; + Oid sortCollations[] = {InvalidOid}; + bool nullsFirstFlags[] = {false}; + int probes = ivfflat_probes; + + scan = RelationGetIndexScan(index, nkeys, norderbys); + + /* Get lists and dimensions from metapage */ + IvfflatGetMetaPageInfo(index, &lists, &dimensions); + + if (probes > lists) + probes = lists; + + so = (IvfflatScanOpaque) palloc(offsetof(IvfflatScanOpaqueData, lists) + probes * sizeof(IvfflatScanList)); + so->first = true; + so->probes = probes; + so->dimensions = dimensions; + + /* Set support functions */ + so->procinfo = index_getprocinfo(index, 1, IVFFLAT_DISTANCE_PROC); + so->normprocinfo = IvfflatOptionalProcInfo(index, IVFFLAT_NORM_PROC); + so->collation = index->rd_indcollation[0]; + + /* Create tuple description for sorting */ +#if PG_VERSION_NUM >= 120000 + so->tupdesc = CreateTemplateTupleDesc(2); +#else + so->tupdesc = CreateTemplateTupleDesc(2, false); +#endif + TupleDescInitEntry(so->tupdesc, (AttrNumber) 1, "distance", FLOAT8OID, -1, 0); + TupleDescInitEntry(so->tupdesc, (AttrNumber) 2, "heaptid", TIDOID, -1, 0); + + /* Prep sort */ + so->sortstate = tuplesort_begin_heap(so->tupdesc, 1, attNums, sortOperators, sortCollations, nullsFirstFlags, work_mem, NULL, false); + +#if PG_VERSION_NUM >= 120000 + so->slot = MakeSingleTupleTableSlot(so->tupdesc, &TTSOpsMinimalTuple); +#else + so->slot = MakeSingleTupleTableSlot(so->tupdesc); +#endif + + so->listQueue = pairingheap_allocate(CompareLists, scan); + + scan->opaque = so; + + return scan; +} + +/* + * Start or restart an index scan + */ +void +ivfflatrescan(IndexScanDesc scan, ScanKey keys, int nkeys, ScanKey orderbys, int norderbys) +{ + IvfflatScanOpaque so = (IvfflatScanOpaque) scan->opaque; + +#if PG_VERSION_NUM >= 130000 + if (!so->first) + tuplesort_reset(so->sortstate); +#endif + + so->first = true; + pairingheap_reset(so->listQueue); + + if (keys && scan->numberOfKeys > 0) + memmove(scan->keyData, keys, scan->numberOfKeys * sizeof(ScanKeyData)); + + if (orderbys && scan->numberOfOrderBys > 0) + memmove(scan->orderByData, orderbys, scan->numberOfOrderBys * sizeof(ScanKeyData)); +} + +/* + * Fetch the next tuple in the given scan + */ +bool +ivfflatgettuple(IndexScanDesc scan, ScanDirection dir) +{ + IvfflatScanOpaque so = (IvfflatScanOpaque) scan->opaque; + + /* + * Index can be used to scan backward, but Postgres doesn't support + * backward scan on operators + */ + Assert(ScanDirectionIsForward(dir)); + + if (so->first) + { + Datum value; + + /* Count index scan for stats */ + pgstat_count_index_scan(scan->indexRelation); + + /* Safety check */ + if (scan->orderByData == NULL) + elog(ERROR, "cannot scan ivfflat index without order"); + + /* Requires MVCC-compliant snapshot as not able to pin during sorting */ + /* https://www.postgresql.org/docs/current/index-locking.html */ + if (!IsMVCCSnapshot(scan->xs_snapshot)) + elog(ERROR, "non-MVCC snapshots are not supported with ivfflat"); + + if (scan->orderByData->sk_flags & SK_ISNULL) + value = PointerGetDatum(InitVector(so->dimensions)); + else + { + value = scan->orderByData->sk_argument; + + /* Value should not be compressed or toasted */ + Assert(!VARATT_IS_COMPRESSED(DatumGetPointer(value))); + Assert(!VARATT_IS_EXTENDED(DatumGetPointer(value))); + + /* Fine if normalization fails */ + if (so->normprocinfo != NULL) + IvfflatNormValue(so->normprocinfo, so->collation, &value, NULL); + } + + IvfflatBench("GetScanLists", GetScanLists(scan, value)); + IvfflatBench("GetScanItems", GetScanItems(scan, value)); + so->first = false; + + /* Clean up if we allocated a new value */ + if (value != scan->orderByData->sk_argument) + pfree(DatumGetPointer(value)); + } + + if (tuplesort_gettupleslot(so->sortstate, true, false, so->slot, NULL)) + { + ItemPointer heaptid = (ItemPointer) DatumGetPointer(slot_getattr(so->slot, 2, &so->isnull)); + +#if PG_VERSION_NUM >= 120000 + scan->xs_heaptid = *heaptid; +#else + scan->xs_ctup.t_self = *heaptid; +#endif + + scan->xs_recheckorderby = false; + return true; + } + + return false; +} + +/* + * End a scan and release resources + */ +void +ivfflatendscan(IndexScanDesc scan) +{ + IvfflatScanOpaque so = (IvfflatScanOpaque) scan->opaque; + + pairingheap_free(so->listQueue); + tuplesort_end(so->sortstate); + + pfree(so); + scan->opaque = NULL; +} diff --git a/external/pgvector/src/ivfutils.c b/external/pgvector/src/ivfutils.c new file mode 100644 index 00000000000..7959a175c86 --- /dev/null +++ b/external/pgvector/src/ivfutils.c @@ -0,0 +1,240 @@ +#include "postgres.h" + +#include "ivfflat.h" +#include "storage/bufmgr.h" +#include "vector.h" + +/* + * Allocate a vector array + */ +VectorArray +VectorArrayInit(int maxlen, int dimensions) +{ + VectorArray res = palloc(sizeof(VectorArrayData)); + + res->length = 0; + res->maxlen = maxlen; + res->dim = dimensions; + res->items = palloc_extended(maxlen * VECTOR_SIZE(dimensions), MCXT_ALLOC_ZERO | MCXT_ALLOC_HUGE); + return res; +} + +/* + * Free a vector array + */ +void +VectorArrayFree(VectorArray arr) +{ + pfree(arr->items); + pfree(arr); +} + +/* + * Print vector array - useful for debugging + */ +void +PrintVectorArray(char *msg, VectorArray arr) +{ + for (int i = 0; i < arr->length; i++) + PrintVector(msg, VectorArrayGet(arr, i)); +} + +/* + * Get the number of lists in the index + */ +int +IvfflatGetLists(Relation index) +{ + IvfflatOptions *opts = (IvfflatOptions *) index->rd_options; + + if (opts) + return opts->lists; + + return IVFFLAT_DEFAULT_LISTS; +} + +/* + * Get proc + */ +FmgrInfo * +IvfflatOptionalProcInfo(Relation index, uint16 procnum) +{ + if (!OidIsValid(index_getprocid(index, 1, procnum))) + return NULL; + + return index_getprocinfo(index, 1, procnum); +} + +/* + * Divide by the norm + * + * Returns false if value should not be indexed + * + * The caller needs to free the pointer stored in value + * if it's different than the original value + */ +bool +IvfflatNormValue(FmgrInfo *procinfo, Oid collation, Datum *value, Vector * result) +{ + double norm = DatumGetFloat8(FunctionCall1Coll(procinfo, collation, *value)); + + if (norm > 0) + { + Vector *v = DatumGetVector(*value); + + if (result == NULL) + result = InitVector(v->dim); + + for (int i = 0; i < v->dim; i++) + result->x[i] = v->x[i] / norm; + + *value = PointerGetDatum(result); + + return true; + } + + return false; +} + +/* + * New buffer + */ +Buffer +IvfflatNewBuffer(Relation index, ForkNumber forkNum) +{ + Buffer buf = ReadBufferExtended(index, forkNum, P_NEW, RBM_NORMAL, NULL); + + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + return buf; +} + +/* + * Init page + */ +void +IvfflatInitPage(Buffer buf, Page page) +{ + PageInit(page, BufferGetPageSize(buf), sizeof(IvfflatPageOpaqueData)); + IvfflatPageGetOpaque(page)->nextblkno = InvalidBlockNumber; + IvfflatPageGetOpaque(page)->page_id = IVFFLAT_PAGE_ID; +} + +/* + * Init and register page + */ +void +IvfflatInitRegisterPage(Relation index, Buffer *buf, Page *page, GenericXLogState **state) +{ + *state = GenericXLogStart(index); + *page = GenericXLogRegisterBuffer(*state, *buf, GENERIC_XLOG_FULL_IMAGE); + IvfflatInitPage(*buf, *page); +} + +/* + * Commit buffer + */ +void +IvfflatCommitBuffer(Buffer buf, GenericXLogState *state) +{ + GenericXLogFinish(state); + UnlockReleaseBuffer(buf); +} + +/* + * Add a new page + * + * The order is very important!! + */ +void +IvfflatAppendPage(Relation index, Buffer *buf, Page *page, GenericXLogState **state, ForkNumber forkNum) +{ + /* Get new buffer */ + Buffer newbuf = IvfflatNewBuffer(index, forkNum); + Page newpage = GenericXLogRegisterBuffer(*state, newbuf, GENERIC_XLOG_FULL_IMAGE); + + /* Update the previous buffer */ + IvfflatPageGetOpaque(*page)->nextblkno = BufferGetBlockNumber(newbuf); + + /* Init new page */ + IvfflatInitPage(newbuf, newpage); + + /* Commit */ + GenericXLogFinish(*state); + + /* Unlock */ + UnlockReleaseBuffer(*buf); + + *state = GenericXLogStart(index); + *page = GenericXLogRegisterBuffer(*state, newbuf, GENERIC_XLOG_FULL_IMAGE); + *buf = newbuf; +} + +/* + * Get the metapage info + */ +void +IvfflatGetMetaPageInfo(Relation index, int *lists, int *dimensions) +{ + Buffer buf; + Page page; + IvfflatMetaPage metap; + + buf = ReadBuffer(index, IVFFLAT_METAPAGE_BLKNO); + LockBuffer(buf, BUFFER_LOCK_SHARE); + page = BufferGetPage(buf); + metap = IvfflatPageGetMeta(page); + + *lists = metap->lists; + + if (dimensions != NULL) + *dimensions = metap->dimensions; + + UnlockReleaseBuffer(buf); +} + +/* + * Update the start or insert page of a list + */ +void +IvfflatUpdateList(Relation index, ListInfo listInfo, + BlockNumber insertPage, BlockNumber originalInsertPage, + BlockNumber startPage, ForkNumber forkNum) +{ + Buffer buf; + Page page; + GenericXLogState *state; + IvfflatList list; + bool changed = false; + + buf = ReadBufferExtended(index, forkNum, listInfo.blkno, RBM_NORMAL, NULL); + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + state = GenericXLogStart(index); + page = GenericXLogRegisterBuffer(state, buf, 0); + list = (IvfflatList) PageGetItem(page, PageGetItemId(page, listInfo.offno)); + + if (BlockNumberIsValid(insertPage) && insertPage != list->insertPage) + { + /* Skip update if insert page is lower than original insert page */ + /* This is needed to prevent insert from overwriting vacuum */ + if (!BlockNumberIsValid(originalInsertPage) || insertPage >= originalInsertPage) + { + list->insertPage = insertPage; + changed = true; + } + } + + if (BlockNumberIsValid(startPage) && startPage != list->startPage) + { + list->startPage = startPage; + changed = true; + } + + /* Only commit if changed */ + if (changed) + IvfflatCommitBuffer(buf, state); + else + { + GenericXLogAbort(state); + UnlockReleaseBuffer(buf); + } +} diff --git a/external/pgvector/src/ivfvacuum.c b/external/pgvector/src/ivfvacuum.c new file mode 100644 index 00000000000..b548af15dd5 --- /dev/null +++ b/external/pgvector/src/ivfvacuum.c @@ -0,0 +1,156 @@ +#include "postgres.h" + +#include "commands/vacuum.h" +#include "ivfflat.h" +#include "storage/bufmgr.h" + +/* + * Bulk delete tuples from the index + */ +IndexBulkDeleteResult * +ivfflatbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, + IndexBulkDeleteCallback callback, void *callback_state) +{ + Relation index = info->index; + BlockNumber blkno = IVFFLAT_HEAD_BLKNO; + BufferAccessStrategy bas = GetAccessStrategy(BAS_BULKREAD); + + if (stats == NULL) + stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult)); + + /* Iterate over list pages */ + while (BlockNumberIsValid(blkno)) + { + Buffer cbuf; + Page cpage; + OffsetNumber coffno; + OffsetNumber cmaxoffno; + BlockNumber startPages[MaxOffsetNumber]; + ListInfo listInfo; + + cbuf = ReadBuffer(index, blkno); + LockBuffer(cbuf, BUFFER_LOCK_SHARE); + cpage = BufferGetPage(cbuf); + + cmaxoffno = PageGetMaxOffsetNumber(cpage); + + /* Iterate over lists */ + for (coffno = FirstOffsetNumber; coffno <= cmaxoffno; coffno = OffsetNumberNext(coffno)) + { + IvfflatList list = (IvfflatList) PageGetItem(cpage, PageGetItemId(cpage, coffno)); + + startPages[coffno - FirstOffsetNumber] = list->startPage; + } + + listInfo.blkno = blkno; + blkno = IvfflatPageGetOpaque(cpage)->nextblkno; + + UnlockReleaseBuffer(cbuf); + + for (coffno = FirstOffsetNumber; coffno <= cmaxoffno; coffno = OffsetNumberNext(coffno)) + { + BlockNumber searchPage = startPages[coffno - FirstOffsetNumber]; + BlockNumber insertPage = InvalidBlockNumber; + + /* Iterate over entry pages */ + while (BlockNumberIsValid(searchPage)) + { + Buffer buf; + Page page; + GenericXLogState *state; + OffsetNumber offno; + OffsetNumber maxoffno; + OffsetNumber deletable[MaxOffsetNumber]; + int ndeletable; + + vacuum_delay_point(); + + buf = ReadBufferExtended(index, MAIN_FORKNUM, searchPage, RBM_NORMAL, bas); + + /* + * ambulkdelete cannot delete entries from pages that are + * pinned by other backends + * + * https://www.postgresql.org/docs/current/index-locking.html + */ + LockBufferForCleanup(buf); + + state = GenericXLogStart(index); + page = GenericXLogRegisterBuffer(state, buf, 0); + + maxoffno = PageGetMaxOffsetNumber(page); + ndeletable = 0; + + /* Find deleted tuples */ + for (offno = FirstOffsetNumber; offno <= maxoffno; offno = OffsetNumberNext(offno)) + { + IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offno)); + ItemPointer htup = &(itup->t_tid); + + if (callback(htup, callback_state)) + { + deletable[ndeletable++] = offno; + stats->tuples_removed++; + } + else + stats->num_index_tuples++; + } + + /* Set to first free page */ + /* Must be set before searchPage is updated */ + if (!BlockNumberIsValid(insertPage) && ndeletable > 0) + insertPage = searchPage; + + searchPage = IvfflatPageGetOpaque(page)->nextblkno; + + if (ndeletable > 0) + { + /* Delete tuples */ + PageIndexMultiDelete(page, deletable, ndeletable); + GenericXLogFinish(state); + } + else + GenericXLogAbort(state); + + UnlockReleaseBuffer(buf); + } + + /* + * Update after all tuples deleted. + * + * We don't add or delete items from lists pages, so offset won't + * change. + */ + if (BlockNumberIsValid(insertPage)) + { + listInfo.offno = coffno; + IvfflatUpdateList(index, listInfo, insertPage, InvalidBlockNumber, InvalidBlockNumber, MAIN_FORKNUM); + } + } + } + + FreeAccessStrategy(bas); + + return stats; +} + +/* + * Clean up after a VACUUM operation + */ +IndexBulkDeleteResult * +ivfflatvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats) +{ + Relation rel = info->index; + + if (info->analyze_only) + return stats; + + /* stats is NULL if ambulkdelete not called */ + /* OK to return NULL if index not changed */ + if (stats == NULL) + return NULL; + + stats->num_pages = RelationGetNumberOfBlocks(rel); + + return stats; +} diff --git a/external/pgvector/src/vector.c b/external/pgvector/src/vector.c new file mode 100644 index 00000000000..d3ebedb6d31 --- /dev/null +++ b/external/pgvector/src/vector.c @@ -0,0 +1,1147 @@ +#include "postgres.h" + +#include + +#include "catalog/pg_type.h" +#include "fmgr.h" +#include "hnsw.h" +#include "ivfflat.h" +#include "lib/stringinfo.h" +#include "libpq/pqformat.h" +#include "port.h" /* for strtof() */ +#include "utils/array.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" +#include "utils/numeric.h" +#include "vector.h" + +#if PG_VERSION_NUM >= 160000 +#include "varatt.h" +#endif + +#if PG_VERSION_NUM >= 120000 +#include "common/shortest_dec.h" +#include "utils/float.h" +#else +#include +#endif + +#if PG_VERSION_NUM < 130000 +#define TYPALIGN_DOUBLE 'd' +#define TYPALIGN_INT 'i' +#endif + +#define STATE_DIMS(x) (ARR_DIMS(x)[0] - 1) +#define CreateStateDatums(dim) palloc(sizeof(Datum) * (dim + 1)) + +PG_MODULE_MAGIC; + +/* + * Initialize index options and variables + */ +PGDLLEXPORT void _PG_init(void); +void +_PG_init(void) +{ + HnswInit(); + IvfflatInit(); +} + +/* + * Ensure same dimensions + */ +static inline void +CheckDims(Vector * a, Vector * b) +{ + if (a->dim != b->dim) + ereport(ERROR, + (errcode(ERRCODE_DATA_EXCEPTION), + errmsg("different vector dimensions %d and %d", a->dim, b->dim))); +} + +/* + * Ensure expected dimensions + */ +static inline void +CheckExpectedDim(int32 typmod, int dim) +{ + if (typmod != -1 && typmod != dim) + ereport(ERROR, + (errcode(ERRCODE_DATA_EXCEPTION), + errmsg("expected %d dimensions, not %d", typmod, dim))); +} + +/* + * Ensure valid dimensions + */ +static inline void +CheckDim(int dim) +{ + if (dim < 1) + ereport(ERROR, + (errcode(ERRCODE_DATA_EXCEPTION), + errmsg("vector must have at least 1 dimension"))); + + if (dim > VECTOR_MAX_DIM) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("vector cannot have more than %d dimensions", VECTOR_MAX_DIM))); +} + +/* + * Ensure finite elements + */ +static inline void +CheckElement(float value) +{ + if (isnan(value)) + ereport(ERROR, + (errcode(ERRCODE_DATA_EXCEPTION), + errmsg("NaN not allowed in vector"))); + + if (isinf(value)) + ereport(ERROR, + (errcode(ERRCODE_DATA_EXCEPTION), + errmsg("infinite value not allowed in vector"))); +} + +/* + * Allocate and initialize a new vector + */ +Vector * +InitVector(int dim) +{ + Vector *result; + int size; + + size = VECTOR_SIZE(dim); + result = (Vector *) palloc0(size); + SET_VARSIZE(result, size); + result->dim = dim; + + return result; +} + +/* + * Check for whitespace, since array_isspace() is static + */ +static inline bool +vector_isspace(char ch) +{ + if (ch == ' ' || + ch == '\t' || + ch == '\n' || + ch == '\r' || + ch == '\v' || + ch == '\f') + return true; + return false; +} + +/* + * Check state array + */ +static float8 * +CheckStateArray(ArrayType *statearray, const char *caller) +{ + if (ARR_NDIM(statearray) != 1 || + ARR_DIMS(statearray)[0] < 1 || + ARR_HASNULL(statearray) || + ARR_ELEMTYPE(statearray) != FLOAT8OID) + elog(ERROR, "%s: expected state array", caller); + return (float8 *) ARR_DATA_PTR(statearray); +} + +#if PG_VERSION_NUM < 120003 +static pg_noinline void +float_overflow_error(void) +{ + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("value out of range: overflow"))); +} + +static pg_noinline void +float_underflow_error(void) +{ + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("value out of range: underflow"))); +} +#endif + +/* + * Convert textual representation to internal representation + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_in); +Datum +vector_in(PG_FUNCTION_ARGS) +{ + char *str = PG_GETARG_CSTRING(0); + int32 typmod = PG_GETARG_INT32(2); + float x[VECTOR_MAX_DIM]; + int dim = 0; + char *pt; + char *stringEnd; + Vector *result; + char *lit = pstrdup(str); + + while (vector_isspace(*str)) + str++; + + if (*str != '[') + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed vector literal: \"%s\"", lit), + errdetail("Vector contents must start with \"[\"."))); + + str++; + pt = strtok(str, ","); + stringEnd = pt; + + while (pt != NULL && *stringEnd != ']') + { + if (dim == VECTOR_MAX_DIM) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("vector cannot have more than %d dimensions", VECTOR_MAX_DIM))); + + while (vector_isspace(*pt)) + pt++; + + /* Check for empty string like float4in */ + if (*pt == '\0') + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type vector: \"%s\"", lit))); + + /* Use strtof like float4in to avoid a double-rounding problem */ + x[dim] = strtof(pt, &stringEnd); + CheckElement(x[dim]); + dim++; + + if (stringEnd == pt) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type vector: \"%s\"", lit))); + + while (vector_isspace(*stringEnd)) + stringEnd++; + + if (*stringEnd != '\0' && *stringEnd != ']') + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type vector: \"%s\"", lit))); + + pt = strtok(NULL, ","); + } + + if (stringEnd == NULL || *stringEnd != ']') + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed vector literal: \"%s\"", lit), + errdetail("Unexpected end of input."))); + + stringEnd++; + + /* Only whitespace is allowed after the closing brace */ + while (vector_isspace(*stringEnd)) + stringEnd++; + + if (*stringEnd != '\0') + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed vector literal: \"%s\"", lit), + errdetail("Junk after closing right brace."))); + + /* Ensure no consecutive delimiters since strtok skips */ + for (pt = lit + 1; *pt != '\0'; pt++) + { + if (pt[-1] == ',' && *pt == ',') + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed vector literal: \"%s\"", lit))); + } + + if (dim < 1) + ereport(ERROR, + (errcode(ERRCODE_DATA_EXCEPTION), + errmsg("vector must have at least 1 dimension"))); + + pfree(lit); + + CheckExpectedDim(typmod, dim); + + result = InitVector(dim); + for (int i = 0; i < dim; i++) + result->x[i] = x[i]; + + PG_RETURN_POINTER(result); +} + +/* + * Convert internal representation to textual representation + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_out); +Datum +vector_out(PG_FUNCTION_ARGS) +{ + Vector *vector = PG_GETARG_VECTOR_P(0); + int dim = vector->dim; + char *buf; + char *ptr; + int n; + +#if PG_VERSION_NUM < 120000 + int ndig = FLT_DIG + extra_float_digits; + + if (ndig < 1) + ndig = 1; + +#define FLOAT_SHORTEST_DECIMAL_LEN (ndig + 10) +#endif + + /* + * Need: + * + * dim * (FLOAT_SHORTEST_DECIMAL_LEN - 1) bytes for + * float_to_shortest_decimal_bufn + * + * dim - 1 bytes for separator + * + * 3 bytes for [, ], and \0 + */ + buf = (char *) palloc(FLOAT_SHORTEST_DECIMAL_LEN * dim + 2); + ptr = buf; + + *ptr = '['; + ptr++; + for (int i = 0; i < dim; i++) + { + if (i > 0) + { + *ptr = ','; + ptr++; + } + +#if PG_VERSION_NUM >= 120000 + n = float_to_shortest_decimal_bufn(vector->x[i], ptr); +#else + n = sprintf(ptr, "%.*g", ndig, vector->x[i]); +#endif + ptr += n; + } + *ptr = ']'; + ptr++; + *ptr = '\0'; + + PG_FREE_IF_COPY(vector, 0); + PG_RETURN_CSTRING(buf); +} + +/* + * Print vector - useful for debugging + */ +void +PrintVector(char *msg, Vector * vector) +{ + char *out = DatumGetPointer(DirectFunctionCall1(vector_out, PointerGetDatum(vector))); + + elog(INFO, "%s = %s", msg, out); + pfree(out); +} + +/* + * Convert type modifier + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_typmod_in); +Datum +vector_typmod_in(PG_FUNCTION_ARGS) +{ + ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0); + int32 *tl; + int n; + + tl = ArrayGetIntegerTypmods(ta, &n); + + if (n != 1) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid type modifier"))); + + if (*tl < 1) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("dimensions for type vector must be at least 1"))); + + if (*tl > VECTOR_MAX_DIM) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("dimensions for type vector cannot exceed %d", VECTOR_MAX_DIM))); + + PG_RETURN_INT32(*tl); +} + +/* + * Convert external binary representation to internal representation + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_recv); +Datum +vector_recv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + int32 typmod = PG_GETARG_INT32(2); + Vector *result; + int16 dim; + int16 unused; + + dim = pq_getmsgint(buf, sizeof(int16)); + unused = pq_getmsgint(buf, sizeof(int16)); + + CheckDim(dim); + CheckExpectedDim(typmod, dim); + + if (unused != 0) + ereport(ERROR, + (errcode(ERRCODE_DATA_EXCEPTION), + errmsg("expected unused to be 0, not %d", unused))); + + result = InitVector(dim); + for (int i = 0; i < dim; i++) + { + result->x[i] = pq_getmsgfloat4(buf); + CheckElement(result->x[i]); + } + + PG_RETURN_POINTER(result); +} + +/* + * Convert internal representation to the external binary representation + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_send); +Datum +vector_send(PG_FUNCTION_ARGS) +{ + Vector *vec = PG_GETARG_VECTOR_P(0); + StringInfoData buf; + + pq_begintypsend(&buf); + pq_sendint(&buf, vec->dim, sizeof(int16)); + pq_sendint(&buf, vec->unused, sizeof(int16)); + for (int i = 0; i < vec->dim; i++) + pq_sendfloat4(&buf, vec->x[i]); + + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + +/* + * Convert vector to vector + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector); +Datum +vector(PG_FUNCTION_ARGS) +{ + Vector *arg = PG_GETARG_VECTOR_P(0); + int32 typmod = PG_GETARG_INT32(1); + + CheckExpectedDim(typmod, arg->dim); + + PG_RETURN_POINTER(arg); +} + +/* + * Convert array to vector + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(array_to_vector); +Datum +array_to_vector(PG_FUNCTION_ARGS) +{ + ArrayType *array = PG_GETARG_ARRAYTYPE_P(0); + int32 typmod = PG_GETARG_INT32(1); + Vector *result; + int16 typlen; + bool typbyval; + char typalign; + Datum *elemsp; + bool *nullsp; + int nelemsp; + + if (ARR_NDIM(array) > 1) + ereport(ERROR, + (errcode(ERRCODE_DATA_EXCEPTION), + errmsg("array must be 1-D"))); + + if (ARR_HASNULL(array) && array_contains_nulls(array)) + ereport(ERROR, + (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), + errmsg("array must not contain nulls"))); + + get_typlenbyvalalign(ARR_ELEMTYPE(array), &typlen, &typbyval, &typalign); + deconstruct_array(array, ARR_ELEMTYPE(array), typlen, typbyval, typalign, &elemsp, &nullsp, &nelemsp); + + CheckDim(nelemsp); + CheckExpectedDim(typmod, nelemsp); + + result = InitVector(nelemsp); + + if (ARR_ELEMTYPE(array) == INT4OID) + { + for (int i = 0; i < nelemsp; i++) + result->x[i] = DatumGetInt32(elemsp[i]); + } + else if (ARR_ELEMTYPE(array) == FLOAT8OID) + { + for (int i = 0; i < nelemsp; i++) + result->x[i] = DatumGetFloat8(elemsp[i]); + } + else if (ARR_ELEMTYPE(array) == FLOAT4OID) + { + for (int i = 0; i < nelemsp; i++) + result->x[i] = DatumGetFloat4(elemsp[i]); + } + else if (ARR_ELEMTYPE(array) == NUMERICOID) + { + for (int i = 0; i < nelemsp; i++) + result->x[i] = DatumGetFloat4(DirectFunctionCall1(numeric_float4, elemsp[i])); + } + else + { + ereport(ERROR, + (errcode(ERRCODE_DATA_EXCEPTION), + errmsg("unsupported array type"))); + } + + /* Check elements */ + for (int i = 0; i < result->dim; i++) + CheckElement(result->x[i]); + + PG_RETURN_POINTER(result); +} + +/* + * Convert vector to float4[] + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_to_float4); +Datum +vector_to_float4(PG_FUNCTION_ARGS) +{ + Vector *vec = PG_GETARG_VECTOR_P(0); + Datum *datums; + ArrayType *result; + + datums = (Datum *) palloc(sizeof(Datum) * vec->dim); + + for (int i = 0; i < vec->dim; i++) + datums[i] = Float4GetDatum(vec->x[i]); + + /* Use TYPALIGN_INT for float4 */ + result = construct_array(datums, vec->dim, FLOAT4OID, sizeof(float4), true, TYPALIGN_INT); + + pfree(datums); + + PG_RETURN_POINTER(result); +} + +/* + * Get the L2 distance between vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(l2_distance); +Datum +l2_distance(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + Vector *b = PG_GETARG_VECTOR_P(1); + float *ax = a->x; + float *bx = b->x; + float distance = 0.0; + float diff; + + CheckDims(a, b); + + /* Auto-vectorized */ + for (int i = 0; i < a->dim; i++) + { + diff = ax[i] - bx[i]; + distance += diff * diff; + } + + PG_RETURN_FLOAT8(sqrt((double) distance)); +} + +/* + * Get the L2 squared distance between vectors + * This saves a sqrt calculation + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_l2_squared_distance); +Datum +vector_l2_squared_distance(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + Vector *b = PG_GETARG_VECTOR_P(1); + float *ax = a->x; + float *bx = b->x; + float distance = 0.0; + float diff; + + CheckDims(a, b); + + /* Auto-vectorized */ + for (int i = 0; i < a->dim; i++) + { + diff = ax[i] - bx[i]; + distance += diff * diff; + } + + PG_RETURN_FLOAT8((double) distance); +} + +/* + * Get the inner product of two vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(inner_product); +Datum +inner_product(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + Vector *b = PG_GETARG_VECTOR_P(1); + float *ax = a->x; + float *bx = b->x; + float distance = 0.0; + + CheckDims(a, b); + + /* Auto-vectorized */ + for (int i = 0; i < a->dim; i++) + distance += ax[i] * bx[i]; + + PG_RETURN_FLOAT8((double) distance); +} + +/* + * Get the negative inner product of two vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_negative_inner_product); +Datum +vector_negative_inner_product(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + Vector *b = PG_GETARG_VECTOR_P(1); + float *ax = a->x; + float *bx = b->x; + float distance = 0.0; + + CheckDims(a, b); + + /* Auto-vectorized */ + for (int i = 0; i < a->dim; i++) + distance += ax[i] * bx[i]; + + PG_RETURN_FLOAT8((double) distance * -1); +} + +/* + * Get the cosine distance between two vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(cosine_distance); +Datum +cosine_distance(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + Vector *b = PG_GETARG_VECTOR_P(1); + float *ax = a->x; + float *bx = b->x; + float distance = 0.0; + float norma = 0.0; + float normb = 0.0; + double similarity; + + CheckDims(a, b); + + /* Auto-vectorized */ + for (int i = 0; i < a->dim; i++) + { + distance += ax[i] * bx[i]; + norma += ax[i] * ax[i]; + normb += bx[i] * bx[i]; + } + + /* Use sqrt(a * b) over sqrt(a) * sqrt(b) */ + similarity = (double) distance / sqrt((double) norma * (double) normb); + +#ifdef _MSC_VER + /* /fp:fast may not propagate NaN */ + if (isnan(similarity)) + PG_RETURN_FLOAT8(NAN); +#endif + + /* Keep in range */ + if (similarity > 1) + similarity = 1.0; + else if (similarity < -1) + similarity = -1.0; + + PG_RETURN_FLOAT8(1.0 - similarity); +} + +/* + * Get the distance for spherical k-means + * Currently uses angular distance since needs to satisfy triangle inequality + * Assumes inputs are unit vectors (skips norm) + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_spherical_distance); +Datum +vector_spherical_distance(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + Vector *b = PG_GETARG_VECTOR_P(1); + float *ax = a->x; + float *bx = b->x; + float dp = 0.0; + double distance; + + CheckDims(a, b); + + /* Auto-vectorized */ + for (int i = 0; i < a->dim; i++) + dp += ax[i] * bx[i]; + + distance = (double) dp; + + /* Prevent NaN with acos with loss of precision */ + if (distance > 1) + distance = 1; + else if (distance < -1) + distance = -1; + + PG_RETURN_FLOAT8(acos(distance) / M_PI); +} + +/* + * Get the L1 distance between vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(l1_distance); +Datum +l1_distance(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + Vector *b = PG_GETARG_VECTOR_P(1); + float *ax = a->x; + float *bx = b->x; + float distance = 0.0; + + CheckDims(a, b); + + /* Auto-vectorized */ + for (int i = 0; i < a->dim; i++) + distance += fabsf(ax[i] - bx[i]); + + PG_RETURN_FLOAT8((double) distance); +} + +/* + * Get the dimensions of a vector + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_dims); +Datum +vector_dims(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + + PG_RETURN_INT32(a->dim); +} + +/* + * Get the L2 norm of a vector + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_norm); +Datum +vector_norm(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + float *ax = a->x; + double norm = 0.0; + + /* Auto-vectorized */ + for (int i = 0; i < a->dim; i++) + norm += (double) ax[i] * (double) ax[i]; + + PG_RETURN_FLOAT8(sqrt(norm)); +} + +/* + * Add vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_add); +Datum +vector_add(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + Vector *b = PG_GETARG_VECTOR_P(1); + float *ax = a->x; + float *bx = b->x; + Vector *result; + float *rx; + + CheckDims(a, b); + + result = InitVector(a->dim); + rx = result->x; + + /* Auto-vectorized */ + for (int i = 0, imax = a->dim; i < imax; i++) + rx[i] = ax[i] + bx[i]; + + /* Check for overflow */ + for (int i = 0, imax = a->dim; i < imax; i++) + { + if (isinf(rx[i])) + float_overflow_error(); + } + + PG_RETURN_POINTER(result); +} + +/* + * Subtract vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_sub); +Datum +vector_sub(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + Vector *b = PG_GETARG_VECTOR_P(1); + float *ax = a->x; + float *bx = b->x; + Vector *result; + float *rx; + + CheckDims(a, b); + + result = InitVector(a->dim); + rx = result->x; + + /* Auto-vectorized */ + for (int i = 0, imax = a->dim; i < imax; i++) + rx[i] = ax[i] - bx[i]; + + /* Check for overflow */ + for (int i = 0, imax = a->dim; i < imax; i++) + { + if (isinf(rx[i])) + float_overflow_error(); + } + + PG_RETURN_POINTER(result); +} + +/* + * Multiply vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_mul); +Datum +vector_mul(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + Vector *b = PG_GETARG_VECTOR_P(1); + float *ax = a->x; + float *bx = b->x; + Vector *result; + float *rx; + + CheckDims(a, b); + + result = InitVector(a->dim); + rx = result->x; + + /* Auto-vectorized */ + for (int i = 0, imax = a->dim; i < imax; i++) + rx[i] = ax[i] * bx[i]; + + /* Check for overflow and underflow */ + for (int i = 0, imax = a->dim; i < imax; i++) + { + if (isinf(rx[i])) + float_overflow_error(); + + if (rx[i] == 0 && !(ax[i] == 0 || bx[i] == 0)) + float_underflow_error(); + } + + PG_RETURN_POINTER(result); +} + +/* + * Internal helper to compare vectors + */ +int +vector_cmp_internal(Vector * a, Vector * b) +{ + CheckDims(a, b); + + for (int i = 0; i < a->dim; i++) + { + if (a->x[i] < b->x[i]) + return -1; + + if (a->x[i] > b->x[i]) + return 1; + } + return 0; +} + +/* + * Less than + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_lt); +Datum +vector_lt(PG_FUNCTION_ARGS) +{ + Vector *a = (Vector *) PG_GETARG_VECTOR_P(0); + Vector *b = (Vector *) PG_GETARG_VECTOR_P(1); + + PG_RETURN_BOOL(vector_cmp_internal(a, b) < 0); +} + +/* + * Less than or equal + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_le); +Datum +vector_le(PG_FUNCTION_ARGS) +{ + Vector *a = (Vector *) PG_GETARG_VECTOR_P(0); + Vector *b = (Vector *) PG_GETARG_VECTOR_P(1); + + PG_RETURN_BOOL(vector_cmp_internal(a, b) <= 0); +} + +/* + * Equal + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_eq); +Datum +vector_eq(PG_FUNCTION_ARGS) +{ + Vector *a = (Vector *) PG_GETARG_VECTOR_P(0); + Vector *b = (Vector *) PG_GETARG_VECTOR_P(1); + + PG_RETURN_BOOL(vector_cmp_internal(a, b) == 0); +} + +/* + * Not equal + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_ne); +Datum +vector_ne(PG_FUNCTION_ARGS) +{ + Vector *a = (Vector *) PG_GETARG_VECTOR_P(0); + Vector *b = (Vector *) PG_GETARG_VECTOR_P(1); + + PG_RETURN_BOOL(vector_cmp_internal(a, b) != 0); +} + +/* + * Greater than or equal + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_ge); +Datum +vector_ge(PG_FUNCTION_ARGS) +{ + Vector *a = (Vector *) PG_GETARG_VECTOR_P(0); + Vector *b = (Vector *) PG_GETARG_VECTOR_P(1); + + PG_RETURN_BOOL(vector_cmp_internal(a, b) >= 0); +} + +/* + * Greater than + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_gt); +Datum +vector_gt(PG_FUNCTION_ARGS) +{ + Vector *a = (Vector *) PG_GETARG_VECTOR_P(0); + Vector *b = (Vector *) PG_GETARG_VECTOR_P(1); + + PG_RETURN_BOOL(vector_cmp_internal(a, b) > 0); +} + +/* + * Compare vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_cmp); +Datum +vector_cmp(PG_FUNCTION_ARGS) +{ + Vector *a = (Vector *) PG_GETARG_VECTOR_P(0); + Vector *b = (Vector *) PG_GETARG_VECTOR_P(1); + + PG_RETURN_INT32(vector_cmp_internal(a, b)); +} + +/* + * Accumulate vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_accum); +Datum +vector_accum(PG_FUNCTION_ARGS) +{ + ArrayType *statearray = PG_GETARG_ARRAYTYPE_P(0); + Vector *newval = PG_GETARG_VECTOR_P(1); + float8 *statevalues; + int16 dim; + bool newarr; + float8 n; + Datum *statedatums; + float *x = newval->x; + ArrayType *result; + + /* Check array before using */ + statevalues = CheckStateArray(statearray, "vector_accum"); + dim = STATE_DIMS(statearray); + newarr = dim == 0; + + if (newarr) + dim = newval->dim; + else + CheckExpectedDim(dim, newval->dim); + + n = statevalues[0] + 1.0; + + statedatums = CreateStateDatums(dim); + statedatums[0] = Float8GetDatum(n); + + if (newarr) + { + for (int i = 0; i < dim; i++) + statedatums[i + 1] = Float8GetDatum((double) x[i]); + } + else + { + for (int i = 0; i < dim; i++) + { + double v = statevalues[i + 1] + x[i]; + + /* Check for overflow */ + if (isinf(v)) + float_overflow_error(); + + statedatums[i + 1] = Float8GetDatum(v); + } + } + + /* Use float8 array like float4_accum */ + result = construct_array(statedatums, dim + 1, + FLOAT8OID, + sizeof(float8), FLOAT8PASSBYVAL, TYPALIGN_DOUBLE); + + pfree(statedatums); + + PG_RETURN_ARRAYTYPE_P(result); +} + +/* + * Combine vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_combine); +Datum +vector_combine(PG_FUNCTION_ARGS) +{ + ArrayType *statearray1 = PG_GETARG_ARRAYTYPE_P(0); + ArrayType *statearray2 = PG_GETARG_ARRAYTYPE_P(1); + float8 *statevalues1; + float8 *statevalues2; + float8 n; + float8 n1; + float8 n2; + int16 dim; + Datum *statedatums; + ArrayType *result; + + /* Check arrays before using */ + statevalues1 = CheckStateArray(statearray1, "vector_combine"); + statevalues2 = CheckStateArray(statearray2, "vector_combine"); + + n1 = statevalues1[0]; + n2 = statevalues2[0]; + + if (n1 == 0.0) + { + n = n2; + dim = STATE_DIMS(statearray2); + statedatums = CreateStateDatums(dim); + for (int i = 1; i <= dim; i++) + statedatums[i] = Float8GetDatum(statevalues2[i]); + } + else if (n2 == 0.0) + { + n = n1; + dim = STATE_DIMS(statearray1); + statedatums = CreateStateDatums(dim); + for (int i = 1; i <= dim; i++) + statedatums[i] = Float8GetDatum(statevalues1[i]); + } + else + { + n = n1 + n2; + dim = STATE_DIMS(statearray1); + CheckExpectedDim(dim, STATE_DIMS(statearray2)); + statedatums = CreateStateDatums(dim); + for (int i = 1; i <= dim; i++) + { + double v = statevalues1[i] + statevalues2[i]; + + /* Check for overflow */ + if (isinf(v)) + float_overflow_error(); + + statedatums[i] = Float8GetDatum(v); + } + } + + statedatums[0] = Float8GetDatum(n); + + result = construct_array(statedatums, dim + 1, + FLOAT8OID, + sizeof(float8), FLOAT8PASSBYVAL, TYPALIGN_DOUBLE); + + pfree(statedatums); + + PG_RETURN_ARRAYTYPE_P(result); +} + +/* + * Average vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_avg); +Datum +vector_avg(PG_FUNCTION_ARGS) +{ + ArrayType *statearray = PG_GETARG_ARRAYTYPE_P(0); + float8 *statevalues; + float8 n; + uint16 dim; + Vector *result; + + /* Check array before using */ + statevalues = CheckStateArray(statearray, "vector_avg"); + n = statevalues[0]; + + /* SQL defines AVG of no values to be NULL */ + if (n == 0.0) + PG_RETURN_NULL(); + + /* Create vector */ + dim = STATE_DIMS(statearray); + CheckDim(dim); + result = InitVector(dim); + for (int i = 0; i < dim; i++) + { + result->x[i] = statevalues[i + 1] / n; + CheckElement(result->x[i]); + } + + PG_RETURN_POINTER(result); +} diff --git a/external/pgvector/src/vector.h b/external/pgvector/src/vector.h new file mode 100644 index 00000000000..e649471eaaa --- /dev/null +++ b/external/pgvector/src/vector.h @@ -0,0 +1,23 @@ +#ifndef VECTOR_H +#define VECTOR_H + +#define VECTOR_MAX_DIM 16000 + +#define VECTOR_SIZE(_dim) (offsetof(Vector, x) + sizeof(float)*(_dim)) +#define DatumGetVector(x) ((Vector *) PG_DETOAST_DATUM(x)) +#define PG_GETARG_VECTOR_P(x) DatumGetVector(PG_GETARG_DATUM(x)) +#define PG_RETURN_VECTOR_P(x) PG_RETURN_POINTER(x) + +typedef struct Vector +{ + int32 vl_len_; /* varlena header (do not touch directly!) */ + int16 dim; /* number of dimensions */ + int16 unused; + float x[FLEXIBLE_ARRAY_MEMBER]; +} Vector; + +Vector *InitVector(int dim); +void PrintVector(char *msg, Vector * vector); +int vector_cmp_internal(Vector * a, Vector * b); + +#endif diff --git a/external/pgvector/test/expected/btree.out b/external/pgvector/test/expected/btree.out new file mode 100644 index 00000000000..d8b6da5690e --- /dev/null +++ b/external/pgvector/test/expected/btree.out @@ -0,0 +1,17 @@ +SET enable_seqscan = off; +CREATE TABLE t (val vector(3)); +INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL); +CREATE INDEX ON t (val); +SELECT * FROM t WHERE val = '[1,2,3]'; + val +--------- + [1,2,3] +(1 row) + +SELECT * FROM t ORDER BY val LIMIT 1; + val +--------- + [0,0,0] +(1 row) + +DROP TABLE t; diff --git a/external/pgvector/test/expected/cast.out b/external/pgvector/test/expected/cast.out new file mode 100644 index 00000000000..48242619b9a --- /dev/null +++ b/external/pgvector/test/expected/cast.out @@ -0,0 +1,59 @@ +SELECT ARRAY[1,2,3]::vector; + array +--------- + [1,2,3] +(1 row) + +SELECT ARRAY[1.0,2.0,3.0]::vector; + array +--------- + [1,2,3] +(1 row) + +SELECT ARRAY[1,2,3]::float4[]::vector; + array +--------- + [1,2,3] +(1 row) + +SELECT ARRAY[1,2,3]::float8[]::vector; + array +--------- + [1,2,3] +(1 row) + +SELECT ARRAY[1,2,3]::numeric[]::vector; + array +--------- + [1,2,3] +(1 row) + +SELECT '{NULL}'::real[]::vector; +ERROR: array must not contain nulls +SELECT '{NaN}'::real[]::vector; +ERROR: NaN not allowed in vector +SELECT '{Infinity}'::real[]::vector; +ERROR: infinite value not allowed in vector +SELECT '{-Infinity}'::real[]::vector; +ERROR: infinite value not allowed in vector +SELECT '{}'::real[]::vector; +ERROR: vector must have at least 1 dimension +SELECT '{{1}}'::real[]::vector; +ERROR: array must be 1-D +SELECT '[1,2,3]'::vector::real[]; + float4 +--------- + {1,2,3} +(1 row) + +SELECT array_agg(n)::vector FROM generate_series(1, 16001) n; +ERROR: vector cannot have more than 16000 dimensions +SELECT array_to_vector(array_agg(n), 16001, false) FROM generate_series(1, 16001) n; +ERROR: vector cannot have more than 16000 dimensions +-- ensure no error +SELECT ARRAY[1,2,3] = ARRAY[1,2,3]; + ?column? +---------- + t +(1 row) + diff --git a/external/pgvector/test/expected/copy.out b/external/pgvector/test/expected/copy.out new file mode 100644 index 00000000000..36d4620db31 --- /dev/null +++ b/external/pgvector/test/expected/copy.out @@ -0,0 +1,16 @@ +CREATE TABLE t (val vector(3)); +INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL); +CREATE TABLE t2 (val vector(3)); +\copy t TO 'results/data.bin' WITH (FORMAT binary) +\copy t2 FROM 'results/data.bin' WITH (FORMAT binary) +SELECT * FROM t2 ORDER BY val; + val +--------- + [0,0,0] + [1,1,1] + [1,2,3] + +(4 rows) + +DROP TABLE t; +DROP TABLE t2; diff --git a/external/pgvector/test/expected/functions.out b/external/pgvector/test/expected/functions.out new file mode 100644 index 00000000000..28406885a09 --- /dev/null +++ b/external/pgvector/test/expected/functions.out @@ -0,0 +1,210 @@ +SELECT '[1,2,3]'::vector + '[4,5,6]'; + ?column? +---------- + [5,7,9] +(1 row) + +SELECT '[3e38]'::vector + '[3e38]'; +ERROR: value out of range: overflow +SELECT '[1,2,3]'::vector - '[4,5,6]'; + ?column? +------------ + [-3,-3,-3] +(1 row) + +SELECT '[-3e38]'::vector - '[3e38]'; +ERROR: value out of range: overflow +SELECT '[1,2,3]'::vector * '[4,5,6]'; + ?column? +----------- + [4,10,18] +(1 row) + +SELECT '[1e37]'::vector * '[1e37]'; +ERROR: value out of range: overflow +SELECT '[1e-37]'::vector * '[1e-37]'; +ERROR: value out of range: underflow +SELECT vector_dims('[1,2,3]'); + vector_dims +------------- + 3 +(1 row) + +SELECT round(vector_norm('[1,1]')::numeric, 5); + round +--------- + 1.41421 +(1 row) + +SELECT vector_norm('[3,4]'); + vector_norm +------------- + 5 +(1 row) + +SELECT vector_norm('[0,1]'); + vector_norm +------------- + 1 +(1 row) + +SELECT vector_norm('[3e37,4e37]')::real; + vector_norm +------------- + 5e+37 +(1 row) + +SELECT l2_distance('[0,0]', '[3,4]'); + l2_distance +------------- + 5 +(1 row) + +SELECT l2_distance('[0,0]', '[0,1]'); + l2_distance +------------- + 1 +(1 row) + +SELECT l2_distance('[1,2]', '[3]'); +ERROR: different vector dimensions 2 and 1 +SELECT l2_distance('[3e38]', '[-3e38]'); + l2_distance +------------- + Infinity +(1 row) + +SELECT inner_product('[1,2]', '[3,4]'); + inner_product +--------------- + 11 +(1 row) + +SELECT inner_product('[1,2]', '[3]'); +ERROR: different vector dimensions 2 and 1 +SELECT inner_product('[3e38]', '[3e38]'); + inner_product +--------------- + Infinity +(1 row) + +SELECT cosine_distance('[1,2]', '[2,4]'); + cosine_distance +----------------- + 0 +(1 row) + +SELECT cosine_distance('[1,2]', '[0,0]'); + cosine_distance +----------------- + NaN +(1 row) + +SELECT cosine_distance('[1,1]', '[1,1]'); + cosine_distance +----------------- + 0 +(1 row) + +SELECT cosine_distance('[1,0]', '[0,2]'); + cosine_distance +----------------- + 1 +(1 row) + +SELECT cosine_distance('[1,1]', '[-1,-1]'); + cosine_distance +----------------- + 2 +(1 row) + +SELECT cosine_distance('[1,2]', '[3]'); +ERROR: different vector dimensions 2 and 1 +SELECT cosine_distance('[1,1]', '[1.1,1.1]'); + cosine_distance +----------------- + 0 +(1 row) + +SELECT cosine_distance('[1,1]', '[-1.1,-1.1]'); + cosine_distance +----------------- + 2 +(1 row) + +SELECT cosine_distance('[3e38]', '[3e38]'); + cosine_distance +----------------- + NaN +(1 row) + +SELECT l1_distance('[0,0]', '[3,4]'); + l1_distance +------------- + 7 +(1 row) + +SELECT l1_distance('[0,0]', '[0,1]'); + l1_distance +------------- + 1 +(1 row) + +SELECT l1_distance('[1,2]', '[3]'); +ERROR: different vector dimensions 2 and 1 +SELECT l1_distance('[3e38]', '[-3e38]'); + l1_distance +------------- + Infinity +(1 row) + +SELECT avg(v) FROM unnest(ARRAY['[1,2,3]'::vector, '[3,5,7]']) v; + avg +----------- + [2,3.5,5] +(1 row) + +SELECT avg(v) FROM unnest(ARRAY['[1,2,3]'::vector, '[3,5,7]', NULL]) v; + avg +----------- + [2,3.5,5] +(1 row) + +SELECT avg(v) FROM unnest(ARRAY[]::vector[]) v; + avg +----- + +(1 row) + +SELECT avg(v) FROM unnest(ARRAY['[1,2]'::vector, '[3]']) v; +ERROR: expected 2 dimensions, not 1 +SELECT avg(v) FROM unnest(ARRAY['[3e38]'::vector, '[3e38]']) v; + avg +--------- + [3e+38] +(1 row) + +SELECT vector_avg(array_agg(n)) FROM generate_series(1, 16002) n; +ERROR: vector cannot have more than 16000 dimensions +SELECT sum(v) FROM unnest(ARRAY['[1,2,3]'::vector, '[3,5,7]']) v; + sum +---------- + [4,7,10] +(1 row) + +SELECT sum(v) FROM unnest(ARRAY['[1,2,3]'::vector, '[3,5,7]', NULL]) v; + sum +---------- + [4,7,10] +(1 row) + +SELECT sum(v) FROM unnest(ARRAY[]::vector[]) v; + sum +----- + +(1 row) + +SELECT sum(v) FROM unnest(ARRAY['[1,2]'::vector, '[3]']) v; +ERROR: different vector dimensions 2 and 1 +SELECT sum(v) FROM unnest(ARRAY['[3e38]'::vector, '[3e38]']) v; +ERROR: value out of range: overflow diff --git a/external/pgvector/test/expected/hnsw_cosine.out b/external/pgvector/test/expected/hnsw_cosine.out new file mode 100644 index 00000000000..2d3e3ef41cf --- /dev/null +++ b/external/pgvector/test/expected/hnsw_cosine.out @@ -0,0 +1,9 @@ +SET enable_seqscan = off; +CREATE TABLE t (val vector(3)); +INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL); +CREATE INDEX ON t USING hnsw (val vector_cosine_ops); +INSERT INTO t (val) VALUES ('[1,2,4]'); +-- SELECT * FROM t ORDER BY val <=> '[3,3,3]'; +-- SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> '[0,0,0]') t2; +-- SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> (SELECT NULL::vector)) t2; +DROP TABLE t; diff --git a/external/pgvector/test/expected/hnsw_ip.out b/external/pgvector/test/expected/hnsw_ip.out new file mode 100644 index 00000000000..cd2685c0232 --- /dev/null +++ b/external/pgvector/test/expected/hnsw_ip.out @@ -0,0 +1,16 @@ +SET enable_seqscan = off; +CREATE TABLE t (val vector(3)); +INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL); +CREATE INDEX ON t USING hnsw (val vector_ip_ops); +INSERT INTO t (val) VALUES ('[1,2,4]'); +SELECT * FROM t ORDER BY val <#> '[3,3,3]'; + val +--------- + [1,2,4] + [1,2,3] + [1,1,1] + [0,0,0] +(4 rows) + +-- SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <#> (SELECT NULL::vector)) t2; +DROP TABLE t; diff --git a/external/pgvector/test/expected/hnsw_l2.out b/external/pgvector/test/expected/hnsw_l2.out new file mode 100644 index 00000000000..42096610a5f --- /dev/null +++ b/external/pgvector/test/expected/hnsw_l2.out @@ -0,0 +1,36 @@ +SET enable_seqscan = off; +CREATE TABLE t (val vector(3)); +INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL); +CREATE INDEX ON t USING hnsw (val vector_l2_ops); +INSERT INTO t (val) VALUES ('[1,2,4]'); +SELECT * FROM t ORDER BY val <-> '[3,3,3]'; + val +--------- + [1,2,3] + [1,2,4] + [1,1,1] + [0,0,0] +(4 rows) + +SELECT * FROM t ORDER BY val <-> (SELECT NULL::vector); + val +--------- + [0,0,0] + [1,1,1] + [1,2,3] + [1,2,4] +(4 rows) + +SELECT COUNT(*) FROM t; + count +------- + 5 +(1 row) + +TRUNCATE t; +SELECT * FROM t ORDER BY val <-> '[3,3,3]'; + val +----- +(0 rows) + +DROP TABLE t; diff --git a/external/pgvector/test/expected/hnsw_options.out b/external/pgvector/test/expected/hnsw_options.out new file mode 100644 index 00000000000..eeba3e568df --- /dev/null +++ b/external/pgvector/test/expected/hnsw_options.out @@ -0,0 +1,26 @@ +CREATE TABLE t (val vector(3)); +CREATE INDEX ON t USING hnsw (val vector_l2_ops) WITH (m = 1); +ERROR: value 1 out of bounds for option "m" +DETAIL: Valid values are between "2" and "100". +CREATE INDEX ON t USING hnsw (val vector_l2_ops) WITH (m = 101); +ERROR: value 101 out of bounds for option "m" +DETAIL: Valid values are between "2" and "100". +CREATE INDEX ON t USING hnsw (val vector_l2_ops) WITH (ef_construction = 3); +ERROR: value 3 out of bounds for option "ef_construction" +DETAIL: Valid values are between "4" and "1000". +CREATE INDEX ON t USING hnsw (val vector_l2_ops) WITH (ef_construction = 1001); +ERROR: value 1001 out of bounds for option "ef_construction" +DETAIL: Valid values are between "4" and "1000". +CREATE INDEX ON t USING hnsw (val vector_l2_ops) WITH (m = 16, ef_construction = 31); +ERROR: ef_construction must be greater than or equal to 2 * m +SHOW hnsw.ef_search; + hnsw.ef_search +---------------- + 40 +(1 row) + +SET hnsw.ef_search = 0; +ERROR: 0 is outside the valid range for parameter "hnsw.ef_search" (1 .. 1000) +SET hnsw.ef_search = 1001; +ERROR: 1001 is outside the valid range for parameter "hnsw.ef_search" (1 .. 1000) +DROP TABLE t; diff --git a/external/pgvector/test/expected/hnsw_unlogged.out b/external/pgvector/test/expected/hnsw_unlogged.out new file mode 100644 index 00000000000..bc000a018fa --- /dev/null +++ b/external/pgvector/test/expected/hnsw_unlogged.out @@ -0,0 +1,14 @@ +SET enable_seqscan = off; +CREATE UNLOGGED TABLE t (val vector(3)); +NOTICE: change unlogged table to logged table,because unlogged table not supports Master-Slave mode +INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL); +CREATE INDEX ON t USING hnsw (val vector_l2_ops); +SELECT * FROM t ORDER BY val <-> '[3,3,3]'; + val +--------- + [1,2,3] + [1,1,1] + [0,0,0] +(3 rows) + +DROP TABLE t; diff --git a/external/pgvector/test/expected/input.out b/external/pgvector/test/expected/input.out new file mode 100644 index 00000000000..102ca51b033 --- /dev/null +++ b/external/pgvector/test/expected/input.out @@ -0,0 +1,129 @@ +SELECT '[1,2,3]'::vector; + vector +--------- + [1,2,3] +(1 row) + +SELECT '[-1,-2,-3]'::vector; + vector +------------ + [-1,-2,-3] +(1 row) + +SELECT '[1.,2.,3.]'::vector; + vector +--------- + [1,2,3] +(1 row) + +SELECT ' [ 1, 2 , 3 ] '::vector; + vector +--------- + [1,2,3] +(1 row) + +SELECT '[1.23456]'::vector; + vector +----------- + [1.23456] +(1 row) + +SELECT '[hello,1]'::vector; +ERROR: invalid input syntax for type vector: "[hello,1]" +LINE 1: SELECT '[hello,1]'::vector; + ^ +SELECT '[NaN,1]'::vector; +ERROR: NaN not allowed in vector +LINE 1: SELECT '[NaN,1]'::vector; + ^ +SELECT '[Infinity,1]'::vector; +ERROR: infinite value not allowed in vector +LINE 1: SELECT '[Infinity,1]'::vector; + ^ +SELECT '[-Infinity,1]'::vector; +ERROR: infinite value not allowed in vector +LINE 1: SELECT '[-Infinity,1]'::vector; + ^ +SELECT '[1.5e38,-1.5e38]'::vector; + vector +-------------------- + [1.5e+38,-1.5e+38] +(1 row) + +SELECT '[1.5e+38,-1.5e+38]'::vector; + vector +-------------------- + [1.5e+38,-1.5e+38] +(1 row) + +SELECT '[1.5e-38,-1.5e-38]'::vector; + vector +-------------------- + [1.5e-38,-1.5e-38] +(1 row) + +SELECT '[4e38,1]'::vector; +ERROR: infinite value not allowed in vector +LINE 1: SELECT '[4e38,1]'::vector; + ^ +SELECT '[1,2,3'::vector; +ERROR: malformed vector literal: "[1,2,3" +LINE 1: SELECT '[1,2,3'::vector; + ^ +DETAIL: Unexpected end of input. +SELECT '[1,2,3]9'::vector; +ERROR: malformed vector literal: "[1,2,3]9" +LINE 1: SELECT '[1,2,3]9'::vector; + ^ +DETAIL: Junk after closing right brace. +SELECT '1,2,3'::vector; +ERROR: malformed vector literal: "1,2,3" +LINE 1: SELECT '1,2,3'::vector; + ^ +DETAIL: Vector contents must start with "[". +SELECT ''::vector; +ERROR: malformed vector literal: "" +LINE 1: SELECT ''::vector; + ^ +DETAIL: Vector contents must start with "[". +SELECT '['::vector; +ERROR: malformed vector literal: "[" +LINE 1: SELECT '['::vector; + ^ +DETAIL: Unexpected end of input. +SELECT '[,'::vector; +ERROR: malformed vector literal: "[," +LINE 1: SELECT '[,'::vector; + ^ +DETAIL: Unexpected end of input. +SELECT '[]'::vector; +ERROR: vector must have at least 1 dimension +LINE 1: SELECT '[]'::vector; + ^ +SELECT '[1,]'::vector; +ERROR: invalid input syntax for type vector: "[1,]" +LINE 1: SELECT '[1,]'::vector; + ^ +SELECT '[1a]'::vector; +ERROR: invalid input syntax for type vector: "[1a]" +LINE 1: SELECT '[1a]'::vector; + ^ +SELECT '[1,,3]'::vector; +ERROR: malformed vector literal: "[1,,3]" +LINE 1: SELECT '[1,,3]'::vector; + ^ +SELECT '[1, ,3]'::vector; +ERROR: invalid input syntax for type vector: "[1, ,3]" +LINE 1: SELECT '[1, ,3]'::vector; + ^ +SELECT '[1,2,3]'::vector(2); +ERROR: expected 2 dimensions, not 3 +SELECT unnest('{"[1,2,3]", "[4,5,6]"}'::vector[]); + unnest +--------- + [1,2,3] + [4,5,6] +(2 rows) + +SELECT '{"[1,2,3]"}'::vector(2)[]; +ERROR: expected 2 dimensions, not 3 diff --git a/external/pgvector/test/expected/ivfflat_cosine.out b/external/pgvector/test/expected/ivfflat_cosine.out new file mode 100644 index 00000000000..2106e7a96d3 --- /dev/null +++ b/external/pgvector/test/expected/ivfflat_cosine.out @@ -0,0 +1,9 @@ +SET enable_seqscan = off; +CREATE TABLE t (val vector(3)); +INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL); +CREATE INDEX ON t USING ivfflat (val vector_cosine_ops) WITH (lists = 1); +INSERT INTO t (val) VALUES ('[1,2,4]'); +-- SELECT * FROM t ORDER BY val <=> '[3,3,3]'; +-- SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> '[0,0,0]') t2; +-- SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> (SELECT NULL::vector)) t2; +DROP TABLE t; diff --git a/external/pgvector/test/expected/ivfflat_ip.out b/external/pgvector/test/expected/ivfflat_ip.out new file mode 100644 index 00000000000..96fef9d044f --- /dev/null +++ b/external/pgvector/test/expected/ivfflat_ip.out @@ -0,0 +1,16 @@ +SET enable_seqscan = off; +CREATE TABLE t (val vector(3)); +INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL); +CREATE INDEX ON t USING ivfflat (val vector_ip_ops) WITH (lists = 1); +INSERT INTO t (val) VALUES ('[1,2,4]'); +SELECT * FROM t ORDER BY val <#> '[3,3,3]'; + val +--------- + [1,2,4] + [1,2,3] + [1,1,1] + [0,0,0] +(4 rows) + +-- SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <#> (SELECT NULL::vector)) t2; +DROP TABLE t; diff --git a/external/pgvector/test/expected/ivfflat_l2.out b/external/pgvector/test/expected/ivfflat_l2.out new file mode 100644 index 00000000000..bef78ae6979 --- /dev/null +++ b/external/pgvector/test/expected/ivfflat_l2.out @@ -0,0 +1,39 @@ +SET enable_seqscan = off; +CREATE TABLE t (val vector(3)); +INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL); +CREATE INDEX ON t USING ivfflat (val vector_l2_ops) WITH (lists = 1); +INSERT INTO t (val) VALUES ('[1,2,4]'); +SELECT * FROM t ORDER BY val <-> '[3,3,3]'; + val +--------- + [1,2,3] + [1,2,4] + [1,1,1] + [0,0,0] +(4 rows) + +SELECT * FROM t ORDER BY val <-> (SELECT NULL::vector); + val +--------- + [0,0,0] + [1,1,1] + [1,2,3] + [1,2,4] +(4 rows) + +SELECT COUNT(*) FROM t; + count +------- + 5 +(1 row) + +TRUNCATE t; +NOTICE: ivfflat index created with little data +DETAIL: This will cause low recall. +HINT: Drop the index until the table has more data. +SELECT * FROM t ORDER BY val <-> '[3,3,3]'; + val +----- +(0 rows) + +DROP TABLE t; diff --git a/external/pgvector/test/expected/ivfflat_options.out b/external/pgvector/test/expected/ivfflat_options.out new file mode 100644 index 00000000000..128402fc987 --- /dev/null +++ b/external/pgvector/test/expected/ivfflat_options.out @@ -0,0 +1,14 @@ +CREATE TABLE t (val vector(3)); +CREATE INDEX ON t USING ivfflat (val vector_l2_ops) WITH (lists = 0); +ERROR: value 0 out of bounds for option "lists" +DETAIL: Valid values are between "1" and "32768". +CREATE INDEX ON t USING ivfflat (val vector_l2_ops) WITH (lists = 32769); +ERROR: value 32769 out of bounds for option "lists" +DETAIL: Valid values are between "1" and "32768". +SHOW ivfflat.probes; + ivfflat.probes +---------------- + 1 +(1 row) + +DROP TABLE t; diff --git a/external/pgvector/test/expected/ivfflat_unlogged.out b/external/pgvector/test/expected/ivfflat_unlogged.out new file mode 100644 index 00000000000..f7d25178225 --- /dev/null +++ b/external/pgvector/test/expected/ivfflat_unlogged.out @@ -0,0 +1,14 @@ +SET enable_seqscan = off; +CREATE UNLOGGED TABLE t (val vector(3)); +NOTICE: change unlogged table to logged table,because unlogged table not supports Master-Slave mode +INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL); +CREATE INDEX ON t USING ivfflat (val vector_l2_ops) WITH (lists = 1); +SELECT * FROM t ORDER BY val <-> '[3,3,3]'; + val +--------- + [1,2,3] + [1,1,1] + [0,0,0] +(3 rows) + +DROP TABLE t; diff --git a/external/pgvector/test/perl/PostgresNode.pm b/external/pgvector/test/perl/PostgresNode.pm new file mode 100644 index 00000000000..32ffb95fa99 --- /dev/null +++ b/external/pgvector/test/perl/PostgresNode.pm @@ -0,0 +1,8 @@ +use PostgreSQL::Test::Cluster; + +sub get_new_node +{ + return PostgreSQL::Test::Cluster->new(@_); +} + +1; diff --git a/external/pgvector/test/perl/TestLib.pm b/external/pgvector/test/perl/TestLib.pm new file mode 100644 index 00000000000..1cb2a826e3c --- /dev/null +++ b/external/pgvector/test/perl/TestLib.pm @@ -0,0 +1,3 @@ +use PostgreSQL::Test::Utils; + +1; diff --git a/external/pgvector/test/sql/btree.sql b/external/pgvector/test/sql/btree.sql new file mode 100644 index 00000000000..232b2881811 --- /dev/null +++ b/external/pgvector/test/sql/btree.sql @@ -0,0 +1,10 @@ +SET enable_seqscan = off; + +CREATE TABLE t (val vector(3)); +INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL); +CREATE INDEX ON t (val); + +SELECT * FROM t WHERE val = '[1,2,3]'; +SELECT * FROM t ORDER BY val LIMIT 1; + +DROP TABLE t; diff --git a/external/pgvector/test/sql/cast.sql b/external/pgvector/test/sql/cast.sql new file mode 100644 index 00000000000..c73ab0750b3 --- /dev/null +++ b/external/pgvector/test/sql/cast.sql @@ -0,0 +1,17 @@ +SELECT ARRAY[1,2,3]::vector; +SELECT ARRAY[1.0,2.0,3.0]::vector; +SELECT ARRAY[1,2,3]::float4[]::vector; +SELECT ARRAY[1,2,3]::float8[]::vector; +SELECT ARRAY[1,2,3]::numeric[]::vector; +SELECT '{NULL}'::real[]::vector; +SELECT '{NaN}'::real[]::vector; +SELECT '{Infinity}'::real[]::vector; +SELECT '{-Infinity}'::real[]::vector; +SELECT '{}'::real[]::vector; +SELECT '{{1}}'::real[]::vector; +SELECT '[1,2,3]'::vector::real[]; +SELECT array_agg(n)::vector FROM generate_series(1, 16001) n; +SELECT array_to_vector(array_agg(n), 16001, false) FROM generate_series(1, 16001) n; + +-- ensure no error +SELECT ARRAY[1,2,3] = ARRAY[1,2,3]; diff --git a/external/pgvector/test/sql/copy.sql b/external/pgvector/test/sql/copy.sql new file mode 100644 index 00000000000..28200901c57 --- /dev/null +++ b/external/pgvector/test/sql/copy.sql @@ -0,0 +1,12 @@ +CREATE TABLE t (val vector(3)); +INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL); + +CREATE TABLE t2 (val vector(3)); + +\copy t TO 'results/data.bin' WITH (FORMAT binary) +\copy t2 FROM 'results/data.bin' WITH (FORMAT binary) + +SELECT * FROM t2 ORDER BY val; + +DROP TABLE t; +DROP TABLE t2; diff --git a/external/pgvector/test/sql/functions.sql b/external/pgvector/test/sql/functions.sql new file mode 100644 index 00000000000..914df36b880 --- /dev/null +++ b/external/pgvector/test/sql/functions.sql @@ -0,0 +1,51 @@ +SELECT '[1,2,3]'::vector + '[4,5,6]'; +SELECT '[3e38]'::vector + '[3e38]'; +SELECT '[1,2,3]'::vector - '[4,5,6]'; +SELECT '[-3e38]'::vector - '[3e38]'; +SELECT '[1,2,3]'::vector * '[4,5,6]'; +SELECT '[1e37]'::vector * '[1e37]'; +SELECT '[1e-37]'::vector * '[1e-37]'; + +SELECT vector_dims('[1,2,3]'); + +SELECT round(vector_norm('[1,1]')::numeric, 5); +SELECT vector_norm('[3,4]'); +SELECT vector_norm('[0,1]'); +SELECT vector_norm('[3e37,4e37]')::real; + +SELECT l2_distance('[0,0]', '[3,4]'); +SELECT l2_distance('[0,0]', '[0,1]'); +SELECT l2_distance('[1,2]', '[3]'); +SELECT l2_distance('[3e38]', '[-3e38]'); + +SELECT inner_product('[1,2]', '[3,4]'); +SELECT inner_product('[1,2]', '[3]'); +SELECT inner_product('[3e38]', '[3e38]'); + +SELECT cosine_distance('[1,2]', '[2,4]'); +SELECT cosine_distance('[1,2]', '[0,0]'); +SELECT cosine_distance('[1,1]', '[1,1]'); +SELECT cosine_distance('[1,0]', '[0,2]'); +SELECT cosine_distance('[1,1]', '[-1,-1]'); +SELECT cosine_distance('[1,2]', '[3]'); +SELECT cosine_distance('[1,1]', '[1.1,1.1]'); +SELECT cosine_distance('[1,1]', '[-1.1,-1.1]'); +SELECT cosine_distance('[3e38]', '[3e38]'); + +SELECT l1_distance('[0,0]', '[3,4]'); +SELECT l1_distance('[0,0]', '[0,1]'); +SELECT l1_distance('[1,2]', '[3]'); +SELECT l1_distance('[3e38]', '[-3e38]'); + +SELECT avg(v) FROM unnest(ARRAY['[1,2,3]'::vector, '[3,5,7]']) v; +SELECT avg(v) FROM unnest(ARRAY['[1,2,3]'::vector, '[3,5,7]', NULL]) v; +SELECT avg(v) FROM unnest(ARRAY[]::vector[]) v; +SELECT avg(v) FROM unnest(ARRAY['[1,2]'::vector, '[3]']) v; +SELECT avg(v) FROM unnest(ARRAY['[3e38]'::vector, '[3e38]']) v; +SELECT vector_avg(array_agg(n)) FROM generate_series(1, 16002) n; + +SELECT sum(v) FROM unnest(ARRAY['[1,2,3]'::vector, '[3,5,7]']) v; +SELECT sum(v) FROM unnest(ARRAY['[1,2,3]'::vector, '[3,5,7]', NULL]) v; +SELECT sum(v) FROM unnest(ARRAY[]::vector[]) v; +SELECT sum(v) FROM unnest(ARRAY['[1,2]'::vector, '[3]']) v; +SELECT sum(v) FROM unnest(ARRAY['[3e38]'::vector, '[3e38]']) v; diff --git a/external/pgvector/test/sql/hnsw_cosine.sql b/external/pgvector/test/sql/hnsw_cosine.sql new file mode 100644 index 00000000000..43db44e5d1f --- /dev/null +++ b/external/pgvector/test/sql/hnsw_cosine.sql @@ -0,0 +1,13 @@ +SET enable_seqscan = off; + +CREATE TABLE t (val vector(3)); +INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL); +CREATE INDEX ON t USING hnsw (val vector_cosine_ops); + +INSERT INTO t (val) VALUES ('[1,2,4]'); + +-- SELECT * FROM t ORDER BY val <=> '[3,3,3]'; +-- SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> '[0,0,0]') t2; +-- SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> (SELECT NULL::vector)) t2; + +DROP TABLE t; diff --git a/external/pgvector/test/sql/hnsw_ip.sql b/external/pgvector/test/sql/hnsw_ip.sql new file mode 100644 index 00000000000..006cd09ab51 --- /dev/null +++ b/external/pgvector/test/sql/hnsw_ip.sql @@ -0,0 +1,12 @@ +SET enable_seqscan = off; + +CREATE TABLE t (val vector(3)); +INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL); +CREATE INDEX ON t USING hnsw (val vector_ip_ops); + +INSERT INTO t (val) VALUES ('[1,2,4]'); + +SELECT * FROM t ORDER BY val <#> '[3,3,3]'; +-- SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <#> (SELECT NULL::vector)) t2; + +DROP TABLE t; diff --git a/external/pgvector/test/sql/hnsw_l2.sql b/external/pgvector/test/sql/hnsw_l2.sql new file mode 100644 index 00000000000..70bb50a51d0 --- /dev/null +++ b/external/pgvector/test/sql/hnsw_l2.sql @@ -0,0 +1,16 @@ +SET enable_seqscan = off; + +CREATE TABLE t (val vector(3)); +INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL); +CREATE INDEX ON t USING hnsw (val vector_l2_ops); + +INSERT INTO t (val) VALUES ('[1,2,4]'); + +SELECT * FROM t ORDER BY val <-> '[3,3,3]'; +SELECT * FROM t ORDER BY val <-> (SELECT NULL::vector); +SELECT COUNT(*) FROM t; + +TRUNCATE t; +SELECT * FROM t ORDER BY val <-> '[3,3,3]'; + +DROP TABLE t; diff --git a/external/pgvector/test/sql/hnsw_options.sql b/external/pgvector/test/sql/hnsw_options.sql new file mode 100644 index 00000000000..7b9662fffba --- /dev/null +++ b/external/pgvector/test/sql/hnsw_options.sql @@ -0,0 +1,13 @@ +CREATE TABLE t (val vector(3)); +CREATE INDEX ON t USING hnsw (val vector_l2_ops) WITH (m = 1); +CREATE INDEX ON t USING hnsw (val vector_l2_ops) WITH (m = 101); +CREATE INDEX ON t USING hnsw (val vector_l2_ops) WITH (ef_construction = 3); +CREATE INDEX ON t USING hnsw (val vector_l2_ops) WITH (ef_construction = 1001); +CREATE INDEX ON t USING hnsw (val vector_l2_ops) WITH (m = 16, ef_construction = 31); + +SHOW hnsw.ef_search; + +SET hnsw.ef_search = 0; +SET hnsw.ef_search = 1001; + +DROP TABLE t; diff --git a/external/pgvector/test/sql/hnsw_unlogged.sql b/external/pgvector/test/sql/hnsw_unlogged.sql new file mode 100644 index 00000000000..2efcc955467 --- /dev/null +++ b/external/pgvector/test/sql/hnsw_unlogged.sql @@ -0,0 +1,9 @@ +SET enable_seqscan = off; + +CREATE UNLOGGED TABLE t (val vector(3)); +INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL); +CREATE INDEX ON t USING hnsw (val vector_l2_ops); + +SELECT * FROM t ORDER BY val <-> '[3,3,3]'; + +DROP TABLE t; diff --git a/external/pgvector/test/sql/input.sql b/external/pgvector/test/sql/input.sql new file mode 100644 index 00000000000..9f5809c9281 --- /dev/null +++ b/external/pgvector/test/sql/input.sql @@ -0,0 +1,28 @@ +SELECT '[1,2,3]'::vector; +SELECT '[-1,-2,-3]'::vector; +SELECT '[1.,2.,3.]'::vector; +SELECT ' [ 1, 2 , 3 ] '::vector; +SELECT '[1.23456]'::vector; +SELECT '[hello,1]'::vector; +SELECT '[NaN,1]'::vector; +SELECT '[Infinity,1]'::vector; +SELECT '[-Infinity,1]'::vector; +SELECT '[1.5e38,-1.5e38]'::vector; +SELECT '[1.5e+38,-1.5e+38]'::vector; +SELECT '[1.5e-38,-1.5e-38]'::vector; +SELECT '[4e38,1]'::vector; +SELECT '[1,2,3'::vector; +SELECT '[1,2,3]9'::vector; +SELECT '1,2,3'::vector; +SELECT ''::vector; +SELECT '['::vector; +SELECT '[,'::vector; +SELECT '[]'::vector; +SELECT '[1,]'::vector; +SELECT '[1a]'::vector; +SELECT '[1,,3]'::vector; +SELECT '[1, ,3]'::vector; +SELECT '[1,2,3]'::vector(2); + +SELECT unnest('{"[1,2,3]", "[4,5,6]"}'::vector[]); +SELECT '{"[1,2,3]"}'::vector(2)[]; diff --git a/external/pgvector/test/sql/ivfflat_cosine.sql b/external/pgvector/test/sql/ivfflat_cosine.sql new file mode 100644 index 00000000000..2a84f426539 --- /dev/null +++ b/external/pgvector/test/sql/ivfflat_cosine.sql @@ -0,0 +1,13 @@ +SET enable_seqscan = off; + +CREATE TABLE t (val vector(3)); +INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL); +CREATE INDEX ON t USING ivfflat (val vector_cosine_ops) WITH (lists = 1); + +INSERT INTO t (val) VALUES ('[1,2,4]'); + +-- SELECT * FROM t ORDER BY val <=> '[3,3,3]'; +-- SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> '[0,0,0]') t2; +-- SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> (SELECT NULL::vector)) t2; + +DROP TABLE t; diff --git a/external/pgvector/test/sql/ivfflat_ip.sql b/external/pgvector/test/sql/ivfflat_ip.sql new file mode 100644 index 00000000000..b2f73bcd384 --- /dev/null +++ b/external/pgvector/test/sql/ivfflat_ip.sql @@ -0,0 +1,12 @@ +SET enable_seqscan = off; + +CREATE TABLE t (val vector(3)); +INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL); +CREATE INDEX ON t USING ivfflat (val vector_ip_ops) WITH (lists = 1); + +INSERT INTO t (val) VALUES ('[1,2,4]'); + +SELECT * FROM t ORDER BY val <#> '[3,3,3]'; +-- SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <#> (SELECT NULL::vector)) t2; + +DROP TABLE t; diff --git a/external/pgvector/test/sql/ivfflat_l2.sql b/external/pgvector/test/sql/ivfflat_l2.sql new file mode 100644 index 00000000000..c309519e6f2 --- /dev/null +++ b/external/pgvector/test/sql/ivfflat_l2.sql @@ -0,0 +1,16 @@ +SET enable_seqscan = off; + +CREATE TABLE t (val vector(3)); +INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL); +CREATE INDEX ON t USING ivfflat (val vector_l2_ops) WITH (lists = 1); + +INSERT INTO t (val) VALUES ('[1,2,4]'); + +SELECT * FROM t ORDER BY val <-> '[3,3,3]'; +SELECT * FROM t ORDER BY val <-> (SELECT NULL::vector); +SELECT COUNT(*) FROM t; + +TRUNCATE t; +SELECT * FROM t ORDER BY val <-> '[3,3,3]'; + +DROP TABLE t; diff --git a/external/pgvector/test/sql/ivfflat_options.sql b/external/pgvector/test/sql/ivfflat_options.sql new file mode 100644 index 00000000000..aa909a58360 --- /dev/null +++ b/external/pgvector/test/sql/ivfflat_options.sql @@ -0,0 +1,7 @@ +CREATE TABLE t (val vector(3)); +CREATE INDEX ON t USING ivfflat (val vector_l2_ops) WITH (lists = 0); +CREATE INDEX ON t USING ivfflat (val vector_l2_ops) WITH (lists = 32769); + +SHOW ivfflat.probes; + +DROP TABLE t; diff --git a/external/pgvector/test/sql/ivfflat_unlogged.sql b/external/pgvector/test/sql/ivfflat_unlogged.sql new file mode 100644 index 00000000000..fb890066c10 --- /dev/null +++ b/external/pgvector/test/sql/ivfflat_unlogged.sql @@ -0,0 +1,9 @@ +SET enable_seqscan = off; + +CREATE UNLOGGED TABLE t (val vector(3)); +INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL); +CREATE INDEX ON t USING ivfflat (val vector_l2_ops) WITH (lists = 1); + +SELECT * FROM t ORDER BY val <-> '[3,3,3]'; + +DROP TABLE t; diff --git a/external/pgvector/test/t/001_ivfflat_wal.pl b/external/pgvector/test/t/001_ivfflat_wal.pl new file mode 100644 index 00000000000..b19eb4087db --- /dev/null +++ b/external/pgvector/test/t/001_ivfflat_wal.pl @@ -0,0 +1,99 @@ +# Based on postgres/contrib/bloom/t/001_wal.pl + +# Test generic xlog record work for ivfflat index replication. +use strict; +use warnings; +use PostgresNode; +use TestLib; +use Test::More; + +my $dim = 32; + +my $node_primary; +my $node_replica; + +# Run few queries on both primary and replica and check their results match. +sub test_index_replay +{ + my ($test_name) = @_; + + # Wait for replica to catch up + my $applname = $node_replica->name; + my $caughtup_query = "SELECT pg_current_wal_lsn() <= replay_lsn FROM pg_stat_replication WHERE application_name = '$applname';"; + $node_primary->poll_query_until('postgres', $caughtup_query) + or die "Timed out while waiting for replica 1 to catch up"; + + my @r = (); + for (1 .. $dim) + { + push(@r, rand()); + } + my $sql = join(",", @r); + + my $queries = qq( + SET enable_seqscan = off; + SELECT * FROM tst ORDER BY v <-> '[$sql]' LIMIT 10; + ); + + # Run test queries and compare their result + my $primary_result = $node_primary->safe_psql("postgres", $queries); + my $replica_result = $node_replica->safe_psql("postgres", $queries); + + is($primary_result, $replica_result, "$test_name: query result matches"); + return; +} + +# Use ARRAY[random(), random(), random(), ...] over +# SELECT array_agg(random()) FROM generate_series(1, $dim) +# to generate different values for each row +my $array_sql = join(",", ('random()') x $dim); + +# Initialize primary node +$node_primary = get_new_node('primary'); +$node_primary->init(allows_streaming => 1); +if ($dim > 32) +{ + # TODO use wal_keep_segments for Postgres < 13 + $node_primary->append_conf('postgresql.conf', qq(wal_keep_size = 1GB)); +} +if ($dim > 1500) +{ + $node_primary->append_conf('postgresql.conf', qq(maintenance_work_mem = 128MB)); +} +$node_primary->start; +my $backup_name = 'my_backup'; + +# Take backup +$node_primary->backup($backup_name); + +# Create streaming replica linking to primary +$node_replica = get_new_node('replica'); +$node_replica->init_from_backup($node_primary, $backup_name, has_streaming => 1); +$node_replica->start; + +# Create ivfflat index on primary +$node_primary->safe_psql("postgres", "CREATE EXTENSION vector;"); +$node_primary->safe_psql("postgres", "CREATE TABLE tst (i int4, v vector($dim));"); +$node_primary->safe_psql("postgres", + "INSERT INTO tst SELECT i % 10, ARRAY[$array_sql] FROM generate_series(1, 100000) i;" +); +$node_primary->safe_psql("postgres", "CREATE INDEX ON tst USING ivfflat (v vector_l2_ops);"); + +# Test that queries give same result +test_index_replay('initial'); + +# Run 10 cycles of table modification. Run test queries after each modification. +for my $i (1 .. 10) +{ + $node_primary->safe_psql("postgres", "DELETE FROM tst WHERE i = $i;"); + test_index_replay("delete $i"); + $node_primary->safe_psql("postgres", "VACUUM tst;"); + test_index_replay("vacuum $i"); + my ($start, $end) = (100001 + ($i - 1) * 10000, 100000 + $i * 10000); + $node_primary->safe_psql("postgres", + "INSERT INTO tst SELECT i % 10, ARRAY[$array_sql] FROM generate_series($start, $end) i;" + ); + test_index_replay("insert $i"); +} + +done_testing(); diff --git a/external/pgvector/test/t/002_ivfflat_vacuum.pl b/external/pgvector/test/t/002_ivfflat_vacuum.pl new file mode 100644 index 00000000000..d930444a671 --- /dev/null +++ b/external/pgvector/test/t/002_ivfflat_vacuum.pl @@ -0,0 +1,44 @@ +use strict; +use warnings; +use PostgresNode; +use TestLib; +use Test::More; + +my $dim = 3; + +my @r = (); +for (1 .. $dim) +{ + my $v = int(rand(1000)) + 1; + push(@r, "i % $v"); +} +my $array_sql = join(", ", @r); + +# Initialize node +my $node = get_new_node('node'); +$node->init; +$node->start; + +# Create table and index +$node->safe_psql("postgres", "CREATE EXTENSION vector;"); +$node->safe_psql("postgres", "CREATE TABLE tst (i int4, v vector($dim));"); +$node->safe_psql("postgres", + "INSERT INTO tst SELECT i % 10, ARRAY[$array_sql] FROM generate_series(1, 100000) i;" +); +$node->safe_psql("postgres", "CREATE INDEX ON tst USING ivfflat (v vector_l2_ops);"); + +# Get size +my $size = $node->safe_psql("postgres", "SELECT pg_total_relation_size('tst_v_idx');"); + +# Delete all, vacuum, and insert same data +$node->safe_psql("postgres", "DELETE FROM tst;"); +$node->safe_psql("postgres", "VACUUM tst;"); +$node->safe_psql("postgres", + "INSERT INTO tst SELECT i % 10, ARRAY[$array_sql] FROM generate_series(1, 100000) i;" +); + +# Check size +my $new_size = $node->safe_psql("postgres", "SELECT pg_total_relation_size('tst_v_idx');"); +is($size, $new_size, "size does not change"); + +done_testing(); diff --git a/external/pgvector/test/t/003_ivfflat_build_recall.pl b/external/pgvector/test/t/003_ivfflat_build_recall.pl new file mode 100644 index 00000000000..de9609381e4 --- /dev/null +++ b/external/pgvector/test/t/003_ivfflat_build_recall.pl @@ -0,0 +1,128 @@ +use strict; +use warnings; +use PostgresNode; +use TestLib; +use Test::More; + +my $node; +my @queries = (); +my @expected; +my $limit = 20; + +sub test_recall +{ + my ($probes, $min, $operator) = @_; + my $correct = 0; + my $total = 0; + + my $explain = $node->safe_psql("postgres", qq( + SET enable_seqscan = off; + SET ivfflat.probes = $probes; + EXPLAIN ANALYZE SELECT i FROM tst ORDER BY v $operator '$queries[0]' LIMIT $limit; + )); + like($explain, qr/Index Scan using idx on tst/); + + for my $i (0 .. $#queries) + { + my $actual = $node->safe_psql("postgres", qq( + SET enable_seqscan = off; + SET ivfflat.probes = $probes; + SELECT i FROM tst ORDER BY v $operator '$queries[$i]' LIMIT $limit; + )); + my @actual_ids = split("\n", $actual); + my %actual_set = map { $_ => 1 } @actual_ids; + + my @expected_ids = split("\n", $expected[$i]); + + foreach (@expected_ids) + { + if (exists($actual_set{$_})) + { + $correct++; + } + $total++; + } + } + + cmp_ok($correct / $total, ">=", $min, $operator); +} + +# Initialize node +$node = get_new_node('node'); +$node->init; +$node->start; + +# Create table +$node->safe_psql("postgres", "CREATE EXTENSION vector;"); +$node->safe_psql("postgres", "CREATE TABLE tst (i int4, v vector(3));"); +$node->safe_psql("postgres", + "INSERT INTO tst SELECT i, ARRAY[random(), random(), random()] FROM generate_series(1, 100000) i;" +); + +# Generate queries +for (1 .. 20) +{ + my $r1 = rand(); + my $r2 = rand(); + my $r3 = rand(); + push(@queries, "[$r1,$r2,$r3]"); +} + +# Check each index type +my @operators = ("<->", "<#>", "<=>"); +my @opclasses = ("vector_l2_ops", "vector_ip_ops", "vector_cosine_ops"); + +for my $i (0 .. $#operators) +{ + my $operator = $operators[$i]; + my $opclass = $opclasses[$i]; + + # Get exact results + @expected = (); + foreach (@queries) + { + my $res = $node->safe_psql("postgres", "SELECT i FROM tst ORDER BY v $operator '$_' LIMIT $limit;"); + push(@expected, $res); + } + + # Build index serially + $node->safe_psql("postgres", qq( + SET max_parallel_maintenance_workers = 0; + CREATE INDEX idx ON tst USING ivfflat (v $opclass); + )); + + # Test approximate results + if ($operator ne "<#>") + { + # TODO Fix test (uniform random vectors all have similar inner product) + test_recall(1, 0.71, $operator); + test_recall(10, 0.95, $operator); + } + # Account for equal distances + test_recall(100, 0.9925, $operator); + + $node->safe_psql("postgres", "DROP INDEX idx;"); + + # Build index in parallel + my ($ret, $stdout, $stderr) = $node->psql("postgres", qq( + SET client_min_messages = DEBUG; + SET min_parallel_table_scan_size = 1; + CREATE INDEX idx ON tst USING ivfflat (v $opclass); + )); + is($ret, 0, $stderr); + like($stderr, qr/using \d+ parallel workers/); + + # Test approximate results + if ($operator ne "<#>") + { + # TODO Fix test (uniform random vectors all have similar inner product) + test_recall(1, 0.71, $operator); + test_recall(10, 0.95, $operator); + } + # Account for equal distances + test_recall(100, 0.9925, $operator); + + $node->safe_psql("postgres", "DROP INDEX idx;"); +} + +done_testing(); diff --git a/external/pgvector/test/t/004_ivfflat_centers.pl b/external/pgvector/test/t/004_ivfflat_centers.pl new file mode 100644 index 00000000000..4c125dd73f9 --- /dev/null +++ b/external/pgvector/test/t/004_ivfflat_centers.pl @@ -0,0 +1,38 @@ +use strict; +use warnings; +use PostgresNode; +use TestLib; +use Test::More; + +# Initialize node +my $node = get_new_node('node'); +$node->init; +$node->start; + +# Create table +$node->safe_psql("postgres", "CREATE EXTENSION vector;"); +$node->safe_psql("postgres", "CREATE TABLE tst (i int4, v vector(3));"); +$node->safe_psql("postgres", + "INSERT INTO tst SELECT i, '[1,2,3]' FROM generate_series(1, 10) i;" +); + +sub test_centers +{ + my ($lists, $min) = @_; + + my ($ret, $stdout, $stderr) = $node->psql("postgres", "CREATE INDEX ON tst USING ivfflat (v vector_l2_ops) WITH (lists = $lists);"); + is($ret, 0, $stderr); +} + +# Test no error for duplicate centers +test_centers(5); +test_centers(10); + +$node->safe_psql("postgres", + "INSERT INTO tst SELECT i, '[4,5,6]' FROM generate_series(1, 10) i;" +); + +# Test no error for duplicate centers +test_centers(10); + +done_testing(); diff --git a/external/pgvector/test/t/005_ivfflat_query_recall.pl b/external/pgvector/test/t/005_ivfflat_query_recall.pl new file mode 100644 index 00000000000..1edebb3b64a --- /dev/null +++ b/external/pgvector/test/t/005_ivfflat_query_recall.pl @@ -0,0 +1,44 @@ +use strict; +use warnings; +use PostgresNode; +use TestLib; +use Test::More; + +# Initialize node +my $node = get_new_node('node'); +$node->init; +$node->start; + +# Create table +$node->safe_psql("postgres", "CREATE EXTENSION vector;"); +$node->safe_psql("postgres", "CREATE TABLE tst (i int4 primary key, v vector(3));"); +$node->safe_psql("postgres", + "INSERT INTO tst SELECT i, ARRAY[random(), random(), random()] FROM generate_series(1, 100000) i;" +); + +# Check each index type +my @operators = ("<->", "<#>", "<=>"); +my @opclasses = ("vector_l2_ops", "vector_ip_ops", "vector_cosine_ops"); + +for my $i (0 .. $#operators) +{ + my $operator = $operators[$i]; + my $opclass = $opclasses[$i]; + + # Add index + $node->safe_psql("postgres", "CREATE INDEX ON tst USING ivfflat (v $opclass);"); + + # Test 100% recall + for (1 .. 20) + { + my $id = int(rand() * 100000); + my $query = $node->safe_psql("postgres", "SELECT v FROM tst WHERE i = $id;"); + my $res = $node->safe_psql("postgres", qq( + SET enable_seqscan = off; + SELECT v FROM tst ORDER BY v <-> '$query' LIMIT 1; + )); + is($res, $query); + } +} + +done_testing(); diff --git a/external/pgvector/test/t/006_ivfflat_lists.pl b/external/pgvector/test/t/006_ivfflat_lists.pl new file mode 100644 index 00000000000..9812f507d88 --- /dev/null +++ b/external/pgvector/test/t/006_ivfflat_lists.pl @@ -0,0 +1,33 @@ +use strict; +use warnings; +use PostgresNode; +use TestLib; +use Test::More; + +# Initialize node +my $node = get_new_node('node'); +$node->init; +$node->start; + +# Create table +$node->safe_psql("postgres", "CREATE EXTENSION vector;"); +$node->safe_psql("postgres", "CREATE TABLE tst (v vector(3));"); +$node->safe_psql("postgres", + "INSERT INTO tst SELECT ARRAY[random(), random(), random()] FROM generate_series(1, 100000) i;" +); + +$node->safe_psql("postgres", "CREATE INDEX lists50 ON tst USING ivfflat (v vector_l2_ops) WITH (lists = 50);"); +$node->safe_psql("postgres", "CREATE INDEX lists100 ON tst USING ivfflat (v vector_l2_ops) WITH (lists = 100);"); + +# Test prefers more lists +my $res = $node->safe_psql("postgres", "EXPLAIN SELECT v FROM tst ORDER BY v <-> '[0.5,0.5,0.5]' LIMIT 10;"); +like($res, qr/lists100/); +unlike($res, qr/lists50/); + +# Test errors with too much memory +my ($ret, $stdout, $stderr) = $node->psql("postgres", + "CREATE INDEX lists10000 ON tst USING ivfflat (v vector_l2_ops) WITH (lists = 10000);" +); +like($stderr, qr/memory required is/); + +done_testing(); diff --git a/external/pgvector/test/t/007_ivfflat_inserts.pl b/external/pgvector/test/t/007_ivfflat_inserts.pl new file mode 100644 index 00000000000..dd7a95d7c84 --- /dev/null +++ b/external/pgvector/test/t/007_ivfflat_inserts.pl @@ -0,0 +1,57 @@ +use strict; +use warnings; +use PostgresNode; +use TestLib; +use Test::More; + +my $dim = 768; + +my $array_sql = join(",", ('random()') x $dim); + +# Initialize node +my $node = get_new_node('node'); +$node->init; +$node->start; + +# Create table and index +$node->safe_psql("postgres", "CREATE EXTENSION vector;"); +$node->safe_psql("postgres", "CREATE TABLE tst (v vector($dim));"); +$node->safe_psql("postgres", + "INSERT INTO tst SELECT ARRAY[$array_sql] FROM generate_series(1, 10000) i;" +); +$node->safe_psql("postgres", "CREATE INDEX ON tst USING ivfflat (v vector_l2_ops);"); + +$node->pgbench( + "--no-vacuum --client=5 --transactions=100", + 0, + [qr{actually processed}], + [qr{^$}], + "concurrent INSERTs", + { + "007_ivfflat_inserts" => "INSERT INTO tst SELECT ARRAY[$array_sql] FROM generate_series(1, 10) i;" + } +); + +sub idx_scan +{ + # Stats do not update instantaneously + # https://www.postgresql.org/docs/current/monitoring-stats.html#MONITORING-STATS-VIEWS + sleep(1); + $node->safe_psql("postgres", "SELECT idx_scan FROM pg_stat_user_indexes WHERE indexrelid = 'tst_v_idx'::regclass;"); +} + +my $expected = 10000 + 5 * 100 * 10; + +my $count = $node->safe_psql("postgres", "SELECT COUNT(*) FROM tst;"); +is($count, $expected); +is(idx_scan(), 0); + +$count = $node->safe_psql("postgres", qq( + SET enable_seqscan = off; + SET ivfflat.probes = 100; + SELECT COUNT(*) FROM (SELECT v FROM tst ORDER BY v <-> (SELECT v FROM tst LIMIT 1)) t; +)); +is($count, $expected); +is(idx_scan(), 1); + +done_testing(); diff --git a/external/pgvector/test/t/008_aggregates.pl b/external/pgvector/test/t/008_aggregates.pl new file mode 100644 index 00000000000..04658900916 --- /dev/null +++ b/external/pgvector/test/t/008_aggregates.pl @@ -0,0 +1,49 @@ +use strict; +use warnings; +use PostgresNode; +use TestLib; +use Test::More; + +# Initialize node +my $node = get_new_node('node'); +$node->init; +$node->start; + +# Create table +$node->safe_psql("postgres", "CREATE EXTENSION vector;"); +$node->safe_psql("postgres", "CREATE TABLE tst (r1 real, r2 real, r3 real, v vector(3));"); +$node->safe_psql("postgres", qq( + INSERT INTO tst SELECT r1, r2, r3, ARRAY[r1, r2, r3] FROM ( + SELECT random() + 1.01 AS r1, random() + 2.01 AS r2, random() + 3.01 AS r3 FROM generate_series(1, 1000000) t + ) i; +)); + +sub test_aggregate +{ + my ($agg) = @_; + + # Test value + my $res = $node->safe_psql("postgres", "SELECT $agg(v) FROM tst;"); + like($res, qr/\[1\.5/); + like($res, qr/,2\.5/); + like($res, qr/,3\.5/); + + # Test matches real for avg + # Cannot test sum since sum(real) varies between calls + if ($agg eq 'avg') + { + my $r1 = $node->safe_psql("postgres", "SELECT $agg(r1)::float4 FROM tst;"); + my $r2 = $node->safe_psql("postgres", "SELECT $agg(r2)::float4 FROM tst;"); + my $r3 = $node->safe_psql("postgres", "SELECT $agg(r3)::float4 FROM tst;"); + is($res, "[$r1,$r2,$r3]"); + } + + # Test explain + my $explain = $node->safe_psql("postgres", "EXPLAIN SELECT $agg(v) FROM tst;"); + like($explain, qr/Partial Aggregate/); +} + +test_aggregate('avg'); +test_aggregate('sum'); + +done_testing(); diff --git a/external/pgvector/test/t/009_storage.pl b/external/pgvector/test/t/009_storage.pl new file mode 100644 index 00000000000..b5aa649a79b --- /dev/null +++ b/external/pgvector/test/t/009_storage.pl @@ -0,0 +1,34 @@ +use strict; +use warnings; +use PostgresNode; +use TestLib; +use Test::More; + +my $dim = 1024; + +# Initialize node +my $node = get_new_node('node'); +$node->init; +$node->start; + +# Create table +$node->safe_psql("postgres", "CREATE EXTENSION vector;"); +$node->safe_psql("postgres", "CREATE TABLE tst (v1 vector(1024), v2 vector(1024), v3 vector(1024));"); + +# Test insert succeeds +$node->safe_psql("postgres", + "INSERT INTO tst SELECT array_agg(n), array_agg(n), array_agg(n) FROM generate_series(1, $dim) n" +); + +# Change storage to PLAIN +$node->safe_psql("postgres", "ALTER TABLE tst ALTER COLUMN v1 SET STORAGE PLAIN"); +$node->safe_psql("postgres", "ALTER TABLE tst ALTER COLUMN v2 SET STORAGE PLAIN"); +$node->safe_psql("postgres", "ALTER TABLE tst ALTER COLUMN v3 SET STORAGE PLAIN"); + +# Test insert fails +my ($ret, $stdout, $stderr) = $node->psql("postgres", + "INSERT INTO tst SELECT array_agg(n), array_agg(n), array_agg(n) FROM generate_series(1, $dim) n" +); +like($stderr, qr/row is too big/); + +done_testing(); diff --git a/external/pgvector/test/t/010_hnsw_wal.pl b/external/pgvector/test/t/010_hnsw_wal.pl new file mode 100644 index 00000000000..36c0dc58c78 --- /dev/null +++ b/external/pgvector/test/t/010_hnsw_wal.pl @@ -0,0 +1,99 @@ +# Based on postgres/contrib/bloom/t/001_wal.pl + +# Test generic xlog record work for hnsw index replication. +use strict; +use warnings; +use PostgresNode; +use TestLib; +use Test::More; + +my $dim = 32; + +my $node_primary; +my $node_replica; + +# Run few queries on both primary and replica and check their results match. +sub test_index_replay +{ + my ($test_name) = @_; + + # Wait for replica to catch up + my $applname = $node_replica->name; + my $caughtup_query = "SELECT pg_current_wal_lsn() <= replay_lsn FROM pg_stat_replication WHERE application_name = '$applname';"; + $node_primary->poll_query_until('postgres', $caughtup_query) + or die "Timed out while waiting for replica 1 to catch up"; + + my @r = (); + for (1 .. $dim) + { + push(@r, rand()); + } + my $sql = join(",", @r); + + my $queries = qq( + SET enable_seqscan = off; + SELECT * FROM tst ORDER BY v <-> '[$sql]' LIMIT 10; + ); + + # Run test queries and compare their result + my $primary_result = $node_primary->safe_psql("postgres", $queries); + my $replica_result = $node_replica->safe_psql("postgres", $queries); + + is($primary_result, $replica_result, "$test_name: query result matches"); + return; +} + +# Use ARRAY[random(), random(), random(), ...] over +# SELECT array_agg(random()) FROM generate_series(1, $dim) +# to generate different values for each row +my $array_sql = join(",", ('random()') x $dim); + +# Initialize primary node +$node_primary = get_new_node('primary'); +$node_primary->init(allows_streaming => 1); +if ($dim > 32) +{ + # TODO use wal_keep_segments for Postgres < 13 + $node_primary->append_conf('postgresql.conf', qq(wal_keep_size = 1GB)); +} +if ($dim > 1500) +{ + $node_primary->append_conf('postgresql.conf', qq(maintenance_work_mem = 128MB)); +} +$node_primary->start; +my $backup_name = 'my_backup'; + +# Take backup +$node_primary->backup($backup_name); + +# Create streaming replica linking to primary +$node_replica = get_new_node('replica'); +$node_replica->init_from_backup($node_primary, $backup_name, has_streaming => 1); +$node_replica->start; + +# Create hnsw index on primary +$node_primary->safe_psql("postgres", "CREATE EXTENSION vector;"); +$node_primary->safe_psql("postgres", "CREATE TABLE tst (i int4, v vector($dim));"); +$node_primary->safe_psql("postgres", + "INSERT INTO tst SELECT i % 10, ARRAY[$array_sql] FROM generate_series(1, 1000) i;" +); +$node_primary->safe_psql("postgres", "CREATE INDEX ON tst USING hnsw (v vector_l2_ops);"); + +# Test that queries give same result +test_index_replay('initial'); + +# Run 10 cycles of table modification. Run test queries after each modification. +for my $i (1 .. 10) +{ + $node_primary->safe_psql("postgres", "DELETE FROM tst WHERE i = $i;"); + test_index_replay("delete $i"); + $node_primary->safe_psql("postgres", "VACUUM tst;"); + test_index_replay("vacuum $i"); + my ($start, $end) = (1001 + ($i - 1) * 100, 1000 + $i * 100); + $node_primary->safe_psql("postgres", + "INSERT INTO tst SELECT i % 10, ARRAY[$array_sql] FROM generate_series($start, $end) i;" + ); + test_index_replay("insert $i"); +} + +done_testing(); diff --git a/external/pgvector/test/t/011_hnsw_vacuum.pl b/external/pgvector/test/t/011_hnsw_vacuum.pl new file mode 100644 index 00000000000..10c301f67b4 --- /dev/null +++ b/external/pgvector/test/t/011_hnsw_vacuum.pl @@ -0,0 +1,54 @@ +use strict; +use warnings; +use PostgresNode; +use TestLib; +use Test::More; + +my $dim = 3; + +my @r = (); +for (1 .. $dim) +{ + my $v = int(rand(1000)) + 1; + push(@r, "i % $v"); +} +my $array_sql = join(", ", @r); + +# Initialize node +my $node = get_new_node('node'); +$node->init; +$node->start; + +# Create table and index +$node->safe_psql("postgres", "CREATE EXTENSION vector;"); +$node->safe_psql("postgres", "CREATE TABLE tst (i int4, v vector($dim));"); +$node->safe_psql("postgres", + "INSERT INTO tst SELECT i, ARRAY[$array_sql] FROM generate_series(1, 10000) i;" +); +$node->safe_psql("postgres", "CREATE INDEX ON tst USING hnsw (v vector_l2_ops);"); + +# Get size +my $size = $node->safe_psql("postgres", "SELECT pg_total_relation_size('tst_v_idx');"); + +# Delete all, vacuum, and insert same data +$node->safe_psql("postgres", "DELETE FROM tst;"); +$node->safe_psql("postgres", "VACUUM tst;"); +$node->safe_psql("postgres", + "INSERT INTO tst SELECT i, ARRAY[$array_sql] FROM generate_series(1, 10000) i;" +); + +# Check size +# May increase some due to different levels +my $new_size = $node->safe_psql("postgres", "SELECT pg_total_relation_size('tst_v_idx');"); +cmp_ok($new_size, "<=", $size * 1.02, "size does not increase too much"); + +# Delete all but one +$node->safe_psql("postgres", "DELETE FROM tst WHERE i != 123;"); +$node->safe_psql("postgres", "VACUUM tst;"); +my $res = $node->safe_psql("postgres", qq( + SET enable_seqscan = off; + SELECT i FROM tst ORDER BY v <-> '[0,0,0]' LIMIT 10; +)); +is($res, 123); + +done_testing(); diff --git a/external/pgvector/test/t/012_hnsw_build_recall.pl b/external/pgvector/test/t/012_hnsw_build_recall.pl new file mode 100644 index 00000000000..e9074c6bda6 --- /dev/null +++ b/external/pgvector/test/t/012_hnsw_build_recall.pl @@ -0,0 +1,93 @@ +use strict; +use warnings; +use PostgresNode; +use TestLib; +use Test::More; + +my $node; +my @queries = (); +my @expected; +my $limit = 20; + +sub test_recall +{ + my ($min, $operator) = @_; + my $correct = 0; + my $total = 0; + + my $explain = $node->safe_psql("postgres", qq( + SET enable_seqscan = off; + EXPLAIN ANALYZE SELECT i FROM tst ORDER BY v $operator '$queries[0]' LIMIT $limit; + )); + like($explain, qr/Index Scan/); + + for my $i (0 .. $#queries) + { + my $actual = $node->safe_psql("postgres", qq( + SET enable_seqscan = off; + SELECT i FROM tst ORDER BY v $operator '$queries[$i]' LIMIT $limit; + )); + my @actual_ids = split("\n", $actual); + my %actual_set = map { $_ => 1 } @actual_ids; + + my @expected_ids = split("\n", $expected[$i]); + + foreach (@expected_ids) + { + if (exists($actual_set{$_})) + { + $correct++; + } + $total++; + } + } + + cmp_ok($correct / $total, ">=", $min, $operator); +} + +# Initialize node +$node = get_new_node('node'); +$node->init; +$node->start; + +# Create table +$node->safe_psql("postgres", "CREATE EXTENSION vector;"); +$node->safe_psql("postgres", "CREATE TABLE tst (i int4, v vector(3));"); +$node->safe_psql("postgres", + "INSERT INTO tst SELECT i, ARRAY[random(), random(), random()] FROM generate_series(1, 10000) i;" +); + +# Generate queries +for (1 .. 20) +{ + my $r1 = rand(); + my $r2 = rand(); + my $r3 = rand(); + push(@queries, "[$r1,$r2,$r3]"); +} + +# Check each index type +my @operators = ("<->", "<#>", "<=>"); +my @opclasses = ("vector_l2_ops", "vector_ip_ops", "vector_cosine_ops"); + +for my $i (0 .. $#operators) +{ + my $operator = $operators[$i]; + my $opclass = $opclasses[$i]; + + # Get exact results + @expected = (); + foreach (@queries) + { + my $res = $node->safe_psql("postgres", "SELECT i FROM tst ORDER BY v $operator '$_' LIMIT $limit;"); + push(@expected, $res); + } + + # Add index + $node->safe_psql("postgres", "CREATE INDEX ON tst USING hnsw (v $opclass);"); + + my $min = $operator eq "<#>" ? 0.80 : 0.99; + test_recall($min, $operator); +} + +done_testing(); diff --git a/external/pgvector/test/t/013_hnsw_insert_recall.pl b/external/pgvector/test/t/013_hnsw_insert_recall.pl new file mode 100644 index 00000000000..d0c24f87c4a --- /dev/null +++ b/external/pgvector/test/t/013_hnsw_insert_recall.pl @@ -0,0 +1,108 @@ +use strict; +use warnings; +use PostgresNode; +use TestLib; +use Test::More; + +my $node; +my @queries = (); +my @expected; +my $limit = 20; + +sub test_recall +{ + my ($min, $operator) = @_; + my $correct = 0; + my $total = 0; + + my $explain = $node->safe_psql("postgres", qq( + SET enable_seqscan = off; + EXPLAIN ANALYZE SELECT i FROM tst ORDER BY v $operator '$queries[0]' LIMIT $limit; + )); + like($explain, qr/Index Scan/); + + for my $i (0 .. $#queries) + { + my $actual = $node->safe_psql("postgres", qq( + SET enable_seqscan = off; + SELECT i FROM tst ORDER BY v $operator '$queries[$i]' LIMIT $limit; + )); + my @actual_ids = split("\n", $actual); + my %actual_set = map { $_ => 1 } @actual_ids; + + my @expected_ids = split("\n", $expected[$i]); + + foreach (@expected_ids) + { + if (exists($actual_set{$_})) + { + $correct++; + } + $total++; + } + } + + cmp_ok($correct / $total, ">=", $min, $operator); +} + +# Initialize node +$node = get_new_node('node'); +$node->init; +$node->start; + +# Create table +$node->safe_psql("postgres", "CREATE EXTENSION vector;"); +$node->safe_psql("postgres", "CREATE TABLE tst (i serial, v vector(3));"); + +# Generate queries +for (1 .. 20) +{ + my $r1 = rand(); + my $r2 = rand(); + my $r3 = rand(); + push(@queries, "[$r1,$r2,$r3]"); +} + +# Check each index type +my @operators = ("<->", "<#>", "<=>"); +my @opclasses = ("vector_l2_ops", "vector_ip_ops", "vector_cosine_ops"); + +for my $i (0 .. $#operators) +{ + my $operator = $operators[$i]; + my $opclass = $opclasses[$i]; + + # Add index + $node->safe_psql("postgres", "CREATE INDEX idx ON tst USING hnsw (v $opclass);"); + + # Use concurrent inserts + $node->pgbench( + "--no-vacuum --client=10 --transactions=1000", + 0, + [qr{actually processed}], + [qr{^$}], + "concurrent INSERTs", + { + "013_hnsw_insert_recall_$opclass" => "INSERT INTO tst (v) VALUES (ARRAY[random(), random(), random()]);" + } + ); + + # Get exact results + @expected = (); + foreach (@queries) + { + my $res = $node->safe_psql("postgres", qq( + SET enable_indexscan = off; + SELECT i FROM tst ORDER BY v $operator '$_' LIMIT $limit; + )); + push(@expected, $res); + } + + my $min = $operator eq "<#>" ? 0.80 : 0.99; + test_recall($min, $operator); + + $node->safe_psql("postgres", "DROP INDEX idx;"); + $node->safe_psql("postgres", "TRUNCATE tst;"); +} + +done_testing(); diff --git a/external/pgvector/test/t/014_hnsw_inserts.pl b/external/pgvector/test/t/014_hnsw_inserts.pl new file mode 100644 index 00000000000..f69bcd624f9 --- /dev/null +++ b/external/pgvector/test/t/014_hnsw_inserts.pl @@ -0,0 +1,74 @@ +use strict; +use warnings; +use PostgresNode; +use TestLib; +use Test::More; + +# Ensures elements and neighbors on both same and different pages +my $dim = 1900; + +my $array_sql = join(",", ('random()') x $dim); + +# Initialize node +my $node = get_new_node('node'); +$node->init; +$node->start; + +# Create table and index +$node->safe_psql("postgres", "CREATE EXTENSION vector;"); +$node->safe_psql("postgres", "CREATE TABLE tst (v vector($dim));"); +$node->safe_psql("postgres", "CREATE INDEX ON tst USING hnsw (v vector_l2_ops);"); + +sub idx_scan +{ + # Stats do not update instantaneously + # https://www.postgresql.org/docs/current/monitoring-stats.html#MONITORING-STATS-VIEWS + sleep(1); + $node->safe_psql("postgres", "SELECT idx_scan FROM pg_stat_user_indexes WHERE indexrelid = 'tst_v_idx'::regclass;"); +} + +for my $i (1 .. 20) +{ + $node->pgbench( + "--no-vacuum --client=10 --transactions=1", + 0, + [qr{actually processed}], + [qr{^$}], + "concurrent INSERTs", + { + "014_hnsw_inserts_$i" => "INSERT INTO tst VALUES (ARRAY[$array_sql]);" + } + ); + + my $count = $node->safe_psql("postgres", qq( + SET enable_seqscan = off; + SELECT COUNT(*) FROM (SELECT v FROM tst ORDER BY v <-> (SELECT v FROM tst LIMIT 1)) t; + )); + is($count, 10); + + $node->safe_psql("postgres", "TRUNCATE tst;"); +} + +$node->pgbench( + "--no-vacuum --client=20 --transactions=5", + 0, + [qr{actually processed}], + [qr{^$}], + "concurrent INSERTs", + { + "014_hnsw_inserts" => "INSERT INTO tst SELECT ARRAY[$array_sql] FROM generate_series(1, 10) i;" + } +); + +my $count = $node->safe_psql("postgres", qq( + SET enable_seqscan = off; + SET hnsw.ef_search = 1000; + SELECT COUNT(*) FROM (SELECT v FROM tst ORDER BY v <-> (SELECT v FROM tst LIMIT 1)) t; +)); +# Elements may lose all incoming connections with the HNSW algorithm +# Vacuuming can fix this if one of the elements neighbors is deleted +cmp_ok($count, ">=", 997); + +is(idx_scan(), 21); + +done_testing(); diff --git a/external/pgvector/test/t/015_hnsw_duplicates.pl b/external/pgvector/test/t/015_hnsw_duplicates.pl new file mode 100644 index 00000000000..7e11deed37f --- /dev/null +++ b/external/pgvector/test/t/015_hnsw_duplicates.pl @@ -0,0 +1,58 @@ +use strict; +use warnings; +use PostgresNode; +use TestLib; +use Test::More; + +# Initialize node +my $node = get_new_node('node'); +$node->init; +$node->start; + +# Create table +$node->safe_psql("postgres", "CREATE EXTENSION vector;"); +$node->safe_psql("postgres", "CREATE TABLE tst (v vector(3));"); + +sub insert_vectors +{ + for my $i (1 .. 20) + { + $node->safe_psql("postgres", "INSERT INTO tst VALUES ('[1,1,1]');"); + } +} + +sub test_duplicates +{ + my $res = $node->safe_psql("postgres", qq( + SET enable_seqscan = off; + SET hnsw.ef_search = 1; + SELECT COUNT(*) FROM (SELECT * FROM tst ORDER BY v <-> '[1,1,1]') t; + )); + is($res, 10); +} + +# Test duplicates with build +insert_vectors(); +$node->safe_psql("postgres", "CREATE INDEX idx ON tst USING hnsw (v vector_l2_ops);"); +test_duplicates(); + +# Reset +$node->safe_psql("postgres", "TRUNCATE tst;"); + +# Test duplicates with inserts +insert_vectors(); +test_duplicates(); + +# Test fallback path for inserts +$node->pgbench( + "--no-vacuum --client=5 --transactions=100", + 0, + [qr{actually processed}], + [qr{^$}], + "concurrent INSERTs", + { + "015_hnsw_duplicates" => "INSERT INTO tst VALUES ('[1,1,1]');" + } +); + +done_testing(); diff --git a/external/pgvector/test/t/016_hnsw_vacuum_recall.pl b/external/pgvector/test/t/016_hnsw_vacuum_recall.pl new file mode 100644 index 00000000000..1cc267d9fcf --- /dev/null +++ b/external/pgvector/test/t/016_hnsw_vacuum_recall.pl @@ -0,0 +1,97 @@ +use strict; +use warnings; +use PostgresNode; +use TestLib; +use Test::More; + +my $node; +my @queries = (); +my @expected; +my $limit = 20; + +sub test_recall +{ + my ($min, $ef_search, $test_name) = @_; + my $correct = 0; + my $total = 0; + + my $explain = $node->safe_psql("postgres", qq( + SET enable_seqscan = off; + SET hnsw.ef_search = $ef_search; + EXPLAIN ANALYZE SELECT i FROM tst ORDER BY v <-> '$queries[0]' LIMIT $limit; + )); + like($explain, qr/Index Scan/); + + for my $i (0 .. $#queries) + { + my $actual = $node->safe_psql("postgres", qq( + SET enable_seqscan = off; + SET hnsw.ef_search = $ef_search; + SELECT i FROM tst ORDER BY v <-> '$queries[$i]' LIMIT $limit; + )); + my @actual_ids = split("\n", $actual); + my %actual_set = map { $_ => 1 } @actual_ids; + + my @expected_ids = split("\n", $expected[$i]); + + foreach (@expected_ids) + { + if (exists($actual_set{$_})) + { + $correct++; + } + $total++; + } + } + + cmp_ok($correct / $total, ">=", $min, $test_name); +} + +# Initialize node +$node = get_new_node('node'); +$node->init; +$node->start; + +# Create table +$node->safe_psql("postgres", "CREATE EXTENSION vector;"); +$node->safe_psql("postgres", "CREATE TABLE tst (i int4, v vector(3));"); +$node->safe_psql("postgres", "ALTER TABLE tst SET (autovacuum_enabled = false);"); +$node->safe_psql("postgres", + "INSERT INTO tst SELECT i, ARRAY[random(), random(), random()] FROM generate_series(1, 10000) i;" +); + +# Add index +$node->safe_psql("postgres", "CREATE INDEX ON tst USING hnsw (v vector_l2_ops) WITH (m = 4, ef_construction = 8);"); + +# Delete data +$node->safe_psql("postgres", "DELETE FROM tst WHERE i > 2500;"); + +# Generate queries +for (1 .. 20) +{ + my $r1 = rand(); + my $r2 = rand(); + my $r3 = rand(); + push(@queries, "[$r1,$r2,$r3]"); +} + +# Get exact results +@expected = (); +foreach (@queries) +{ + my $res = $node->safe_psql("postgres", qq( + SET enable_indexscan = off; + SELECT i FROM tst ORDER BY v <-> '$_' LIMIT $limit; + )); + push(@expected, $res); +} + +test_recall(0.20, $limit, "before vacuum"); +test_recall(0.95, 100, "before vacuum"); + +# TODO Test concurrent inserts with vacuum +$node->safe_psql("postgres", "VACUUM tst;"); + +test_recall(0.95, $limit, "after vacuum"); + +done_testing(); diff --git a/external/pgvector/test/t/017_ivfflat_insert_recall.pl b/external/pgvector/test/t/017_ivfflat_insert_recall.pl new file mode 100644 index 00000000000..c2e320cc15a --- /dev/null +++ b/external/pgvector/test/t/017_ivfflat_insert_recall.pl @@ -0,0 +1,117 @@ +use strict; +use warnings; +use PostgresNode; +use TestLib; +use Test::More; + +my $node; +my @queries = (); +my @expected; +my $limit = 20; + +sub test_recall +{ + my ($probes, $min, $operator) = @_; + my $correct = 0; + my $total = 0; + + my $explain = $node->safe_psql("postgres", qq( + SET enable_seqscan = off; + SET ivfflat.probes = $probes; + EXPLAIN ANALYZE SELECT i FROM tst ORDER BY v $operator '$queries[0]' LIMIT $limit; + )); + like($explain, qr/Index Scan using idx on tst/); + + for my $i (0 .. $#queries) + { + my $actual = $node->safe_psql("postgres", qq( + SET enable_seqscan = off; + SET ivfflat.probes = $probes; + SELECT i FROM tst ORDER BY v $operator '$queries[$i]' LIMIT $limit; + )); + my @actual_ids = split("\n", $actual); + my %actual_set = map { $_ => 1 } @actual_ids; + + my @expected_ids = split("\n", $expected[$i]); + + foreach (@expected_ids) + { + if (exists($actual_set{$_})) + { + $correct++; + } + $total++; + } + } + + cmp_ok($correct / $total, ">=", $min, $operator); +} + +# Initialize node +$node = get_new_node('node'); +$node->init; +$node->start; + +# Create table +$node->safe_psql("postgres", "CREATE EXTENSION vector;"); +$node->safe_psql("postgres", "CREATE TABLE tst (i serial, v vector(3));"); + +# Generate queries +for (1 .. 20) +{ + my $r1 = rand(); + my $r2 = rand(); + my $r3 = rand(); + push(@queries, "[$r1,$r2,$r3]"); +} + +# Check each index type +my @operators = ("<->", "<#>", "<=>"); +my @opclasses = ("vector_l2_ops", "vector_ip_ops", "vector_cosine_ops"); + +for my $i (0 .. $#operators) +{ + my $operator = $operators[$i]; + my $opclass = $opclasses[$i]; + + # Add index + $node->safe_psql("postgres", "CREATE INDEX idx ON tst USING ivfflat (v $opclass);"); + + # Use concurrent inserts + $node->pgbench( + "--no-vacuum --client=10 --transactions=1000", + 0, + [qr{actually processed}], + [qr{^$}], + "concurrent INSERTs", + { + "017_ivfflat_insert_recall_$opclass" => "INSERT INTO tst (v) SELECT ARRAY[random(), random(), random()] FROM generate_series(1, 10) i;" + } + ); + + # Get exact results + @expected = (); + foreach (@queries) + { + my $res = $node->safe_psql("postgres", qq( + SET enable_indexscan = off; + SELECT i FROM tst ORDER BY v $operator '$_' LIMIT $limit; + )); + push(@expected, $res); + } + + # Test approximate results + if ($operator ne "<#>") + { + # TODO Fix test (uniform random vectors all have similar inner product) + test_recall(1, 0.71, $operator); + test_recall(10, 0.95, $operator); + } + # Account for equal distances + test_recall(100, 0.9925, $operator); + + $node->safe_psql("postgres", "DROP INDEX idx;"); + $node->safe_psql("postgres", "TRUNCATE tst;"); +} + +done_testing(); diff --git a/external/pgvector/test/t/018_ivfflat_deletes.pl b/external/pgvector/test/t/018_ivfflat_deletes.pl new file mode 100644 index 00000000000..a0ea0e63391 --- /dev/null +++ b/external/pgvector/test/t/018_ivfflat_deletes.pl @@ -0,0 +1,43 @@ +use strict; +use warnings; +use PostgresNode; +use TestLib; +use Test::More; + +my $dim = 3; + +my $array_sql = join(",", ('random()') x $dim); + +# Initialize node +my $node = get_new_node('node'); +$node->init; +$node->start; + +# Create table and index +$node->safe_psql("postgres", "CREATE EXTENSION vector;"); +$node->safe_psql("postgres", "CREATE TABLE tst (i serial, v vector($dim));"); +$node->safe_psql("postgres", + "INSERT INTO tst (v) SELECT ARRAY[$array_sql] FROM generate_series(1, 10000) i;" +); +$node->safe_psql("postgres", "CREATE INDEX ON tst USING ivfflat (v vector_l2_ops);"); + +# Delete data +$node->safe_psql("postgres", "DELETE FROM tst WHERE i % 100 != 0;"); + +my $exp = $node->safe_psql("postgres", qq( + SET enable_indexscan = off; + SELECT i FROM tst ORDER BY v <-> '[0,0,0]'; +)); + +# Run twice to make sure correct tuples marked as dead +for (1 .. 2) +{ + my $res = $node->safe_psql("postgres", qq( + SET enable_seqscan = off; + SET ivfflat.probes = 100; + SELECT i FROM tst ORDER BY v <-> '[0,0,0]'; + )); + is($res, $exp); +} + +done_testing(); diff --git a/external/pgvector/vector.control b/external/pgvector/vector.control new file mode 100644 index 00000000000..709170387e0 --- /dev/null +++ b/external/pgvector/vector.control @@ -0,0 +1,4 @@ +comment = 'vector data type and ivfflat and hnsw access methods' +default_version = '0.5.1' +module_pathname = '$libdir/vector' +relocatable = true diff --git a/pgreplay/.gitignore b/pgreplay/.gitignore new file mode 100644 index 00000000000..0d8297059ab --- /dev/null +++ b/pgreplay/.gitignore @@ -0,0 +1,33 @@ +# Object files +*.o +*.obj + +# Libraries +*.lib +*.a + +# Shared objects (inc. Windows DLLs) +*.dll +*.so +*.so.* +*.dylib + +# build products +pgreplay +pgreplay.exe +Makefile +# config.h +config.log +config.status + +# test subdirectory +test/ + +# other versioning systems +.svn +CVS + +# M4 cache +autom4te.cache/ +.vscode +devtest \ No newline at end of file diff --git a/pgreplay/CHANGELOG b/pgreplay/CHANGELOG new file mode 100644 index 00000000000..77782d3c532 --- /dev/null +++ b/pgreplay/CHANGELOG @@ -0,0 +1,98 @@ +Version 1.4.0 + +Enhancements: +- Add a replay option "-n" that provides a "dry run" without actually + connecting to the database. + Patch by Manuel Vives. + +Bugfixes: +- Fix Makefile to call "install" portably. + It used to call "$(INSTALL) -D", which works on some, but not all systems + (MAC OS X is an example where it doesn't work). + Report by Jeff Doering. +- Fix crash after replay of DEALLOCATE ALL. + This caused a crash the next time prepared statements were deleted, for + example at the end of the database session. + Reported by Wan Shen Lim. + +Version 1.3.0 Feb 20 2017 + +Enhancements: +- Accept CPPFLAGS from configure in Makefile.in. + Patch by Marko Kreen. +- Add command line option -X to specify extra connect string fragment. + Needed to specify unusual connect string options. + Patch by Marko Kreen. +- Introduce replay filter options with "-D database" and "-U username" + to filter for a database or user during parsing. + Patch by Gilles Darold. + +Version 1.2.0 Aug 17 2012 + +Enhancements: +- Introduce replay option "-j" to jump ahead when all connections are idle. + This can speed up replay. The statistics will not include the skipped time, + but delay warnings will work as expected. + Idea and original patch by John Lumby. + +Bugfixes: +- Fix failure to parse string constants like E'\\' where the backslash before + a quotation mark is backslash-quoted. + Bug discovered by Jeff Frost. + +Version 1.1.0 Feb 09 2012 + +Enhancements: +- Respect environment variable DESTDIR in Makefile for RPM packagers. +- Improve execution delay reporting by introducing more intelligent time + steps when a report is written; every 10 seconds is way too spammy. +- Add documentation for interaction with pgFouine to the README. + +Bugfixes: +- Fix incorrect assumption that everything that starts with a dollar + sign is a dollar quoted string. This used to trigger bogus "end of dollar + quote not found" error messages when $n placeholders are used in PREPARE + statements. Discovered and fixed by Todd Owen. +- When pgreplay waited for a response on a connection because it needed to + send the next command, it used to sleep for one millisecond before + polling the socket again. This proved to be too long on busy systems, + where replay started to lag behind. Now pgreplay will not sleep, + but keep polling until the response is there. + +Version 1.0.0 Jun 03 2011 + +Bugfixes: +- Fix a connection and memory leak introduced by the new handling of FATAL + connection errors in 0.9.1. + Discovered by Denis Kostin. + +Version 0.9.1 Feb 26 2011 + +Enhancements: +- Calculate parse and execution statistics and display them at the end + of the run. + +Bugfixes: +- Use "=" instead of "==" in "test" comparisons in configure. + This improves portability. +- Change replay file timestamp to seconds after midnight of 2000-01-01 + in local time. This makes the replay file format independent of time zones + and avoids problems with mktime(3) implementations that don't like + the UNIX epoch. +- Ignore string literals in filter_bad_statements during log file parsing. + This keeps the function from getting confused by the contents of the + string. Discovered by Josh Berkus. +- Correctly handle prepared statements without parameters. + Discovered by Grigorij Lipin. +- Fix a corner case bug in read_log_line that can cause data corruption + when parsing a stderr log. Discovered by Grigorij Lipin. +- Skip memory dumps in stderr log caused by "out of memory" errors + instead of gagging on them. Discovered by Grigorij Lipin. +- Don't gag if a connection attempt results in a FATAL error during replay. + This can for example happen if max_connections has been exceeded or if a + non-existant user is specified with "trust" authentication. + Discovered by Grigorij Lipin. + +Version 0.9.0 Mar 19 2010 + +- first release diff --git a/pgreplay/Dockerfile b/pgreplay/Dockerfile new file mode 100644 index 00000000000..a0c78e6f613 --- /dev/null +++ b/pgreplay/Dockerfile @@ -0,0 +1,16 @@ +FROM ubuntu:22.10 + +RUN TZ=UTC +RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone +RUN apt-get update && \ + apt-get install --no-install-recommends -y tzdata make gcc libc6-dev postgresql-14 libpq-dev postgresql-doc-14 git ca-certificates && \ + apt-get clean && rm -rf /var/lib/apt/lists/* + +WORKDIR /root +RUN git clone https://github.com/laurenz/pgreplay.git +WORKDIR /root/pgreplay +RUN ./configure --with-postgres=/usr/bin +RUN make +RUN make install +RUN ln -s /root/pgreplay/pgreplay /usr/local/bin + diff --git a/pgreplay/LICENSE b/pgreplay/LICENSE new file mode 100644 index 00000000000..8185f6d8b2f --- /dev/null +++ b/pgreplay/LICENSE @@ -0,0 +1,19 @@ +Portions Copyright (c) 2017-2023, CYBERTEC PostgreSQL International GmbH +Portions Copyright (c) 2010-2017, Magistrat der Stadt Wien + +Permission to use, copy, modify, and distribute this software and its +documentation for any purpose, without fee, and without a written agreement +is hereby granted, provided that the above copyright notice and this paragraph +and the following two paragraphs appear in all copies. + +IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR +DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING +LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, +EVEN IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGE. + +THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, +BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, +AND THE COPYRIGHT HOLDER HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, +SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. diff --git a/pgreplay/Makefile.in b/pgreplay/Makefile.in new file mode 100644 index 00000000000..c6d784c460a --- /dev/null +++ b/pgreplay/Makefile.in @@ -0,0 +1,64 @@ +CFLAGS = @CFLAGS@ +CPPFLAGS = @CPPFLAGS@ +LDFLAGS = @LDFLAGS@ +LIBS = @LIBS@ +CC = @CC@ +EXTRA_OBJS = @EXTRA_OBJS@ +prefix = @prefix@ +exec_prefix = @exec_prefix@ +bindir = @bindir@ +datarootdir = @datarootdir@ +datadir = @datadir@ +mandir = @mandir@ +docdir = @docdir@ +htmldir = @htmldir@ +INSTALL = @INSTALL@ +VERSION = @PACKAGE_VERSION@ +EXE = pgreplay +OBJS = parse.o replayitem.o main.o replayfile.o database.o $(EXTRA_OBJS) +HEADERS = config.h pgreplay.h + +.PHONY: clean distclean tarball test install install_bin install_man install_html + +.SUFFIXES: .o .c .h + +$(EXE): $(OBJS) $(HEADERS) + $(CC) -o $@ $(CFLAGS) $(CPPFLAGS) $(OBJS) $(LDFLAGS) $(LIBS) + +$(OBJS): $(HEADERS) + +clean: + rm -f $(OBJS) $(EXE) + +distclean: clean + rm -rf Makefile config.h config.h.in~ config.log config.cache config.status autom4te.cache test/runtest.sh pgreplay-$(VERSION).tar* + +tarball: distclean + if tar --version 2>/dev/null | grep -q GNU; then : ;else echo "this requires GNU tar" 1>&2; exit 1; fi + ln -s "`pwd`" /tmp/pgreplay-$(VERSION) + tar -cf pgreplay-$(VERSION).tar -h -C /tmp --exclude .svn --exclude CVS --exclude pgreplay-$(VERSION).tar pgreplay-$(VERSION) --exclude test/testrun.c + rm /tmp/pgreplay-$(VERSION) + gzip -9 pgreplay-$(VERSION).tar + +$(EXE).html: $(EXE).1 + groff -Thtml -mman $< > $@ + +test: $(EXE) test/runtest.sh + cd test && ./runtest.sh + +install: install_bin install_man + +install_bin: $(EXE) + $(INSTALL) -d $(DESTDIR)$(bindir) + $(INSTALL) -m 0755 $< $(DESTDIR)$(bindir)/$< + +install_man: $(EXE).1 + $(INSTALL) -d $(DESTDIR)$(mandir)/man1 + $(INSTALL) -m 0644 $< $(DESTDIR)$(mandir)/man1/$< + +install_html: $(EXE).html + $(INSTALL) -d $(DESTDIR)$(htmldir) + $(INSTALL) -m 0644 $< $(DESTDIR)$(htmldir)/$< + +.c.o: + $(CC) -c $(CFLAGS) -DVERSION='"$(VERSION)"' $< diff --git a/pgreplay/README.md b/pgreplay/README.md new file mode 100644 index 00000000000..ef6a19b4b3c --- /dev/null +++ b/pgreplay/README.md @@ -0,0 +1,334 @@ +pgreplay - record and replay real-life database workloads +========================================================= + +pgreplay reads a PostgreSQL log file (*not* a WAL file), extracts the +SQL statements and executes them in the same order and with the original +timing against a PostgreSQL database. + +If the execution of statements gets behind schedule, warning messages +are issued that indicate that the server cannot handle the load in a +timely fashion. + +A final report gives you a useful statistical analysis of your workload +and its execution. + +The idea is to replay a real-world database workload as exactly as possible. + +This is useful for performance tests, particularly in the following +situations: +- You want to compare the performance of your PostgreSQL application + on different hardware or different operating systems. +- You want to upgrade your database and want to make sure that the new + database version does not suffer from performance regressions that + affect you. + +Moreover, pgreplay can give you some feeling as to how your application +*might* scale by allowing you to try to replay the workload at a higher +speed (if that is possible; see +[implementation details](#implementation-details) below). +Be warned, though, that 500 users working at double speed is not really +the same as 1000 users working at normal speed. + +While pgreplay will find out if your database application will encounter +performance problems, it does not provide a lot of help in the analysis of +the cause of these problems. Combine pgreplay with a specialized analysis +program like [pgBadger](https://pgbadger.darold.net/) for that. + +As an additional feature, pgreplay lets you split the replay in two +parts: you can parse the log file and create a "replay file", which +contains just the statements to be replayed and is hopefully much +smaller than the original log file. +Such a replay file can then be run against a database. + +pgreplay is written by Laurenz Albe and is inspired by "Playr" +which never made it out of Beta. + +Installation +============ + +pgreplay needs PostgreSQL 8.0 or better. + +It is supposed to compile without warnings and run on all platforms +supported by PostgreSQL. +Since I only got to test it on Linux, AIX, FreeBSD and Windows, there may be +problems with other platforms. I am interested in reports and fixes for +these platforms. +On Windows, only the MinGW build environment is supported (I have no +other compiler). That means that there is currently no 64-bit build +for Windows (but a 32-bit executable should work fine anywhere). + +To build pgreplay, you will need the `pg_config` utility. If you installed +PostgreSQL using installation packages, you will probably have to install +the development package that contains `pg_config` and the header files. + +If `pg_config` is on the `PATH`, the installation process will look like this: + +- unpack the tarball +- `./configure` +- `make` +- `make test` (optional, described below) +- `make install` (as superuser) + +If your PostgreSQL installation is in a nonstandard directory, you +will have to use the `--with-postgres=` +option of `configure`. + +Unless you link it statically, pgreplay requires the PostgreSQL client +shared library on the system where it is run. + +The following utilities are only necessary if you intend to develop pgreplay: +- autoconf 2.62 or better to generate `configure` +- GNU tar to `make tarball` (unless you want to roll it by hand) +- groff to make the HTML documentation with `make html` + +Docker +------ + +The `Dockerfile` provided with the software can be used as a starting +point for creating a container that runs pgreplay. Adapt is as necessary. + +Here are commands to build and run the container: + +``` +# build the image +docker build -t laurenz/pgreplay -f Dockerfile . + +# and run it +docker run --rm -ti -v $(pwd):/app -w /app laurenz/pgreplay pgreplay -h +``` + +Testing +------- + +You can run a test on pgreplay before installing by running `make test`. +This will parse sample log files and check that the result is as +expected. + +Then an attempt is made to replay the log files and check if that +works as expected. For this you need psql installed and a PostgreSQL server +running (on this or another machine) so that the following command +will succeed: + + psql -U postgres -d postgres -l + +You can set up the `PGPORT` and `PGHOST` environment variables and a password +file for the user if necessary. + +There have to be a login roles named `hansi` and `postgres` in the database, +and both users must be able to connect without a password. Only `postgres` +will be used to run actual SQL statements. The regression test will create +a table `runtest` and use it, and it will drop the table when it is done. + +Usage +===== + +First, you will need to record your real-life workload. +For that, set the following parameters in `postgresql.conf`: + +- `log_min_messages = error` (or more) + (if you know that you have no cancel requests, `log` will do) +- `log_min_error_statement = log` (or more) +- `log_connections = on` +- `log_disconnections = on` +- `log_line_prefix = '%m|%u|%d|%c|'` (if you don't use CSV logging) +- `log_statement = 'all'` +- `lc_messages` must be set to English (the encoding does not matter) +- `bytea_output = escape` (from version 9.0 on, only if you want to replay + the log on 8.4 or earlier) + +It is highly recommended that you use CSV logging, because anything that +the PostgreSQL server or any loaded modules write to standard error will +be written to the stderr log and might confuse the parser. + +Then let your users have their way with the database. + +Make sure that you have a `pg_dumpall` of the database cluster from the time +of the start of your log file (or use the `-b` option with the time of your +backup). Alternatively, you can use point-in-time-recovery to clone your +database at the appropriate time. + +When you are done, restore the database (in the "before" state) to the +machine where you want to perform the load test and run pgreplay against +that database. + +Try to create a scenario as similar to your production system as +possible (except for the change you want to test, of course). For example, +if your clients connect over the network, run pgreplay on a different +machine from where the database server is running. + +Since passwords are not logged (and pgreplay consequently has no way of +knowing them), you have two options: either change `pg_hba.conf` on the +test database to allow `trust` authentication or (if that is unacceptable) +create a password file as described by the PostgreSQL documentation. +Alternatively, you can change the passwords of all application users +to one single password that you supply to pgreplay with the `-W` option. + +Limitations +=========== + +pgreplay can only replay what is logged by PostgreSQL. +This leads to some limitations: + +- `COPY` statements will not be replayed, because the copy data are not logged. + I could have supported `COPY TO` statements, but that would have imposed a + requirement that the directory structure on the replay system must be + identical to the original machine. + And if your application runs on the same machine as your database and they + interact on the file system, pgreplay will probably not help you much + anyway. +- Fast-path API function calls are not logged and will not be replayed. + Unfortunately, this includes the Large Object API. +- Since the log file is always written in the database encoding (which you + can specify with the `-E` switch of pgreplay), all `SET client_encoding` + statements will be ignored. +- If your cluster contains databases with different encoding, the log file + will have mixed encoding as well. You cannot use pgreplay well in such + an environment, because many statements against databases whose + encoding does not match the `-E` switch will fail. +- Since the preparation time of prepared statements is not logged (unless + `log_min_messages` is `debug2` or more), these statements will be prepared + immediately before they are first executed during replay. +- All parameters of prepared statements are logged as strings, no matter + what type was originally specified during bind. + This can cause errors during replay with expressions like `$1 + $2`, + which will cause the error `operator is not unique: unknown + unknown`. + +While pgreplay makes sure that commands are sent to the server in the +order in which they were originally executed, there is no way to guarantee +that they will be executed in the same order during replay: Network +delay, processor contention and other factors may cause a later command +to "overtake" an earlier one. While this does not matter if the +commands don't affect each other, it can lead to SQL statements hitting +locks unexpectedly, causing replay to deadlock and "hang". +This is particularly likely if many different sessions change the same data +repeatedly in short intervals. + +You can work around this problem by canceling the waiting statement with +pg_cancel_backend. Replay should continue normally after that. + +Implementation details +====================== + +pgreplay will track the "session ID" associated with each log entry (the +session ID uniquely identifies a database connection). +For each new session ID, a new database connection will be opened during +replay. Each statement will be sent on the corresponding connection, so +transactions are preserved and concurrent sessions cannot get in each +other's way. + +The order of statements in the log file is strictly preserved, so there +cannot be any race conditions caused by different execution speeds on +separate connections. On the other hand, that means that long running +queries on one connection may stall execution on concurrent connections, +but that's all you can get if you want to reproduce the exact same +workload on a system that behaves differently. + +As an example, consider this (simplified) log file: + + session 1|connect + session 2|connect + session 1|statement: BEGIN + session 1|statement: SELECT something(1) + session 2|statement: BEGIN + session 2|statement: SELECT something(2) + session 1|statement: SELECT something(3) + session 2|statement: ROLLBACK + session 2|disconnect + session 1|statement: COMMIT + session 2|disconnect + +This will cause two database connections to be opened, so the `ROLLBACK` in +session 2 will not affect session 1. +If `SELECT something(2)` takes longer than expected (longer than it did in +the original), that will not stall the execution of `SELECT something(3)` +because it runs on a different connection. The `ROLLBACK`, however, has to +wait for the completion of the long statement. Since the order of statements +is preserved, the `COMMIT` on session 1 will have to wait until the `ROLLBACK` +on session 2 has started (but it does not have to wait for the completion of +the `ROLLBACK`). + +pgreplay is implemented in C and makes heavy use of asynchronous command +processing (which is the reason why it is implemented in C). +This way a single process can handle many concurrent connections, which +makes it possible to get away without multithreading or multiprocessing. + +This avoids the need for synchronization and many portability problems. +But since TINSTAAFL, the choice of C brings along its own portability +problems. Go figure. + +Replay file format +------------------ + +The replay file is a binary file, integer numbers are stored in network +byte order. + +Each record in the replay file corresponds to one database operation +and is constructed as follows: +- 4-byte `unsigned int`: log file timestamp in seconds since 2000-01-01 +- 4-byte `unsigned int`: fractional part of log file timestamp in microseconds +- 8-byte `unsigned int`: session id +- 1-byte `unsigned int`: type of the database action: + - 0 is connect + - 1 is disconnect + - 2 is simple statement execution + - 3 is statement preparation + - 4 is execution of a prepared statement + - 5 is cancel request +- The remainder of the record is specific to the action, strings are stored + with a preceeding 4-byte unsigned int that contains the length. + Read the source for details. +- Each record is terminated by a new-line character (byte 0x0A). + + +Using for polardb +====== +- make sure pg_stat_statements, plpgsql, system_stats was installed.system_stats install methods refer to https://github.com/EnterpriseDB/system_stats. +```sql +postgres=# create extension plpgsql; +postgres=# create extension pg_stat_statements; +postgres=# create extension system_stats; +postgres=# \dx + List of installed extensions + Name | Version | Schema | Description +--------------------+---------+------------+----------------------------------------------------------- + pg_stat_statements | 1.6 | public | track execution statistics of all SQL statements executed + plpgsql | 1.0 | pg_catalog | PL/pgSQL procedural language + system_stats | 1.0 | public | EnterpriseDB system statistics for PostgreSQL +(3 rows) +``` + +- postgres.conf configure +```sh +# log setting for gpreplay +log_min_messages = error +# (if you know that you have no cancel requests, log will do) +log_min_error_statement = log +log_connections = on +log_disconnections = on +log_line_prefix = '%t|%u|%d|%c|' +log_statement = 'all' +# lc_messages must be set to English (the encoding does not matter) +bytea_output = escape +# (from version 9.0 on, only if you want to replay the log on 8.4 or earlier) + +polar_enable_log_search_path = true +polar_enable_log_parameter_type = true +``` +- run read audit log from $PGDATA/log/replay_xxx.log and replay sql to databse, reporting monitor info every 3 seconds. +```sh +./pgreplay -P -m 3 -h 127.0.0.1 -p 5432 -W benchmarksql $PGDATA/log/replay_xxx.log +``` +Support +======= + +If you have a problem or question, the preferred option is to [open an +issue](https://github.com/laurenz/pgreplay/issues). +This requires a GitHub account. + +Professional support can be bought from +[CYBERTEC PostgreSQL International GmbH](https://www.cybertec-postgresql.com/). + +TODO list +========= + +Nothing currently. Tell me if you have good ideas. diff --git a/pgreplay/TODO b/pgreplay/TODO new file mode 100644 index 00000000000..37e54e61adb --- /dev/null +++ b/pgreplay/TODO @@ -0,0 +1,7 @@ +This is a list of things that could be improved. +Help is welcome! + +- Use "single-row mode" with PQsetSingleRowMode. + This would reduce the memory requirements for large result sets. + + Noted by Thomas Dziedzic in #15. diff --git a/pgreplay/config.guess b/pgreplay/config.guess new file mode 100755 index 00000000000..31e01efec3e --- /dev/null +++ b/pgreplay/config.guess @@ -0,0 +1,1463 @@ +#! /bin/sh +# Attempt to guess a canonical system name. +# Copyright 1992-2017 Free Software Foundation, Inc. + +timestamp='2017-11-07' + +# This file is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see . +# +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that +# program. This Exception is an additional permission under section 7 +# of the GNU General Public License, version 3 ("GPLv3"). +# +# Originally written by Per Bothner; maintained since 2000 by Ben Elliston. +# +# You can get the latest version of this script from: +# https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess +# +# Please send patches to . + + +me=`echo "$0" | sed -e 's,.*/,,'` + +usage="\ +Usage: $0 [OPTION] + +Output the configuration name of the system \`$me' is run on. + +Options: + -h, --help print this help, then exit + -t, --time-stamp print date of last modification, then exit + -v, --version print version number, then exit + +Report bugs and patches to ." + +version="\ +GNU config.guess ($timestamp) + +Originally written by Per Bothner. +Copyright 1992-2017 Free Software Foundation, Inc. + +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." + +help=" +Try \`$me --help' for more information." + +# Parse command line +while test $# -gt 0 ; do + case $1 in + --time-stamp | --time* | -t ) + echo "$timestamp" ; exit ;; + --version | -v ) + echo "$version" ; exit ;; + --help | --h* | -h ) + echo "$usage"; exit ;; + -- ) # Stop option processing + shift; break ;; + - ) # Use stdin as input. + break ;; + -* ) + echo "$me: invalid option $1$help" >&2 + exit 1 ;; + * ) + break ;; + esac +done + +if test $# != 0; then + echo "$me: too many arguments$help" >&2 + exit 1 +fi + +trap 'exit 1' 1 2 15 + +# CC_FOR_BUILD -- compiler used by this script. Note that the use of a +# compiler to aid in system detection is discouraged as it requires +# temporary files to be created and, as you can see below, it is a +# headache to deal with in a portable fashion. + +# Historically, `CC_FOR_BUILD' used to be named `HOST_CC'. We still +# use `HOST_CC' if defined, but it is deprecated. + +# Portable tmp directory creation inspired by the Autoconf team. + +set_cc_for_build=' +trap "exitcode=\$?; (rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null) && exit \$exitcode" 0 ; +trap "rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null; exit 1" 1 2 13 15 ; +: ${TMPDIR=/tmp} ; + { tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } || + { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir $tmp) ; } || + { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir $tmp) && echo "Warning: creating insecure temp directory" >&2 ; } || + { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } ; +dummy=$tmp/dummy ; +tmpfiles="$dummy.c $dummy.o $dummy.rel $dummy" ; +case $CC_FOR_BUILD,$HOST_CC,$CC in + ,,) echo "int x;" > $dummy.c ; + for c in cc gcc c89 c99 ; do + if ($c -c -o $dummy.o $dummy.c) >/dev/null 2>&1 ; then + CC_FOR_BUILD="$c"; break ; + fi ; + done ; + if test x"$CC_FOR_BUILD" = x ; then + CC_FOR_BUILD=no_compiler_found ; + fi + ;; + ,,*) CC_FOR_BUILD=$CC ;; + ,*,*) CC_FOR_BUILD=$HOST_CC ;; +esac ; set_cc_for_build= ;' + +# This is needed to find uname on a Pyramid OSx when run in the BSD universe. +# (ghazi@noc.rutgers.edu 1994-08-24) +if (test -f /.attbin/uname) >/dev/null 2>&1 ; then + PATH=$PATH:/.attbin ; export PATH +fi + +UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown +UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown +UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown +UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown + +case "${UNAME_SYSTEM}" in +Linux|GNU|GNU/*) + # If the system lacks a compiler, then just pick glibc. + # We could probably try harder. + LIBC=gnu + + eval $set_cc_for_build + cat <<-EOF > $dummy.c + #include + #if defined(__UCLIBC__) + LIBC=uclibc + #elif defined(__dietlibc__) + LIBC=dietlibc + #else + LIBC=gnu + #endif + EOF + eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC' | sed 's, ,,g'` + ;; +esac + +# Note: order is significant - the case branches are not exclusive. + +case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in + *:NetBSD:*:*) + # NetBSD (nbsd) targets should (where applicable) match one or + # more of the tuples: *-*-netbsdelf*, *-*-netbsdaout*, + # *-*-netbsdecoff* and *-*-netbsd*. For targets that recently + # switched to ELF, *-*-netbsd* would select the old + # object file format. This provides both forward + # compatibility and a consistent mechanism for selecting the + # object file format. + # + # Note: NetBSD doesn't particularly care about the vendor + # portion of the name. We always set it to "unknown". + sysctl="sysctl -n hw.machine_arch" + UNAME_MACHINE_ARCH=`(uname -p 2>/dev/null || \ + /sbin/$sysctl 2>/dev/null || \ + /usr/sbin/$sysctl 2>/dev/null || \ + echo unknown)` + case "${UNAME_MACHINE_ARCH}" in + armeb) machine=armeb-unknown ;; + arm*) machine=arm-unknown ;; + sh3el) machine=shl-unknown ;; + sh3eb) machine=sh-unknown ;; + sh5el) machine=sh5le-unknown ;; + earmv*) + arch=`echo ${UNAME_MACHINE_ARCH} | sed -e 's,^e\(armv[0-9]\).*$,\1,'` + endian=`echo ${UNAME_MACHINE_ARCH} | sed -ne 's,^.*\(eb\)$,\1,p'` + machine=${arch}${endian}-unknown + ;; + *) machine=${UNAME_MACHINE_ARCH}-unknown ;; + esac + # The Operating System including object format, if it has switched + # to ELF recently (or will in the future) and ABI. + case "${UNAME_MACHINE_ARCH}" in + earm*) + os=netbsdelf + ;; + arm*|i386|m68k|ns32k|sh3*|sparc|vax) + eval $set_cc_for_build + if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ELF__ + then + # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout). + # Return netbsd for either. FIX? + os=netbsd + else + os=netbsdelf + fi + ;; + *) + os=netbsd + ;; + esac + # Determine ABI tags. + case "${UNAME_MACHINE_ARCH}" in + earm*) + expr='s/^earmv[0-9]/-eabi/;s/eb$//' + abi=`echo ${UNAME_MACHINE_ARCH} | sed -e "$expr"` + ;; + esac + # The OS release + # Debian GNU/NetBSD machines have a different userland, and + # thus, need a distinct triplet. However, they do not need + # kernel version information, so it can be replaced with a + # suitable tag, in the style of linux-gnu. + case "${UNAME_VERSION}" in + Debian*) + release='-gnu' + ;; + *) + release=`echo ${UNAME_RELEASE} | sed -e 's/[-_].*//' | cut -d. -f1,2` + ;; + esac + # Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM: + # contains redundant information, the shorter form: + # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used. + echo "${machine}-${os}${release}${abi}" + exit ;; + *:Bitrig:*:*) + UNAME_MACHINE_ARCH=`arch | sed 's/Bitrig.//'` + echo ${UNAME_MACHINE_ARCH}-unknown-bitrig${UNAME_RELEASE} + exit ;; + *:OpenBSD:*:*) + UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'` + echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE} + exit ;; + *:LibertyBSD:*:*) + UNAME_MACHINE_ARCH=`arch | sed 's/^.*BSD\.//'` + echo ${UNAME_MACHINE_ARCH}-unknown-libertybsd${UNAME_RELEASE} + exit ;; + *:MidnightBSD:*:*) + echo ${UNAME_MACHINE}-unknown-midnightbsd${UNAME_RELEASE} + exit ;; + *:ekkoBSD:*:*) + echo ${UNAME_MACHINE}-unknown-ekkobsd${UNAME_RELEASE} + exit ;; + *:SolidBSD:*:*) + echo ${UNAME_MACHINE}-unknown-solidbsd${UNAME_RELEASE} + exit ;; + macppc:MirBSD:*:*) + echo powerpc-unknown-mirbsd${UNAME_RELEASE} + exit ;; + *:MirBSD:*:*) + echo ${UNAME_MACHINE}-unknown-mirbsd${UNAME_RELEASE} + exit ;; + *:Sortix:*:*) + echo ${UNAME_MACHINE}-unknown-sortix + exit ;; + *:Redox:*:*) + echo ${UNAME_MACHINE}-unknown-redox + exit ;; + alpha:OSF1:*:*) + case $UNAME_RELEASE in + *4.0) + UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'` + ;; + *5.*) + UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'` + ;; + esac + # According to Compaq, /usr/sbin/psrinfo has been available on + # OSF/1 and Tru64 systems produced since 1995. I hope that + # covers most systems running today. This code pipes the CPU + # types through head -n 1, so we only detect the type of CPU 0. + ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^ The alpha \(.*\) processor.*$/\1/p' | head -n 1` + case "$ALPHA_CPU_TYPE" in + "EV4 (21064)") + UNAME_MACHINE=alpha ;; + "EV4.5 (21064)") + UNAME_MACHINE=alpha ;; + "LCA4 (21066/21068)") + UNAME_MACHINE=alpha ;; + "EV5 (21164)") + UNAME_MACHINE=alphaev5 ;; + "EV5.6 (21164A)") + UNAME_MACHINE=alphaev56 ;; + "EV5.6 (21164PC)") + UNAME_MACHINE=alphapca56 ;; + "EV5.7 (21164PC)") + UNAME_MACHINE=alphapca57 ;; + "EV6 (21264)") + UNAME_MACHINE=alphaev6 ;; + "EV6.7 (21264A)") + UNAME_MACHINE=alphaev67 ;; + "EV6.8CB (21264C)") + UNAME_MACHINE=alphaev68 ;; + "EV6.8AL (21264B)") + UNAME_MACHINE=alphaev68 ;; + "EV6.8CX (21264D)") + UNAME_MACHINE=alphaev68 ;; + "EV6.9A (21264/EV69A)") + UNAME_MACHINE=alphaev69 ;; + "EV7 (21364)") + UNAME_MACHINE=alphaev7 ;; + "EV7.9 (21364A)") + UNAME_MACHINE=alphaev79 ;; + esac + # A Pn.n version is a patched version. + # A Vn.n version is a released version. + # A Tn.n version is a released field test version. + # A Xn.n version is an unreleased experimental baselevel. + # 1.2 uses "1.2" for uname -r. + echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz` + # Reset EXIT trap before exiting to avoid spurious non-zero exit code. + exitcode=$? + trap '' 0 + exit $exitcode ;; + Amiga*:UNIX_System_V:4.0:*) + echo m68k-unknown-sysv4 + exit ;; + *:[Aa]miga[Oo][Ss]:*:*) + echo ${UNAME_MACHINE}-unknown-amigaos + exit ;; + *:[Mm]orph[Oo][Ss]:*:*) + echo ${UNAME_MACHINE}-unknown-morphos + exit ;; + *:OS/390:*:*) + echo i370-ibm-openedition + exit ;; + *:z/VM:*:*) + echo s390-ibm-zvmoe + exit ;; + *:OS400:*:*) + echo powerpc-ibm-os400 + exit ;; + arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*) + echo arm-acorn-riscix${UNAME_RELEASE} + exit ;; + arm*:riscos:*:*|arm*:RISCOS:*:*) + echo arm-unknown-riscos + exit ;; + SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*) + echo hppa1.1-hitachi-hiuxmpp + exit ;; + Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*) + # akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE. + if test "`(/bin/universe) 2>/dev/null`" = att ; then + echo pyramid-pyramid-sysv3 + else + echo pyramid-pyramid-bsd + fi + exit ;; + NILE*:*:*:dcosx) + echo pyramid-pyramid-svr4 + exit ;; + DRS?6000:unix:4.0:6*) + echo sparc-icl-nx6 + exit ;; + DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*) + case `/usr/bin/uname -p` in + sparc) echo sparc-icl-nx7; exit ;; + esac ;; + s390x:SunOS:*:*) + echo ${UNAME_MACHINE}-ibm-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; + sun4H:SunOS:5.*:*) + echo sparc-hal-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; + sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*) + echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; + i86pc:AuroraUX:5.*:* | i86xen:AuroraUX:5.*:*) + echo i386-pc-auroraux${UNAME_RELEASE} + exit ;; + i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*) + eval $set_cc_for_build + SUN_ARCH=i386 + # If there is a compiler, see if it is configured for 64-bit objects. + # Note that the Sun cc does not turn __LP64__ into 1 like gcc does. + # This test works for both compilers. + if [ "$CC_FOR_BUILD" != no_compiler_found ]; then + if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \ + (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ + grep IS_64BIT_ARCH >/dev/null + then + SUN_ARCH=x86_64 + fi + fi + echo ${SUN_ARCH}-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; + sun4*:SunOS:6*:*) + # According to config.sub, this is the proper way to canonicalize + # SunOS6. Hard to guess exactly what SunOS6 will be like, but + # it's likely to be more like Solaris than SunOS4. + echo sparc-sun-solaris3`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; + sun4*:SunOS:*:*) + case "`/usr/bin/arch -k`" in + Series*|S4*) + UNAME_RELEASE=`uname -v` + ;; + esac + # Japanese Language versions have a version number like `4.1.3-JL'. + echo sparc-sun-sunos`echo ${UNAME_RELEASE}|sed -e 's/-/_/'` + exit ;; + sun3*:SunOS:*:*) + echo m68k-sun-sunos${UNAME_RELEASE} + exit ;; + sun*:*:4.2BSD:*) + UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null` + test "x${UNAME_RELEASE}" = x && UNAME_RELEASE=3 + case "`/bin/arch`" in + sun3) + echo m68k-sun-sunos${UNAME_RELEASE} + ;; + sun4) + echo sparc-sun-sunos${UNAME_RELEASE} + ;; + esac + exit ;; + aushp:SunOS:*:*) + echo sparc-auspex-sunos${UNAME_RELEASE} + exit ;; + # The situation for MiNT is a little confusing. The machine name + # can be virtually everything (everything which is not + # "atarist" or "atariste" at least should have a processor + # > m68000). The system name ranges from "MiNT" over "FreeMiNT" + # to the lowercase version "mint" (or "freemint"). Finally + # the system name "TOS" denotes a system which is actually not + # MiNT. But MiNT is downward compatible to TOS, so this should + # be no problem. + atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*) + echo m68k-atari-mint${UNAME_RELEASE} + exit ;; + atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*) + echo m68k-atari-mint${UNAME_RELEASE} + exit ;; + *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*) + echo m68k-atari-mint${UNAME_RELEASE} + exit ;; + milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*) + echo m68k-milan-mint${UNAME_RELEASE} + exit ;; + hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*) + echo m68k-hades-mint${UNAME_RELEASE} + exit ;; + *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*) + echo m68k-unknown-mint${UNAME_RELEASE} + exit ;; + m68k:machten:*:*) + echo m68k-apple-machten${UNAME_RELEASE} + exit ;; + powerpc:machten:*:*) + echo powerpc-apple-machten${UNAME_RELEASE} + exit ;; + RISC*:Mach:*:*) + echo mips-dec-mach_bsd4.3 + exit ;; + RISC*:ULTRIX:*:*) + echo mips-dec-ultrix${UNAME_RELEASE} + exit ;; + VAX*:ULTRIX*:*:*) + echo vax-dec-ultrix${UNAME_RELEASE} + exit ;; + 2020:CLIX:*:* | 2430:CLIX:*:*) + echo clipper-intergraph-clix${UNAME_RELEASE} + exit ;; + mips:*:*:UMIPS | mips:*:*:RISCos) + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c +#ifdef __cplusplus +#include /* for printf() prototype */ + int main (int argc, char *argv[]) { +#else + int main (argc, argv) int argc; char *argv[]; { +#endif + #if defined (host_mips) && defined (MIPSEB) + #if defined (SYSTYPE_SYSV) + printf ("mips-mips-riscos%ssysv\\n", argv[1]); exit (0); + #endif + #if defined (SYSTYPE_SVR4) + printf ("mips-mips-riscos%ssvr4\\n", argv[1]); exit (0); + #endif + #if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD) + printf ("mips-mips-riscos%sbsd\\n", argv[1]); exit (0); + #endif + #endif + exit (-1); + } +EOF + $CC_FOR_BUILD -o $dummy $dummy.c && + dummyarg=`echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` && + SYSTEM_NAME=`$dummy $dummyarg` && + { echo "$SYSTEM_NAME"; exit; } + echo mips-mips-riscos${UNAME_RELEASE} + exit ;; + Motorola:PowerMAX_OS:*:*) + echo powerpc-motorola-powermax + exit ;; + Motorola:*:4.3:PL8-*) + echo powerpc-harris-powermax + exit ;; + Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*) + echo powerpc-harris-powermax + exit ;; + Night_Hawk:Power_UNIX:*:*) + echo powerpc-harris-powerunix + exit ;; + m88k:CX/UX:7*:*) + echo m88k-harris-cxux7 + exit ;; + m88k:*:4*:R4*) + echo m88k-motorola-sysv4 + exit ;; + m88k:*:3*:R3*) + echo m88k-motorola-sysv3 + exit ;; + AViiON:dgux:*:*) + # DG/UX returns AViiON for all architectures + UNAME_PROCESSOR=`/usr/bin/uname -p` + if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ] + then + if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \ + [ ${TARGET_BINARY_INTERFACE}x = x ] + then + echo m88k-dg-dgux${UNAME_RELEASE} + else + echo m88k-dg-dguxbcs${UNAME_RELEASE} + fi + else + echo i586-dg-dgux${UNAME_RELEASE} + fi + exit ;; + M88*:DolphinOS:*:*) # DolphinOS (SVR3) + echo m88k-dolphin-sysv3 + exit ;; + M88*:*:R3*:*) + # Delta 88k system running SVR3 + echo m88k-motorola-sysv3 + exit ;; + XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3) + echo m88k-tektronix-sysv3 + exit ;; + Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD) + echo m68k-tektronix-bsd + exit ;; + *:IRIX*:*:*) + echo mips-sgi-irix`echo ${UNAME_RELEASE}|sed -e 's/-/_/g'` + exit ;; + ????????:AIX?:[12].1:2) # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX. + echo romp-ibm-aix # uname -m gives an 8 hex-code CPU id + exit ;; # Note that: echo "'`uname -s`'" gives 'AIX ' + i*86:AIX:*:*) + echo i386-ibm-aix + exit ;; + ia64:AIX:*:*) + if [ -x /usr/bin/oslevel ] ; then + IBM_REV=`/usr/bin/oslevel` + else + IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE} + fi + echo ${UNAME_MACHINE}-ibm-aix${IBM_REV} + exit ;; + *:AIX:2:3) + if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #include + + main() + { + if (!__power_pc()) + exit(1); + puts("powerpc-ibm-aix3.2.5"); + exit(0); + } +EOF + if $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` + then + echo "$SYSTEM_NAME" + else + echo rs6000-ibm-aix3.2.5 + fi + elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then + echo rs6000-ibm-aix3.2.4 + else + echo rs6000-ibm-aix3.2 + fi + exit ;; + *:AIX:*:[4567]) + IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'` + if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then + IBM_ARCH=rs6000 + else + IBM_ARCH=powerpc + fi + if [ -x /usr/bin/lslpp ] ; then + IBM_REV=`/usr/bin/lslpp -Lqc bos.rte.libc | + awk -F: '{ print $3 }' | sed s/[0-9]*$/0/` + else + IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE} + fi + echo ${IBM_ARCH}-ibm-aix${IBM_REV} + exit ;; + *:AIX:*:*) + echo rs6000-ibm-aix + exit ;; + ibmrt:4.4BSD:*|romp-ibm:4.4BSD:*) + echo romp-ibm-bsd4.4 + exit ;; + ibmrt:*BSD:*|romp-ibm:BSD:*) # covers RT/PC BSD and + echo romp-ibm-bsd${UNAME_RELEASE} # 4.3 with uname added to + exit ;; # report: romp-ibm BSD 4.3 + *:BOSX:*:*) + echo rs6000-bull-bosx + exit ;; + DPX/2?00:B.O.S.:*:*) + echo m68k-bull-sysv3 + exit ;; + 9000/[34]??:4.3bsd:1.*:*) + echo m68k-hp-bsd + exit ;; + hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*) + echo m68k-hp-bsd4.4 + exit ;; + 9000/[34678]??:HP-UX:*:*) + HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'` + case "${UNAME_MACHINE}" in + 9000/31?) HP_ARCH=m68000 ;; + 9000/[34]??) HP_ARCH=m68k ;; + 9000/[678][0-9][0-9]) + if [ -x /usr/bin/getconf ]; then + sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null` + sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null` + case "${sc_cpu_version}" in + 523) HP_ARCH=hppa1.0 ;; # CPU_PA_RISC1_0 + 528) HP_ARCH=hppa1.1 ;; # CPU_PA_RISC1_1 + 532) # CPU_PA_RISC2_0 + case "${sc_kernel_bits}" in + 32) HP_ARCH=hppa2.0n ;; + 64) HP_ARCH=hppa2.0w ;; + '') HP_ARCH=hppa2.0 ;; # HP-UX 10.20 + esac ;; + esac + fi + if [ "${HP_ARCH}" = "" ]; then + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + + #define _HPUX_SOURCE + #include + #include + + int main () + { + #if defined(_SC_KERNEL_BITS) + long bits = sysconf(_SC_KERNEL_BITS); + #endif + long cpu = sysconf (_SC_CPU_VERSION); + + switch (cpu) + { + case CPU_PA_RISC1_0: puts ("hppa1.0"); break; + case CPU_PA_RISC1_1: puts ("hppa1.1"); break; + case CPU_PA_RISC2_0: + #if defined(_SC_KERNEL_BITS) + switch (bits) + { + case 64: puts ("hppa2.0w"); break; + case 32: puts ("hppa2.0n"); break; + default: puts ("hppa2.0"); break; + } break; + #else /* !defined(_SC_KERNEL_BITS) */ + puts ("hppa2.0"); break; + #endif + default: puts ("hppa1.0"); break; + } + exit (0); + } +EOF + (CCOPTS="" $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy` + test -z "$HP_ARCH" && HP_ARCH=hppa + fi ;; + esac + if [ ${HP_ARCH} = hppa2.0w ] + then + eval $set_cc_for_build + + # hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating + # 32-bit code. hppa64-hp-hpux* has the same kernel and a compiler + # generating 64-bit code. GNU and HP use different nomenclature: + # + # $ CC_FOR_BUILD=cc ./config.guess + # => hppa2.0w-hp-hpux11.23 + # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess + # => hppa64-hp-hpux11.23 + + if echo __LP64__ | (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | + grep -q __LP64__ + then + HP_ARCH=hppa2.0w + else + HP_ARCH=hppa64 + fi + fi + echo ${HP_ARCH}-hp-hpux${HPUX_REV} + exit ;; + ia64:HP-UX:*:*) + HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'` + echo ia64-hp-hpux${HPUX_REV} + exit ;; + 3050*:HI-UX:*:*) + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #include + int + main () + { + long cpu = sysconf (_SC_CPU_VERSION); + /* The order matters, because CPU_IS_HP_MC68K erroneously returns + true for CPU_PA_RISC1_0. CPU_IS_PA_RISC returns correct + results, however. */ + if (CPU_IS_PA_RISC (cpu)) + { + switch (cpu) + { + case CPU_PA_RISC1_0: puts ("hppa1.0-hitachi-hiuxwe2"); break; + case CPU_PA_RISC1_1: puts ("hppa1.1-hitachi-hiuxwe2"); break; + case CPU_PA_RISC2_0: puts ("hppa2.0-hitachi-hiuxwe2"); break; + default: puts ("hppa-hitachi-hiuxwe2"); break; + } + } + else if (CPU_IS_HP_MC68K (cpu)) + puts ("m68k-hitachi-hiuxwe2"); + else puts ("unknown-hitachi-hiuxwe2"); + exit (0); + } +EOF + $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` && + { echo "$SYSTEM_NAME"; exit; } + echo unknown-hitachi-hiuxwe2 + exit ;; + 9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:*) + echo hppa1.1-hp-bsd + exit ;; + 9000/8??:4.3bsd:*:*) + echo hppa1.0-hp-bsd + exit ;; + *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*) + echo hppa1.0-hp-mpeix + exit ;; + hp7??:OSF1:*:* | hp8?[79]:OSF1:*:*) + echo hppa1.1-hp-osf + exit ;; + hp8??:OSF1:*:*) + echo hppa1.0-hp-osf + exit ;; + i*86:OSF1:*:*) + if [ -x /usr/sbin/sysversion ] ; then + echo ${UNAME_MACHINE}-unknown-osf1mk + else + echo ${UNAME_MACHINE}-unknown-osf1 + fi + exit ;; + parisc*:Lites*:*:*) + echo hppa1.1-hp-lites + exit ;; + C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*) + echo c1-convex-bsd + exit ;; + C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*) + if getsysinfo -f scalar_acc + then echo c32-convex-bsd + else echo c2-convex-bsd + fi + exit ;; + C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*) + echo c34-convex-bsd + exit ;; + C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*) + echo c38-convex-bsd + exit ;; + C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*) + echo c4-convex-bsd + exit ;; + CRAY*Y-MP:*:*:*) + echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*[A-Z]90:*:*:*) + echo ${UNAME_MACHINE}-cray-unicos${UNAME_RELEASE} \ + | sed -e 's/CRAY.*\([A-Z]90\)/\1/' \ + -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \ + -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*TS:*:*:*) + echo t90-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*T3E:*:*:*) + echo alphaev5-cray-unicosmk${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*SV1:*:*:*) + echo sv1-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; + *:UNICOS/mp:*:*) + echo craynv-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; + F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*) + FUJITSU_PROC=`uname -m | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz` + FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'` + FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'` + echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" + exit ;; + 5000:UNIX_System_V:4.*:*) + FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'` + FUJITSU_REL=`echo ${UNAME_RELEASE} | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/ /_/'` + echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" + exit ;; + i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*) + echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE} + exit ;; + sparc*:BSD/OS:*:*) + echo sparc-unknown-bsdi${UNAME_RELEASE} + exit ;; + *:BSD/OS:*:*) + echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE} + exit ;; + *:FreeBSD:*:*) + UNAME_PROCESSOR=`/usr/bin/uname -p` + case ${UNAME_PROCESSOR} in + amd64) + UNAME_PROCESSOR=x86_64 ;; + i386) + UNAME_PROCESSOR=i586 ;; + esac + echo ${UNAME_PROCESSOR}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` + exit ;; + i*:CYGWIN*:*) + echo ${UNAME_MACHINE}-pc-cygwin + exit ;; + *:MINGW64*:*) + echo ${UNAME_MACHINE}-pc-mingw64 + exit ;; + *:MINGW*:*) + echo ${UNAME_MACHINE}-pc-mingw32 + exit ;; + *:MSYS*:*) + echo ${UNAME_MACHINE}-pc-msys + exit ;; + i*:PW*:*) + echo ${UNAME_MACHINE}-pc-pw32 + exit ;; + *:Interix*:*) + case ${UNAME_MACHINE} in + x86) + echo i586-pc-interix${UNAME_RELEASE} + exit ;; + authenticamd | genuineintel | EM64T) + echo x86_64-unknown-interix${UNAME_RELEASE} + exit ;; + IA64) + echo ia64-unknown-interix${UNAME_RELEASE} + exit ;; + esac ;; + i*:UWIN*:*) + echo ${UNAME_MACHINE}-pc-uwin + exit ;; + amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*) + echo x86_64-unknown-cygwin + exit ;; + prep*:SunOS:5.*:*) + echo powerpcle-unknown-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; + *:GNU:*:*) + # the GNU system + echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-${LIBC}`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'` + exit ;; + *:GNU/*:*:*) + # other systems with GNU libc and userland + echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr "[:upper:]" "[:lower:]"``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-${LIBC} + exit ;; + i*86:Minix:*:*) + echo ${UNAME_MACHINE}-pc-minix + exit ;; + aarch64:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + aarch64_be:Linux:*:*) + UNAME_MACHINE=aarch64_be + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + alpha:Linux:*:*) + case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in + EV5) UNAME_MACHINE=alphaev5 ;; + EV56) UNAME_MACHINE=alphaev56 ;; + PCA56) UNAME_MACHINE=alphapca56 ;; + PCA57) UNAME_MACHINE=alphapca56 ;; + EV6) UNAME_MACHINE=alphaev6 ;; + EV67) UNAME_MACHINE=alphaev67 ;; + EV68*) UNAME_MACHINE=alphaev68 ;; + esac + objdump --private-headers /bin/sh | grep -q ld.so.1 + if test "$?" = 0 ; then LIBC=gnulibc1 ; fi + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + arc:Linux:*:* | arceb:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + arm*:Linux:*:*) + eval $set_cc_for_build + if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ARM_EABI__ + then + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + else + if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ARM_PCS_VFP + then + echo ${UNAME_MACHINE}-unknown-linux-${LIBC}eabi + else + echo ${UNAME_MACHINE}-unknown-linux-${LIBC}eabihf + fi + fi + exit ;; + avr32*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + cris:Linux:*:*) + echo ${UNAME_MACHINE}-axis-linux-${LIBC} + exit ;; + crisv32:Linux:*:*) + echo ${UNAME_MACHINE}-axis-linux-${LIBC} + exit ;; + e2k:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + frv:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + hexagon:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + i*86:Linux:*:*) + echo ${UNAME_MACHINE}-pc-linux-${LIBC} + exit ;; + ia64:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + k1om:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + m32r*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + m68*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + mips:Linux:*:* | mips64:Linux:*:*) + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #undef CPU + #undef ${UNAME_MACHINE} + #undef ${UNAME_MACHINE}el + #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) + CPU=${UNAME_MACHINE}el + #else + #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) + CPU=${UNAME_MACHINE} + #else + CPU= + #endif + #endif +EOF + eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'` + test x"${CPU}" != x && { echo "${CPU}-unknown-linux-${LIBC}"; exit; } + ;; + mips64el:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + openrisc*:Linux:*:*) + echo or1k-unknown-linux-${LIBC} + exit ;; + or32:Linux:*:* | or1k*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + padre:Linux:*:*) + echo sparc-unknown-linux-${LIBC} + exit ;; + parisc64:Linux:*:* | hppa64:Linux:*:*) + echo hppa64-unknown-linux-${LIBC} + exit ;; + parisc:Linux:*:* | hppa:Linux:*:*) + # Look for CPU level + case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in + PA7*) echo hppa1.1-unknown-linux-${LIBC} ;; + PA8*) echo hppa2.0-unknown-linux-${LIBC} ;; + *) echo hppa-unknown-linux-${LIBC} ;; + esac + exit ;; + ppc64:Linux:*:*) + echo powerpc64-unknown-linux-${LIBC} + exit ;; + ppc:Linux:*:*) + echo powerpc-unknown-linux-${LIBC} + exit ;; + ppc64le:Linux:*:*) + echo powerpc64le-unknown-linux-${LIBC} + exit ;; + ppcle:Linux:*:*) + echo powerpcle-unknown-linux-${LIBC} + exit ;; + riscv32:Linux:*:* | riscv64:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + s390:Linux:*:* | s390x:Linux:*:*) + echo ${UNAME_MACHINE}-ibm-linux-${LIBC} + exit ;; + sh64*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + sh*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + sparc:Linux:*:* | sparc64:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + tile*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + vax:Linux:*:*) + echo ${UNAME_MACHINE}-dec-linux-${LIBC} + exit ;; + x86_64:Linux:*:*) + echo ${UNAME_MACHINE}-pc-linux-${LIBC} + exit ;; + xtensa*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + i*86:DYNIX/ptx:4*:*) + # ptx 4.0 does uname -s correctly, with DYNIX/ptx in there. + # earlier versions are messed up and put the nodename in both + # sysname and nodename. + echo i386-sequent-sysv4 + exit ;; + i*86:UNIX_SV:4.2MP:2.*) + # Unixware is an offshoot of SVR4, but it has its own version + # number series starting with 2... + # I am not positive that other SVR4 systems won't match this, + # I just have to hope. -- rms. + # Use sysv4.2uw... so that sysv4* matches it. + echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION} + exit ;; + i*86:OS/2:*:*) + # If we were able to find `uname', then EMX Unix compatibility + # is probably installed. + echo ${UNAME_MACHINE}-pc-os2-emx + exit ;; + i*86:XTS-300:*:STOP) + echo ${UNAME_MACHINE}-unknown-stop + exit ;; + i*86:atheos:*:*) + echo ${UNAME_MACHINE}-unknown-atheos + exit ;; + i*86:syllable:*:*) + echo ${UNAME_MACHINE}-pc-syllable + exit ;; + i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*) + echo i386-unknown-lynxos${UNAME_RELEASE} + exit ;; + i*86:*DOS:*:*) + echo ${UNAME_MACHINE}-pc-msdosdjgpp + exit ;; + i*86:*:4.*:*) + UNAME_REL=`echo ${UNAME_RELEASE} | sed 's/\/MP$//'` + if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then + echo ${UNAME_MACHINE}-univel-sysv${UNAME_REL} + else + echo ${UNAME_MACHINE}-pc-sysv${UNAME_REL} + fi + exit ;; + i*86:*:5:[678]*) + # UnixWare 7.x, OpenUNIX and OpenServer 6. + case `/bin/uname -X | grep "^Machine"` in + *486*) UNAME_MACHINE=i486 ;; + *Pentium) UNAME_MACHINE=i586 ;; + *Pent*|*Celeron) UNAME_MACHINE=i686 ;; + esac + echo ${UNAME_MACHINE}-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION} + exit ;; + i*86:*:3.2:*) + if test -f /usr/options/cb.name; then + UNAME_REL=`sed -n 's/.*Version //p' /dev/null >/dev/null ; then + UNAME_REL=`(/bin/uname -X|grep Release|sed -e 's/.*= //')` + (/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486 + (/bin/uname -X|grep '^Machine.*Pentium' >/dev/null) \ + && UNAME_MACHINE=i586 + (/bin/uname -X|grep '^Machine.*Pent *II' >/dev/null) \ + && UNAME_MACHINE=i686 + (/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \ + && UNAME_MACHINE=i686 + echo ${UNAME_MACHINE}-pc-sco$UNAME_REL + else + echo ${UNAME_MACHINE}-pc-sysv32 + fi + exit ;; + pc:*:*:*) + # Left here for compatibility: + # uname -m prints for DJGPP always 'pc', but it prints nothing about + # the processor, so we play safe by assuming i586. + # Note: whatever this is, it MUST be the same as what config.sub + # prints for the "djgpp" host, or else GDB configure will decide that + # this is a cross-build. + echo i586-pc-msdosdjgpp + exit ;; + Intel:Mach:3*:*) + echo i386-pc-mach3 + exit ;; + paragon:*:*:*) + echo i860-intel-osf1 + exit ;; + i860:*:4.*:*) # i860-SVR4 + if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then + echo i860-stardent-sysv${UNAME_RELEASE} # Stardent Vistra i860-SVR4 + else # Add other i860-SVR4 vendors below as they are discovered. + echo i860-unknown-sysv${UNAME_RELEASE} # Unknown i860-SVR4 + fi + exit ;; + mini*:CTIX:SYS*5:*) + # "miniframe" + echo m68010-convergent-sysv + exit ;; + mc68k:UNIX:SYSTEM5:3.51m) + echo m68k-convergent-sysv + exit ;; + M680?0:D-NIX:5.3:*) + echo m68k-diab-dnix + exit ;; + M68*:*:R3V[5678]*:*) + test -r /sysV68 && { echo 'm68k-motorola-sysv'; exit; } ;; + 3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0) + OS_REL='' + test -r /etc/.relid \ + && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && { echo i486-ncr-sysv4.3${OS_REL}; exit; } + /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ + && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;; + 3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*) + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && { echo i486-ncr-sysv4; exit; } ;; + NCR*:*:4.2:* | MPRAS*:*:4.2:*) + OS_REL='.3' + test -r /etc/.relid \ + && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && { echo i486-ncr-sysv4.3${OS_REL}; exit; } + /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ + && { echo i586-ncr-sysv4.3${OS_REL}; exit; } + /bin/uname -p 2>/dev/null | /bin/grep pteron >/dev/null \ + && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;; + m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*) + echo m68k-unknown-lynxos${UNAME_RELEASE} + exit ;; + mc68030:UNIX_System_V:4.*:*) + echo m68k-atari-sysv4 + exit ;; + TSUNAMI:LynxOS:2.*:*) + echo sparc-unknown-lynxos${UNAME_RELEASE} + exit ;; + rs6000:LynxOS:2.*:*) + echo rs6000-unknown-lynxos${UNAME_RELEASE} + exit ;; + PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*) + echo powerpc-unknown-lynxos${UNAME_RELEASE} + exit ;; + SM[BE]S:UNIX_SV:*:*) + echo mips-dde-sysv${UNAME_RELEASE} + exit ;; + RM*:ReliantUNIX-*:*:*) + echo mips-sni-sysv4 + exit ;; + RM*:SINIX-*:*:*) + echo mips-sni-sysv4 + exit ;; + *:SINIX-*:*:*) + if uname -p 2>/dev/null >/dev/null ; then + UNAME_MACHINE=`(uname -p) 2>/dev/null` + echo ${UNAME_MACHINE}-sni-sysv4 + else + echo ns32k-sni-sysv + fi + exit ;; + PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort + # says + echo i586-unisys-sysv4 + exit ;; + *:UNIX_System_V:4*:FTX*) + # From Gerald Hewes . + # How about differentiating between stratus architectures? -djm + echo hppa1.1-stratus-sysv4 + exit ;; + *:*:*:FTX*) + # From seanf@swdc.stratus.com. + echo i860-stratus-sysv4 + exit ;; + i*86:VOS:*:*) + # From Paul.Green@stratus.com. + echo ${UNAME_MACHINE}-stratus-vos + exit ;; + *:VOS:*:*) + # From Paul.Green@stratus.com. + echo hppa1.1-stratus-vos + exit ;; + mc68*:A/UX:*:*) + echo m68k-apple-aux${UNAME_RELEASE} + exit ;; + news*:NEWS-OS:6*:*) + echo mips-sony-newsos6 + exit ;; + R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*) + if [ -d /usr/nec ]; then + echo mips-nec-sysv${UNAME_RELEASE} + else + echo mips-unknown-sysv${UNAME_RELEASE} + fi + exit ;; + BeBox:BeOS:*:*) # BeOS running on hardware made by Be, PPC only. + echo powerpc-be-beos + exit ;; + BeMac:BeOS:*:*) # BeOS running on Mac or Mac clone, PPC only. + echo powerpc-apple-beos + exit ;; + BePC:BeOS:*:*) # BeOS running on Intel PC compatible. + echo i586-pc-beos + exit ;; + BePC:Haiku:*:*) # Haiku running on Intel PC compatible. + echo i586-pc-haiku + exit ;; + x86_64:Haiku:*:*) + echo x86_64-unknown-haiku + exit ;; + SX-4:SUPER-UX:*:*) + echo sx4-nec-superux${UNAME_RELEASE} + exit ;; + SX-5:SUPER-UX:*:*) + echo sx5-nec-superux${UNAME_RELEASE} + exit ;; + SX-6:SUPER-UX:*:*) + echo sx6-nec-superux${UNAME_RELEASE} + exit ;; + SX-7:SUPER-UX:*:*) + echo sx7-nec-superux${UNAME_RELEASE} + exit ;; + SX-8:SUPER-UX:*:*) + echo sx8-nec-superux${UNAME_RELEASE} + exit ;; + SX-8R:SUPER-UX:*:*) + echo sx8r-nec-superux${UNAME_RELEASE} + exit ;; + SX-ACE:SUPER-UX:*:*) + echo sxace-nec-superux${UNAME_RELEASE} + exit ;; + Power*:Rhapsody:*:*) + echo powerpc-apple-rhapsody${UNAME_RELEASE} + exit ;; + *:Rhapsody:*:*) + echo ${UNAME_MACHINE}-apple-rhapsody${UNAME_RELEASE} + exit ;; + *:Darwin:*:*) + UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown + eval $set_cc_for_build + if test "$UNAME_PROCESSOR" = unknown ; then + UNAME_PROCESSOR=powerpc + fi + if test `echo "$UNAME_RELEASE" | sed -e 's/\..*//'` -le 10 ; then + if [ "$CC_FOR_BUILD" != no_compiler_found ]; then + if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \ + (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ + grep IS_64BIT_ARCH >/dev/null + then + case $UNAME_PROCESSOR in + i386) UNAME_PROCESSOR=x86_64 ;; + powerpc) UNAME_PROCESSOR=powerpc64 ;; + esac + fi + # On 10.4-10.6 one might compile for PowerPC via gcc -arch ppc + if (echo '#ifdef __POWERPC__'; echo IS_PPC; echo '#endif') | \ + (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ + grep IS_PPC >/dev/null + then + UNAME_PROCESSOR=powerpc + fi + fi + elif test "$UNAME_PROCESSOR" = i386 ; then + # Avoid executing cc on OS X 10.9, as it ships with a stub + # that puts up a graphical alert prompting to install + # developer tools. Any system running Mac OS X 10.7 or + # later (Darwin 11 and later) is required to have a 64-bit + # processor. This is not true of the ARM version of Darwin + # that Apple uses in portable devices. + UNAME_PROCESSOR=x86_64 + fi + echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE} + exit ;; + *:procnto*:*:* | *:QNX:[0123456789]*:*) + UNAME_PROCESSOR=`uname -p` + if test "$UNAME_PROCESSOR" = x86; then + UNAME_PROCESSOR=i386 + UNAME_MACHINE=pc + fi + echo ${UNAME_PROCESSOR}-${UNAME_MACHINE}-nto-qnx${UNAME_RELEASE} + exit ;; + *:QNX:*:4*) + echo i386-pc-qnx + exit ;; + NEO-*:NONSTOP_KERNEL:*:*) + echo neo-tandem-nsk${UNAME_RELEASE} + exit ;; + NSE-*:NONSTOP_KERNEL:*:*) + echo nse-tandem-nsk${UNAME_RELEASE} + exit ;; + NSR-*:NONSTOP_KERNEL:*:*) + echo nsr-tandem-nsk${UNAME_RELEASE} + exit ;; + NSX-*:NONSTOP_KERNEL:*:*) + echo nsx-tandem-nsk${UNAME_RELEASE} + exit ;; + *:NonStop-UX:*:*) + echo mips-compaq-nonstopux + exit ;; + BS2000:POSIX*:*:*) + echo bs2000-siemens-sysv + exit ;; + DS/*:UNIX_System_V:*:*) + echo ${UNAME_MACHINE}-${UNAME_SYSTEM}-${UNAME_RELEASE} + exit ;; + *:Plan9:*:*) + # "uname -m" is not consistent, so use $cputype instead. 386 + # is converted to i386 for consistency with other x86 + # operating systems. + if test "$cputype" = 386; then + UNAME_MACHINE=i386 + else + UNAME_MACHINE="$cputype" + fi + echo ${UNAME_MACHINE}-unknown-plan9 + exit ;; + *:TOPS-10:*:*) + echo pdp10-unknown-tops10 + exit ;; + *:TENEX:*:*) + echo pdp10-unknown-tenex + exit ;; + KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*) + echo pdp10-dec-tops20 + exit ;; + XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*) + echo pdp10-xkl-tops20 + exit ;; + *:TOPS-20:*:*) + echo pdp10-unknown-tops20 + exit ;; + *:ITS:*:*) + echo pdp10-unknown-its + exit ;; + SEI:*:*:SEIUX) + echo mips-sei-seiux${UNAME_RELEASE} + exit ;; + *:DragonFly:*:*) + echo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` + exit ;; + *:*VMS:*:*) + UNAME_MACHINE=`(uname -p) 2>/dev/null` + case "${UNAME_MACHINE}" in + A*) echo alpha-dec-vms ; exit ;; + I*) echo ia64-dec-vms ; exit ;; + V*) echo vax-dec-vms ; exit ;; + esac ;; + *:XENIX:*:SysV) + echo i386-pc-xenix + exit ;; + i*86:skyos:*:*) + echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE} | sed -e 's/ .*$//'` + exit ;; + i*86:rdos:*:*) + echo ${UNAME_MACHINE}-pc-rdos + exit ;; + i*86:AROS:*:*) + echo ${UNAME_MACHINE}-pc-aros + exit ;; + x86_64:VMkernel:*:*) + echo ${UNAME_MACHINE}-unknown-esx + exit ;; + amd64:Isilon\ OneFS:*:*) + echo x86_64-unknown-onefs + exit ;; +esac + +echo "$0: unable to guess system type" >&2 + +case "${UNAME_MACHINE}:${UNAME_SYSTEM}" in + mips:Linux | mips64:Linux) + # If we got here on MIPS GNU/Linux, output extra information. + cat >&2 <&2 </dev/null || echo unknown` +uname -r = `(uname -r) 2>/dev/null || echo unknown` +uname -s = `(uname -s) 2>/dev/null || echo unknown` +uname -v = `(uname -v) 2>/dev/null || echo unknown` + +/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null` +/bin/uname -X = `(/bin/uname -X) 2>/dev/null` + +hostinfo = `(hostinfo) 2>/dev/null` +/bin/universe = `(/bin/universe) 2>/dev/null` +/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null` +/bin/arch = `(/bin/arch) 2>/dev/null` +/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null` +/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null` + +UNAME_MACHINE = ${UNAME_MACHINE} +UNAME_RELEASE = ${UNAME_RELEASE} +UNAME_SYSTEM = ${UNAME_SYSTEM} +UNAME_VERSION = ${UNAME_VERSION} +EOF + +exit 1 + +# Local variables: +# eval: (add-hook 'write-file-functions 'time-stamp) +# time-stamp-start: "timestamp='" +# time-stamp-format: "%:y-%02m-%02d" +# time-stamp-end: "'" +# End: diff --git a/pgreplay/config.h.in b/pgreplay/config.h.in new file mode 100644 index 00000000000..a665811518b --- /dev/null +++ b/pgreplay/config.h.in @@ -0,0 +1,138 @@ +/* config.h.in. Generated from configure.in by autoheader. */ + +/* Define if building universal (internal helper macro) */ +#undef AC_APPLE_UNIVERSAL_BUILD + +/* Define to 1 if you have the header file. */ +#undef HAVE_FCNTL_H + +/* Define to 1 if you have the `gettimeofday' function. */ +#undef HAVE_GETTIMEOFDAY + +/* Define to 1 if you have the header file. */ +#undef HAVE_INTTYPES_H + +/* Define to 1 if you have the `pq' library (-lpq). */ +#undef HAVE_LIBPQ + +/* Define to 1 if you have the header file. */ +#undef HAVE_MEMORY_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_NETINET_IN_H + +/* Define to 1 if you have the `select' function. */ +#undef HAVE_SELECT + +/* Define to 1 if you have the `setenv' function. */ +#undef HAVE_SETENV + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDINT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDLIB_H + +/* Define to 1 if you have the `strchr' function. */ +#undef HAVE_STRCHR + +/* Define to 1 if you have the header file. */ +#undef HAVE_STRINGS_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STRING_H + +/* Define to 1 if you have the `strtoul' function. */ +#undef HAVE_STRTOUL + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_STAT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_TIME_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_TYPES_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_UNISTD_H + +/* Define to the address where bug reports for this package should be sent. */ +#undef PACKAGE_BUGREPORT + +/* Define to the full name of this package. */ +#undef PACKAGE_NAME + +/* Define to the full name and version of this package. */ +#undef PACKAGE_STRING + +/* Define to the one symbol short name of this package. */ +#undef PACKAGE_TARNAME + +/* Define to the home page for this package. */ +#undef PACKAGE_URL + +/* Define to the version of this package. */ +#undef PACKAGE_VERSION + +/* The size of `unsigned int', as computed by sizeof. */ +#undef SIZEOF_UNSIGNED_INT + +/* The size of `unsigned long', as computed by sizeof. */ +#undef SIZEOF_UNSIGNED_LONG + +/* The size of `unsigned short', as computed by sizeof. */ +#undef SIZEOF_UNSIGNED_SHORT + +/* Define to 1 if you have the ANSI C header files. */ +#undef STDC_HEADERS + +/* Define to 1 if you can safely include both and . */ +#undef TIME_WITH_SYS_TIME + +/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most + significant byte first (like Motorola and SPARC, unlike Intel). */ +#if defined AC_APPLE_UNIVERSAL_BUILD +# if defined __BIG_ENDIAN__ +# define WORDS_BIGENDIAN 1 +# endif +#else +# ifndef WORDS_BIGENDIAN +# undef WORDS_BIGENDIAN +# endif +#endif + +/* Enable large inode numbers on Mac OS X 10.5. */ +#ifndef _DARWIN_USE_64_BIT_INODE +# define _DARWIN_USE_64_BIT_INODE 1 +#endif + +/* Number of bits in a file offset, on hosts where this is settable. */ +#undef _FILE_OFFSET_BITS + +/* Define for large files, on AIX-style hosts. */ +#undef _LARGE_FILES + +/* Define for Solaris 2.5.1 so the uint32_t typedef from , + , or is not used. If the typedef were allowed, the + #define below would cause a syntax error. */ +#undef _UINT32_T + +/* Define for Solaris 2.5.1 so the uint64_t typedef from , + , or is not used. If the typedef were allowed, the + #define below would cause a syntax error. */ +#undef _UINT64_T + +/* Define to `unsigned int' if does not define. */ +#undef size_t + +/* Define to `int' if does not define. */ +#undef ssize_t + +/* Define to the type of an unsigned integer type of width exactly 32 bits if + such a type exists and the standard includes do not define it. */ +#undef uint32_t + +/* Define to the type of an unsigned integer type of width exactly 64 bits if + such a type exists and the standard includes do not define it. */ +#undef uint64_t diff --git a/pgreplay/config.sub b/pgreplay/config.sub new file mode 100755 index 00000000000..00f68b8e5f3 --- /dev/null +++ b/pgreplay/config.sub @@ -0,0 +1,1848 @@ +#! /bin/sh +# Configuration validation subroutine script. +# Copyright 1992-2017 Free Software Foundation, Inc. + +timestamp='2017-11-23' + +# This file is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see . +# +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that +# program. This Exception is an additional permission under section 7 +# of the GNU General Public License, version 3 ("GPLv3"). + + +# Please send patches to . +# +# Configuration subroutine to validate and canonicalize a configuration type. +# Supply the specified configuration type as an argument. +# If it is invalid, we print an error message on stderr and exit with code 1. +# Otherwise, we print the canonical config type on stdout and succeed. + +# You can get the latest version of this script from: +# https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub + +# This file is supposed to be the same for all GNU packages +# and recognize all the CPU types, system types and aliases +# that are meaningful with *any* GNU software. +# Each package is responsible for reporting which valid configurations +# it does not support. The user should be able to distinguish +# a failure to support a valid configuration from a meaningless +# configuration. + +# The goal of this file is to map all the various variations of a given +# machine specification into a single specification in the form: +# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM +# or in some cases, the newer four-part form: +# CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM +# It is wrong to echo any other type of specification. + +me=`echo "$0" | sed -e 's,.*/,,'` + +usage="\ +Usage: $0 [OPTION] CPU-MFR-OPSYS or ALIAS + +Canonicalize a configuration name. + +Options: + -h, --help print this help, then exit + -t, --time-stamp print date of last modification, then exit + -v, --version print version number, then exit + +Report bugs and patches to ." + +version="\ +GNU config.sub ($timestamp) + +Copyright 1992-2017 Free Software Foundation, Inc. + +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." + +help=" +Try \`$me --help' for more information." + +# Parse command line +while test $# -gt 0 ; do + case $1 in + --time-stamp | --time* | -t ) + echo "$timestamp" ; exit ;; + --version | -v ) + echo "$version" ; exit ;; + --help | --h* | -h ) + echo "$usage"; exit ;; + -- ) # Stop option processing + shift; break ;; + - ) # Use stdin as input. + break ;; + -* ) + echo "$me: invalid option $1$help" + exit 1 ;; + + *local*) + # First pass through any local machine types. + echo $1 + exit ;; + + * ) + break ;; + esac +done + +case $# in + 0) echo "$me: missing argument$help" >&2 + exit 1;; + 1) ;; + *) echo "$me: too many arguments$help" >&2 + exit 1;; +esac + +# Separate what the user gave into CPU-COMPANY and OS or KERNEL-OS (if any). +# Here we must recognize all the valid KERNEL-OS combinations. +maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'` +case $maybe_os in + nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \ + linux-musl* | linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \ + knetbsd*-gnu* | netbsd*-gnu* | netbsd*-eabi* | \ + kopensolaris*-gnu* | cloudabi*-eabi* | \ + storm-chaos* | os2-emx* | rtmk-nova*) + os=-$maybe_os + basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'` + ;; + android-linux) + os=-linux-android + basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`-unknown + ;; + *) + basic_machine=`echo $1 | sed 's/-[^-]*$//'` + if [ $basic_machine != $1 ] + then os=`echo $1 | sed 's/.*-/-/'` + else os=; fi + ;; +esac + +### Let's recognize common machines as not being operating systems so +### that things like config.sub decstation-3100 work. We also +### recognize some manufacturers as not being operating systems, so we +### can provide default operating systems below. +case $os in + -sun*os*) + # Prevent following clause from handling this invalid input. + ;; + -dec* | -mips* | -sequent* | -encore* | -pc532* | -sgi* | -sony* | \ + -att* | -7300* | -3300* | -delta* | -motorola* | -sun[234]* | \ + -unicom* | -ibm* | -next | -hp | -isi* | -apollo | -altos* | \ + -convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\ + -c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \ + -harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \ + -apple | -axis | -knuth | -cray | -microblaze*) + os= + basic_machine=$1 + ;; + -bluegene*) + os=-cnk + ;; + -sim | -cisco | -oki | -wec | -winbond) + os= + basic_machine=$1 + ;; + -scout) + ;; + -wrs) + os=-vxworks + basic_machine=$1 + ;; + -chorusos*) + os=-chorusos + basic_machine=$1 + ;; + -chorusrdb) + os=-chorusrdb + basic_machine=$1 + ;; + -hiux*) + os=-hiuxwe2 + ;; + -sco6) + os=-sco5v6 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco5) + os=-sco3.2v5 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco4) + os=-sco3.2v4 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco3.2.[4-9]*) + os=`echo $os | sed -e 's/sco3.2./sco3.2v/'` + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco3.2v[4-9]*) + # Don't forget version if it is 3.2v4 or newer. + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco5v6*) + # Don't forget version if it is 3.2v4 or newer. + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco*) + os=-sco3.2v2 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -udk*) + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -isc) + os=-isc2.2 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -clix*) + basic_machine=clipper-intergraph + ;; + -isc*) + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -lynx*178) + os=-lynxos178 + ;; + -lynx*5) + os=-lynxos5 + ;; + -lynx*) + os=-lynxos + ;; + -ptx*) + basic_machine=`echo $1 | sed -e 's/86-.*/86-sequent/'` + ;; + -psos*) + os=-psos + ;; + -mint | -mint[0-9]*) + basic_machine=m68k-atari + os=-mint + ;; +esac + +# Decode aliases for certain CPU-COMPANY combinations. +case $basic_machine in + # Recognize the basic CPU types without company name. + # Some are omitted here because they have special meanings below. + 1750a | 580 \ + | a29k \ + | aarch64 | aarch64_be \ + | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \ + | alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \ + | am33_2.0 \ + | arc | arceb \ + | arm | arm[bl]e | arme[lb] | armv[2-8] | armv[3-8][lb] | armv7[arm] \ + | avr | avr32 \ + | ba \ + | be32 | be64 \ + | bfin \ + | c4x | c8051 | clipper \ + | d10v | d30v | dlx | dsp16xx \ + | e2k | epiphany \ + | fido | fr30 | frv | ft32 \ + | h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \ + | hexagon \ + | i370 | i860 | i960 | ia16 | ia64 \ + | ip2k | iq2000 \ + | k1om \ + | le32 | le64 \ + | lm32 \ + | m32c | m32r | m32rle | m68000 | m68k | m88k \ + | maxq | mb | microblaze | microblazeel | mcore | mep | metag \ + | mips | mipsbe | mipseb | mipsel | mipsle \ + | mips16 \ + | mips64 | mips64el \ + | mips64octeon | mips64octeonel \ + | mips64orion | mips64orionel \ + | mips64r5900 | mips64r5900el \ + | mips64vr | mips64vrel \ + | mips64vr4100 | mips64vr4100el \ + | mips64vr4300 | mips64vr4300el \ + | mips64vr5000 | mips64vr5000el \ + | mips64vr5900 | mips64vr5900el \ + | mipsisa32 | mipsisa32el \ + | mipsisa32r2 | mipsisa32r2el \ + | mipsisa32r6 | mipsisa32r6el \ + | mipsisa64 | mipsisa64el \ + | mipsisa64r2 | mipsisa64r2el \ + | mipsisa64r6 | mipsisa64r6el \ + | mipsisa64sb1 | mipsisa64sb1el \ + | mipsisa64sr71k | mipsisa64sr71kel \ + | mipsr5900 | mipsr5900el \ + | mipstx39 | mipstx39el \ + | mn10200 | mn10300 \ + | moxie \ + | mt \ + | msp430 \ + | nds32 | nds32le | nds32be \ + | nios | nios2 | nios2eb | nios2el \ + | ns16k | ns32k \ + | open8 | or1k | or1knd | or32 \ + | pdp10 | pdp11 | pj | pjl \ + | powerpc | powerpc64 | powerpc64le | powerpcle \ + | pru \ + | pyramid \ + | riscv32 | riscv64 \ + | rl78 | rx \ + | score \ + | sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[234]eb | sheb | shbe | shle | sh[1234]le | sh3ele \ + | sh64 | sh64le \ + | sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \ + | sparcv8 | sparcv9 | sparcv9b | sparcv9v \ + | spu \ + | tahoe | tic4x | tic54x | tic55x | tic6x | tic80 | tron \ + | ubicom32 \ + | v850 | v850e | v850e1 | v850e2 | v850es | v850e2v3 \ + | visium \ + | wasm32 \ + | x86 | xc16x | xstormy16 | xtensa \ + | z8k | z80) + basic_machine=$basic_machine-unknown + ;; + c54x) + basic_machine=tic54x-unknown + ;; + c55x) + basic_machine=tic55x-unknown + ;; + c6x) + basic_machine=tic6x-unknown + ;; + leon|leon[3-9]) + basic_machine=sparc-$basic_machine + ;; + m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | nvptx | picochip) + basic_machine=$basic_machine-unknown + os=-none + ;; + m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | v70 | w65 | z8k) + ;; + ms1) + basic_machine=mt-unknown + ;; + + strongarm | thumb | xscale) + basic_machine=arm-unknown + ;; + xgate) + basic_machine=$basic_machine-unknown + os=-none + ;; + xscaleeb) + basic_machine=armeb-unknown + ;; + + xscaleel) + basic_machine=armel-unknown + ;; + + # We use `pc' rather than `unknown' + # because (1) that's what they normally are, and + # (2) the word "unknown" tends to confuse beginning users. + i*86 | x86_64) + basic_machine=$basic_machine-pc + ;; + # Object if more than one company name word. + *-*-*) + echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2 + exit 1 + ;; + # Recognize the basic CPU types with company name. + 580-* \ + | a29k-* \ + | aarch64-* | aarch64_be-* \ + | alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \ + | alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \ + | alphapca5[67]-* | alpha64pca5[67]-* | arc-* | arceb-* \ + | arm-* | armbe-* | armle-* | armeb-* | armv*-* \ + | avr-* | avr32-* \ + | ba-* \ + | be32-* | be64-* \ + | bfin-* | bs2000-* \ + | c[123]* | c30-* | [cjt]90-* | c4x-* \ + | c8051-* | clipper-* | craynv-* | cydra-* \ + | d10v-* | d30v-* | dlx-* \ + | e2k-* | elxsi-* \ + | f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \ + | h8300-* | h8500-* \ + | hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \ + | hexagon-* \ + | i*86-* | i860-* | i960-* | ia16-* | ia64-* \ + | ip2k-* | iq2000-* \ + | k1om-* \ + | le32-* | le64-* \ + | lm32-* \ + | m32c-* | m32r-* | m32rle-* \ + | m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \ + | m88110-* | m88k-* | maxq-* | mcore-* | metag-* \ + | microblaze-* | microblazeel-* \ + | mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \ + | mips16-* \ + | mips64-* | mips64el-* \ + | mips64octeon-* | mips64octeonel-* \ + | mips64orion-* | mips64orionel-* \ + | mips64r5900-* | mips64r5900el-* \ + | mips64vr-* | mips64vrel-* \ + | mips64vr4100-* | mips64vr4100el-* \ + | mips64vr4300-* | mips64vr4300el-* \ + | mips64vr5000-* | mips64vr5000el-* \ + | mips64vr5900-* | mips64vr5900el-* \ + | mipsisa32-* | mipsisa32el-* \ + | mipsisa32r2-* | mipsisa32r2el-* \ + | mipsisa32r6-* | mipsisa32r6el-* \ + | mipsisa64-* | mipsisa64el-* \ + | mipsisa64r2-* | mipsisa64r2el-* \ + | mipsisa64r6-* | mipsisa64r6el-* \ + | mipsisa64sb1-* | mipsisa64sb1el-* \ + | mipsisa64sr71k-* | mipsisa64sr71kel-* \ + | mipsr5900-* | mipsr5900el-* \ + | mipstx39-* | mipstx39el-* \ + | mmix-* \ + | mt-* \ + | msp430-* \ + | nds32-* | nds32le-* | nds32be-* \ + | nios-* | nios2-* | nios2eb-* | nios2el-* \ + | none-* | np1-* | ns16k-* | ns32k-* \ + | open8-* \ + | or1k*-* \ + | orion-* \ + | pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \ + | powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* \ + | pru-* \ + | pyramid-* \ + | riscv32-* | riscv64-* \ + | rl78-* | romp-* | rs6000-* | rx-* \ + | sh-* | sh[1234]-* | sh[24]a-* | sh[24]aeb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \ + | shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \ + | sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \ + | sparclite-* \ + | sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | sv1-* | sx*-* \ + | tahoe-* \ + | tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \ + | tile*-* \ + | tron-* \ + | ubicom32-* \ + | v850-* | v850e-* | v850e1-* | v850es-* | v850e2-* | v850e2v3-* \ + | vax-* \ + | visium-* \ + | wasm32-* \ + | we32k-* \ + | x86-* | x86_64-* | xc16x-* | xps100-* \ + | xstormy16-* | xtensa*-* \ + | ymp-* \ + | z8k-* | z80-*) + ;; + # Recognize the basic CPU types without company name, with glob match. + xtensa*) + basic_machine=$basic_machine-unknown + ;; + # Recognize the various machine names and aliases which stand + # for a CPU type and a company and sometimes even an OS. + 386bsd) + basic_machine=i386-unknown + os=-bsd + ;; + 3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc) + basic_machine=m68000-att + ;; + 3b*) + basic_machine=we32k-att + ;; + a29khif) + basic_machine=a29k-amd + os=-udi + ;; + abacus) + basic_machine=abacus-unknown + ;; + adobe68k) + basic_machine=m68010-adobe + os=-scout + ;; + alliant | fx80) + basic_machine=fx80-alliant + ;; + altos | altos3068) + basic_machine=m68k-altos + ;; + am29k) + basic_machine=a29k-none + os=-bsd + ;; + amd64) + basic_machine=x86_64-pc + ;; + amd64-*) + basic_machine=x86_64-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + amdahl) + basic_machine=580-amdahl + os=-sysv + ;; + amiga | amiga-*) + basic_machine=m68k-unknown + ;; + amigaos | amigados) + basic_machine=m68k-unknown + os=-amigaos + ;; + amigaunix | amix) + basic_machine=m68k-unknown + os=-sysv4 + ;; + apollo68) + basic_machine=m68k-apollo + os=-sysv + ;; + apollo68bsd) + basic_machine=m68k-apollo + os=-bsd + ;; + aros) + basic_machine=i386-pc + os=-aros + ;; + asmjs) + basic_machine=asmjs-unknown + ;; + aux) + basic_machine=m68k-apple + os=-aux + ;; + balance) + basic_machine=ns32k-sequent + os=-dynix + ;; + blackfin) + basic_machine=bfin-unknown + os=-linux + ;; + blackfin-*) + basic_machine=bfin-`echo $basic_machine | sed 's/^[^-]*-//'` + os=-linux + ;; + bluegene*) + basic_machine=powerpc-ibm + os=-cnk + ;; + c54x-*) + basic_machine=tic54x-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + c55x-*) + basic_machine=tic55x-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + c6x-*) + basic_machine=tic6x-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + c90) + basic_machine=c90-cray + os=-unicos + ;; + cegcc) + basic_machine=arm-unknown + os=-cegcc + ;; + convex-c1) + basic_machine=c1-convex + os=-bsd + ;; + convex-c2) + basic_machine=c2-convex + os=-bsd + ;; + convex-c32) + basic_machine=c32-convex + os=-bsd + ;; + convex-c34) + basic_machine=c34-convex + os=-bsd + ;; + convex-c38) + basic_machine=c38-convex + os=-bsd + ;; + cray | j90) + basic_machine=j90-cray + os=-unicos + ;; + craynv) + basic_machine=craynv-cray + os=-unicosmp + ;; + cr16 | cr16-*) + basic_machine=cr16-unknown + os=-elf + ;; + crds | unos) + basic_machine=m68k-crds + ;; + crisv32 | crisv32-* | etraxfs*) + basic_machine=crisv32-axis + ;; + cris | cris-* | etrax*) + basic_machine=cris-axis + ;; + crx) + basic_machine=crx-unknown + os=-elf + ;; + da30 | da30-*) + basic_machine=m68k-da30 + ;; + decstation | decstation-3100 | pmax | pmax-* | pmin | dec3100 | decstatn) + basic_machine=mips-dec + ;; + decsystem10* | dec10*) + basic_machine=pdp10-dec + os=-tops10 + ;; + decsystem20* | dec20*) + basic_machine=pdp10-dec + os=-tops20 + ;; + delta | 3300 | motorola-3300 | motorola-delta \ + | 3300-motorola | delta-motorola) + basic_machine=m68k-motorola + ;; + delta88) + basic_machine=m88k-motorola + os=-sysv3 + ;; + dicos) + basic_machine=i686-pc + os=-dicos + ;; + djgpp) + basic_machine=i586-pc + os=-msdosdjgpp + ;; + dpx20 | dpx20-*) + basic_machine=rs6000-bull + os=-bosx + ;; + dpx2*) + basic_machine=m68k-bull + os=-sysv3 + ;; + e500v[12]) + basic_machine=powerpc-unknown + os=$os"spe" + ;; + e500v[12]-*) + basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'` + os=$os"spe" + ;; + ebmon29k) + basic_machine=a29k-amd + os=-ebmon + ;; + elxsi) + basic_machine=elxsi-elxsi + os=-bsd + ;; + encore | umax | mmax) + basic_machine=ns32k-encore + ;; + es1800 | OSE68k | ose68k | ose | OSE) + basic_machine=m68k-ericsson + os=-ose + ;; + fx2800) + basic_machine=i860-alliant + ;; + genix) + basic_machine=ns32k-ns + ;; + gmicro) + basic_machine=tron-gmicro + os=-sysv + ;; + go32) + basic_machine=i386-pc + os=-go32 + ;; + h3050r* | hiux*) + basic_machine=hppa1.1-hitachi + os=-hiuxwe2 + ;; + h8300hms) + basic_machine=h8300-hitachi + os=-hms + ;; + h8300xray) + basic_machine=h8300-hitachi + os=-xray + ;; + h8500hms) + basic_machine=h8500-hitachi + os=-hms + ;; + harris) + basic_machine=m88k-harris + os=-sysv3 + ;; + hp300-*) + basic_machine=m68k-hp + ;; + hp300bsd) + basic_machine=m68k-hp + os=-bsd + ;; + hp300hpux) + basic_machine=m68k-hp + os=-hpux + ;; + hp3k9[0-9][0-9] | hp9[0-9][0-9]) + basic_machine=hppa1.0-hp + ;; + hp9k2[0-9][0-9] | hp9k31[0-9]) + basic_machine=m68000-hp + ;; + hp9k3[2-9][0-9]) + basic_machine=m68k-hp + ;; + hp9k6[0-9][0-9] | hp6[0-9][0-9]) + basic_machine=hppa1.0-hp + ;; + hp9k7[0-79][0-9] | hp7[0-79][0-9]) + basic_machine=hppa1.1-hp + ;; + hp9k78[0-9] | hp78[0-9]) + # FIXME: really hppa2.0-hp + basic_machine=hppa1.1-hp + ;; + hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893) + # FIXME: really hppa2.0-hp + basic_machine=hppa1.1-hp + ;; + hp9k8[0-9][13679] | hp8[0-9][13679]) + basic_machine=hppa1.1-hp + ;; + hp9k8[0-9][0-9] | hp8[0-9][0-9]) + basic_machine=hppa1.0-hp + ;; + hppa-next) + os=-nextstep3 + ;; + hppaosf) + basic_machine=hppa1.1-hp + os=-osf + ;; + hppro) + basic_machine=hppa1.1-hp + os=-proelf + ;; + i370-ibm* | ibm*) + basic_machine=i370-ibm + ;; + i*86v32) + basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` + os=-sysv32 + ;; + i*86v4*) + basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` + os=-sysv4 + ;; + i*86v) + basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` + os=-sysv + ;; + i*86sol2) + basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` + os=-solaris2 + ;; + i386mach) + basic_machine=i386-mach + os=-mach + ;; + i386-vsta | vsta) + basic_machine=i386-unknown + os=-vsta + ;; + iris | iris4d) + basic_machine=mips-sgi + case $os in + -irix*) + ;; + *) + os=-irix4 + ;; + esac + ;; + isi68 | isi) + basic_machine=m68k-isi + os=-sysv + ;; + leon-*|leon[3-9]-*) + basic_machine=sparc-`echo $basic_machine | sed 's/-.*//'` + ;; + m68knommu) + basic_machine=m68k-unknown + os=-linux + ;; + m68knommu-*) + basic_machine=m68k-`echo $basic_machine | sed 's/^[^-]*-//'` + os=-linux + ;; + m88k-omron*) + basic_machine=m88k-omron + ;; + magnum | m3230) + basic_machine=mips-mips + os=-sysv + ;; + merlin) + basic_machine=ns32k-utek + os=-sysv + ;; + microblaze*) + basic_machine=microblaze-xilinx + ;; + mingw64) + basic_machine=x86_64-pc + os=-mingw64 + ;; + mingw32) + basic_machine=i686-pc + os=-mingw32 + ;; + mingw32ce) + basic_machine=arm-unknown + os=-mingw32ce + ;; + miniframe) + basic_machine=m68000-convergent + ;; + *mint | -mint[0-9]* | *MiNT | *MiNT[0-9]*) + basic_machine=m68k-atari + os=-mint + ;; + mips3*-*) + basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'` + ;; + mips3*) + basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`-unknown + ;; + monitor) + basic_machine=m68k-rom68k + os=-coff + ;; + morphos) + basic_machine=powerpc-unknown + os=-morphos + ;; + moxiebox) + basic_machine=moxie-unknown + os=-moxiebox + ;; + msdos) + basic_machine=i386-pc + os=-msdos + ;; + ms1-*) + basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'` + ;; + msys) + basic_machine=i686-pc + os=-msys + ;; + mvs) + basic_machine=i370-ibm + os=-mvs + ;; + nacl) + basic_machine=le32-unknown + os=-nacl + ;; + ncr3000) + basic_machine=i486-ncr + os=-sysv4 + ;; + netbsd386) + basic_machine=i386-unknown + os=-netbsd + ;; + netwinder) + basic_machine=armv4l-rebel + os=-linux + ;; + news | news700 | news800 | news900) + basic_machine=m68k-sony + os=-newsos + ;; + news1000) + basic_machine=m68030-sony + os=-newsos + ;; + news-3600 | risc-news) + basic_machine=mips-sony + os=-newsos + ;; + necv70) + basic_machine=v70-nec + os=-sysv + ;; + next | m*-next) + basic_machine=m68k-next + case $os in + -nextstep* ) + ;; + -ns2*) + os=-nextstep2 + ;; + *) + os=-nextstep3 + ;; + esac + ;; + nh3000) + basic_machine=m68k-harris + os=-cxux + ;; + nh[45]000) + basic_machine=m88k-harris + os=-cxux + ;; + nindy960) + basic_machine=i960-intel + os=-nindy + ;; + mon960) + basic_machine=i960-intel + os=-mon960 + ;; + nonstopux) + basic_machine=mips-compaq + os=-nonstopux + ;; + np1) + basic_machine=np1-gould + ;; + neo-tandem) + basic_machine=neo-tandem + ;; + nse-tandem) + basic_machine=nse-tandem + ;; + nsr-tandem) + basic_machine=nsr-tandem + ;; + nsx-tandem) + basic_machine=nsx-tandem + ;; + op50n-* | op60c-*) + basic_machine=hppa1.1-oki + os=-proelf + ;; + openrisc | openrisc-*) + basic_machine=or32-unknown + ;; + os400) + basic_machine=powerpc-ibm + os=-os400 + ;; + OSE68000 | ose68000) + basic_machine=m68000-ericsson + os=-ose + ;; + os68k) + basic_machine=m68k-none + os=-os68k + ;; + pa-hitachi) + basic_machine=hppa1.1-hitachi + os=-hiuxwe2 + ;; + paragon) + basic_machine=i860-intel + os=-osf + ;; + parisc) + basic_machine=hppa-unknown + os=-linux + ;; + parisc-*) + basic_machine=hppa-`echo $basic_machine | sed 's/^[^-]*-//'` + os=-linux + ;; + pbd) + basic_machine=sparc-tti + ;; + pbb) + basic_machine=m68k-tti + ;; + pc532 | pc532-*) + basic_machine=ns32k-pc532 + ;; + pc98) + basic_machine=i386-pc + ;; + pc98-*) + basic_machine=i386-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + pentium | p5 | k5 | k6 | nexgen | viac3) + basic_machine=i586-pc + ;; + pentiumpro | p6 | 6x86 | athlon | athlon_*) + basic_machine=i686-pc + ;; + pentiumii | pentium2 | pentiumiii | pentium3) + basic_machine=i686-pc + ;; + pentium4) + basic_machine=i786-pc + ;; + pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*) + basic_machine=i586-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + pentiumpro-* | p6-* | 6x86-* | athlon-*) + basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*) + basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + pentium4-*) + basic_machine=i786-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + pn) + basic_machine=pn-gould + ;; + power) basic_machine=power-ibm + ;; + ppc | ppcbe) basic_machine=powerpc-unknown + ;; + ppc-* | ppcbe-*) + basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + ppcle | powerpclittle) + basic_machine=powerpcle-unknown + ;; + ppcle-* | powerpclittle-*) + basic_machine=powerpcle-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + ppc64) basic_machine=powerpc64-unknown + ;; + ppc64-*) basic_machine=powerpc64-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + ppc64le | powerpc64little) + basic_machine=powerpc64le-unknown + ;; + ppc64le-* | powerpc64little-*) + basic_machine=powerpc64le-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + ps2) + basic_machine=i386-ibm + ;; + pw32) + basic_machine=i586-unknown + os=-pw32 + ;; + rdos | rdos64) + basic_machine=x86_64-pc + os=-rdos + ;; + rdos32) + basic_machine=i386-pc + os=-rdos + ;; + rom68k) + basic_machine=m68k-rom68k + os=-coff + ;; + rm[46]00) + basic_machine=mips-siemens + ;; + rtpc | rtpc-*) + basic_machine=romp-ibm + ;; + s390 | s390-*) + basic_machine=s390-ibm + ;; + s390x | s390x-*) + basic_machine=s390x-ibm + ;; + sa29200) + basic_machine=a29k-amd + os=-udi + ;; + sb1) + basic_machine=mipsisa64sb1-unknown + ;; + sb1el) + basic_machine=mipsisa64sb1el-unknown + ;; + sde) + basic_machine=mipsisa32-sde + os=-elf + ;; + sei) + basic_machine=mips-sei + os=-seiux + ;; + sequent) + basic_machine=i386-sequent + ;; + sh) + basic_machine=sh-hitachi + os=-hms + ;; + sh5el) + basic_machine=sh5le-unknown + ;; + sh64) + basic_machine=sh64-unknown + ;; + sparclite-wrs | simso-wrs) + basic_machine=sparclite-wrs + os=-vxworks + ;; + sps7) + basic_machine=m68k-bull + os=-sysv2 + ;; + spur) + basic_machine=spur-unknown + ;; + st2000) + basic_machine=m68k-tandem + ;; + stratus) + basic_machine=i860-stratus + os=-sysv4 + ;; + strongarm-* | thumb-*) + basic_machine=arm-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + sun2) + basic_machine=m68000-sun + ;; + sun2os3) + basic_machine=m68000-sun + os=-sunos3 + ;; + sun2os4) + basic_machine=m68000-sun + os=-sunos4 + ;; + sun3os3) + basic_machine=m68k-sun + os=-sunos3 + ;; + sun3os4) + basic_machine=m68k-sun + os=-sunos4 + ;; + sun4os3) + basic_machine=sparc-sun + os=-sunos3 + ;; + sun4os4) + basic_machine=sparc-sun + os=-sunos4 + ;; + sun4sol2) + basic_machine=sparc-sun + os=-solaris2 + ;; + sun3 | sun3-*) + basic_machine=m68k-sun + ;; + sun4) + basic_machine=sparc-sun + ;; + sun386 | sun386i | roadrunner) + basic_machine=i386-sun + ;; + sv1) + basic_machine=sv1-cray + os=-unicos + ;; + symmetry) + basic_machine=i386-sequent + os=-dynix + ;; + t3e) + basic_machine=alphaev5-cray + os=-unicos + ;; + t90) + basic_machine=t90-cray + os=-unicos + ;; + tile*) + basic_machine=$basic_machine-unknown + os=-linux-gnu + ;; + tx39) + basic_machine=mipstx39-unknown + ;; + tx39el) + basic_machine=mipstx39el-unknown + ;; + toad1) + basic_machine=pdp10-xkl + os=-tops20 + ;; + tower | tower-32) + basic_machine=m68k-ncr + ;; + tpf) + basic_machine=s390x-ibm + os=-tpf + ;; + udi29k) + basic_machine=a29k-amd + os=-udi + ;; + ultra3) + basic_machine=a29k-nyu + os=-sym1 + ;; + v810 | necv810) + basic_machine=v810-nec + os=-none + ;; + vaxv) + basic_machine=vax-dec + os=-sysv + ;; + vms) + basic_machine=vax-dec + os=-vms + ;; + vpp*|vx|vx-*) + basic_machine=f301-fujitsu + ;; + vxworks960) + basic_machine=i960-wrs + os=-vxworks + ;; + vxworks68) + basic_machine=m68k-wrs + os=-vxworks + ;; + vxworks29k) + basic_machine=a29k-wrs + os=-vxworks + ;; + wasm32) + basic_machine=wasm32-unknown + ;; + w65*) + basic_machine=w65-wdc + os=-none + ;; + w89k-*) + basic_machine=hppa1.1-winbond + os=-proelf + ;; + x64) + basic_machine=x86_64-pc + ;; + xbox) + basic_machine=i686-pc + os=-mingw32 + ;; + xps | xps100) + basic_machine=xps100-honeywell + ;; + xscale-* | xscalee[bl]-*) + basic_machine=`echo $basic_machine | sed 's/^xscale/arm/'` + ;; + ymp) + basic_machine=ymp-cray + os=-unicos + ;; + z8k-*-coff) + basic_machine=z8k-unknown + os=-sim + ;; + z80-*-coff) + basic_machine=z80-unknown + os=-sim + ;; + none) + basic_machine=none-none + os=-none + ;; + +# Here we handle the default manufacturer of certain CPU types. It is in +# some cases the only manufacturer, in others, it is the most popular. + w89k) + basic_machine=hppa1.1-winbond + ;; + op50n) + basic_machine=hppa1.1-oki + ;; + op60c) + basic_machine=hppa1.1-oki + ;; + romp) + basic_machine=romp-ibm + ;; + mmix) + basic_machine=mmix-knuth + ;; + rs6000) + basic_machine=rs6000-ibm + ;; + vax) + basic_machine=vax-dec + ;; + pdp10) + # there are many clones, so DEC is not a safe bet + basic_machine=pdp10-unknown + ;; + pdp11) + basic_machine=pdp11-dec + ;; + we32k) + basic_machine=we32k-att + ;; + sh[1234] | sh[24]a | sh[24]aeb | sh[34]eb | sh[1234]le | sh[23]ele) + basic_machine=sh-unknown + ;; + sparc | sparcv8 | sparcv9 | sparcv9b | sparcv9v) + basic_machine=sparc-sun + ;; + cydra) + basic_machine=cydra-cydrome + ;; + orion) + basic_machine=orion-highlevel + ;; + orion105) + basic_machine=clipper-highlevel + ;; + mac | mpw | mac-mpw) + basic_machine=m68k-apple + ;; + pmac | pmac-mpw) + basic_machine=powerpc-apple + ;; + *-unknown) + # Make sure to match an already-canonicalized machine name. + ;; + *) + echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2 + exit 1 + ;; +esac + +# Here we canonicalize certain aliases for manufacturers. +case $basic_machine in + *-digital*) + basic_machine=`echo $basic_machine | sed 's/digital.*/dec/'` + ;; + *-commodore*) + basic_machine=`echo $basic_machine | sed 's/commodore.*/cbm/'` + ;; + *) + ;; +esac + +# Decode manufacturer-specific aliases for certain operating systems. + +if [ x"$os" != x"" ] +then +case $os in + # First match some system type aliases that might get confused + # with valid system types. + # -solaris* is a basic system type, with this one exception. + -auroraux) + os=-auroraux + ;; + -solaris1 | -solaris1.*) + os=`echo $os | sed -e 's|solaris1|sunos4|'` + ;; + -solaris) + os=-solaris2 + ;; + -svr4*) + os=-sysv4 + ;; + -unixware*) + os=-sysv4.2uw + ;; + -gnu/linux*) + os=`echo $os | sed -e 's|gnu/linux|linux-gnu|'` + ;; + # Now accept the basic system types. + # The portable systems comes first. + # Each alternative MUST end in a * to match a version number. + # -sysv* is not here because it comes later, after sysvr4. + -gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \ + | -*vms* | -sco* | -esix* | -isc* | -aix* | -cnk* | -sunos | -sunos[34]*\ + | -hpux* | -unos* | -osf* | -luna* | -dgux* | -auroraux* | -solaris* \ + | -sym* | -kopensolaris* | -plan9* \ + | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \ + | -aos* | -aros* | -cloudabi* | -sortix* \ + | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \ + | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \ + | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \ + | -bitrig* | -openbsd* | -solidbsd* | -libertybsd* \ + | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \ + | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \ + | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \ + | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \ + | -chorusos* | -chorusrdb* | -cegcc* | -glidix* \ + | -cygwin* | -msys* | -pe* | -psos* | -moss* | -proelf* | -rtems* \ + | -midipix* | -mingw32* | -mingw64* | -linux-gnu* | -linux-android* \ + | -linux-newlib* | -linux-musl* | -linux-uclibc* \ + | -uxpv* | -beos* | -mpeix* | -udk* | -moxiebox* \ + | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \ + | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \ + | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \ + | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \ + | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \ + | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \ + | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es* \ + | -onefs* | -tirtos* | -phoenix* | -fuchsia* | -redox*) + # Remember, each alternative MUST END IN *, to match a version number. + ;; + -qnx*) + case $basic_machine in + x86-* | i*86-*) + ;; + *) + os=-nto$os + ;; + esac + ;; + -nto-qnx*) + ;; + -nto*) + os=`echo $os | sed -e 's|nto|nto-qnx|'` + ;; + -sim | -es1800* | -hms* | -xray | -os68k* | -none* | -v88r* \ + | -windows* | -osx | -abug | -netware* | -os9* | -beos* | -haiku* \ + | -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*) + ;; + -mac*) + os=`echo $os | sed -e 's|mac|macos|'` + ;; + -linux-dietlibc) + os=-linux-dietlibc + ;; + -linux*) + os=`echo $os | sed -e 's|linux|linux-gnu|'` + ;; + -sunos5*) + os=`echo $os | sed -e 's|sunos5|solaris2|'` + ;; + -sunos6*) + os=`echo $os | sed -e 's|sunos6|solaris3|'` + ;; + -opened*) + os=-openedition + ;; + -os400*) + os=-os400 + ;; + -wince*) + os=-wince + ;; + -osfrose*) + os=-osfrose + ;; + -osf*) + os=-osf + ;; + -utek*) + os=-bsd + ;; + -dynix*) + os=-bsd + ;; + -acis*) + os=-aos + ;; + -atheos*) + os=-atheos + ;; + -syllable*) + os=-syllable + ;; + -386bsd) + os=-bsd + ;; + -ctix* | -uts*) + os=-sysv + ;; + -nova*) + os=-rtmk-nova + ;; + -ns2) + os=-nextstep2 + ;; + -nsk*) + os=-nsk + ;; + # Preserve the version number of sinix5. + -sinix5.*) + os=`echo $os | sed -e 's|sinix|sysv|'` + ;; + -sinix*) + os=-sysv4 + ;; + -tpf*) + os=-tpf + ;; + -triton*) + os=-sysv3 + ;; + -oss*) + os=-sysv3 + ;; + -svr4) + os=-sysv4 + ;; + -svr3) + os=-sysv3 + ;; + -sysvr4) + os=-sysv4 + ;; + # This must come after -sysvr4. + -sysv*) + ;; + -ose*) + os=-ose + ;; + -es1800*) + os=-ose + ;; + -xenix) + os=-xenix + ;; + -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*) + os=-mint + ;; + -aros*) + os=-aros + ;; + -zvmoe) + os=-zvmoe + ;; + -dicos*) + os=-dicos + ;; + -pikeos*) + # Until real need of OS specific support for + # particular features comes up, bare metal + # configurations are quite functional. + case $basic_machine in + arm*) + os=-eabi + ;; + *) + os=-elf + ;; + esac + ;; + -nacl*) + ;; + -ios) + ;; + -none) + ;; + *) + # Get rid of the `-' at the beginning of $os. + os=`echo $os | sed 's/[^-]*-//'` + echo Invalid configuration \`$1\': system \`$os\' not recognized 1>&2 + exit 1 + ;; +esac +else + +# Here we handle the default operating systems that come with various machines. +# The value should be what the vendor currently ships out the door with their +# machine or put another way, the most popular os provided with the machine. + +# Note that if you're going to try to match "-MANUFACTURER" here (say, +# "-sun"), then you have to tell the case statement up towards the top +# that MANUFACTURER isn't an operating system. Otherwise, code above +# will signal an error saying that MANUFACTURER isn't an operating +# system, and we'll never get to this point. + +case $basic_machine in + score-*) + os=-elf + ;; + spu-*) + os=-elf + ;; + *-acorn) + os=-riscix1.2 + ;; + arm*-rebel) + os=-linux + ;; + arm*-semi) + os=-aout + ;; + c4x-* | tic4x-*) + os=-coff + ;; + c8051-*) + os=-elf + ;; + hexagon-*) + os=-elf + ;; + tic54x-*) + os=-coff + ;; + tic55x-*) + os=-coff + ;; + tic6x-*) + os=-coff + ;; + # This must come before the *-dec entry. + pdp10-*) + os=-tops20 + ;; + pdp11-*) + os=-none + ;; + *-dec | vax-*) + os=-ultrix4.2 + ;; + m68*-apollo) + os=-domain + ;; + i386-sun) + os=-sunos4.0.2 + ;; + m68000-sun) + os=-sunos3 + ;; + m68*-cisco) + os=-aout + ;; + mep-*) + os=-elf + ;; + mips*-cisco) + os=-elf + ;; + mips*-*) + os=-elf + ;; + or32-*) + os=-coff + ;; + *-tti) # must be before sparc entry or we get the wrong os. + os=-sysv3 + ;; + sparc-* | *-sun) + os=-sunos4.1.1 + ;; + pru-*) + os=-elf + ;; + *-be) + os=-beos + ;; + *-haiku) + os=-haiku + ;; + *-ibm) + os=-aix + ;; + *-knuth) + os=-mmixware + ;; + *-wec) + os=-proelf + ;; + *-winbond) + os=-proelf + ;; + *-oki) + os=-proelf + ;; + *-hp) + os=-hpux + ;; + *-hitachi) + os=-hiux + ;; + i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent) + os=-sysv + ;; + *-cbm) + os=-amigaos + ;; + *-dg) + os=-dgux + ;; + *-dolphin) + os=-sysv3 + ;; + m68k-ccur) + os=-rtu + ;; + m88k-omron*) + os=-luna + ;; + *-next) + os=-nextstep + ;; + *-sequent) + os=-ptx + ;; + *-crds) + os=-unos + ;; + *-ns) + os=-genix + ;; + i370-*) + os=-mvs + ;; + *-next) + os=-nextstep3 + ;; + *-gould) + os=-sysv + ;; + *-highlevel) + os=-bsd + ;; + *-encore) + os=-bsd + ;; + *-sgi) + os=-irix + ;; + *-siemens) + os=-sysv4 + ;; + *-masscomp) + os=-rtu + ;; + f30[01]-fujitsu | f700-fujitsu) + os=-uxpv + ;; + *-rom68k) + os=-coff + ;; + *-*bug) + os=-coff + ;; + *-apple) + os=-macos + ;; + *-atari*) + os=-mint + ;; + *) + os=-none + ;; +esac +fi + +# Here we handle the case where we know the os, and the CPU type, but not the +# manufacturer. We pick the logical manufacturer. +vendor=unknown +case $basic_machine in + *-unknown) + case $os in + -riscix*) + vendor=acorn + ;; + -sunos*) + vendor=sun + ;; + -cnk*|-aix*) + vendor=ibm + ;; + -beos*) + vendor=be + ;; + -hpux*) + vendor=hp + ;; + -mpeix*) + vendor=hp + ;; + -hiux*) + vendor=hitachi + ;; + -unos*) + vendor=crds + ;; + -dgux*) + vendor=dg + ;; + -luna*) + vendor=omron + ;; + -genix*) + vendor=ns + ;; + -mvs* | -opened*) + vendor=ibm + ;; + -os400*) + vendor=ibm + ;; + -ptx*) + vendor=sequent + ;; + -tpf*) + vendor=ibm + ;; + -vxsim* | -vxworks* | -windiss*) + vendor=wrs + ;; + -aux*) + vendor=apple + ;; + -hms*) + vendor=hitachi + ;; + -mpw* | -macos*) + vendor=apple + ;; + -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*) + vendor=atari + ;; + -vos*) + vendor=stratus + ;; + esac + basic_machine=`echo $basic_machine | sed "s/unknown/$vendor/"` + ;; +esac + +echo $basic_machine$os +exit + +# Local variables: +# eval: (add-hook 'write-file-functions 'time-stamp) +# time-stamp-start: "timestamp='" +# time-stamp-format: "%:y-%02m-%02d" +# time-stamp-end: "'" +# End: diff --git a/pgreplay/configure b/pgreplay/configure new file mode 100755 index 00000000000..60358dfbb7b --- /dev/null +++ b/pgreplay/configure @@ -0,0 +1,5806 @@ +#! /bin/sh +# Guess values for system-dependent variables and create Makefiles. +# Generated by GNU Autoconf 2.69 for pgreplay 1.4.0. +# +# Report bugs to . +# +# +# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. +# +# +# This configure script is free software; the Free Software Foundation +# gives unlimited permission to copy, distribute and modify it. +## -------------------- ## +## M4sh Initialization. ## +## -------------------- ## + +# Be more Bourne compatible +DUALCASE=1; export DUALCASE # for MKS sh +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in #( + *posix*) : + set -o posix ;; #( + *) : + ;; +esac +fi + + +as_nl=' +' +export as_nl +# Printing a long string crashes Solaris 7 /usr/bin/printf. +as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo +# Prefer a ksh shell builtin over an external printf program on Solaris, +# but without wasting forks for bash or zsh. +if test -z "$BASH_VERSION$ZSH_VERSION" \ + && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='print -r --' + as_echo_n='print -rn --' +elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='printf %s\n' + as_echo_n='printf %s' +else + if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then + as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' + as_echo_n='/usr/ucb/echo -n' + else + as_echo_body='eval expr "X$1" : "X\\(.*\\)"' + as_echo_n_body='eval + arg=$1; + case $arg in #( + *"$as_nl"*) + expr "X$arg" : "X\\(.*\\)$as_nl"; + arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; + esac; + expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" + ' + export as_echo_n_body + as_echo_n='sh -c $as_echo_n_body as_echo' + fi + export as_echo_body + as_echo='sh -c $as_echo_body as_echo' +fi + +# The user is always right. +if test "${PATH_SEPARATOR+set}" != set; then + PATH_SEPARATOR=: + (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { + (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || + PATH_SEPARATOR=';' + } +fi + + +# IFS +# We need space, tab and new line, in precisely that order. Quoting is +# there to prevent editors from complaining about space-tab. +# (If _AS_PATH_WALK were called with IFS unset, it would disable word +# splitting by setting IFS to empty value.) +IFS=" "" $as_nl" + +# Find who we are. Look in the path if we contain no directory separator. +as_myself= +case $0 in #(( + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break + done +IFS=$as_save_IFS + + ;; +esac +# We did not find ourselves, most probably we were run as `sh COMMAND' +# in which case we are not to be found in the path. +if test "x$as_myself" = x; then + as_myself=$0 +fi +if test ! -f "$as_myself"; then + $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 + exit 1 +fi + +# Unset variables that we do not need and which cause bugs (e.g. in +# pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1" +# suppresses any "Segmentation fault" message there. '((' could +# trigger a bug in pdksh 5.2.14. +for as_var in BASH_ENV ENV MAIL MAILPATH +do eval test x\${$as_var+set} = xset \ + && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : +done +PS1='$ ' +PS2='> ' +PS4='+ ' + +# NLS nuisances. +LC_ALL=C +export LC_ALL +LANGUAGE=C +export LANGUAGE + +# CDPATH. +(unset CDPATH) >/dev/null 2>&1 && unset CDPATH + +# Use a proper internal environment variable to ensure we don't fall + # into an infinite loop, continuously re-executing ourselves. + if test x"${_as_can_reexec}" != xno && test "x$CONFIG_SHELL" != x; then + _as_can_reexec=no; export _as_can_reexec; + # We cannot yet assume a decent shell, so we have to provide a +# neutralization value for shells without unset; and this also +# works around shells that cannot unset nonexistent variables. +# Preserve -v and -x to the replacement shell. +BASH_ENV=/dev/null +ENV=/dev/null +(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV +case $- in # (((( + *v*x* | *x*v* ) as_opts=-vx ;; + *v* ) as_opts=-v ;; + *x* ) as_opts=-x ;; + * ) as_opts= ;; +esac +exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} +# Admittedly, this is quite paranoid, since all the known shells bail +# out after a failed `exec'. +$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2 +as_fn_exit 255 + fi + # We don't want this to propagate to other subprocesses. + { _as_can_reexec=; unset _as_can_reexec;} +if test "x$CONFIG_SHELL" = x; then + as_bourne_compatible="if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on \${1+\"\$@\"}, which + # is contrary to our usage. Disable this feature. + alias -g '\${1+\"\$@\"}'='\"\$@\"' + setopt NO_GLOB_SUBST +else + case \`(set -o) 2>/dev/null\` in #( + *posix*) : + set -o posix ;; #( + *) : + ;; +esac +fi +" + as_required="as_fn_return () { (exit \$1); } +as_fn_success () { as_fn_return 0; } +as_fn_failure () { as_fn_return 1; } +as_fn_ret_success () { return 0; } +as_fn_ret_failure () { return 1; } + +exitcode=0 +as_fn_success || { exitcode=1; echo as_fn_success failed.; } +as_fn_failure && { exitcode=1; echo as_fn_failure succeeded.; } +as_fn_ret_success || { exitcode=1; echo as_fn_ret_success failed.; } +as_fn_ret_failure && { exitcode=1; echo as_fn_ret_failure succeeded.; } +if ( set x; as_fn_ret_success y && test x = \"\$1\" ); then : + +else + exitcode=1; echo positional parameters were not saved. +fi +test x\$exitcode = x0 || exit 1 +test -x / || exit 1" + as_suggested=" as_lineno_1=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_1a=\$LINENO + as_lineno_2=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_2a=\$LINENO + eval 'test \"x\$as_lineno_1'\$as_run'\" != \"x\$as_lineno_2'\$as_run'\" && + test \"x\`expr \$as_lineno_1'\$as_run' + 1\`\" = \"x\$as_lineno_2'\$as_run'\"' || exit 1 +test \$(( 1 + 1 )) = 2 || exit 1" + if (eval "$as_required") 2>/dev/null; then : + as_have_required=yes +else + as_have_required=no +fi + if test x$as_have_required = xyes && (eval "$as_suggested") 2>/dev/null; then : + +else + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +as_found=false +for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + as_found=: + case $as_dir in #( + /*) + for as_base in sh bash ksh sh5; do + # Try only shells that exist, to save several forks. + as_shell=$as_dir/$as_base + if { test -f "$as_shell" || test -f "$as_shell.exe"; } && + { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$as_shell"; } 2>/dev/null; then : + CONFIG_SHELL=$as_shell as_have_required=yes + if { $as_echo "$as_bourne_compatible""$as_suggested" | as_run=a "$as_shell"; } 2>/dev/null; then : + break 2 +fi +fi + done;; + esac + as_found=false +done +$as_found || { if { test -f "$SHELL" || test -f "$SHELL.exe"; } && + { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$SHELL"; } 2>/dev/null; then : + CONFIG_SHELL=$SHELL as_have_required=yes +fi; } +IFS=$as_save_IFS + + + if test "x$CONFIG_SHELL" != x; then : + export CONFIG_SHELL + # We cannot yet assume a decent shell, so we have to provide a +# neutralization value for shells without unset; and this also +# works around shells that cannot unset nonexistent variables. +# Preserve -v and -x to the replacement shell. +BASH_ENV=/dev/null +ENV=/dev/null +(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV +case $- in # (((( + *v*x* | *x*v* ) as_opts=-vx ;; + *v* ) as_opts=-v ;; + *x* ) as_opts=-x ;; + * ) as_opts= ;; +esac +exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} +# Admittedly, this is quite paranoid, since all the known shells bail +# out after a failed `exec'. +$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2 +exit 255 +fi + + if test x$as_have_required = xno; then : + $as_echo "$0: This script requires a shell more modern than all" + $as_echo "$0: the shells that I found on your system." + if test x${ZSH_VERSION+set} = xset ; then + $as_echo "$0: In particular, zsh $ZSH_VERSION has bugs and should" + $as_echo "$0: be upgraded to zsh 4.3.4 or later." + else + $as_echo "$0: Please tell bug-autoconf@gnu.org and +$0: https://github.com/laurenz/pgreplay/issues about your +$0: system, including any error possibly output before this +$0: message. Then install a modern shell, or manually run +$0: the script under such a shell if you do have one." + fi + exit 1 +fi +fi +fi +SHELL=${CONFIG_SHELL-/bin/sh} +export SHELL +# Unset more variables known to interfere with behavior of common tools. +CLICOLOR_FORCE= GREP_OPTIONS= +unset CLICOLOR_FORCE GREP_OPTIONS + +## --------------------- ## +## M4sh Shell Functions. ## +## --------------------- ## +# as_fn_unset VAR +# --------------- +# Portably unset VAR. +as_fn_unset () +{ + { eval $1=; unset $1;} +} +as_unset=as_fn_unset + +# as_fn_set_status STATUS +# ----------------------- +# Set $? to STATUS, without forking. +as_fn_set_status () +{ + return $1 +} # as_fn_set_status + +# as_fn_exit STATUS +# ----------------- +# Exit the shell with STATUS, even in a "trap 0" or "set -e" context. +as_fn_exit () +{ + set +e + as_fn_set_status $1 + exit $1 +} # as_fn_exit + +# as_fn_mkdir_p +# ------------- +# Create "$as_dir" as a directory, including parents if necessary. +as_fn_mkdir_p () +{ + + case $as_dir in #( + -*) as_dir=./$as_dir;; + esac + test -d "$as_dir" || eval $as_mkdir_p || { + as_dirs= + while :; do + case $as_dir in #( + *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( + *) as_qdir=$as_dir;; + esac + as_dirs="'$as_qdir' $as_dirs" + as_dir=`$as_dirname -- "$as_dir" || +$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_dir" : 'X\(//\)[^/]' \| \ + X"$as_dir" : 'X\(//\)$' \| \ + X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$as_dir" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + test -d "$as_dir" && break + done + test -z "$as_dirs" || eval "mkdir $as_dirs" + } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" + + +} # as_fn_mkdir_p + +# as_fn_executable_p FILE +# ----------------------- +# Test if FILE is an executable regular file. +as_fn_executable_p () +{ + test -f "$1" && test -x "$1" +} # as_fn_executable_p +# as_fn_append VAR VALUE +# ---------------------- +# Append the text in VALUE to the end of the definition contained in VAR. Take +# advantage of any shell optimizations that allow amortized linear growth over +# repeated appends, instead of the typical quadratic growth present in naive +# implementations. +if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then : + eval 'as_fn_append () + { + eval $1+=\$2 + }' +else + as_fn_append () + { + eval $1=\$$1\$2 + } +fi # as_fn_append + +# as_fn_arith ARG... +# ------------------ +# Perform arithmetic evaluation on the ARGs, and store the result in the +# global $as_val. Take advantage of shells that can avoid forks. The arguments +# must be portable across $(()) and expr. +if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then : + eval 'as_fn_arith () + { + as_val=$(( $* )) + }' +else + as_fn_arith () + { + as_val=`expr "$@" || test $? -eq 1` + } +fi # as_fn_arith + + +# as_fn_error STATUS ERROR [LINENO LOG_FD] +# ---------------------------------------- +# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are +# provided, also output the error to LOG_FD, referencing LINENO. Then exit the +# script with STATUS, using 1 if that was 0. +as_fn_error () +{ + as_status=$1; test $as_status -eq 0 && as_status=1 + if test "$4"; then + as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 + fi + $as_echo "$as_me: error: $2" >&2 + as_fn_exit $as_status +} # as_fn_error + +if expr a : '\(a\)' >/dev/null 2>&1 && + test "X`expr 00001 : '.*\(...\)'`" = X001; then + as_expr=expr +else + as_expr=false +fi + +if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then + as_basename=basename +else + as_basename=false +fi + +if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then + as_dirname=dirname +else + as_dirname=false +fi + +as_me=`$as_basename -- "$0" || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ + s//\1/ + q + } + /^X\/\(\/\/\)$/{ + s//\1/ + q + } + /^X\/\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + + + as_lineno_1=$LINENO as_lineno_1a=$LINENO + as_lineno_2=$LINENO as_lineno_2a=$LINENO + eval 'test "x$as_lineno_1'$as_run'" != "x$as_lineno_2'$as_run'" && + test "x`expr $as_lineno_1'$as_run' + 1`" = "x$as_lineno_2'$as_run'"' || { + # Blame Lee E. McMahon (1931-1989) for sed's syntax. :-) + sed -n ' + p + /[$]LINENO/= + ' <$as_myself | + sed ' + s/[$]LINENO.*/&-/ + t lineno + b + :lineno + N + :loop + s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/ + t loop + s/-\n.*// + ' >$as_me.lineno && + chmod +x "$as_me.lineno" || + { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; } + + # If we had to re-execute with $CONFIG_SHELL, we're ensured to have + # already done that, so ensure we don't try to do so again and fall + # in an infinite loop. This has already happened in practice. + _as_can_reexec=no; export _as_can_reexec + # Don't try to exec as it changes $[0], causing all sort of problems + # (the dirname of $[0] is not the place where we might find the + # original and so on. Autoconf is especially sensitive to this). + . "./$as_me.lineno" + # Exit status is that of the last command. + exit +} + +ECHO_C= ECHO_N= ECHO_T= +case `echo -n x` in #((((( +-n*) + case `echo 'xy\c'` in + *c*) ECHO_T=' ';; # ECHO_T is single tab character. + xy) ECHO_C='\c';; + *) echo `echo ksh88 bug on AIX 6.1` > /dev/null + ECHO_T=' ';; + esac;; +*) + ECHO_N='-n';; +esac + +rm -f conf$$ conf$$.exe conf$$.file +if test -d conf$$.dir; then + rm -f conf$$.dir/conf$$.file +else + rm -f conf$$.dir + mkdir conf$$.dir 2>/dev/null +fi +if (echo >conf$$.file) 2>/dev/null; then + if ln -s conf$$.file conf$$ 2>/dev/null; then + as_ln_s='ln -s' + # ... but there are two gotchas: + # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. + # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. + # In both cases, we have to default to `cp -pR'. + ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || + as_ln_s='cp -pR' + elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln + else + as_ln_s='cp -pR' + fi +else + as_ln_s='cp -pR' +fi +rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file +rmdir conf$$.dir 2>/dev/null + +if mkdir -p . 2>/dev/null; then + as_mkdir_p='mkdir -p "$as_dir"' +else + test -d ./-p && rmdir ./-p + as_mkdir_p=false +fi + +as_test_x='test -x' +as_executable_p=as_fn_executable_p + +# Sed expression to map a string onto a valid CPP name. +as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" + +# Sed expression to map a string onto a valid variable name. +as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" + + +test -n "$DJDIR" || exec 7<&0 &1 + +# Name of the host. +# hostname on some systems (SVR3.2, old GNU/Linux) returns a bogus exit status, +# so uname gets run too. +ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q` + +# +# Initializations. +# +ac_default_prefix=/usr/local +ac_clean_files= +ac_config_libobj_dir=. +LIBOBJS= +cross_compiling=no +subdirs= +MFLAGS= +MAKEFLAGS= + +# Identity of this package. +PACKAGE_NAME='pgreplay' +PACKAGE_TARNAME='pgreplay' +PACKAGE_VERSION='1.4.0' +PACKAGE_STRING='pgreplay 1.4.0' +PACKAGE_BUGREPORT='https://github.com/laurenz/pgreplay/issues' +PACKAGE_URL='http://laurenz.github.io/pgreplay/' + +ac_unique_file="parse.c" +# Factoring default headers for most tests. +ac_includes_default="\ +#include +#ifdef HAVE_SYS_TYPES_H +# include +#endif +#ifdef HAVE_SYS_STAT_H +# include +#endif +#ifdef STDC_HEADERS +# include +# include +#else +# ifdef HAVE_STDLIB_H +# include +# endif +#endif +#ifdef HAVE_STRING_H +# if !defined STDC_HEADERS && defined HAVE_MEMORY_H +# include +# endif +# include +#endif +#ifdef HAVE_STRINGS_H +# include +#endif +#ifdef HAVE_INTTYPES_H +# include +#endif +#ifdef HAVE_STDINT_H +# include +#endif +#ifdef HAVE_UNISTD_H +# include +#endif" + +ac_subst_vars='LTLIBOBJS +LIBOBJS +POW_LIB +EGREP +GREP +CPP +EXTRA_OBJS +ac_prefix_program +pgbindir +OBJEXT +EXEEXT +ac_ct_CC +CPPFLAGS +LDFLAGS +CFLAGS +CC +INSTALL_DATA +INSTALL_SCRIPT +INSTALL_PROGRAM +build_os +build_vendor +build_cpu +build +target_alias +host_alias +build_alias +LIBS +ECHO_T +ECHO_N +ECHO_C +DEFS +mandir +localedir +libdir +psdir +pdfdir +dvidir +htmldir +infodir +docdir +oldincludedir +includedir +localstatedir +sharedstatedir +sysconfdir +datadir +datarootdir +libexecdir +sbindir +bindir +program_transform_name +prefix +exec_prefix +PACKAGE_URL +PACKAGE_BUGREPORT +PACKAGE_STRING +PACKAGE_VERSION +PACKAGE_TARNAME +PACKAGE_NAME +PATH_SEPARATOR +SHELL' +ac_subst_files='' +ac_user_opts=' +enable_option_checking +with_postgres +enable_largefile +' + ac_precious_vars='build_alias +host_alias +target_alias +CC +CFLAGS +LDFLAGS +LIBS +CPPFLAGS +CPP' + + +# Initialize some variables set by options. +ac_init_help= +ac_init_version=false +ac_unrecognized_opts= +ac_unrecognized_sep= +# The variables have the same names as the options, with +# dashes changed to underlines. +cache_file=/dev/null +exec_prefix=NONE +no_create= +no_recursion= +prefix=NONE +program_prefix=NONE +program_suffix=NONE +program_transform_name=s,x,x, +silent= +site= +srcdir= +verbose= +x_includes=NONE +x_libraries=NONE + +# Installation directory options. +# These are left unexpanded so users can "make install exec_prefix=/foo" +# and all the variables that are supposed to be based on exec_prefix +# by default will actually change. +# Use braces instead of parens because sh, perl, etc. also accept them. +# (The list follows the same order as the GNU Coding Standards.) +bindir='${exec_prefix}/bin' +sbindir='${exec_prefix}/sbin' +libexecdir='${exec_prefix}/libexec' +datarootdir='${prefix}/share' +datadir='${datarootdir}' +sysconfdir='${prefix}/etc' +sharedstatedir='${prefix}/com' +localstatedir='${prefix}/var' +includedir='${prefix}/include' +oldincludedir='/usr/include' +docdir='${datarootdir}/doc/${PACKAGE_TARNAME}' +infodir='${datarootdir}/info' +htmldir='${docdir}' +dvidir='${docdir}' +pdfdir='${docdir}' +psdir='${docdir}' +libdir='${exec_prefix}/lib' +localedir='${datarootdir}/locale' +mandir='${datarootdir}/man' + +ac_prev= +ac_dashdash= +for ac_option +do + # If the previous option needs an argument, assign it. + if test -n "$ac_prev"; then + eval $ac_prev=\$ac_option + ac_prev= + continue + fi + + case $ac_option in + *=?*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;; + *=) ac_optarg= ;; + *) ac_optarg=yes ;; + esac + + # Accept the important Cygnus configure options, so we can diagnose typos. + + case $ac_dashdash$ac_option in + --) + ac_dashdash=yes ;; + + -bindir | --bindir | --bindi | --bind | --bin | --bi) + ac_prev=bindir ;; + -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) + bindir=$ac_optarg ;; + + -build | --build | --buil | --bui | --bu) + ac_prev=build_alias ;; + -build=* | --build=* | --buil=* | --bui=* | --bu=*) + build_alias=$ac_optarg ;; + + -cache-file | --cache-file | --cache-fil | --cache-fi \ + | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) + ac_prev=cache_file ;; + -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ + | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) + cache_file=$ac_optarg ;; + + --config-cache | -C) + cache_file=config.cache ;; + + -datadir | --datadir | --datadi | --datad) + ac_prev=datadir ;; + -datadir=* | --datadir=* | --datadi=* | --datad=*) + datadir=$ac_optarg ;; + + -datarootdir | --datarootdir | --datarootdi | --datarootd | --dataroot \ + | --dataroo | --dataro | --datar) + ac_prev=datarootdir ;; + -datarootdir=* | --datarootdir=* | --datarootdi=* | --datarootd=* \ + | --dataroot=* | --dataroo=* | --dataro=* | --datar=*) + datarootdir=$ac_optarg ;; + + -disable-* | --disable-*) + ac_useropt=`expr "x$ac_option" : 'x-*disable-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid feature name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"enable_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--disable-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval enable_$ac_useropt=no ;; + + -docdir | --docdir | --docdi | --doc | --do) + ac_prev=docdir ;; + -docdir=* | --docdir=* | --docdi=* | --doc=* | --do=*) + docdir=$ac_optarg ;; + + -dvidir | --dvidir | --dvidi | --dvid | --dvi | --dv) + ac_prev=dvidir ;; + -dvidir=* | --dvidir=* | --dvidi=* | --dvid=* | --dvi=* | --dv=*) + dvidir=$ac_optarg ;; + + -enable-* | --enable-*) + ac_useropt=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid feature name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"enable_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--enable-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval enable_$ac_useropt=\$ac_optarg ;; + + -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ + | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ + | --exec | --exe | --ex) + ac_prev=exec_prefix ;; + -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \ + | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \ + | --exec=* | --exe=* | --ex=*) + exec_prefix=$ac_optarg ;; + + -gas | --gas | --ga | --g) + # Obsolete; use --with-gas. + with_gas=yes ;; + + -help | --help | --hel | --he | -h) + ac_init_help=long ;; + -help=r* | --help=r* | --hel=r* | --he=r* | -hr*) + ac_init_help=recursive ;; + -help=s* | --help=s* | --hel=s* | --he=s* | -hs*) + ac_init_help=short ;; + + -host | --host | --hos | --ho) + ac_prev=host_alias ;; + -host=* | --host=* | --hos=* | --ho=*) + host_alias=$ac_optarg ;; + + -htmldir | --htmldir | --htmldi | --htmld | --html | --htm | --ht) + ac_prev=htmldir ;; + -htmldir=* | --htmldir=* | --htmldi=* | --htmld=* | --html=* | --htm=* \ + | --ht=*) + htmldir=$ac_optarg ;; + + -includedir | --includedir | --includedi | --included | --include \ + | --includ | --inclu | --incl | --inc) + ac_prev=includedir ;; + -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \ + | --includ=* | --inclu=* | --incl=* | --inc=*) + includedir=$ac_optarg ;; + + -infodir | --infodir | --infodi | --infod | --info | --inf) + ac_prev=infodir ;; + -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*) + infodir=$ac_optarg ;; + + -libdir | --libdir | --libdi | --libd) + ac_prev=libdir ;; + -libdir=* | --libdir=* | --libdi=* | --libd=*) + libdir=$ac_optarg ;; + + -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \ + | --libexe | --libex | --libe) + ac_prev=libexecdir ;; + -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \ + | --libexe=* | --libex=* | --libe=*) + libexecdir=$ac_optarg ;; + + -localedir | --localedir | --localedi | --localed | --locale) + ac_prev=localedir ;; + -localedir=* | --localedir=* | --localedi=* | --localed=* | --locale=*) + localedir=$ac_optarg ;; + + -localstatedir | --localstatedir | --localstatedi | --localstated \ + | --localstate | --localstat | --localsta | --localst | --locals) + ac_prev=localstatedir ;; + -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ + | --localstate=* | --localstat=* | --localsta=* | --localst=* | --locals=*) + localstatedir=$ac_optarg ;; + + -mandir | --mandir | --mandi | --mand | --man | --ma | --m) + ac_prev=mandir ;; + -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*) + mandir=$ac_optarg ;; + + -nfp | --nfp | --nf) + # Obsolete; use --without-fp. + with_fp=no ;; + + -no-create | --no-create | --no-creat | --no-crea | --no-cre \ + | --no-cr | --no-c | -n) + no_create=yes ;; + + -no-recursion | --no-recursion | --no-recursio | --no-recursi \ + | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) + no_recursion=yes ;; + + -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \ + | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \ + | --oldin | --oldi | --old | --ol | --o) + ac_prev=oldincludedir ;; + -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \ + | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \ + | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*) + oldincludedir=$ac_optarg ;; + + -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) + ac_prev=prefix ;; + -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) + prefix=$ac_optarg ;; + + -program-prefix | --program-prefix | --program-prefi | --program-pref \ + | --program-pre | --program-pr | --program-p) + ac_prev=program_prefix ;; + -program-prefix=* | --program-prefix=* | --program-prefi=* \ + | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*) + program_prefix=$ac_optarg ;; + + -program-suffix | --program-suffix | --program-suffi | --program-suff \ + | --program-suf | --program-su | --program-s) + ac_prev=program_suffix ;; + -program-suffix=* | --program-suffix=* | --program-suffi=* \ + | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*) + program_suffix=$ac_optarg ;; + + -program-transform-name | --program-transform-name \ + | --program-transform-nam | --program-transform-na \ + | --program-transform-n | --program-transform- \ + | --program-transform | --program-transfor \ + | --program-transfo | --program-transf \ + | --program-trans | --program-tran \ + | --progr-tra | --program-tr | --program-t) + ac_prev=program_transform_name ;; + -program-transform-name=* | --program-transform-name=* \ + | --program-transform-nam=* | --program-transform-na=* \ + | --program-transform-n=* | --program-transform-=* \ + | --program-transform=* | --program-transfor=* \ + | --program-transfo=* | --program-transf=* \ + | --program-trans=* | --program-tran=* \ + | --progr-tra=* | --program-tr=* | --program-t=*) + program_transform_name=$ac_optarg ;; + + -pdfdir | --pdfdir | --pdfdi | --pdfd | --pdf | --pd) + ac_prev=pdfdir ;; + -pdfdir=* | --pdfdir=* | --pdfdi=* | --pdfd=* | --pdf=* | --pd=*) + pdfdir=$ac_optarg ;; + + -psdir | --psdir | --psdi | --psd | --ps) + ac_prev=psdir ;; + -psdir=* | --psdir=* | --psdi=* | --psd=* | --ps=*) + psdir=$ac_optarg ;; + + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + silent=yes ;; + + -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) + ac_prev=sbindir ;; + -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ + | --sbi=* | --sb=*) + sbindir=$ac_optarg ;; + + -sharedstatedir | --sharedstatedir | --sharedstatedi \ + | --sharedstated | --sharedstate | --sharedstat | --sharedsta \ + | --sharedst | --shareds | --shared | --share | --shar \ + | --sha | --sh) + ac_prev=sharedstatedir ;; + -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \ + | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \ + | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \ + | --sha=* | --sh=*) + sharedstatedir=$ac_optarg ;; + + -site | --site | --sit) + ac_prev=site ;; + -site=* | --site=* | --sit=*) + site=$ac_optarg ;; + + -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) + ac_prev=srcdir ;; + -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) + srcdir=$ac_optarg ;; + + -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \ + | --syscon | --sysco | --sysc | --sys | --sy) + ac_prev=sysconfdir ;; + -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \ + | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*) + sysconfdir=$ac_optarg ;; + + -target | --target | --targe | --targ | --tar | --ta | --t) + ac_prev=target_alias ;; + -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*) + target_alias=$ac_optarg ;; + + -v | -verbose | --verbose | --verbos | --verbo | --verb) + verbose=yes ;; + + -version | --version | --versio | --versi | --vers | -V) + ac_init_version=: ;; + + -with-* | --with-*) + ac_useropt=`expr "x$ac_option" : 'x-*with-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid package name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"with_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--with-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval with_$ac_useropt=\$ac_optarg ;; + + -without-* | --without-*) + ac_useropt=`expr "x$ac_option" : 'x-*without-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid package name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"with_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--without-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval with_$ac_useropt=no ;; + + --x) + # Obsolete; use --with-x. + with_x=yes ;; + + -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \ + | --x-incl | --x-inc | --x-in | --x-i) + ac_prev=x_includes ;; + -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \ + | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*) + x_includes=$ac_optarg ;; + + -x-libraries | --x-libraries | --x-librarie | --x-librari \ + | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l) + ac_prev=x_libraries ;; + -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \ + | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) + x_libraries=$ac_optarg ;; + + -*) as_fn_error $? "unrecognized option: \`$ac_option' +Try \`$0 --help' for more information" + ;; + + *=*) + ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='` + # Reject names that are not valid shell variable names. + case $ac_envvar in #( + '' | [0-9]* | *[!_$as_cr_alnum]* ) + as_fn_error $? "invalid variable name: \`$ac_envvar'" ;; + esac + eval $ac_envvar=\$ac_optarg + export $ac_envvar ;; + + *) + # FIXME: should be removed in autoconf 3.0. + $as_echo "$as_me: WARNING: you should use --build, --host, --target" >&2 + expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null && + $as_echo "$as_me: WARNING: invalid host type: $ac_option" >&2 + : "${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}" + ;; + + esac +done + +if test -n "$ac_prev"; then + ac_option=--`echo $ac_prev | sed 's/_/-/g'` + as_fn_error $? "missing argument to $ac_option" +fi + +if test -n "$ac_unrecognized_opts"; then + case $enable_option_checking in + no) ;; + fatal) as_fn_error $? "unrecognized options: $ac_unrecognized_opts" ;; + *) $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;; + esac +fi + +# Check all directory arguments for consistency. +for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \ + datadir sysconfdir sharedstatedir localstatedir includedir \ + oldincludedir docdir infodir htmldir dvidir pdfdir psdir \ + libdir localedir mandir +do + eval ac_val=\$$ac_var + # Remove trailing slashes. + case $ac_val in + */ ) + ac_val=`expr "X$ac_val" : 'X\(.*[^/]\)' \| "X$ac_val" : 'X\(.*\)'` + eval $ac_var=\$ac_val;; + esac + # Be sure to have absolute directory names. + case $ac_val in + [\\/$]* | ?:[\\/]* ) continue;; + NONE | '' ) case $ac_var in *prefix ) continue;; esac;; + esac + as_fn_error $? "expected an absolute directory name for --$ac_var: $ac_val" +done + +# There might be people who depend on the old broken behavior: `$host' +# used to hold the argument of --host etc. +# FIXME: To remove some day. +build=$build_alias +host=$host_alias +target=$target_alias + +# FIXME: To remove some day. +if test "x$host_alias" != x; then + if test "x$build_alias" = x; then + cross_compiling=maybe + elif test "x$build_alias" != "x$host_alias"; then + cross_compiling=yes + fi +fi + +ac_tool_prefix= +test -n "$host_alias" && ac_tool_prefix=$host_alias- + +test "$silent" = yes && exec 6>/dev/null + + +ac_pwd=`pwd` && test -n "$ac_pwd" && +ac_ls_di=`ls -di .` && +ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` || + as_fn_error $? "working directory cannot be determined" +test "X$ac_ls_di" = "X$ac_pwd_ls_di" || + as_fn_error $? "pwd does not report name of working directory" + + +# Find the source files, if location was not specified. +if test -z "$srcdir"; then + ac_srcdir_defaulted=yes + # Try the directory containing this script, then the parent directory. + ac_confdir=`$as_dirname -- "$as_myself" || +$as_expr X"$as_myself" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_myself" : 'X\(//\)[^/]' \| \ + X"$as_myself" : 'X\(//\)$' \| \ + X"$as_myself" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$as_myself" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + srcdir=$ac_confdir + if test ! -r "$srcdir/$ac_unique_file"; then + srcdir=.. + fi +else + ac_srcdir_defaulted=no +fi +if test ! -r "$srcdir/$ac_unique_file"; then + test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .." + as_fn_error $? "cannot find sources ($ac_unique_file) in $srcdir" +fi +ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work" +ac_abs_confdir=`( + cd "$srcdir" && test -r "./$ac_unique_file" || as_fn_error $? "$ac_msg" + pwd)` +# When building in place, set srcdir=. +if test "$ac_abs_confdir" = "$ac_pwd"; then + srcdir=. +fi +# Remove unnecessary trailing slashes from srcdir. +# Double slashes in file names in object file debugging info +# mess up M-x gdb in Emacs. +case $srcdir in +*/) srcdir=`expr "X$srcdir" : 'X\(.*[^/]\)' \| "X$srcdir" : 'X\(.*\)'`;; +esac +for ac_var in $ac_precious_vars; do + eval ac_env_${ac_var}_set=\${${ac_var}+set} + eval ac_env_${ac_var}_value=\$${ac_var} + eval ac_cv_env_${ac_var}_set=\${${ac_var}+set} + eval ac_cv_env_${ac_var}_value=\$${ac_var} +done + +# +# Report the --help message. +# +if test "$ac_init_help" = "long"; then + # Omit some internal or obsolete options to make the list less imposing. + # This message is too long to be a string in the A/UX 3.1 sh. + cat <<_ACEOF +\`configure' configures pgreplay 1.4.0 to adapt to many kinds of systems. + +Usage: $0 [OPTION]... [VAR=VALUE]... + +To assign environment variables (e.g., CC, CFLAGS...), specify them as +VAR=VALUE. See below for descriptions of some of the useful variables. + +Defaults for the options are specified in brackets. + +Configuration: + -h, --help display this help and exit + --help=short display options specific to this package + --help=recursive display the short help of all the included packages + -V, --version display version information and exit + -q, --quiet, --silent do not print \`checking ...' messages + --cache-file=FILE cache test results in FILE [disabled] + -C, --config-cache alias for \`--cache-file=config.cache' + -n, --no-create do not create output files + --srcdir=DIR find the sources in DIR [configure dir or \`..'] + +Installation directories: + --prefix=PREFIX install architecture-independent files in PREFIX + [$ac_default_prefix] + --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX + [PREFIX] + +By default, \`make install' will install all the files in +\`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc. You can specify +an installation prefix other than \`$ac_default_prefix' using \`--prefix', +for instance \`--prefix=\$HOME'. + +For better control, use the options below. + +Fine tuning of the installation directories: + --bindir=DIR user executables [EPREFIX/bin] + --sbindir=DIR system admin executables [EPREFIX/sbin] + --libexecdir=DIR program executables [EPREFIX/libexec] + --sysconfdir=DIR read-only single-machine data [PREFIX/etc] + --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com] + --localstatedir=DIR modifiable single-machine data [PREFIX/var] + --libdir=DIR object code libraries [EPREFIX/lib] + --includedir=DIR C header files [PREFIX/include] + --oldincludedir=DIR C header files for non-gcc [/usr/include] + --datarootdir=DIR read-only arch.-independent data root [PREFIX/share] + --datadir=DIR read-only architecture-independent data [DATAROOTDIR] + --infodir=DIR info documentation [DATAROOTDIR/info] + --localedir=DIR locale-dependent data [DATAROOTDIR/locale] + --mandir=DIR man documentation [DATAROOTDIR/man] + --docdir=DIR documentation root [DATAROOTDIR/doc/pgreplay] + --htmldir=DIR html documentation [DOCDIR] + --dvidir=DIR dvi documentation [DOCDIR] + --pdfdir=DIR pdf documentation [DOCDIR] + --psdir=DIR ps documentation [DOCDIR] +_ACEOF + + cat <<\_ACEOF + +System types: + --build=BUILD configure for building on BUILD [guessed] +_ACEOF +fi + +if test -n "$ac_init_help"; then + case $ac_init_help in + short | recursive ) echo "Configuration of pgreplay 1.4.0:";; + esac + cat <<\_ACEOF + +Optional Features: + --disable-option-checking ignore unrecognized --enable/--with options + --disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no) + --enable-FEATURE[=ARG] include FEATURE [ARG=yes] + --disable-largefile omit support for large files + +Optional Packages: + --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] + --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) + --with-postgres=DIR specify location of pg_config for your PostgreSQL + installation + +Some influential environment variables: + CC C compiler command + CFLAGS C compiler flags + LDFLAGS linker flags, e.g. -L if you have libraries in a + nonstandard directory + LIBS libraries to pass to the linker, e.g. -l + CPPFLAGS (Objective) C/C++ preprocessor flags, e.g. -I if + you have headers in a nonstandard directory + CPP C preprocessor + +Use these variables to override the choices made by `configure' or to help +it to find libraries and programs with nonstandard names/locations. + +Report bugs to . +pgreplay home page: . +_ACEOF +ac_status=$? +fi + +if test "$ac_init_help" = "recursive"; then + # If there are subdirs, report their specific --help. + for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue + test -d "$ac_dir" || + { cd "$srcdir" && ac_pwd=`pwd` && srcdir=. && test -d "$ac_dir"; } || + continue + ac_builddir=. + +case "$ac_dir" in +.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; +*) + ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` + # A ".." for each directory in $ac_dir_suffix. + ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` + case $ac_top_builddir_sub in + "") ac_top_builddir_sub=. ac_top_build_prefix= ;; + *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; + esac ;; +esac +ac_abs_top_builddir=$ac_pwd +ac_abs_builddir=$ac_pwd$ac_dir_suffix +# for backward compatibility: +ac_top_builddir=$ac_top_build_prefix + +case $srcdir in + .) # We are building in place. + ac_srcdir=. + ac_top_srcdir=$ac_top_builddir_sub + ac_abs_top_srcdir=$ac_pwd ;; + [\\/]* | ?:[\\/]* ) # Absolute name. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir + ac_abs_top_srcdir=$srcdir ;; + *) # Relative name. + ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_build_prefix$srcdir + ac_abs_top_srcdir=$ac_pwd/$srcdir ;; +esac +ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix + + cd "$ac_dir" || { ac_status=$?; continue; } + # Check for guested configure. + if test -f "$ac_srcdir/configure.gnu"; then + echo && + $SHELL "$ac_srcdir/configure.gnu" --help=recursive + elif test -f "$ac_srcdir/configure"; then + echo && + $SHELL "$ac_srcdir/configure" --help=recursive + else + $as_echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2 + fi || ac_status=$? + cd "$ac_pwd" || { ac_status=$?; break; } + done +fi + +test -n "$ac_init_help" && exit $ac_status +if $ac_init_version; then + cat <<\_ACEOF +pgreplay configure 1.4.0 +generated by GNU Autoconf 2.69 + +Copyright (C) 2012 Free Software Foundation, Inc. +This configure script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it. +_ACEOF + exit +fi + +## ------------------------ ## +## Autoconf initialization. ## +## ------------------------ ## + +# ac_fn_c_try_compile LINENO +# -------------------------- +# Try to compile conftest.$ac_ext, and return whether this succeeded. +ac_fn_c_try_compile () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + rm -f conftest.$ac_objext + if { { ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compile") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then : + ac_retval=0 +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_compile + +# ac_fn_c_try_link LINENO +# ----------------------- +# Try to link conftest.$ac_ext, and return whether this succeeded. +ac_fn_c_try_link () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + rm -f conftest.$ac_objext conftest$ac_exeext + if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest$ac_exeext && { + test "$cross_compiling" = yes || + test -x conftest$ac_exeext + }; then : + ac_retval=0 +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information + # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would + # interfere with the next link command; also delete a directory that is + # left behind by Apple's compiler. We do this before executing the actions. + rm -rf conftest.dSYM conftest_ipa8_conftest.oo + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_link + +# ac_fn_c_try_cpp LINENO +# ---------------------- +# Try to preprocess conftest.$ac_ext, and return whether this succeeded. +ac_fn_c_try_cpp () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + if { { ac_try="$ac_cpp conftest.$ac_ext" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_cpp conftest.$ac_ext") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } > conftest.i && { + test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || + test ! -s conftest.err + }; then : + ac_retval=0 +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_cpp + +# ac_fn_c_check_header_mongrel LINENO HEADER VAR INCLUDES +# ------------------------------------------------------- +# Tests whether HEADER exists, giving a warning if it cannot be compiled using +# the include files in INCLUDES and setting the cache variable VAR +# accordingly. +ac_fn_c_check_header_mongrel () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + if eval \${$3+:} false; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } +else + # Is the header compilable? +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 usability" >&5 +$as_echo_n "checking $2 usability... " >&6; } +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +#include <$2> +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_header_compiler=yes +else + ac_header_compiler=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_compiler" >&5 +$as_echo "$ac_header_compiler" >&6; } + +# Is the header present? +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 presence" >&5 +$as_echo_n "checking $2 presence... " >&6; } +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include <$2> +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + ac_header_preproc=yes +else + ac_header_preproc=no +fi +rm -f conftest.err conftest.i conftest.$ac_ext +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_preproc" >&5 +$as_echo "$ac_header_preproc" >&6; } + +# So? What about this header? +case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in #(( + yes:no: ) + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&5 +$as_echo "$as_me: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5 +$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;} + ;; + no:yes:* ) + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: present but cannot be compiled" >&5 +$as_echo "$as_me: WARNING: $2: present but cannot be compiled" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: check for missing prerequisite headers?" >&5 +$as_echo "$as_me: WARNING: $2: check for missing prerequisite headers?" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: see the Autoconf documentation" >&5 +$as_echo "$as_me: WARNING: $2: see the Autoconf documentation" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&5 +$as_echo "$as_me: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5 +$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;} +( $as_echo "## --------------------------------------------------------- ## +## Report this to https://github.com/laurenz/pgreplay/issues ## +## --------------------------------------------------------- ##" + ) | sed "s/^/$as_me: WARNING: /" >&2 + ;; +esac + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +else + eval "$3=\$ac_header_compiler" +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_header_mongrel + +# ac_fn_c_try_run LINENO +# ---------------------- +# Try to link conftest.$ac_ext, and return whether this succeeded. Assumes +# that executables *can* be run. +ac_fn_c_try_run () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { ac_try='./conftest$ac_exeext' + { { case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then : + ac_retval=0 +else + $as_echo "$as_me: program exited with status $ac_status" >&5 + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=$ac_status +fi + rm -rf conftest.dSYM conftest_ipa8_conftest.oo + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_run + +# ac_fn_c_check_header_compile LINENO HEADER VAR INCLUDES +# ------------------------------------------------------- +# Tests whether HEADER exists and can be compiled using the include files in +# INCLUDES, setting the cache variable VAR accordingly. +ac_fn_c_check_header_compile () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +#include <$2> +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + eval "$3=yes" +else + eval "$3=no" +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_header_compile + +# ac_fn_c_check_type LINENO TYPE VAR INCLUDES +# ------------------------------------------- +# Tests whether TYPE exists after having included INCLUDES, setting cache +# variable VAR accordingly. +ac_fn_c_check_type () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +else + eval "$3=no" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +int +main () +{ +if (sizeof ($2)) + return 0; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +int +main () +{ +if (sizeof (($2))) + return 0; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + +else + eval "$3=yes" +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_type + +# ac_fn_c_find_uintX_t LINENO BITS VAR +# ------------------------------------ +# Finds an unsigned integer type with width BITS, setting cache variable VAR +# accordingly. +ac_fn_c_find_uintX_t () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for uint$2_t" >&5 +$as_echo_n "checking for uint$2_t... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +else + eval "$3=no" + # Order is important - never check a type that is potentially smaller + # than half of the expected target width. + for ac_type in uint$2_t 'unsigned int' 'unsigned long int' \ + 'unsigned long long int' 'unsigned short int' 'unsigned char'; do + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$ac_includes_default +int +main () +{ +static int test_array [1 - 2 * !((($ac_type) -1 >> ($2 / 2 - 1)) >> ($2 / 2 - 1) == 3)]; +test_array [0] = 0; +return test_array [0]; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + case $ac_type in #( + uint$2_t) : + eval "$3=yes" ;; #( + *) : + eval "$3=\$ac_type" ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + if eval test \"x\$"$3"\" = x"no"; then : + +else + break +fi + done +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_find_uintX_t + +# ac_fn_c_compute_int LINENO EXPR VAR INCLUDES +# -------------------------------------------- +# Tries to find the compile-time value of EXPR in a program that includes +# INCLUDES, setting VAR accordingly. Returns whether the value could be +# computed +ac_fn_c_compute_int () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + if test "$cross_compiling" = yes; then + # Depending upon the size, compute the lo and hi bounds. +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +int +main () +{ +static int test_array [1 - 2 * !(($2) >= 0)]; +test_array [0] = 0; +return test_array [0]; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_lo=0 ac_mid=0 + while :; do + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +int +main () +{ +static int test_array [1 - 2 * !(($2) <= $ac_mid)]; +test_array [0] = 0; +return test_array [0]; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_hi=$ac_mid; break +else + as_fn_arith $ac_mid + 1 && ac_lo=$as_val + if test $ac_lo -le $ac_mid; then + ac_lo= ac_hi= + break + fi + as_fn_arith 2 '*' $ac_mid + 1 && ac_mid=$as_val +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + done +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +int +main () +{ +static int test_array [1 - 2 * !(($2) < 0)]; +test_array [0] = 0; +return test_array [0]; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_hi=-1 ac_mid=-1 + while :; do + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +int +main () +{ +static int test_array [1 - 2 * !(($2) >= $ac_mid)]; +test_array [0] = 0; +return test_array [0]; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_lo=$ac_mid; break +else + as_fn_arith '(' $ac_mid ')' - 1 && ac_hi=$as_val + if test $ac_mid -le $ac_hi; then + ac_lo= ac_hi= + break + fi + as_fn_arith 2 '*' $ac_mid && ac_mid=$as_val +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + done +else + ac_lo= ac_hi= +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +# Binary search between lo and hi bounds. +while test "x$ac_lo" != "x$ac_hi"; do + as_fn_arith '(' $ac_hi - $ac_lo ')' / 2 + $ac_lo && ac_mid=$as_val + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +int +main () +{ +static int test_array [1 - 2 * !(($2) <= $ac_mid)]; +test_array [0] = 0; +return test_array [0]; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_hi=$ac_mid +else + as_fn_arith '(' $ac_mid ')' + 1 && ac_lo=$as_val +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +done +case $ac_lo in #(( +?*) eval "$3=\$ac_lo"; ac_retval=0 ;; +'') ac_retval=1 ;; +esac + else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +static long int longval () { return $2; } +static unsigned long int ulongval () { return $2; } +#include +#include +int +main () +{ + + FILE *f = fopen ("conftest.val", "w"); + if (! f) + return 1; + if (($2) < 0) + { + long int i = longval (); + if (i != ($2)) + return 1; + fprintf (f, "%ld", i); + } + else + { + unsigned long int i = ulongval (); + if (i != ($2)) + return 1; + fprintf (f, "%lu", i); + } + /* Do not output a trailing newline, as this causes \r\n confusion + on some platforms. */ + return ferror (f) || fclose (f) != 0; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + echo >>conftest.val; read $3 &5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +/* Define $2 to an innocuous variant, in case declares $2. + For example, HP-UX 11i declares gettimeofday. */ +#define $2 innocuous_$2 + +/* System header to define __stub macros and hopefully few prototypes, + which can conflict with char $2 (); below. + Prefer to if __STDC__ is defined, since + exists even on freestanding compilers. */ + +#ifdef __STDC__ +# include +#else +# include +#endif + +#undef $2 + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char $2 (); +/* The GNU C library defines this for functions which it implements + to always fail with ENOSYS. Some functions are actually named + something starting with __ and the normal name is an alias. */ +#if defined __stub_$2 || defined __stub___$2 +choke me +#endif + +int +main () +{ +return $2 (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + eval "$3=yes" +else + eval "$3=no" +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_func +cat >config.log <<_ACEOF +This file contains any messages produced by compilers while +running configure, to aid debugging if configure makes a mistake. + +It was created by pgreplay $as_me 1.4.0, which was +generated by GNU Autoconf 2.69. Invocation command line was + + $ $0 $@ + +_ACEOF +exec 5>>config.log +{ +cat <<_ASUNAME +## --------- ## +## Platform. ## +## --------- ## + +hostname = `(hostname || uname -n) 2>/dev/null | sed 1q` +uname -m = `(uname -m) 2>/dev/null || echo unknown` +uname -r = `(uname -r) 2>/dev/null || echo unknown` +uname -s = `(uname -s) 2>/dev/null || echo unknown` +uname -v = `(uname -v) 2>/dev/null || echo unknown` + +/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown` +/bin/uname -X = `(/bin/uname -X) 2>/dev/null || echo unknown` + +/bin/arch = `(/bin/arch) 2>/dev/null || echo unknown` +/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null || echo unknown` +/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown` +/usr/bin/hostinfo = `(/usr/bin/hostinfo) 2>/dev/null || echo unknown` +/bin/machine = `(/bin/machine) 2>/dev/null || echo unknown` +/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null || echo unknown` +/bin/universe = `(/bin/universe) 2>/dev/null || echo unknown` + +_ASUNAME + +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + $as_echo "PATH: $as_dir" + done +IFS=$as_save_IFS + +} >&5 + +cat >&5 <<_ACEOF + + +## ----------- ## +## Core tests. ## +## ----------- ## + +_ACEOF + + +# Keep a trace of the command line. +# Strip out --no-create and --no-recursion so they do not pile up. +# Strip out --silent because we don't want to record it for future runs. +# Also quote any args containing shell meta-characters. +# Make two passes to allow for proper duplicate-argument suppression. +ac_configure_args= +ac_configure_args0= +ac_configure_args1= +ac_must_keep_next=false +for ac_pass in 1 2 +do + for ac_arg + do + case $ac_arg in + -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;; + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + continue ;; + *\'*) + ac_arg=`$as_echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; + esac + case $ac_pass in + 1) as_fn_append ac_configure_args0 " '$ac_arg'" ;; + 2) + as_fn_append ac_configure_args1 " '$ac_arg'" + if test $ac_must_keep_next = true; then + ac_must_keep_next=false # Got value, back to normal. + else + case $ac_arg in + *=* | --config-cache | -C | -disable-* | --disable-* \ + | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \ + | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \ + | -with-* | --with-* | -without-* | --without-* | --x) + case "$ac_configure_args0 " in + "$ac_configure_args1"*" '$ac_arg' "* ) continue ;; + esac + ;; + -* ) ac_must_keep_next=true ;; + esac + fi + as_fn_append ac_configure_args " '$ac_arg'" + ;; + esac + done +done +{ ac_configure_args0=; unset ac_configure_args0;} +{ ac_configure_args1=; unset ac_configure_args1;} + +# When interrupted or exit'd, cleanup temporary files, and complete +# config.log. We remove comments because anyway the quotes in there +# would cause problems or look ugly. +# WARNING: Use '\'' to represent an apostrophe within the trap. +# WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug. +trap 'exit_status=$? + # Save into config.log some information that might help in debugging. + { + echo + + $as_echo "## ---------------- ## +## Cache variables. ## +## ---------------- ##" + echo + # The following way of writing the cache mishandles newlines in values, +( + for ac_var in `(set) 2>&1 | sed -n '\''s/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'\''`; do + eval ac_val=\$$ac_var + case $ac_val in #( + *${as_nl}*) + case $ac_var in #( + *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 +$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; + esac + case $ac_var in #( + _ | IFS | as_nl) ;; #( + BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( + *) { eval $ac_var=; unset $ac_var;} ;; + esac ;; + esac + done + (set) 2>&1 | + case $as_nl`(ac_space='\'' '\''; set) 2>&1` in #( + *${as_nl}ac_space=\ *) + sed -n \ + "s/'\''/'\''\\\\'\'''\''/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\''\\2'\''/p" + ;; #( + *) + sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" + ;; + esac | + sort +) + echo + + $as_echo "## ----------------- ## +## Output variables. ## +## ----------------- ##" + echo + for ac_var in $ac_subst_vars + do + eval ac_val=\$$ac_var + case $ac_val in + *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; + esac + $as_echo "$ac_var='\''$ac_val'\''" + done | sort + echo + + if test -n "$ac_subst_files"; then + $as_echo "## ------------------- ## +## File substitutions. ## +## ------------------- ##" + echo + for ac_var in $ac_subst_files + do + eval ac_val=\$$ac_var + case $ac_val in + *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; + esac + $as_echo "$ac_var='\''$ac_val'\''" + done | sort + echo + fi + + if test -s confdefs.h; then + $as_echo "## ----------- ## +## confdefs.h. ## +## ----------- ##" + echo + cat confdefs.h + echo + fi + test "$ac_signal" != 0 && + $as_echo "$as_me: caught signal $ac_signal" + $as_echo "$as_me: exit $exit_status" + } >&5 + rm -f core *.core core.conftest.* && + rm -f -r conftest* confdefs* conf$$* $ac_clean_files && + exit $exit_status +' 0 +for ac_signal in 1 2 13 15; do + trap 'ac_signal='$ac_signal'; as_fn_exit 1' $ac_signal +done +ac_signal=0 + +# confdefs.h avoids OS command line length limits that DEFS can exceed. +rm -f -r conftest* confdefs.h + +$as_echo "/* confdefs.h */" > confdefs.h + +# Predefined preprocessor variables. + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_NAME "$PACKAGE_NAME" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_TARNAME "$PACKAGE_TARNAME" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_VERSION "$PACKAGE_VERSION" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_STRING "$PACKAGE_STRING" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_URL "$PACKAGE_URL" +_ACEOF + + +# Let the site file select an alternate cache file if it wants to. +# Prefer an explicitly selected file to automatically selected ones. +ac_site_file1=NONE +ac_site_file2=NONE +if test -n "$CONFIG_SITE"; then + # We do not want a PATH search for config.site. + case $CONFIG_SITE in #(( + -*) ac_site_file1=./$CONFIG_SITE;; + */*) ac_site_file1=$CONFIG_SITE;; + *) ac_site_file1=./$CONFIG_SITE;; + esac +elif test "x$prefix" != xNONE; then + ac_site_file1=$prefix/share/config.site + ac_site_file2=$prefix/etc/config.site +else + ac_site_file1=$ac_default_prefix/share/config.site + ac_site_file2=$ac_default_prefix/etc/config.site +fi +for ac_site_file in "$ac_site_file1" "$ac_site_file2" +do + test "x$ac_site_file" = xNONE && continue + if test /dev/null != "$ac_site_file" && test -r "$ac_site_file"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: loading site script $ac_site_file" >&5 +$as_echo "$as_me: loading site script $ac_site_file" >&6;} + sed 's/^/| /' "$ac_site_file" >&5 + . "$ac_site_file" \ + || { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "failed to load site script $ac_site_file +See \`config.log' for more details" "$LINENO" 5; } + fi +done + +if test -r "$cache_file"; then + # Some versions of bash will fail to source /dev/null (special files + # actually), so we avoid doing that. DJGPP emulates it as a regular file. + if test /dev/null != "$cache_file" && test -f "$cache_file"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: loading cache $cache_file" >&5 +$as_echo "$as_me: loading cache $cache_file" >&6;} + case $cache_file in + [\\/]* | ?:[\\/]* ) . "$cache_file";; + *) . "./$cache_file";; + esac + fi +else + { $as_echo "$as_me:${as_lineno-$LINENO}: creating cache $cache_file" >&5 +$as_echo "$as_me: creating cache $cache_file" >&6;} + >$cache_file +fi + +# Check that the precious variables saved in the cache have kept the same +# value. +ac_cache_corrupted=false +for ac_var in $ac_precious_vars; do + eval ac_old_set=\$ac_cv_env_${ac_var}_set + eval ac_new_set=\$ac_env_${ac_var}_set + eval ac_old_val=\$ac_cv_env_${ac_var}_value + eval ac_new_val=\$ac_env_${ac_var}_value + case $ac_old_set,$ac_new_set in + set,) + { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5 +$as_echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,set) + { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was not set in the previous run" >&5 +$as_echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,);; + *) + if test "x$ac_old_val" != "x$ac_new_val"; then + # differences in whitespace do not lead to failure. + ac_old_val_w=`echo x $ac_old_val` + ac_new_val_w=`echo x $ac_new_val` + if test "$ac_old_val_w" != "$ac_new_val_w"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' has changed since the previous run:" >&5 +$as_echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;} + ac_cache_corrupted=: + else + { $as_echo "$as_me:${as_lineno-$LINENO}: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&5 +$as_echo "$as_me: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&2;} + eval $ac_var=\$ac_old_val + fi + { $as_echo "$as_me:${as_lineno-$LINENO}: former value: \`$ac_old_val'" >&5 +$as_echo "$as_me: former value: \`$ac_old_val'" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: current value: \`$ac_new_val'" >&5 +$as_echo "$as_me: current value: \`$ac_new_val'" >&2;} + fi;; + esac + # Pass precious variables to config.status. + if test "$ac_new_set" = set; then + case $ac_new_val in + *\'*) ac_arg=$ac_var=`$as_echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;; + *) ac_arg=$ac_var=$ac_new_val ;; + esac + case " $ac_configure_args " in + *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy. + *) as_fn_append ac_configure_args " '$ac_arg'" ;; + esac + fi +done +if $ac_cache_corrupted; then + { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: error: changes in the environment can compromise the build" >&5 +$as_echo "$as_me: error: changes in the environment can compromise the build" >&2;} + as_fn_error $? "run \`make distclean' and/or \`rm $cache_file' and start over" "$LINENO" 5 +fi +## -------------------- ## +## Main body of script. ## +## -------------------- ## + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + +ac_config_headers="$ac_config_headers config.h" + +ac_aux_dir= +for ac_dir in "$srcdir" "$srcdir/.." "$srcdir/../.."; do + if test -f "$ac_dir/install-sh"; then + ac_aux_dir=$ac_dir + ac_install_sh="$ac_aux_dir/install-sh -c" + break + elif test -f "$ac_dir/install.sh"; then + ac_aux_dir=$ac_dir + ac_install_sh="$ac_aux_dir/install.sh -c" + break + elif test -f "$ac_dir/shtool"; then + ac_aux_dir=$ac_dir + ac_install_sh="$ac_aux_dir/shtool install -c" + break + fi +done +if test -z "$ac_aux_dir"; then + as_fn_error $? "cannot find install-sh, install.sh, or shtool in \"$srcdir\" \"$srcdir/..\" \"$srcdir/../..\"" "$LINENO" 5 +fi + +# These three variables are undocumented and unsupported, +# and are intended to be withdrawn in a future Autoconf release. +# They can cause serious problems if a builder's source tree is in a directory +# whose full name contains unusual characters. +ac_config_guess="$SHELL $ac_aux_dir/config.guess" # Please don't use this var. +ac_config_sub="$SHELL $ac_aux_dir/config.sub" # Please don't use this var. +ac_configure="$SHELL $ac_aux_dir/configure" # Please don't use this var. + + +# Make sure we can run config.sub. +$SHELL "$ac_aux_dir/config.sub" sun4 >/dev/null 2>&1 || + as_fn_error $? "cannot run $SHELL $ac_aux_dir/config.sub" "$LINENO" 5 + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking build system type" >&5 +$as_echo_n "checking build system type... " >&6; } +if ${ac_cv_build+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_build_alias=$build_alias +test "x$ac_build_alias" = x && + ac_build_alias=`$SHELL "$ac_aux_dir/config.guess"` +test "x$ac_build_alias" = x && + as_fn_error $? "cannot guess build type; you must specify one" "$LINENO" 5 +ac_cv_build=`$SHELL "$ac_aux_dir/config.sub" $ac_build_alias` || + as_fn_error $? "$SHELL $ac_aux_dir/config.sub $ac_build_alias failed" "$LINENO" 5 + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_build" >&5 +$as_echo "$ac_cv_build" >&6; } +case $ac_cv_build in +*-*-*) ;; +*) as_fn_error $? "invalid value of canonical build" "$LINENO" 5;; +esac +build=$ac_cv_build +ac_save_IFS=$IFS; IFS='-' +set x $ac_cv_build +shift +build_cpu=$1 +build_vendor=$2 +shift; shift +# Remember, the first character of IFS is used to create $*, +# except with old shells: +build_os=$* +IFS=$ac_save_IFS +case $build_os in *\ *) build_os=`echo "$build_os" | sed 's/ /-/g'`;; esac + + + +# Checks for programs. +# Find a good install program. We prefer a C program (faster), +# so one script is as good as another. But avoid the broken or +# incompatible versions: +# SysV /etc/install, /usr/sbin/install +# SunOS /usr/etc/install +# IRIX /sbin/install +# AIX /bin/install +# AmigaOS /C/install, which installs bootblocks on floppy discs +# AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag +# AFS /usr/afsws/bin/install, which mishandles nonexistent args +# SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff" +# OS/2's system install, which has a completely different semantic +# ./install, which can be erroneously created by make from ./install.sh. +# Reject install programs that cannot install multiple files. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for a BSD-compatible install" >&5 +$as_echo_n "checking for a BSD-compatible install... " >&6; } +if test -z "$INSTALL"; then +if ${ac_cv_path_install+:} false; then : + $as_echo_n "(cached) " >&6 +else + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + # Account for people who put trailing slashes in PATH elements. +case $as_dir/ in #(( + ./ | .// | /[cC]/* | \ + /etc/* | /usr/sbin/* | /usr/etc/* | /sbin/* | /usr/afsws/bin/* | \ + ?:[\\/]os2[\\/]install[\\/]* | ?:[\\/]OS2[\\/]INSTALL[\\/]* | \ + /usr/ucb/* ) ;; + *) + # OSF1 and SCO ODT 3.0 have their own names for install. + # Don't use installbsd from OSF since it installs stuff as root + # by default. + for ac_prog in ginstall scoinst install; do + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_prog$ac_exec_ext"; then + if test $ac_prog = install && + grep dspmsg "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then + # AIX install. It has an incompatible calling convention. + : + elif test $ac_prog = install && + grep pwplus "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then + # program-specific install script used by HP pwplus--don't use. + : + else + rm -rf conftest.one conftest.two conftest.dir + echo one > conftest.one + echo two > conftest.two + mkdir conftest.dir + if "$as_dir/$ac_prog$ac_exec_ext" -c conftest.one conftest.two "`pwd`/conftest.dir" && + test -s conftest.one && test -s conftest.two && + test -s conftest.dir/conftest.one && + test -s conftest.dir/conftest.two + then + ac_cv_path_install="$as_dir/$ac_prog$ac_exec_ext -c" + break 3 + fi + fi + fi + done + done + ;; +esac + + done +IFS=$as_save_IFS + +rm -rf conftest.one conftest.two conftest.dir + +fi + if test "${ac_cv_path_install+set}" = set; then + INSTALL=$ac_cv_path_install + else + # As a last resort, use the slow shell script. Don't cache a + # value for INSTALL within a source directory, because that will + # break other packages using the cache if that directory is + # removed, or if the value is a relative name. + INSTALL=$ac_install_sh + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $INSTALL" >&5 +$as_echo "$INSTALL" >&6; } + +# Use test -z because SunOS4 sh mishandles braces in ${var-val}. +# It thinks the first close brace ends the variable substitution. +test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}' + +test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL}' + +test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644' + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +if test -n "$ac_tool_prefix"; then + for ac_prog in gcc cc + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="$ac_tool_prefix$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$CC" && break + done +fi +if test -z "$CC"; then + ac_ct_CC=$CC + for ac_prog in gcc cc +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +$as_echo "$ac_ct_CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$ac_ct_CC" && break +done + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +fi + + +test -z "$CC" && { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "no acceptable C compiler found in \$PATH +See \`config.log' for more details" "$LINENO" 5; } + +# Provide some information about the compiler. +$as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5 +set X $ac_compile +ac_compiler=$2 +for ac_option in --version -v -V -qversion; do + { { ac_try="$ac_compiler $ac_option >&5" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compiler $ac_option >&5") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + sed '10a\ +... rest of stderr output deleted ... + 10q' conftest.err >conftest.er1 + cat conftest.er1 >&5 + fi + rm -f conftest.er1 conftest.err + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +done + +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files a.out a.out.dSYM a.exe b.out" +# Try to create an executable without -o first, disregard a.out. +# It will help us diagnose broken compilers, and finding out an intuition +# of exeext. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the C compiler works" >&5 +$as_echo_n "checking whether the C compiler works... " >&6; } +ac_link_default=`$as_echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'` + +# The possible output files: +ac_files="a.out conftest.exe conftest a.exe a_out.exe b.out conftest.*" + +ac_rmfiles= +for ac_file in $ac_files +do + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; + * ) ac_rmfiles="$ac_rmfiles $ac_file";; + esac +done +rm -f $ac_rmfiles + +if { { ac_try="$ac_link_default" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link_default") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then : + # Autoconf-2.13 could set the ac_cv_exeext variable to `no'. +# So ignore a value of `no', otherwise this would lead to `EXEEXT = no' +# in a Makefile. We should not override ac_cv_exeext if it was cached, +# so that the user can short-circuit this test for compilers unknown to +# Autoconf. +for ac_file in $ac_files '' +do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) + ;; + [ab].out ) + # We found the default executable, but exeext='' is most + # certainly right. + break;; + *.* ) + if test "${ac_cv_exeext+set}" = set && test "$ac_cv_exeext" != no; + then :; else + ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + fi + # We set ac_cv_exeext here because the later test for it is not + # safe: cross compilers may not add the suffix if given an `-o' + # argument, so we may need to know it at that point already. + # Even if this section looks crufty: it has the advantage of + # actually working. + break;; + * ) + break;; + esac +done +test "$ac_cv_exeext" = no && ac_cv_exeext= + +else + ac_file='' +fi +if test -z "$ac_file"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +$as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "C compiler cannot create executables +See \`config.log' for more details" "$LINENO" 5; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler default output file name" >&5 +$as_echo_n "checking for C compiler default output file name... " >&6; } +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_file" >&5 +$as_echo "$ac_file" >&6; } +ac_exeext=$ac_cv_exeext + +rm -f -r a.out a.out.dSYM a.exe conftest$ac_cv_exeext b.out +ac_clean_files=$ac_clean_files_save +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of executables" >&5 +$as_echo_n "checking for suffix of executables... " >&6; } +if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then : + # If both `conftest.exe' and `conftest' are `present' (well, observable) +# catch `conftest.exe'. For instance with Cygwin, `ls conftest' will +# work properly (i.e., refer to `conftest.exe'), while it won't with +# `rm'. +for ac_file in conftest.exe conftest conftest.*; do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; + *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + break;; + * ) break;; + esac +done +else + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot compute suffix of executables: cannot compile and link +See \`config.log' for more details" "$LINENO" 5; } +fi +rm -f conftest conftest$ac_cv_exeext +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5 +$as_echo "$ac_cv_exeext" >&6; } + +rm -f conftest.$ac_ext +EXEEXT=$ac_cv_exeext +ac_exeext=$EXEEXT +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main () +{ +FILE *f = fopen ("conftest.out", "w"); + return ferror (f) || fclose (f) != 0; + + ; + return 0; +} +_ACEOF +ac_clean_files="$ac_clean_files conftest.out" +# Check that the compiler produces executables we can run. If not, either +# the compiler is broken, or we cross compile. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are cross compiling" >&5 +$as_echo_n "checking whether we are cross compiling... " >&6; } +if test "$cross_compiling" != yes; then + { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + if { ac_try='./conftest$ac_cv_exeext' + { { case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then + cross_compiling=no + else + if test "$cross_compiling" = maybe; then + cross_compiling=yes + else + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot run C compiled programs. +If you meant to cross compile, use \`--host'. +See \`config.log' for more details" "$LINENO" 5; } + fi + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $cross_compiling" >&5 +$as_echo "$cross_compiling" >&6; } + +rm -f conftest.$ac_ext conftest$ac_cv_exeext conftest.out +ac_clean_files=$ac_clean_files_save +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of object files" >&5 +$as_echo_n "checking for suffix of object files... " >&6; } +if ${ac_cv_objext+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.o conftest.obj +if { { ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compile") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then : + for ac_file in conftest.o conftest.obj conftest.*; do + test -f "$ac_file" || continue; + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM ) ;; + *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'` + break;; + esac +done +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot compute suffix of object files: cannot compile +See \`config.log' for more details" "$LINENO" 5; } +fi +rm -f conftest.$ac_cv_objext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_objext" >&5 +$as_echo "$ac_cv_objext" >&6; } +OBJEXT=$ac_cv_objext +ac_objext=$OBJEXT +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C compiler" >&5 +$as_echo_n "checking whether we are using the GNU C compiler... " >&6; } +if ${ac_cv_c_compiler_gnu+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ +#ifndef __GNUC__ + choke me +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_compiler_gnu=yes +else + ac_compiler_gnu=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +ac_cv_c_compiler_gnu=$ac_compiler_gnu + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5 +$as_echo "$ac_cv_c_compiler_gnu" >&6; } +if test $ac_compiler_gnu = yes; then + GCC=yes +else + GCC= +fi +ac_test_CFLAGS=${CFLAGS+set} +ac_save_CFLAGS=$CFLAGS +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5 +$as_echo_n "checking whether $CC accepts -g... " >&6; } +if ${ac_cv_prog_cc_g+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_save_c_werror_flag=$ac_c_werror_flag + ac_c_werror_flag=yes + ac_cv_prog_cc_g=no + CFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_prog_cc_g=yes +else + CFLAGS="" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + +else + ac_c_werror_flag=$ac_save_c_werror_flag + CFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_prog_cc_g=yes +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + ac_c_werror_flag=$ac_save_c_werror_flag +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5 +$as_echo "$ac_cv_prog_cc_g" >&6; } +if test "$ac_test_CFLAGS" = set; then + CFLAGS=$ac_save_CFLAGS +elif test $ac_cv_prog_cc_g = yes; then + if test "$GCC" = yes; then + CFLAGS="-g -O2" + else + CFLAGS="-g" + fi +else + if test "$GCC" = yes; then + CFLAGS="-O2" + else + CFLAGS= + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C89" >&5 +$as_echo_n "checking for $CC option to accept ISO C89... " >&6; } +if ${ac_cv_prog_cc_c89+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_cv_prog_cc_c89=no +ac_save_CC=$CC +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +struct stat; +/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */ +struct buf { int x; }; +FILE * (*rcsopen) (struct buf *, struct stat *, int); +static char *e (p, i) + char **p; + int i; +{ + return p[i]; +} +static char *f (char * (*g) (char **, int), char **p, ...) +{ + char *s; + va_list v; + va_start (v,p); + s = g (p, va_arg (v,int)); + va_end (v); + return s; +} + +/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has + function prototypes and stuff, but not '\xHH' hex character constants. + These don't provoke an error unfortunately, instead are silently treated + as 'x'. The following induces an error, until -std is added to get + proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an + array size at least. It's necessary to write '\x00'==0 to get something + that's true only with -std. */ +int osf4_cc_array ['\x00' == 0 ? 1 : -1]; + +/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters + inside strings and character constants. */ +#define FOO(x) 'x' +int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1]; + +int test (int i, double x); +struct s1 {int (*f) (int a);}; +struct s2 {int (*f) (double a);}; +int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int); +int argc; +char **argv; +int +main () +{ +return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]; + ; + return 0; +} +_ACEOF +for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \ + -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" +do + CC="$ac_save_CC $ac_arg" + if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_prog_cc_c89=$ac_arg +fi +rm -f core conftest.err conftest.$ac_objext + test "x$ac_cv_prog_cc_c89" != "xno" && break +done +rm -f conftest.$ac_ext +CC=$ac_save_CC + +fi +# AC_CACHE_VAL +case "x$ac_cv_prog_cc_c89" in + x) + { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 +$as_echo "none needed" >&6; } ;; + xno) + { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 +$as_echo "unsupported" >&6; } ;; + *) + CC="$CC $ac_cv_prog_cc_c89" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5 +$as_echo "$ac_cv_prog_cc_c89" >&6; } ;; +esac +if test "x$ac_cv_prog_cc_c89" != xno; then : + +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +# add -Wall to CFLAGS for gcc +if test "$GCC" = yes; then CFLAGS="-Wall $CFLAGS"; fi + +# check if ld supports -rpath +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the linker supports -rpath" >&5 +$as_echo_n "checking whether the linker supports -rpath... " >&6; } +save_LDFLAGS="$LDFLAGS" +LDFLAGS="$LDFLAGS -Wl,-rpath,/usr/lib" + +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +main() {} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + rpath=yes +else + rpath=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LDFLAGS="$save_LDFLAGS" +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $rpath" >&5 +$as_echo "$rpath" >&6; } + + +# Check whether --with-postgres was given. +if test "${with_postgres+set}" = set; then : + withval=$with_postgres; if ! test -x "$with_postgres"/pg_config; then + echo "*** pg_config not found in '$with_postgres'" 1>&2 + exit 1 + fi + pgbindir=`"$with_postgres"/pg_config --bindir` + + if test $build_os = mingw32; then pglibdir="$pgbindir"; else pglibdir=`"$with_postgres"/pg_config --libdir`; fi + pgincludedir=`"$with_postgres"/pg_config --includedir` + LDFLAGS="$LDFLAGS -L$pglibdir" + if test $rpath = yes; then LDFLAGS="$LDFLAGS -Wl,-rpath,$pglibdir"; fi + CFLAGS="$CFLAGS -I$pgincludedir" + PATH=`"$with_postgres"/pg_config --bindir`:$PATH +fi + + +# Install with PostgreSQL by default +if test "x$prefix" = xNONE; then + $as_echo_n "checking for prefix by " >&6 + # Extract the first word of "pg_config", so it can be a program name with args. +set dummy pg_config; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_path_ac_prefix_program+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $ac_prefix_program in + [\\/]* | ?:[\\/]*) + ac_cv_path_ac_prefix_program="$ac_prefix_program" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_path_ac_prefix_program="$as_dir/$ac_word$ac_exec_ext" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +ac_prefix_program=$ac_cv_path_ac_prefix_program +if test -n "$ac_prefix_program"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_prefix_program" >&5 +$as_echo "$ac_prefix_program" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + if test -n "$ac_prefix_program"; then + prefix=`$as_dirname -- "$ac_prefix_program" || +$as_expr X"$ac_prefix_program" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$ac_prefix_program" : 'X\(//\)[^/]' \| \ + X"$ac_prefix_program" : 'X\(//\)$' \| \ + X"$ac_prefix_program" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$ac_prefix_program" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + prefix=`$as_dirname -- "$prefix" || +$as_expr X"$prefix" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$prefix" : 'X\(//\)[^/]' \| \ + X"$prefix" : 'X\(//\)$' \| \ + X"$prefix" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$prefix" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + fi +fi + + +# Checks for libraries. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for PQsendPrepare in -lpq" >&5 +$as_echo_n "checking for PQsendPrepare in -lpq... " >&6; } +if ${ac_cv_lib_pq_PQsendPrepare+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lpq $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char PQsendPrepare (); +int +main () +{ +return PQsendPrepare (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_pq_PQsendPrepare=yes +else + ac_cv_lib_pq_PQsendPrepare=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_pq_PQsendPrepare" >&5 +$as_echo "$ac_cv_lib_pq_PQsendPrepare" >&6; } +if test "x$ac_cv_lib_pq_PQsendPrepare" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_LIBPQ 1 +_ACEOF + + LIBS="-lpq $LIBS" + +else + as_fn_error $? " +*** cannot locate PostgreSQL client library +*** required is version 8.0 or better +" "$LINENO" 5 +fi + + +# special Windows settings +if test $build_os = mingw32; then + EXTRA_OBJS=windows.o + + # assume socks library is present + # we have a problem using AC_CHECK_LIB because the function is __stdcall + LIBS="$LIBS -lwsock32" +fi + +# Checks for header files. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether time.h and sys/time.h may both be included" >&5 +$as_echo_n "checking whether time.h and sys/time.h may both be included... " >&6; } +if ${ac_cv_header_time+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +#include + +int +main () +{ +if ((struct tm *) 0) +return 0; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_header_time=yes +else + ac_cv_header_time=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_time" >&5 +$as_echo "$ac_cv_header_time" >&6; } +if test $ac_cv_header_time = yes; then + +$as_echo "#define TIME_WITH_SYS_TIME 1" >>confdefs.h + +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to run the C preprocessor" >&5 +$as_echo_n "checking how to run the C preprocessor... " >&6; } +# On Suns, sometimes $CPP names a directory. +if test -n "$CPP" && test -d "$CPP"; then + CPP= +fi +if test -z "$CPP"; then + if ${ac_cv_prog_CPP+:} false; then : + $as_echo_n "(cached) " >&6 +else + # Double quotes because CPP needs to be expanded + for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp" + do + ac_preproc_ok=false +for ac_c_preproc_warn_flag in '' yes +do + # Use a header file that comes with gcc, so configuring glibc + # with a fresh cross-compiler works. + # Prefer to if __STDC__ is defined, since + # exists even on freestanding compilers. + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. "Syntax error" is here to catch this case. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#ifdef __STDC__ +# include +#else +# include +#endif + Syntax error +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + +else + # Broken: fails on valid input. +continue +fi +rm -f conftest.err conftest.i conftest.$ac_ext + + # OK, works on sane cases. Now check whether nonexistent headers + # can be detected and how. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + # Broken: success on invalid input. +continue +else + # Passes both tests. +ac_preproc_ok=: +break +fi +rm -f conftest.err conftest.i conftest.$ac_ext + +done +# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. +rm -f conftest.i conftest.err conftest.$ac_ext +if $ac_preproc_ok; then : + break +fi + + done + ac_cv_prog_CPP=$CPP + +fi + CPP=$ac_cv_prog_CPP +else + ac_cv_prog_CPP=$CPP +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $CPP" >&5 +$as_echo "$CPP" >&6; } +ac_preproc_ok=false +for ac_c_preproc_warn_flag in '' yes +do + # Use a header file that comes with gcc, so configuring glibc + # with a fresh cross-compiler works. + # Prefer to if __STDC__ is defined, since + # exists even on freestanding compilers. + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. "Syntax error" is here to catch this case. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#ifdef __STDC__ +# include +#else +# include +#endif + Syntax error +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + +else + # Broken: fails on valid input. +continue +fi +rm -f conftest.err conftest.i conftest.$ac_ext + + # OK, works on sane cases. Now check whether nonexistent headers + # can be detected and how. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + # Broken: success on invalid input. +continue +else + # Passes both tests. +ac_preproc_ok=: +break +fi +rm -f conftest.err conftest.i conftest.$ac_ext + +done +# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. +rm -f conftest.i conftest.err conftest.$ac_ext +if $ac_preproc_ok; then : + +else + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "C preprocessor \"$CPP\" fails sanity check +See \`config.log' for more details" "$LINENO" 5; } +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5 +$as_echo_n "checking for grep that handles long lines and -e... " >&6; } +if ${ac_cv_path_GREP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -z "$GREP"; then + ac_path_GREP_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in grep ggrep; do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext" + as_fn_executable_p "$ac_path_GREP" || continue +# Check for GNU ac_path_GREP and select it if it is found. + # Check for GNU $ac_path_GREP +case `"$ac_path_GREP" --version 2>&1` in +*GNU*) + ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;; +*) + ac_count=0 + $as_echo_n 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + $as_echo 'GREP' >> "conftest.nl" + "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_GREP_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_GREP="$ac_path_GREP" + ac_path_GREP_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_GREP_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_GREP"; then + as_fn_error $? "no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 + fi +else + ac_cv_path_GREP=$GREP +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_GREP" >&5 +$as_echo "$ac_cv_path_GREP" >&6; } + GREP="$ac_cv_path_GREP" + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5 +$as_echo_n "checking for egrep... " >&6; } +if ${ac_cv_path_EGREP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if echo a | $GREP -E '(a|b)' >/dev/null 2>&1 + then ac_cv_path_EGREP="$GREP -E" + else + if test -z "$EGREP"; then + ac_path_EGREP_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in egrep; do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext" + as_fn_executable_p "$ac_path_EGREP" || continue +# Check for GNU ac_path_EGREP and select it if it is found. + # Check for GNU $ac_path_EGREP +case `"$ac_path_EGREP" --version 2>&1` in +*GNU*) + ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;; +*) + ac_count=0 + $as_echo_n 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + $as_echo 'EGREP' >> "conftest.nl" + "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_EGREP_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_EGREP="$ac_path_EGREP" + ac_path_EGREP_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_EGREP_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_EGREP"; then + as_fn_error $? "no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 + fi +else + ac_cv_path_EGREP=$EGREP +fi + + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_EGREP" >&5 +$as_echo "$ac_cv_path_EGREP" >&6; } + EGREP="$ac_cv_path_EGREP" + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ANSI C header files" >&5 +$as_echo_n "checking for ANSI C header files... " >&6; } +if ${ac_cv_header_stdc+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +#include +#include + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_header_stdc=yes +else + ac_cv_header_stdc=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + +if test $ac_cv_header_stdc = yes; then + # SunOS 4.x string.h does not declare mem*, contrary to ANSI. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "memchr" >/dev/null 2>&1; then : + +else + ac_cv_header_stdc=no +fi +rm -f conftest* + +fi + +if test $ac_cv_header_stdc = yes; then + # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "free" >/dev/null 2>&1; then : + +else + ac_cv_header_stdc=no +fi +rm -f conftest* + +fi + +if test $ac_cv_header_stdc = yes; then + # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi. + if test "$cross_compiling" = yes; then : + : +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +#if ((' ' & 0x0FF) == 0x020) +# define ISLOWER(c) ('a' <= (c) && (c) <= 'z') +# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c)) +#else +# define ISLOWER(c) \ + (('a' <= (c) && (c) <= 'i') \ + || ('j' <= (c) && (c) <= 'r') \ + || ('s' <= (c) && (c) <= 'z')) +# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c)) +#endif + +#define XOR(e, f) (((e) && !(f)) || (!(e) && (f))) +int +main () +{ + int i; + for (i = 0; i < 256; i++) + if (XOR (islower (i), ISLOWER (i)) + || toupper (i) != TOUPPER (i)) + return 2; + return 0; +} +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + +else + ac_cv_header_stdc=no +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + +fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_stdc" >&5 +$as_echo "$ac_cv_header_stdc" >&6; } +if test $ac_cv_header_stdc = yes; then + +$as_echo "#define STDC_HEADERS 1" >>confdefs.h + +fi + +# On IRIX 5.3, sys/types and inttypes.h are conflicting. +for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \ + inttypes.h stdint.h unistd.h +do : + as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` +ac_fn_c_check_header_compile "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default +" +if eval test \"x\$"$as_ac_Header"\" = x"yes"; then : + cat >>confdefs.h <<_ACEOF +#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + +fi + +done + + +for ac_header in fcntl.h netinet/in.h stdint.h stdlib.h string.h sys/time.h unistd.h inttypes.h +do : + as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` +ac_fn_c_check_header_mongrel "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default" +if eval test \"x\$"$as_ac_Header"\" = x"yes"; then : + cat >>confdefs.h <<_ACEOF +#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + +fi + +done + +ac_fn_c_check_header_compile "$LINENO" "libpq-fe.h" "ac_cv_header_libpq_fe_h" "/* dummy */ +" +if test "x$ac_cv_header_libpq_fe_h" = xyes; then : + +else + as_fn_error $? "PostgreSQL header files not found" "$LINENO" 5 +fi + + + +# Checks for typedefs, structures, and compiler characteristics. +# Check whether --enable-largefile was given. +if test "${enable_largefile+set}" = set; then : + enableval=$enable_largefile; +fi + +if test "$enable_largefile" != no; then + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for special C compiler options needed for large files" >&5 +$as_echo_n "checking for special C compiler options needed for large files... " >&6; } +if ${ac_cv_sys_largefile_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_cv_sys_largefile_CC=no + if test "$GCC" != yes; then + ac_save_CC=$CC + while :; do + # IRIX 6.2 and later do not support large files by default, + # so use the C compiler's -n32 option if that helps. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + /* Check that off_t can represent 2**63 - 1 correctly. + We can't simply define LARGE_OFF_T to be 9223372036854775807, + since some C++ compilers masquerading as C compilers + incorrectly reject 9223372036854775807. */ +#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62)) + int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 + && LARGE_OFF_T % 2147483647 == 1) + ? 1 : -1]; +int +main () +{ + + ; + return 0; +} +_ACEOF + if ac_fn_c_try_compile "$LINENO"; then : + break +fi +rm -f core conftest.err conftest.$ac_objext + CC="$CC -n32" + if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_sys_largefile_CC=' -n32'; break +fi +rm -f core conftest.err conftest.$ac_objext + break + done + CC=$ac_save_CC + rm -f conftest.$ac_ext + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sys_largefile_CC" >&5 +$as_echo "$ac_cv_sys_largefile_CC" >&6; } + if test "$ac_cv_sys_largefile_CC" != no; then + CC=$CC$ac_cv_sys_largefile_CC + fi + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for _FILE_OFFSET_BITS value needed for large files" >&5 +$as_echo_n "checking for _FILE_OFFSET_BITS value needed for large files... " >&6; } +if ${ac_cv_sys_file_offset_bits+:} false; then : + $as_echo_n "(cached) " >&6 +else + while :; do + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + /* Check that off_t can represent 2**63 - 1 correctly. + We can't simply define LARGE_OFF_T to be 9223372036854775807, + since some C++ compilers masquerading as C compilers + incorrectly reject 9223372036854775807. */ +#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62)) + int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 + && LARGE_OFF_T % 2147483647 == 1) + ? 1 : -1]; +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_sys_file_offset_bits=no; break +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#define _FILE_OFFSET_BITS 64 +#include + /* Check that off_t can represent 2**63 - 1 correctly. + We can't simply define LARGE_OFF_T to be 9223372036854775807, + since some C++ compilers masquerading as C compilers + incorrectly reject 9223372036854775807. */ +#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62)) + int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 + && LARGE_OFF_T % 2147483647 == 1) + ? 1 : -1]; +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_sys_file_offset_bits=64; break +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + ac_cv_sys_file_offset_bits=unknown + break +done +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sys_file_offset_bits" >&5 +$as_echo "$ac_cv_sys_file_offset_bits" >&6; } +case $ac_cv_sys_file_offset_bits in #( + no | unknown) ;; + *) +cat >>confdefs.h <<_ACEOF +#define _FILE_OFFSET_BITS $ac_cv_sys_file_offset_bits +_ACEOF +;; +esac +rm -rf conftest* + if test $ac_cv_sys_file_offset_bits = unknown; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for _LARGE_FILES value needed for large files" >&5 +$as_echo_n "checking for _LARGE_FILES value needed for large files... " >&6; } +if ${ac_cv_sys_large_files+:} false; then : + $as_echo_n "(cached) " >&6 +else + while :; do + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + /* Check that off_t can represent 2**63 - 1 correctly. + We can't simply define LARGE_OFF_T to be 9223372036854775807, + since some C++ compilers masquerading as C compilers + incorrectly reject 9223372036854775807. */ +#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62)) + int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 + && LARGE_OFF_T % 2147483647 == 1) + ? 1 : -1]; +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_sys_large_files=no; break +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#define _LARGE_FILES 1 +#include + /* Check that off_t can represent 2**63 - 1 correctly. + We can't simply define LARGE_OFF_T to be 9223372036854775807, + since some C++ compilers masquerading as C compilers + incorrectly reject 9223372036854775807. */ +#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62)) + int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 + && LARGE_OFF_T % 2147483647 == 1) + ? 1 : -1]; +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_sys_large_files=1; break +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + ac_cv_sys_large_files=unknown + break +done +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sys_large_files" >&5 +$as_echo "$ac_cv_sys_large_files" >&6; } +case $ac_cv_sys_large_files in #( + no | unknown) ;; + *) +cat >>confdefs.h <<_ACEOF +#define _LARGE_FILES $ac_cv_sys_large_files +_ACEOF +;; +esac +rm -rf conftest* + fi + + +fi + +ac_fn_c_check_type "$LINENO" "size_t" "ac_cv_type_size_t" "$ac_includes_default" +if test "x$ac_cv_type_size_t" = xyes; then : + +else + +cat >>confdefs.h <<_ACEOF +#define size_t unsigned int +_ACEOF + +fi + +ac_fn_c_check_type "$LINENO" "ssize_t" "ac_cv_type_ssize_t" "$ac_includes_default" +if test "x$ac_cv_type_ssize_t" = xyes; then : + +else + +cat >>confdefs.h <<_ACEOF +#define ssize_t int +_ACEOF + +fi + +ac_fn_c_find_uintX_t "$LINENO" "64" "ac_cv_c_uint64_t" +case $ac_cv_c_uint64_t in #( + no|yes) ;; #( + *) + +$as_echo "#define _UINT64_T 1" >>confdefs.h + + +cat >>confdefs.h <<_ACEOF +#define uint64_t $ac_cv_c_uint64_t +_ACEOF +;; + esac + +ac_fn_c_find_uintX_t "$LINENO" "32" "ac_cv_c_uint32_t" +case $ac_cv_c_uint32_t in #( + no|yes) ;; #( + *) + +$as_echo "#define _UINT32_T 1" >>confdefs.h + + +cat >>confdefs.h <<_ACEOF +#define uint32_t $ac_cv_c_uint32_t +_ACEOF +;; + esac + +# The cast to long int works around a bug in the HP C Compiler +# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects +# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'. +# This bug is HP SR number 8606223364. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of unsigned int" >&5 +$as_echo_n "checking size of unsigned int... " >&6; } +if ${ac_cv_sizeof_unsigned_int+:} false; then : + $as_echo_n "(cached) " >&6 +else + if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (unsigned int))" "ac_cv_sizeof_unsigned_int" "$ac_includes_default"; then : + +else + if test "$ac_cv_type_unsigned_int" = yes; then + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "cannot compute sizeof (unsigned int) +See \`config.log' for more details" "$LINENO" 5; } + else + ac_cv_sizeof_unsigned_int=0 + fi +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_unsigned_int" >&5 +$as_echo "$ac_cv_sizeof_unsigned_int" >&6; } + + + +cat >>confdefs.h <<_ACEOF +#define SIZEOF_UNSIGNED_INT $ac_cv_sizeof_unsigned_int +_ACEOF + + +# The cast to long int works around a bug in the HP C Compiler +# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects +# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'. +# This bug is HP SR number 8606223364. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of unsigned long" >&5 +$as_echo_n "checking size of unsigned long... " >&6; } +if ${ac_cv_sizeof_unsigned_long+:} false; then : + $as_echo_n "(cached) " >&6 +else + if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (unsigned long))" "ac_cv_sizeof_unsigned_long" "$ac_includes_default"; then : + +else + if test "$ac_cv_type_unsigned_long" = yes; then + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "cannot compute sizeof (unsigned long) +See \`config.log' for more details" "$LINENO" 5; } + else + ac_cv_sizeof_unsigned_long=0 + fi +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_unsigned_long" >&5 +$as_echo "$ac_cv_sizeof_unsigned_long" >&6; } + + + +cat >>confdefs.h <<_ACEOF +#define SIZEOF_UNSIGNED_LONG $ac_cv_sizeof_unsigned_long +_ACEOF + + +# The cast to long int works around a bug in the HP C Compiler +# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects +# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'. +# This bug is HP SR number 8606223364. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of unsigned short" >&5 +$as_echo_n "checking size of unsigned short... " >&6; } +if ${ac_cv_sizeof_unsigned_short+:} false; then : + $as_echo_n "(cached) " >&6 +else + if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (unsigned short))" "ac_cv_sizeof_unsigned_short" "$ac_includes_default"; then : + +else + if test "$ac_cv_type_unsigned_short" = yes; then + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "cannot compute sizeof (unsigned short) +See \`config.log' for more details" "$LINENO" 5; } + else + ac_cv_sizeof_unsigned_short=0 + fi +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_unsigned_short" >&5 +$as_echo "$ac_cv_sizeof_unsigned_short" >&6; } + + + +cat >>confdefs.h <<_ACEOF +#define SIZEOF_UNSIGNED_SHORT $ac_cv_sizeof_unsigned_short +_ACEOF + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether byte ordering is bigendian" >&5 +$as_echo_n "checking whether byte ordering is bigendian... " >&6; } +if ${ac_cv_c_bigendian+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_cv_c_bigendian=unknown + # See if we're dealing with a universal compiler. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#ifndef __APPLE_CC__ + not a universal capable compiler + #endif + typedef int dummy; + +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + + # Check for potential -arch flags. It is not universal unless + # there are at least two -arch flags with different values. + ac_arch= + ac_prev= + for ac_word in $CC $CFLAGS $CPPFLAGS $LDFLAGS; do + if test -n "$ac_prev"; then + case $ac_word in + i?86 | x86_64 | ppc | ppc64) + if test -z "$ac_arch" || test "$ac_arch" = "$ac_word"; then + ac_arch=$ac_word + else + ac_cv_c_bigendian=universal + break + fi + ;; + esac + ac_prev= + elif test "x$ac_word" = "x-arch"; then + ac_prev=arch + fi + done +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + if test $ac_cv_c_bigendian = unknown; then + # See if sys/param.h defines the BYTE_ORDER macro. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + #include + +int +main () +{ +#if ! (defined BYTE_ORDER && defined BIG_ENDIAN \ + && defined LITTLE_ENDIAN && BYTE_ORDER && BIG_ENDIAN \ + && LITTLE_ENDIAN) + bogus endian macros + #endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + # It does; now see whether it defined to BIG_ENDIAN or not. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + #include + +int +main () +{ +#if BYTE_ORDER != BIG_ENDIAN + not big endian + #endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_c_bigendian=yes +else + ac_cv_c_bigendian=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + fi + if test $ac_cv_c_bigendian = unknown; then + # See if defines _LITTLE_ENDIAN or _BIG_ENDIAN (e.g., Solaris). + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + +int +main () +{ +#if ! (defined _LITTLE_ENDIAN || defined _BIG_ENDIAN) + bogus endian macros + #endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + # It does; now see whether it defined to _BIG_ENDIAN or not. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + +int +main () +{ +#ifndef _BIG_ENDIAN + not big endian + #endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_c_bigendian=yes +else + ac_cv_c_bigendian=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + fi + if test $ac_cv_c_bigendian = unknown; then + # Compile a test program. + if test "$cross_compiling" = yes; then : + # Try to guess by grepping values from an object file. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +short int ascii_mm[] = + { 0x4249, 0x4765, 0x6E44, 0x6961, 0x6E53, 0x7953, 0 }; + short int ascii_ii[] = + { 0x694C, 0x5454, 0x656C, 0x6E45, 0x6944, 0x6E61, 0 }; + int use_ascii (int i) { + return ascii_mm[i] + ascii_ii[i]; + } + short int ebcdic_ii[] = + { 0x89D3, 0xE3E3, 0x8593, 0x95C5, 0x89C4, 0x9581, 0 }; + short int ebcdic_mm[] = + { 0xC2C9, 0xC785, 0x95C4, 0x8981, 0x95E2, 0xA8E2, 0 }; + int use_ebcdic (int i) { + return ebcdic_mm[i] + ebcdic_ii[i]; + } + extern int foo; + +int +main () +{ +return use_ascii (foo) == use_ebcdic (foo); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + if grep BIGenDianSyS conftest.$ac_objext >/dev/null; then + ac_cv_c_bigendian=yes + fi + if grep LiTTleEnDian conftest.$ac_objext >/dev/null ; then + if test "$ac_cv_c_bigendian" = unknown; then + ac_cv_c_bigendian=no + else + # finding both strings is unlikely to happen, but who knows? + ac_cv_c_bigendian=unknown + fi + fi +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$ac_includes_default +int +main () +{ + + /* Are we little or big endian? From Harbison&Steele. */ + union + { + long int l; + char c[sizeof (long int)]; + } u; + u.l = 1; + return u.c[sizeof (long int) - 1] == 1; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + ac_cv_c_bigendian=no +else + ac_cv_c_bigendian=yes +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_bigendian" >&5 +$as_echo "$ac_cv_c_bigendian" >&6; } + case $ac_cv_c_bigendian in #( + yes) + $as_echo "#define WORDS_BIGENDIAN 1" >>confdefs.h +;; #( + no) + ;; #( + universal) + +$as_echo "#define AC_APPLE_UNIVERSAL_BUILD 1" >>confdefs.h + + ;; #( + *) + as_fn_error $? "unknown endianness + presetting ac_cv_c_bigendian=no (or yes) will help" "$LINENO" 5 ;; + esac + + +# Checks for library functions. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for working strtod" >&5 +$as_echo_n "checking for working strtod... " >&6; } +if ${ac_cv_func_strtod+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test "$cross_compiling" = yes; then : + ac_cv_func_strtod=no +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +$ac_includes_default +#ifndef strtod +double strtod (); +#endif +int +main() +{ + { + /* Some versions of Linux strtod mis-parse strings with leading '+'. */ + char *string = " +69"; + char *term; + double value; + value = strtod (string, &term); + if (value != 69 || term != (string + 4)) + return 1; + } + + { + /* Under Solaris 2.4, strtod returns the wrong value for the + terminating character under some conditions. */ + char *string = "NaN"; + char *term; + strtod (string, &term); + if (term != string && *(term - 1) == 0) + return 1; + } + return 0; +} + +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + ac_cv_func_strtod=yes +else + ac_cv_func_strtod=no +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_func_strtod" >&5 +$as_echo "$ac_cv_func_strtod" >&6; } +if test $ac_cv_func_strtod = no; then + case " $LIBOBJS " in + *" strtod.$ac_objext "* ) ;; + *) LIBOBJS="$LIBOBJS strtod.$ac_objext" + ;; +esac + +ac_fn_c_check_func "$LINENO" "pow" "ac_cv_func_pow" +if test "x$ac_cv_func_pow" = xyes; then : + +fi + +if test $ac_cv_func_pow = no; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for pow in -lm" >&5 +$as_echo_n "checking for pow in -lm... " >&6; } +if ${ac_cv_lib_m_pow+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lm $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char pow (); +int +main () +{ +return pow (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_m_pow=yes +else + ac_cv_lib_m_pow=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_m_pow" >&5 +$as_echo "$ac_cv_lib_m_pow" >&6; } +if test "x$ac_cv_lib_m_pow" = xyes; then : + POW_LIB=-lm +else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cannot find library containing definition of pow" >&5 +$as_echo "$as_me: WARNING: cannot find library containing definition of pow" >&2;} +fi + +fi + +fi + +for ac_func in gettimeofday select setenv strchr strtoul +do : + as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` +ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var" +if eval test \"x\$"$as_ac_var"\" = x"yes"; then : + cat >>confdefs.h <<_ACEOF +#define `$as_echo "HAVE_$ac_func" | $as_tr_cpp` 1 +_ACEOF + +fi +done + + +ac_config_files="$ac_config_files Makefile test/runtest.sh" + +cat >confcache <<\_ACEOF +# This file is a shell script that caches the results of configure +# tests run on this system so they can be shared between configure +# scripts and configure runs, see configure's option --config-cache. +# It is not useful on other systems. If it contains results you don't +# want to keep, you may remove or edit it. +# +# config.status only pays attention to the cache file if you give it +# the --recheck option to rerun configure. +# +# `ac_cv_env_foo' variables (set or unset) will be overridden when +# loading this file, other *unset* `ac_cv_foo' will be assigned the +# following values. + +_ACEOF + +# The following way of writing the cache mishandles newlines in values, +# but we know of no workaround that is simple, portable, and efficient. +# So, we kill variables containing newlines. +# Ultrix sh set writes to stderr and can't be redirected directly, +# and sets the high bit in the cache file unless we assign to the vars. +( + for ac_var in `(set) 2>&1 | sed -n 's/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'`; do + eval ac_val=\$$ac_var + case $ac_val in #( + *${as_nl}*) + case $ac_var in #( + *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 +$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; + esac + case $ac_var in #( + _ | IFS | as_nl) ;; #( + BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( + *) { eval $ac_var=; unset $ac_var;} ;; + esac ;; + esac + done + + (set) 2>&1 | + case $as_nl`(ac_space=' '; set) 2>&1` in #( + *${as_nl}ac_space=\ *) + # `set' does not quote correctly, so add quotes: double-quote + # substitution turns \\\\ into \\, and sed turns \\ into \. + sed -n \ + "s/'/'\\\\''/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p" + ;; #( + *) + # `set' quotes correctly as required by POSIX, so do not add quotes. + sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" + ;; + esac | + sort +) | + sed ' + /^ac_cv_env_/b end + t clear + :clear + s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/ + t end + s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/ + :end' >>confcache +if diff "$cache_file" confcache >/dev/null 2>&1; then :; else + if test -w "$cache_file"; then + if test "x$cache_file" != "x/dev/null"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: updating cache $cache_file" >&5 +$as_echo "$as_me: updating cache $cache_file" >&6;} + if test ! -f "$cache_file" || test -h "$cache_file"; then + cat confcache >"$cache_file" + else + case $cache_file in #( + */* | ?:*) + mv -f confcache "$cache_file"$$ && + mv -f "$cache_file"$$ "$cache_file" ;; #( + *) + mv -f confcache "$cache_file" ;; + esac + fi + fi + else + { $as_echo "$as_me:${as_lineno-$LINENO}: not updating unwritable cache $cache_file" >&5 +$as_echo "$as_me: not updating unwritable cache $cache_file" >&6;} + fi +fi +rm -f confcache + +test "x$prefix" = xNONE && prefix=$ac_default_prefix +# Let make expand exec_prefix. +test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' + +DEFS=-DHAVE_CONFIG_H + +ac_libobjs= +ac_ltlibobjs= +U= +for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue + # 1. Remove the extension, and $U if already installed. + ac_script='s/\$U\././;s/\.o$//;s/\.obj$//' + ac_i=`$as_echo "$ac_i" | sed "$ac_script"` + # 2. Prepend LIBOBJDIR. When used with automake>=1.10 LIBOBJDIR + # will be set to the directory where LIBOBJS objects are built. + as_fn_append ac_libobjs " \${LIBOBJDIR}$ac_i\$U.$ac_objext" + as_fn_append ac_ltlibobjs " \${LIBOBJDIR}$ac_i"'$U.lo' +done +LIBOBJS=$ac_libobjs + +LTLIBOBJS=$ac_ltlibobjs + + + + +: "${CONFIG_STATUS=./config.status}" +ac_write_fail=0 +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files $CONFIG_STATUS" +{ $as_echo "$as_me:${as_lineno-$LINENO}: creating $CONFIG_STATUS" >&5 +$as_echo "$as_me: creating $CONFIG_STATUS" >&6;} +as_write_fail=0 +cat >$CONFIG_STATUS <<_ASEOF || as_write_fail=1 +#! $SHELL +# Generated by $as_me. +# Run this file to recreate the current configuration. +# Compiler output produced by configure, useful for debugging +# configure, is in config.log if it exists. + +debug=false +ac_cs_recheck=false +ac_cs_silent=false + +SHELL=\${CONFIG_SHELL-$SHELL} +export SHELL +_ASEOF +cat >>$CONFIG_STATUS <<\_ASEOF || as_write_fail=1 +## -------------------- ## +## M4sh Initialization. ## +## -------------------- ## + +# Be more Bourne compatible +DUALCASE=1; export DUALCASE # for MKS sh +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in #( + *posix*) : + set -o posix ;; #( + *) : + ;; +esac +fi + + +as_nl=' +' +export as_nl +# Printing a long string crashes Solaris 7 /usr/bin/printf. +as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo +# Prefer a ksh shell builtin over an external printf program on Solaris, +# but without wasting forks for bash or zsh. +if test -z "$BASH_VERSION$ZSH_VERSION" \ + && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='print -r --' + as_echo_n='print -rn --' +elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='printf %s\n' + as_echo_n='printf %s' +else + if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then + as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' + as_echo_n='/usr/ucb/echo -n' + else + as_echo_body='eval expr "X$1" : "X\\(.*\\)"' + as_echo_n_body='eval + arg=$1; + case $arg in #( + *"$as_nl"*) + expr "X$arg" : "X\\(.*\\)$as_nl"; + arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; + esac; + expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" + ' + export as_echo_n_body + as_echo_n='sh -c $as_echo_n_body as_echo' + fi + export as_echo_body + as_echo='sh -c $as_echo_body as_echo' +fi + +# The user is always right. +if test "${PATH_SEPARATOR+set}" != set; then + PATH_SEPARATOR=: + (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { + (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || + PATH_SEPARATOR=';' + } +fi + + +# IFS +# We need space, tab and new line, in precisely that order. Quoting is +# there to prevent editors from complaining about space-tab. +# (If _AS_PATH_WALK were called with IFS unset, it would disable word +# splitting by setting IFS to empty value.) +IFS=" "" $as_nl" + +# Find who we are. Look in the path if we contain no directory separator. +as_myself= +case $0 in #(( + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break + done +IFS=$as_save_IFS + + ;; +esac +# We did not find ourselves, most probably we were run as `sh COMMAND' +# in which case we are not to be found in the path. +if test "x$as_myself" = x; then + as_myself=$0 +fi +if test ! -f "$as_myself"; then + $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 + exit 1 +fi + +# Unset variables that we do not need and which cause bugs (e.g. in +# pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1" +# suppresses any "Segmentation fault" message there. '((' could +# trigger a bug in pdksh 5.2.14. +for as_var in BASH_ENV ENV MAIL MAILPATH +do eval test x\${$as_var+set} = xset \ + && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : +done +PS1='$ ' +PS2='> ' +PS4='+ ' + +# NLS nuisances. +LC_ALL=C +export LC_ALL +LANGUAGE=C +export LANGUAGE + +# CDPATH. +(unset CDPATH) >/dev/null 2>&1 && unset CDPATH + + +# as_fn_error STATUS ERROR [LINENO LOG_FD] +# ---------------------------------------- +# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are +# provided, also output the error to LOG_FD, referencing LINENO. Then exit the +# script with STATUS, using 1 if that was 0. +as_fn_error () +{ + as_status=$1; test $as_status -eq 0 && as_status=1 + if test "$4"; then + as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 + fi + $as_echo "$as_me: error: $2" >&2 + as_fn_exit $as_status +} # as_fn_error + + +# as_fn_set_status STATUS +# ----------------------- +# Set $? to STATUS, without forking. +as_fn_set_status () +{ + return $1 +} # as_fn_set_status + +# as_fn_exit STATUS +# ----------------- +# Exit the shell with STATUS, even in a "trap 0" or "set -e" context. +as_fn_exit () +{ + set +e + as_fn_set_status $1 + exit $1 +} # as_fn_exit + +# as_fn_unset VAR +# --------------- +# Portably unset VAR. +as_fn_unset () +{ + { eval $1=; unset $1;} +} +as_unset=as_fn_unset +# as_fn_append VAR VALUE +# ---------------------- +# Append the text in VALUE to the end of the definition contained in VAR. Take +# advantage of any shell optimizations that allow amortized linear growth over +# repeated appends, instead of the typical quadratic growth present in naive +# implementations. +if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then : + eval 'as_fn_append () + { + eval $1+=\$2 + }' +else + as_fn_append () + { + eval $1=\$$1\$2 + } +fi # as_fn_append + +# as_fn_arith ARG... +# ------------------ +# Perform arithmetic evaluation on the ARGs, and store the result in the +# global $as_val. Take advantage of shells that can avoid forks. The arguments +# must be portable across $(()) and expr. +if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then : + eval 'as_fn_arith () + { + as_val=$(( $* )) + }' +else + as_fn_arith () + { + as_val=`expr "$@" || test $? -eq 1` + } +fi # as_fn_arith + + +if expr a : '\(a\)' >/dev/null 2>&1 && + test "X`expr 00001 : '.*\(...\)'`" = X001; then + as_expr=expr +else + as_expr=false +fi + +if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then + as_basename=basename +else + as_basename=false +fi + +if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then + as_dirname=dirname +else + as_dirname=false +fi + +as_me=`$as_basename -- "$0" || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ + s//\1/ + q + } + /^X\/\(\/\/\)$/{ + s//\1/ + q + } + /^X\/\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + +ECHO_C= ECHO_N= ECHO_T= +case `echo -n x` in #((((( +-n*) + case `echo 'xy\c'` in + *c*) ECHO_T=' ';; # ECHO_T is single tab character. + xy) ECHO_C='\c';; + *) echo `echo ksh88 bug on AIX 6.1` > /dev/null + ECHO_T=' ';; + esac;; +*) + ECHO_N='-n';; +esac + +rm -f conf$$ conf$$.exe conf$$.file +if test -d conf$$.dir; then + rm -f conf$$.dir/conf$$.file +else + rm -f conf$$.dir + mkdir conf$$.dir 2>/dev/null +fi +if (echo >conf$$.file) 2>/dev/null; then + if ln -s conf$$.file conf$$ 2>/dev/null; then + as_ln_s='ln -s' + # ... but there are two gotchas: + # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. + # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. + # In both cases, we have to default to `cp -pR'. + ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || + as_ln_s='cp -pR' + elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln + else + as_ln_s='cp -pR' + fi +else + as_ln_s='cp -pR' +fi +rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file +rmdir conf$$.dir 2>/dev/null + + +# as_fn_mkdir_p +# ------------- +# Create "$as_dir" as a directory, including parents if necessary. +as_fn_mkdir_p () +{ + + case $as_dir in #( + -*) as_dir=./$as_dir;; + esac + test -d "$as_dir" || eval $as_mkdir_p || { + as_dirs= + while :; do + case $as_dir in #( + *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( + *) as_qdir=$as_dir;; + esac + as_dirs="'$as_qdir' $as_dirs" + as_dir=`$as_dirname -- "$as_dir" || +$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_dir" : 'X\(//\)[^/]' \| \ + X"$as_dir" : 'X\(//\)$' \| \ + X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$as_dir" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + test -d "$as_dir" && break + done + test -z "$as_dirs" || eval "mkdir $as_dirs" + } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" + + +} # as_fn_mkdir_p +if mkdir -p . 2>/dev/null; then + as_mkdir_p='mkdir -p "$as_dir"' +else + test -d ./-p && rmdir ./-p + as_mkdir_p=false +fi + + +# as_fn_executable_p FILE +# ----------------------- +# Test if FILE is an executable regular file. +as_fn_executable_p () +{ + test -f "$1" && test -x "$1" +} # as_fn_executable_p +as_test_x='test -x' +as_executable_p=as_fn_executable_p + +# Sed expression to map a string onto a valid CPP name. +as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" + +# Sed expression to map a string onto a valid variable name. +as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" + + +exec 6>&1 +## ----------------------------------- ## +## Main body of $CONFIG_STATUS script. ## +## ----------------------------------- ## +_ASEOF +test $as_write_fail = 0 && chmod +x $CONFIG_STATUS || ac_write_fail=1 + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# Save the log message, to keep $0 and so on meaningful, and to +# report actual input values of CONFIG_FILES etc. instead of their +# values after options handling. +ac_log=" +This file was extended by pgreplay $as_me 1.4.0, which was +generated by GNU Autoconf 2.69. Invocation command line was + + CONFIG_FILES = $CONFIG_FILES + CONFIG_HEADERS = $CONFIG_HEADERS + CONFIG_LINKS = $CONFIG_LINKS + CONFIG_COMMANDS = $CONFIG_COMMANDS + $ $0 $@ + +on `(hostname || uname -n) 2>/dev/null | sed 1q` +" + +_ACEOF + +case $ac_config_files in *" +"*) set x $ac_config_files; shift; ac_config_files=$*;; +esac + +case $ac_config_headers in *" +"*) set x $ac_config_headers; shift; ac_config_headers=$*;; +esac + + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +# Files that config.status was made for. +config_files="$ac_config_files" +config_headers="$ac_config_headers" + +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +ac_cs_usage="\ +\`$as_me' instantiates files and other configuration actions +from templates according to the current configuration. Unless the files +and actions are specified as TAGs, all are instantiated by default. + +Usage: $0 [OPTION]... [TAG]... + + -h, --help print this help, then exit + -V, --version print version number and configuration settings, then exit + --config print configuration, then exit + -q, --quiet, --silent + do not print progress messages + -d, --debug don't remove temporary files + --recheck update $as_me by reconfiguring in the same conditions + --file=FILE[:TEMPLATE] + instantiate the configuration file FILE + --header=FILE[:TEMPLATE] + instantiate the configuration header FILE + +Configuration files: +$config_files + +Configuration headers: +$config_headers + +Report bugs to . +pgreplay home page: ." + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" +ac_cs_version="\\ +pgreplay config.status 1.4.0 +configured by $0, generated by GNU Autoconf 2.69, + with options \\"\$ac_cs_config\\" + +Copyright (C) 2012 Free Software Foundation, Inc. +This config.status script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it." + +ac_pwd='$ac_pwd' +srcdir='$srcdir' +INSTALL='$INSTALL' +test -n "\$AWK" || AWK=awk +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# The default lists apply if the user does not specify any file. +ac_need_defaults=: +while test $# != 0 +do + case $1 in + --*=?*) + ac_option=`expr "X$1" : 'X\([^=]*\)='` + ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'` + ac_shift=: + ;; + --*=) + ac_option=`expr "X$1" : 'X\([^=]*\)='` + ac_optarg= + ac_shift=: + ;; + *) + ac_option=$1 + ac_optarg=$2 + ac_shift=shift + ;; + esac + + case $ac_option in + # Handling of the options. + -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) + ac_cs_recheck=: ;; + --version | --versio | --versi | --vers | --ver | --ve | --v | -V ) + $as_echo "$ac_cs_version"; exit ;; + --config | --confi | --conf | --con | --co | --c ) + $as_echo "$ac_cs_config"; exit ;; + --debug | --debu | --deb | --de | --d | -d ) + debug=: ;; + --file | --fil | --fi | --f ) + $ac_shift + case $ac_optarg in + *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; + '') as_fn_error $? "missing file argument" ;; + esac + as_fn_append CONFIG_FILES " '$ac_optarg'" + ac_need_defaults=false;; + --header | --heade | --head | --hea ) + $ac_shift + case $ac_optarg in + *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; + esac + as_fn_append CONFIG_HEADERS " '$ac_optarg'" + ac_need_defaults=false;; + --he | --h) + # Conflict between --help and --header + as_fn_error $? "ambiguous option: \`$1' +Try \`$0 --help' for more information.";; + --help | --hel | -h ) + $as_echo "$ac_cs_usage"; exit ;; + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil | --si | --s) + ac_cs_silent=: ;; + + # This is an error. + -*) as_fn_error $? "unrecognized option: \`$1' +Try \`$0 --help' for more information." ;; + + *) as_fn_append ac_config_targets " $1" + ac_need_defaults=false ;; + + esac + shift +done + +ac_configure_extra_args= + +if $ac_cs_silent; then + exec 6>/dev/null + ac_configure_extra_args="$ac_configure_extra_args --silent" +fi + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +if \$ac_cs_recheck; then + set X $SHELL '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion + shift + \$as_echo "running CONFIG_SHELL=$SHELL \$*" >&6 + CONFIG_SHELL='$SHELL' + export CONFIG_SHELL + exec "\$@" +fi + +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +exec 5>>config.log +{ + echo + sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX +## Running $as_me. ## +_ASBOX + $as_echo "$ac_log" +} >&5 + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 + +# Handling of arguments. +for ac_config_target in $ac_config_targets +do + case $ac_config_target in + "config.h") CONFIG_HEADERS="$CONFIG_HEADERS config.h" ;; + "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;; + "test/runtest.sh") CONFIG_FILES="$CONFIG_FILES test/runtest.sh" ;; + + *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;; + esac +done + + +# If the user did not use the arguments to specify the items to instantiate, +# then the envvar interface is used. Set only those that are not. +# We use the long form for the default assignment because of an extremely +# bizarre bug on SunOS 4.1.3. +if $ac_need_defaults; then + test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files + test "${CONFIG_HEADERS+set}" = set || CONFIG_HEADERS=$config_headers +fi + +# Have a temporary directory for convenience. Make it in the build tree +# simply because there is no reason against having it here, and in addition, +# creating and moving files from /tmp can sometimes cause problems. +# Hook for its removal unless debugging. +# Note that there is a small window in which the directory will not be cleaned: +# after its creation but before its name has been assigned to `$tmp'. +$debug || +{ + tmp= ac_tmp= + trap 'exit_status=$? + : "${ac_tmp:=$tmp}" + { test ! -d "$ac_tmp" || rm -fr "$ac_tmp"; } && exit $exit_status +' 0 + trap 'as_fn_exit 1' 1 2 13 15 +} +# Create a (secure) tmp directory for tmp files. + +{ + tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` && + test -d "$tmp" +} || +{ + tmp=./conf$$-$RANDOM + (umask 077 && mkdir "$tmp") +} || as_fn_error $? "cannot create a temporary directory in ." "$LINENO" 5 +ac_tmp=$tmp + +# Set up the scripts for CONFIG_FILES section. +# No need to generate them if there are no CONFIG_FILES. +# This happens for instance with `./config.status config.h'. +if test -n "$CONFIG_FILES"; then + + +ac_cr=`echo X | tr X '\015'` +# On cygwin, bash can eat \r inside `` if the user requested igncr. +# But we know of no other shell where ac_cr would be empty at this +# point, so we can use a bashism as a fallback. +if test "x$ac_cr" = x; then + eval ac_cr=\$\'\\r\' +fi +ac_cs_awk_cr=`$AWK 'BEGIN { print "a\rb" }' /dev/null` +if test "$ac_cs_awk_cr" = "a${ac_cr}b"; then + ac_cs_awk_cr='\\r' +else + ac_cs_awk_cr=$ac_cr +fi + +echo 'BEGIN {' >"$ac_tmp/subs1.awk" && +_ACEOF + + +{ + echo "cat >conf$$subs.awk <<_ACEOF" && + echo "$ac_subst_vars" | sed 's/.*/&!$&$ac_delim/' && + echo "_ACEOF" +} >conf$$subs.sh || + as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 +ac_delim_num=`echo "$ac_subst_vars" | grep -c '^'` +ac_delim='%!_!# ' +for ac_last_try in false false false false false :; do + . ./conf$$subs.sh || + as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 + + ac_delim_n=`sed -n "s/.*$ac_delim\$/X/p" conf$$subs.awk | grep -c X` + if test $ac_delim_n = $ac_delim_num; then + break + elif $ac_last_try; then + as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 + else + ac_delim="$ac_delim!$ac_delim _$ac_delim!! " + fi +done +rm -f conf$$subs.sh + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +cat >>"\$ac_tmp/subs1.awk" <<\\_ACAWK && +_ACEOF +sed -n ' +h +s/^/S["/; s/!.*/"]=/ +p +g +s/^[^!]*!// +:repl +t repl +s/'"$ac_delim"'$// +t delim +:nl +h +s/\(.\{148\}\)..*/\1/ +t more1 +s/["\\]/\\&/g; s/^/"/; s/$/\\n"\\/ +p +n +b repl +:more1 +s/["\\]/\\&/g; s/^/"/; s/$/"\\/ +p +g +s/.\{148\}// +t nl +:delim +h +s/\(.\{148\}\)..*/\1/ +t more2 +s/["\\]/\\&/g; s/^/"/; s/$/"/ +p +b +:more2 +s/["\\]/\\&/g; s/^/"/; s/$/"\\/ +p +g +s/.\{148\}// +t delim +' >$CONFIG_STATUS || ac_write_fail=1 +rm -f conf$$subs.awk +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +_ACAWK +cat >>"\$ac_tmp/subs1.awk" <<_ACAWK && + for (key in S) S_is_set[key] = 1 + FS = "" + +} +{ + line = $ 0 + nfields = split(line, field, "@") + substed = 0 + len = length(field[1]) + for (i = 2; i < nfields; i++) { + key = field[i] + keylen = length(key) + if (S_is_set[key]) { + value = S[key] + line = substr(line, 1, len) "" value "" substr(line, len + keylen + 3) + len += length(value) + length(field[++i]) + substed = 1 + } else + len += 1 + keylen + } + + print line +} + +_ACAWK +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +if sed "s/$ac_cr//" < /dev/null > /dev/null 2>&1; then + sed "s/$ac_cr\$//; s/$ac_cr/$ac_cs_awk_cr/g" +else + cat +fi < "$ac_tmp/subs1.awk" > "$ac_tmp/subs.awk" \ + || as_fn_error $? "could not setup config files machinery" "$LINENO" 5 +_ACEOF + +# VPATH may cause trouble with some makes, so we remove sole $(srcdir), +# ${srcdir} and @srcdir@ entries from VPATH if srcdir is ".", strip leading and +# trailing colons and then remove the whole line if VPATH becomes empty +# (actually we leave an empty line to preserve line numbers). +if test "x$srcdir" = x.; then + ac_vpsub='/^[ ]*VPATH[ ]*=[ ]*/{ +h +s/// +s/^/:/ +s/[ ]*$/:/ +s/:\$(srcdir):/:/g +s/:\${srcdir}:/:/g +s/:@srcdir@:/:/g +s/^:*// +s/:*$// +x +s/\(=[ ]*\).*/\1/ +G +s/\n// +s/^[^=]*=[ ]*$// +}' +fi + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +fi # test -n "$CONFIG_FILES" + +# Set up the scripts for CONFIG_HEADERS section. +# No need to generate them if there are no CONFIG_HEADERS. +# This happens for instance with `./config.status Makefile'. +if test -n "$CONFIG_HEADERS"; then +cat >"$ac_tmp/defines.awk" <<\_ACAWK || +BEGIN { +_ACEOF + +# Transform confdefs.h into an awk script `defines.awk', embedded as +# here-document in config.status, that substitutes the proper values into +# config.h.in to produce config.h. + +# Create a delimiter string that does not exist in confdefs.h, to ease +# handling of long lines. +ac_delim='%!_!# ' +for ac_last_try in false false :; do + ac_tt=`sed -n "/$ac_delim/p" confdefs.h` + if test -z "$ac_tt"; then + break + elif $ac_last_try; then + as_fn_error $? "could not make $CONFIG_HEADERS" "$LINENO" 5 + else + ac_delim="$ac_delim!$ac_delim _$ac_delim!! " + fi +done + +# For the awk script, D is an array of macro values keyed by name, +# likewise P contains macro parameters if any. Preserve backslash +# newline sequences. + +ac_word_re=[_$as_cr_Letters][_$as_cr_alnum]* +sed -n ' +s/.\{148\}/&'"$ac_delim"'/g +t rset +:rset +s/^[ ]*#[ ]*define[ ][ ]*/ / +t def +d +:def +s/\\$// +t bsnl +s/["\\]/\\&/g +s/^ \('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/P["\1"]="\2"\ +D["\1"]=" \3"/p +s/^ \('"$ac_word_re"'\)[ ]*\(.*\)/D["\1"]=" \2"/p +d +:bsnl +s/["\\]/\\&/g +s/^ \('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/P["\1"]="\2"\ +D["\1"]=" \3\\\\\\n"\\/p +t cont +s/^ \('"$ac_word_re"'\)[ ]*\(.*\)/D["\1"]=" \2\\\\\\n"\\/p +t cont +d +:cont +n +s/.\{148\}/&'"$ac_delim"'/g +t clear +:clear +s/\\$// +t bsnlc +s/["\\]/\\&/g; s/^/"/; s/$/"/p +d +:bsnlc +s/["\\]/\\&/g; s/^/"/; s/$/\\\\\\n"\\/p +b cont +' >$CONFIG_STATUS || ac_write_fail=1 + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 + for (key in D) D_is_set[key] = 1 + FS = "" +} +/^[\t ]*#[\t ]*(define|undef)[\t ]+$ac_word_re([\t (]|\$)/ { + line = \$ 0 + split(line, arg, " ") + if (arg[1] == "#") { + defundef = arg[2] + mac1 = arg[3] + } else { + defundef = substr(arg[1], 2) + mac1 = arg[2] + } + split(mac1, mac2, "(") #) + macro = mac2[1] + prefix = substr(line, 1, index(line, defundef) - 1) + if (D_is_set[macro]) { + # Preserve the white space surrounding the "#". + print prefix "define", macro P[macro] D[macro] + next + } else { + # Replace #undef with comments. This is necessary, for example, + # in the case of _POSIX_SOURCE, which is predefined and required + # on some systems where configure will not decide to define it. + if (defundef == "undef") { + print "/*", prefix defundef, macro, "*/" + next + } + } +} +{ print } +_ACAWK +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 + as_fn_error $? "could not setup config headers machinery" "$LINENO" 5 +fi # test -n "$CONFIG_HEADERS" + + +eval set X " :F $CONFIG_FILES :H $CONFIG_HEADERS " +shift +for ac_tag +do + case $ac_tag in + :[FHLC]) ac_mode=$ac_tag; continue;; + esac + case $ac_mode$ac_tag in + :[FHL]*:*);; + :L* | :C*:*) as_fn_error $? "invalid tag \`$ac_tag'" "$LINENO" 5;; + :[FH]-) ac_tag=-:-;; + :[FH]*) ac_tag=$ac_tag:$ac_tag.in;; + esac + ac_save_IFS=$IFS + IFS=: + set x $ac_tag + IFS=$ac_save_IFS + shift + ac_file=$1 + shift + + case $ac_mode in + :L) ac_source=$1;; + :[FH]) + ac_file_inputs= + for ac_f + do + case $ac_f in + -) ac_f="$ac_tmp/stdin";; + *) # Look for the file first in the build tree, then in the source tree + # (if the path is not absolute). The absolute path cannot be DOS-style, + # because $ac_f cannot contain `:'. + test -f "$ac_f" || + case $ac_f in + [\\/$]*) false;; + *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";; + esac || + as_fn_error 1 "cannot find input file: \`$ac_f'" "$LINENO" 5;; + esac + case $ac_f in *\'*) ac_f=`$as_echo "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac + as_fn_append ac_file_inputs " '$ac_f'" + done + + # Let's still pretend it is `configure' which instantiates (i.e., don't + # use $as_me), people would be surprised to read: + # /* config.h. Generated by config.status. */ + configure_input='Generated from '` + $as_echo "$*" | sed 's|^[^:]*/||;s|:[^:]*/|, |g' + `' by configure.' + if test x"$ac_file" != x-; then + configure_input="$ac_file. $configure_input" + { $as_echo "$as_me:${as_lineno-$LINENO}: creating $ac_file" >&5 +$as_echo "$as_me: creating $ac_file" >&6;} + fi + # Neutralize special characters interpreted by sed in replacement strings. + case $configure_input in #( + *\&* | *\|* | *\\* ) + ac_sed_conf_input=`$as_echo "$configure_input" | + sed 's/[\\\\&|]/\\\\&/g'`;; #( + *) ac_sed_conf_input=$configure_input;; + esac + + case $ac_tag in + *:-:* | *:-) cat >"$ac_tmp/stdin" \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 ;; + esac + ;; + esac + + ac_dir=`$as_dirname -- "$ac_file" || +$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$ac_file" : 'X\(//\)[^/]' \| \ + X"$ac_file" : 'X\(//\)$' \| \ + X"$ac_file" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$ac_file" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + as_dir="$ac_dir"; as_fn_mkdir_p + ac_builddir=. + +case "$ac_dir" in +.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; +*) + ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` + # A ".." for each directory in $ac_dir_suffix. + ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` + case $ac_top_builddir_sub in + "") ac_top_builddir_sub=. ac_top_build_prefix= ;; + *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; + esac ;; +esac +ac_abs_top_builddir=$ac_pwd +ac_abs_builddir=$ac_pwd$ac_dir_suffix +# for backward compatibility: +ac_top_builddir=$ac_top_build_prefix + +case $srcdir in + .) # We are building in place. + ac_srcdir=. + ac_top_srcdir=$ac_top_builddir_sub + ac_abs_top_srcdir=$ac_pwd ;; + [\\/]* | ?:[\\/]* ) # Absolute name. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir + ac_abs_top_srcdir=$srcdir ;; + *) # Relative name. + ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_build_prefix$srcdir + ac_abs_top_srcdir=$ac_pwd/$srcdir ;; +esac +ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix + + + case $ac_mode in + :F) + # + # CONFIG_FILE + # + + case $INSTALL in + [\\/$]* | ?:[\\/]* ) ac_INSTALL=$INSTALL ;; + *) ac_INSTALL=$ac_top_build_prefix$INSTALL ;; + esac +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# If the template does not know about datarootdir, expand it. +# FIXME: This hack should be removed a few years after 2.60. +ac_datarootdir_hack=; ac_datarootdir_seen= +ac_sed_dataroot=' +/datarootdir/ { + p + q +} +/@datadir@/p +/@docdir@/p +/@infodir@/p +/@localedir@/p +/@mandir@/p' +case `eval "sed -n \"\$ac_sed_dataroot\" $ac_file_inputs"` in +*datarootdir*) ac_datarootdir_seen=yes;; +*@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*) + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5 +$as_echo "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;} +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 + ac_datarootdir_hack=' + s&@datadir@&$datadir&g + s&@docdir@&$docdir&g + s&@infodir@&$infodir&g + s&@localedir@&$localedir&g + s&@mandir@&$mandir&g + s&\\\${datarootdir}&$datarootdir&g' ;; +esac +_ACEOF + +# Neutralize VPATH when `$srcdir' = `.'. +# Shell code in configure.ac might set extrasub. +# FIXME: do we really want to maintain this feature? +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +ac_sed_extra="$ac_vpsub +$extrasub +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +:t +/@[a-zA-Z_][a-zA-Z_0-9]*@/!b +s|@configure_input@|$ac_sed_conf_input|;t t +s&@top_builddir@&$ac_top_builddir_sub&;t t +s&@top_build_prefix@&$ac_top_build_prefix&;t t +s&@srcdir@&$ac_srcdir&;t t +s&@abs_srcdir@&$ac_abs_srcdir&;t t +s&@top_srcdir@&$ac_top_srcdir&;t t +s&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t +s&@builddir@&$ac_builddir&;t t +s&@abs_builddir@&$ac_abs_builddir&;t t +s&@abs_top_builddir@&$ac_abs_top_builddir&;t t +s&@INSTALL@&$ac_INSTALL&;t t +$ac_datarootdir_hack +" +eval sed \"\$ac_sed_extra\" "$ac_file_inputs" | $AWK -f "$ac_tmp/subs.awk" \ + >$ac_tmp/out || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + +test -z "$ac_datarootdir_hack$ac_datarootdir_seen" && + { ac_out=`sed -n '/\${datarootdir}/p' "$ac_tmp/out"`; test -n "$ac_out"; } && + { ac_out=`sed -n '/^[ ]*datarootdir[ ]*:*=/p' \ + "$ac_tmp/out"`; test -z "$ac_out"; } && + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable \`datarootdir' +which seems to be undefined. Please make sure it is defined" >&5 +$as_echo "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir' +which seems to be undefined. Please make sure it is defined" >&2;} + + rm -f "$ac_tmp/stdin" + case $ac_file in + -) cat "$ac_tmp/out" && rm -f "$ac_tmp/out";; + *) rm -f "$ac_file" && mv "$ac_tmp/out" "$ac_file";; + esac \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + ;; + :H) + # + # CONFIG_HEADER + # + if test x"$ac_file" != x-; then + { + $as_echo "/* $configure_input */" \ + && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" + } >"$ac_tmp/config.h" \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + if diff "$ac_file" "$ac_tmp/config.h" >/dev/null 2>&1; then + { $as_echo "$as_me:${as_lineno-$LINENO}: $ac_file is unchanged" >&5 +$as_echo "$as_me: $ac_file is unchanged" >&6;} + else + rm -f "$ac_file" + mv "$ac_tmp/config.h" "$ac_file" \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + fi + else + $as_echo "/* $configure_input */" \ + && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" \ + || as_fn_error $? "could not create -" "$LINENO" 5 + fi + ;; + + + esac + +done # for ac_tag + + +as_fn_exit 0 +_ACEOF +ac_clean_files=$ac_clean_files_save + +test $ac_write_fail = 0 || + as_fn_error $? "write failure creating $CONFIG_STATUS" "$LINENO" 5 + + +# configure is writing to config.log, and then calls config.status. +# config.status does its own redirection, appending to config.log. +# Unfortunately, on DOS this fails, as config.log is still kept open +# by configure, so config.status won't be able to write to it; its +# output is simply discarded. So we exec the FD to /dev/null, +# effectively closing config.log, so it can be properly (re)opened and +# appended to by config.status. When coming back to configure, we +# need to make the FD available again. +if test "$no_create" != yes; then + ac_cs_success=: + ac_config_status_args= + test "$silent" = yes && + ac_config_status_args="$ac_config_status_args --quiet" + exec 5>/dev/null + $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false + exec 5>>config.log + # Use ||, not &&, to avoid exiting from the if with $? = 1, which + # would make configure fail if this is the last instruction. + $ac_cs_success || as_fn_exit 1 +fi +if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: unrecognized options: $ac_unrecognized_opts" >&5 +$as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;} +fi + +chmod u+x test/runtest.sh diff --git a/pgreplay/configure.in b/pgreplay/configure.in new file mode 100644 index 00000000000..949e17c3a30 --- /dev/null +++ b/pgreplay/configure.in @@ -0,0 +1,79 @@ +# -*- Autoconf -*- +# Process this file with autoconf to produce a configure script. + +AC_PREREQ([2.62]) +AC_INIT([pgreplay], [1.4.0], [https://github.com/laurenz/pgreplay/issues], [pgreplay], [http://laurenz.github.io/pgreplay/]) +AC_CONFIG_SRCDIR([parse.c]) +AC_CONFIG_HEADERS([config.h]) +AC_CANONICAL_BUILD + +# Checks for programs. +AC_PROG_INSTALL +AC_PROG_CC([gcc cc]) +# add -Wall to CFLAGS for gcc +if test "$GCC" = yes; then CFLAGS="-Wall $CFLAGS"; fi + +# check if ld supports -rpath +AC_MSG_CHECKING([whether the linker supports -rpath]) +save_LDFLAGS="$LDFLAGS" +LDFLAGS="$LDFLAGS -Wl,-rpath,/usr/lib" +AC_LINK_IFELSE([AC_LANG_SOURCE([[main() {}]])], [rpath=yes], [rpath=no]) +LDFLAGS="$save_LDFLAGS" +AC_MSG_RESULT([$rpath]) + +AC_ARG_WITH([postgres], + [AS_HELP_STRING([--with-postgres=DIR], + [specify location of pg_config for your PostgreSQL installation])], + [if ! test -x "$with_postgres"/pg_config; then + echo "*** pg_config not found in '$with_postgres'" 1>&2 + exit 1 + fi + pgbindir=`"$with_postgres"/pg_config --bindir` + AC_SUBST([pgbindir]) + if test $build_os = mingw32; then pglibdir="$pgbindir"; else pglibdir=`"$with_postgres"/pg_config --libdir`; fi + pgincludedir=`"$with_postgres"/pg_config --includedir` + LDFLAGS="$LDFLAGS -L$pglibdir" + if test $rpath = yes; then LDFLAGS="$LDFLAGS -Wl,-rpath,$pglibdir"; fi + CFLAGS="$CFLAGS -I$pgincludedir" + PATH=`"$with_postgres"/pg_config --bindir`:$PATH]) + +# Install with PostgreSQL by default +AC_PREFIX_PROGRAM(pg_config) + +# Checks for libraries. +AC_CHECK_LIB([pq], [PQsendPrepare], [], [AC_MSG_ERROR([ +*** cannot locate PostgreSQL client library +*** required is version 8.0 or better +])]) + +# special Windows settings +if test $build_os = mingw32; then + AC_SUBST([EXTRA_OBJS], [windows.o]) + # assume socks library is present + # we have a problem using AC_CHECK_LIB because the function is __stdcall + LIBS="$LIBS -lwsock32" +fi + +# Checks for header files. +AC_HEADER_TIME +AC_CHECK_HEADERS([fcntl.h netinet/in.h stdint.h stdlib.h string.h sys/time.h unistd.h inttypes.h]) +AC_CHECK_HEADER([libpq-fe.h], [], [AC_MSG_ERROR([PostgreSQL header files not found])], [/* dummy */]) + +# Checks for typedefs, structures, and compiler characteristics. +AC_SYS_LARGEFILE +AC_TYPE_SIZE_T +AC_TYPE_SSIZE_T +AC_TYPE_UINT64_T +AC_TYPE_UINT32_T +AC_CHECK_SIZEOF(unsigned int) +AC_CHECK_SIZEOF(unsigned long) +AC_CHECK_SIZEOF(unsigned short) +AC_C_BIGENDIAN + +# Checks for library functions. +AC_FUNC_STRTOD +AC_CHECK_FUNCS([gettimeofday select setenv strchr strtoul]) + +AC_CONFIG_FILES([Makefile test/runtest.sh]) +AC_OUTPUT +chmod u+x test/runtest.sh diff --git a/pgreplay/database.c b/pgreplay/database.c new file mode 100644 index 00000000000..3f7184d65cd --- /dev/null +++ b/pgreplay/database.c @@ -0,0 +1,1224 @@ +#include "pgreplay.h" +#include "uthash.h" + +#include +#include +#include +#include +#ifdef HAVE_SYS_SELECT_H +# include +#else +# include +# include +#endif +#ifdef TIME_WITH_SYS_TIME +# include +# include +#else +# ifdef HAVE_SYS_TIME_H +# include +# else +# include +# endif +#endif +#ifdef WINDOWS +# include +#endif + +/* + * Utility macros to calculate with struct timeval. + * These are already defined on BSD type systems. + */ + +#ifndef timeradd +# define timeradd(a, b, result) \ + do { \ + (result)->tv_sec = (a)->tv_sec + (b)->tv_sec; \ + (result)->tv_usec = (a)->tv_usec + (b)->tv_usec; \ + if ((result)->tv_usec >= 1000000) { \ + ++(result)->tv_sec; \ + (result)->tv_usec -= 1000000; \ + } \ + } while (0) +#endif + +#ifndef timersub +# define timersub(a, b, result) \ + do { \ + (result)->tv_sec = (a)->tv_sec - (b)->tv_sec; \ + (result)->tv_usec = (a)->tv_usec - (b)->tv_usec; \ + if ((result)->tv_usec < 0) { \ + --(result)->tv_sec; \ + (result)->tv_usec += 1000000; \ + } \ + } while (0) +#endif + +extern int monitor_connect_finish(void); + +/* connect string */ +static char *conn_string; + +/* speed factor for replay */ +static double replay_factor; + +/* possible stati a connection can have */ +typedef enum { + idle = 0, + conn_wait_write, + conn_wait_read, + wait_write, + wait_read, + closed +} connstatus; + +/* prepare hash table */ +typedef struct prepare_item{ + char *name; /* key */ + int id; + UT_hash_handle hh; /* makes this structure hashable */ +}prepare_item; + +/* linked list element for list of open connections */ +struct dbconn { + uint64_t session_id; + PGconn *db_conn; + int socket; + connstatus status; + struct timeval session_start; + struct timeval stmt_start; + char *errmsg; + char *search_path; + prepare_item *prepare_hash; + struct dbconn *next; +}; +typedef struct dbconn dbconn; + +/* linked list of open connections */ +static struct dbconn *connections = NULL; + +/* linked list of open connections */ +PGconn *monitor_conn = NULL; + +/* remember timestamp of program start and stop */ +static struct timeval start_time; +static struct timeval stop_time; + +/* remember timestamp of first statement */ +static struct timeval first_stmt_time; +static struct timeval last_stmt_time; + +/* maximum seconds behind schedule */ +static time_t secs_behind = 0; + +/* time skipped instead of sleeping through it */ +static struct timeval jump_total = {0, 0}; + +/* statistics */ +static struct timeval stat_exec = {0, 0}; /* SQL statement execution time */ +static struct timeval stat_session = {0, 0}; /* session duration total */ +static struct timeval stat_longstmt = {0, 0}; /* session duration total */ +static unsigned long stat_stmt = 0; /* number of SQL statements */ +static unsigned long stat_prep = 0; /* number of preparations */ +static unsigned long stat_errors = 0; /* unsuccessful SQL statements and preparations */ +static unsigned long stat_actions = 0; /* client-server interactions */ +static unsigned long stat_statements = 0; /* number of concurrent statements */ +static unsigned long stat_stmtmax = 0; /* maximum concurrent statements */ +static unsigned long stat_sesscnt = 0; /* total number of sessions */ +static unsigned long stat_sessions = 0; /* number of concurrent sessions */ +static unsigned long stat_sessmax = 0; /* maximum concurrent sessions */ +static unsigned long stat_hist[5] = {0, 0, 0, 0, 0}; /* duration histogram */ +static unsigned long old_stat_hist[5] = {0, 0, 0, 0, 0}; /* segment duration histogram */ + +static PGresult* old_result = NULL; +static unsigned long old_stat_stmt = 0; +static unsigned long old_stat_errors = 0; + +#define NUM_DELAY_STEPS 11 + +/* steps for execution delay reports */ +static struct { + int seconds; + char *display; + short int shown; +} delay_steps[NUM_DELAY_STEPS] = { + {10, "10 seconds", 0}, + {30, "30 seconds", 0}, + {60, "1 minute", 0}, + {180, "3 minutes", 0}, + {600, "10 minutes", 0}, + {1800, "30 minutes", 0}, + {3600, "1 hour", 0}, + {7200, "2 hours", 0}, + {21600, "6 hours", 0}, + {43200, "12 hours", 0}, + {86400, "1 day", 0} +}; + +/* processes (ignores) notices from the server */ +static void ignore_notices(void *arg, const PGresult *res) { +} + +/* encapsulates "select" call and error handling */ + +static int do_select(int n, fd_set *rfds, fd_set *wfds, fd_set *xfds, struct timeval *timeout) { + int rc; + + rc = select(n, rfds, wfds, xfds, timeout); +#ifdef WINDOWS + if (SOCKET_ERROR == rc) { + win_perror("Error in select()", 1); + rc = -1; + } +#else + if (-1 == rc) { + perror("Error in select()"); + } +#endif + + return rc; +} + +/* checks if a certain socket can be read or written without blocking */ + +static int poll_socket(int socket, int for_read, char * const errmsg_prefix) { + fd_set fds; + struct timeval zero = { 0, 0 }; + + FD_ZERO(&fds); + FD_SET(socket, &fds); + return do_select(socket + 1, for_read ? &fds : NULL, for_read ? NULL : &fds, NULL, &zero); +} + +/* sleep routine that should work on all platforms */ + +static int do_sleep(struct timeval *delta) { + debug(2, "Napping for %lu.%06lu seconds\n", (unsigned long)delta->tv_sec, (unsigned long)delta->tv_usec); +#ifdef WINDOWS + Sleep((DWORD)delta->tv_sec * 1000 + (DWORD)(delta->tv_usec / 1000)); + return 0; +#else + return do_select(0, NULL, NULL, NULL, delta); +#endif +} + +/* +set search_path of the connection + */ +static int set_search_path(const char * search_path, PGconn * conn){ + char * set_path = malloc(strlen(search_path) + strlen("set search_path = ;") + 1); + PGresult* res; + sprintf(set_path,"set search_path = %s;",search_path); + res = PQexec(conn, set_path); + if(PQresultStatus(res) != PGRES_COMMAND_OK){ + fprintf(stderr, "set_search_path: Query execution failed search: %s\n", PQerrorMessage(conn)); + } + PQclear(res); + free(set_path); + return 0; +} + +/* add prepare cmd */ +static int set_prepare_cmd(const char * prepare_cmd, PGconn * conn){ + PGresult* res = PQexec(conn, prepare_cmd); + if(PQresultStatus(res) != PGRES_COMMAND_OK){ + fprintf(stderr, "set_prepare_cmd: Query execution failed search: %s\n", PQerrorMessage(conn)); + } + PQclear(res); + return 0; +} + +static void print_replay_statistics(int dry_run) { + int hours, minutes; + double seconds, runtime, session_time, busy_time; + struct timeval delta; + unsigned long histtotal = + stat_hist[0] + stat_hist[1] + stat_hist[2] + stat_hist[3] + stat_hist[4]; + + if (dry_run) { + fprintf(sf, "\nReplay statistics (dry run)\n"); + fprintf(sf, "===========================\n\n"); + + /* calculate lengh of the recorded workload */ + timersub(&last_stmt_time, &first_stmt_time, &delta); + hours = delta.tv_sec / 3600; + delta.tv_sec -= hours * 3600; + minutes = delta.tv_sec / 60; + delta.tv_sec -= minutes * 60; + seconds = delta.tv_usec / 1000000.0 + delta.tv_sec; + + fprintf(sf, "Duration of recorded workload:"); + if (hours > 0) { + fprintf(sf, " %d hours", hours); + } + if (minutes > 0) { + fprintf(sf, " %d minutes", minutes); + } + fprintf(sf, " %.3f seconds\n", seconds); + fprintf(sf, "Calls to the server: %lu\n", stat_actions); + } else { + fprintf(sf, "\nReplay statistics\n"); + fprintf(sf, "=================\n\n"); + + /* calculate total run time */ + timersub(&stop_time, &start_time, &delta); + runtime = delta.tv_usec / 1000000.0 + delta.tv_sec; + /* calculate hours and minutes, subtract from delta */ + hours = delta.tv_sec / 3600; + delta.tv_sec -= hours * 3600; + minutes = delta.tv_sec / 60; + delta.tv_sec -= minutes * 60; + seconds = delta.tv_usec / 1000000.0 + delta.tv_sec; + /* calculate total busy time */ + busy_time = stat_exec.tv_usec / 1000000.0 + stat_exec.tv_sec; + /* calculate total session time */ + session_time = stat_session.tv_usec / 1000000.0 + stat_session.tv_sec; + + fprintf(sf, "Speed factor for replay: %.3f\n", replay_factor); + fprintf(sf, "Total run time:"); + if (hours > 0) { + fprintf(sf, " %d hours", hours); + } + if (minutes > 0) { + fprintf(sf, " %d minutes", minutes); + } + fprintf(sf, " %.3f seconds\n", seconds); + fprintf(sf, "Maximum lag behind schedule: %lu seconds\n", (unsigned long) secs_behind); + fprintf(sf, "Calls to the server: %lu\n", stat_actions); + if (runtime > 0.0) { + fprintf(sf, "(%.3f calls per second)\n", stat_actions / runtime); + } + } + + fprintf(sf, "Total number of connections: %lu\n", stat_sesscnt); + fprintf(sf, "Maximum number of concurrent connections: %lu\n", stat_sessmax); + if (!dry_run && runtime > 0.0) { + fprintf(sf, "Average number of concurrent connections: %.3f\n", session_time / runtime); + } + if (!dry_run && session_time > 0.0) { + fprintf(sf, "Average session idle percentage: %.3f%%\n", 100.0 * (session_time - busy_time) / session_time); + } + + fprintf(sf, "SQL statements executed: %lu\n", stat_stmt - stat_prep); + if (!dry_run && stat_stmt > stat_prep) { + fprintf(sf, "(%lu or %.3f%% of these completed with error)\n", + stat_errors, (100.0 * stat_errors) / (stat_stmt - stat_prep)); + fprintf(sf, "Maximum number of concurrent SQL statements: %lu\n", stat_stmtmax); + if (runtime > 0.0) { + fprintf(sf, "Average number of concurrent SQL statements: %.3f\n", busy_time / runtime); + } + fprintf(sf, "Average SQL statement duration: %.3f seconds\n", busy_time / stat_stmt); + fprintf(sf, "Maximum SQL statement duration: %.3f seconds\n", + stat_longstmt.tv_sec + stat_longstmt.tv_usec / 1000000.0); + fprintf(sf, "Statement duration histogram:\n"); + fprintf(sf, " 0 to 0.02 seconds: %.3f%%\n", 100.0 * stat_hist[0] / histtotal); + fprintf(sf, " 0.02 to 0.1 seconds: %.3f%%\n", 100.0 * stat_hist[1] / histtotal); + fprintf(sf, " 0.1 to 0.5 seconds: %.3f%%\n", 100.0 * stat_hist[2] / histtotal); + fprintf(sf, " 0.5 to 2 seconds: %.3f%%\n", 100.0 * stat_hist[3] / histtotal); + fprintf(sf, " over 2 seconds: %.3f%%\n", 100.0 * stat_hist[4] / histtotal); + } +} + +int database_consumer_init(const char *ignore, const char *host, int port, const char *passwd, double factor) { + int conn_string_len = 12; /* port and '\0' */ + const char *p; + char *p1; + + debug(3, "Entering database_consumer_init%s\n", ""); + + /* get time of program start */ + if (-1 == gettimeofday(&start_time, NULL)) { + perror("Error calling gettimeofday"); + return 0; + } + + replay_factor = factor; + + /* calculate length of connect string */ + if (host) { + conn_string_len += 8; + for (p=host; '\0'!=*p; ++p) { + if (('\'' == *p) || ('\\' == *p)) { + conn_string_len += 2; + } else { + ++conn_string_len; + } + } + } + if (passwd) { + conn_string_len += 12; + for (p=passwd; '\0'!=*p; ++p) { + if (('\'' == *p) || ('\\' == *p)) { + conn_string_len += 2; + } else { + ++conn_string_len; + } + } + } + + if (extra_connstr) + conn_string_len += strlen(extra_connstr); + + if (NULL == (conn_string = malloc(conn_string_len))) { + fprintf(stderr, "Cannot allocate %d bytes of memory\n", conn_string_len); + return 0; + } + /* write the port to the connection string if it is set */ + if (-1 == port) { + conn_string[0] = '\0'; + } else { + if (sprintf(conn_string, "port=%d", port) < 0) { + perror("Error writing connect string:"); + free(conn_string); + return 0; + } + } + for (p1=conn_string; '\0'!=*p1; ++p1) { + /* places p1 at the end of the string */ + } + + /* append host if necessary */ + if (host) { + *(p1++) = ' '; + *(p1++) = 'h'; + *(p1++) = 'o'; + *(p1++) = 's'; + *(p1++) = 't'; + *(p1++) = '='; + *(p1++) = '\''; + for (p=host; '\0'!=*p; ++p) { + if (('\'' == *p) || ('\\' == *p)) { + *(p1++) = '\\'; + } + *(p1++) = *p; + } + *(p1++) = '\''; + *p1 = '\0'; + } + + /* append password if necessary */ + if (passwd) { + *(p1++) = ' '; + *(p1++) = 'p'; + *(p1++) = 'a'; + *(p1++) = 's'; + *(p1++) = 's'; + *(p1++) = 'w'; + *(p1++) = 'o'; + *(p1++) = 'r'; + *(p1++) = 'd'; + *(p1++) = '='; + *(p1++) = '\''; + for (p=passwd; '\0'!=*p; ++p) { + if (('\'' == *p) || ('\\' == *p)) { + *(p1++) = '\\'; + } + *(p1++) = *p; + } + *(p1++) = '\''; + *p1 = '\0'; + } + + if (extra_connstr) { + *(p1++) = ' '; + strcpy(p1, extra_connstr); + } + + debug(2, "Database connect string: \"%s\"\n", conn_string); + + debug(3, "Leaving database_consumer_init%s\n", ""); + return 1; +} + +void database_consumer_finish(int dry_run) { + debug(3, "Entering database_consumer_finish%s\n", ""); + + free(conn_string); + + if (NULL != connections) { + fprintf(stderr, "Error: not all database connections closed\n"); + } + + if (-1 == gettimeofday(&stop_time, NULL)) { + perror("Error calling gettimeofday"); + } else if (sf) { + print_replay_statistics(dry_run); + } + + debug(3, "Leaving database_consumer_finish%s\n", ""); +} + +int database_consumer(replay_item *item) { + const uint64_t session_id = replay_get_session_id(item); + const replay_type type = replay_get_type(item); + int all_idle = 1, rc = 0, j; + struct dbconn *conn = connections, *found_conn = NULL, *prev_conn = NULL; + struct timeval target_time, now, delta; + const struct timeval *stmt_time; + static int fstmtm_set = 0; /* have we already collected first_statement_time */ + double d; + time_t i; + char *connstr, *p1, errbuf[256]; + const char *user, *database, *p; + PGcancel *cancel_request; + PGresult *result; + ExecStatusType result_status; + // const char* search_path,*params_typename,*source_text; + + debug(3, "Entering database_consumer%s\n", ""); + + /* loop through open connections and do what can be done */ + while ((-1 != rc) && (NULL != conn)) { + /* if we find the connection for the current statement, remember it */ + if (session_id == conn->session_id) { + found_conn = conn; + } + + /* handle each connection according to status */ + switch(conn->status) { + case idle: + case closed: + break; /* nothing to do */ + + case conn_wait_read: + case conn_wait_write: + /* in connection process */ + /* check if socket is still busy */ + switch (poll_socket(conn->socket, (conn_wait_read == conn->status), "Error polling socket during connect")) { + case 0: + /* socket still busy */ + debug(2, "Socket for session 0x" UINT64_FORMAT " busy for %s during connect\n", conn->session_id, (conn_wait_write == conn->status) ? "write" : "read"); + all_idle = 0; + break; + case 1: + /* socket not busy, continue connect process */ + switch(PQconnectPoll(conn->db_conn)) { + case PGRES_POLLING_WRITING: + conn->status = conn_wait_write; + all_idle = 0; + break; + case PGRES_POLLING_READING: + conn->status = conn_wait_read; + all_idle = 0; + break; + case PGRES_POLLING_OK: + debug(2, "Connection for session 0x" UINT64_FORMAT " established\n", conn->session_id); + conn->status = idle; + + /* get session start time */ + if (-1 == gettimeofday(&(conn->session_start), NULL)) { + perror("Error calling gettimeofday"); + rc = -1; + } + + /* count total and concurrent sessions */ + ++stat_sesscnt; + if (++stat_sessions > stat_sessmax) { + stat_sessmax = stat_sessions; + } + + break; + case PGRES_POLLING_FAILED: + /* If the connection fails because of a + FATAL error from the server, mark + connection "closed" and keep going. + The same thing probably happened in the + original run. + PostgreSQL logs no disconnection for this. + */ + p1 = PQerrorMessage(conn->db_conn); + if (0 == strncmp(p1, "FATAL: ", 8)) { + p1 += 8; + if (NULL == (conn->errmsg = malloc(strlen(p1) + 1))) { + fprintf(stderr, "Cannot allocate %lu bytes of memory\n", (unsigned long)(strlen(p1) + 1)); + rc = -1; + } else { + debug(2, "Connection for session 0x" UINT64_FORMAT " failed with FATAL error: %s\n", + conn->session_id, p1); + strcpy(conn->errmsg, p1); + conn->status = closed; + PQfinish(conn->db_conn); + } + + break; + } + /* else fall through */ + default: + fprintf(stderr, "Connection for session 0x" UINT64_FORMAT " failed: %s\n", conn->session_id, PQerrorMessage(conn->db_conn)); + rc = -1; + PQfinish(conn->db_conn); + } + break; + default: + /* error happened in select() */ + rc = -1; + } + break; + + case wait_write: + /* check if the socket is writable */ + switch (poll_socket(conn->socket, 0, "Error polling socket for write")) { + case 0: + /* socket still busy */ + debug(2, "Session 0x" UINT64_FORMAT " busy writing data\n", conn->session_id); + all_idle = 0; + break; + case 1: + /* try PQflush again */ + debug(2, "Session 0x" UINT64_FORMAT " flushing data\n", conn->session_id); + switch (PQflush(conn->db_conn)) { + case 0: + /* finished flushing all data */ + conn->status = wait_read; + all_idle = 0; + break; + case 1: + /* more data to flush */ + all_idle = 0; + break; + default: + fprintf(stderr, "Error flushing to database: %s\n", PQerrorMessage(conn->db_conn)); + rc = -1; + } + break; + default: + /* error in select() */ + rc = -1; + } + break; + + case wait_read: + /* check if the socket is readable */ + switch (poll_socket(conn->socket, 1, "Error polling socket for read")) { + case 0: + /* socket still busy */ + debug(2, "Session 0x" UINT64_FORMAT " waiting for data\n", conn->session_id); + all_idle = 0; + break; + case 1: + /* read input from connection */ + if (! PQconsumeInput(conn->db_conn)) { + fprintf(stderr, "Error reading from database: %s\n", PQerrorMessage(conn->db_conn)); + rc = -1; + } else { + /* check if we are done reading */ + if (PQisBusy(conn->db_conn)) { + /* more to read */ + all_idle = 0; + } else { + /* read and discard all results */ + while (NULL != (result = PQgetResult(conn->db_conn))) { + /* count statements and errors for statistics */ + ++stat_stmt; + result_status = PQresultStatus(result); + debug(2, "Session 0x" UINT64_FORMAT " got got query response (%s)\n", + conn->session_id, + (PGRES_TUPLES_OK == result_status) ? "PGRES_TUPLES_OK" : + ((PGRES_COMMAND_OK == result_status) ? "PGRES_COMMAND_OK" : + ((PGRES_FATAL_ERROR == result_status) ? "PGRES_FATAL_ERROR" : + ((PGRES_NONFATAL_ERROR == result_status) ? "PGRES_NONFATAL_ERROR" : + ((PGRES_EMPTY_QUERY == result_status) ? "PGRES_EMPTY_QUERY" : "unexpected status"))))); + + if ((PGRES_EMPTY_QUERY != result_status) + && (PGRES_COMMAND_OK != result_status) + && (PGRES_TUPLES_OK != result_status) + && (PGRES_NONFATAL_ERROR != result_status)) + { + ++stat_errors; + } + + PQclear(result); + } + + /* one less concurrent statement */ + --stat_statements; + + conn->status = idle; + + /* remember execution time for statistics */ + if (-1 == gettimeofday(&delta, NULL)) { + perror("Error calling gettimeofday"); + rc = -1; + } else { + /* subtract statement start time */ + timersub(&delta, &(conn->stmt_start), &delta); + + /* add to duration histogram */ + if (0 == delta.tv_sec) { + if (20000 >= delta.tv_usec) { + ++stat_hist[0]; + } else if (100000 >= delta.tv_usec) { + ++stat_hist[1]; + } else if (500000 >= delta.tv_usec) { + ++stat_hist[2]; + } else { + ++stat_hist[3]; + } + } else if (2 > delta.tv_sec) { + ++stat_hist[3]; + } else { + ++stat_hist[4]; + } + + /* remember longest statement */ + if ((delta.tv_sec > stat_longstmt.tv_sec) + || ((delta.tv_sec == stat_longstmt.tv_sec) + && (delta.tv_usec > stat_longstmt.tv_usec))) + { + stat_longstmt.tv_sec = delta.tv_sec; + stat_longstmt.tv_usec = delta.tv_usec; + } + + /* add to total */ + timeradd(&stat_exec, &delta, &stat_exec); + } + } + } + break; + default: + /* error during select() */ + rc = -1; + } + break; + } + + if (! found_conn) { + /* remember previous item in list, useful for removing an item */ + prev_conn = conn; + } + + conn = conn->next; + } + + /* make sure we found a connection above (except for connect items) */ + if (1 == rc) { + if ((pg_connect == type) && (NULL != found_conn)) { + fprintf(stderr, "Error: connection for session 0x" UINT64_FORMAT " already exists\n", replay_get_session_id(item)); + rc = -1; + } else if ((pg_connect != type) && (NULL == found_conn)) { + fprintf(stderr, "Error: no connection found for session 0x" UINT64_FORMAT "\n", replay_get_session_id(item)); + rc = -1; + } + } + + /* time when the statement originally ran */ + stmt_time = replay_get_time(item); + last_stmt_time.tv_sec = stmt_time->tv_sec; + last_stmt_time.tv_usec = stmt_time->tv_usec; + + /* set first_stmt_time if it is not yet set */ + if (! fstmtm_set) { + first_stmt_time.tv_sec = stmt_time->tv_sec; + first_stmt_time.tv_usec = stmt_time->tv_usec; + + fstmtm_set = 1; + } + + /* get current time */ + if (-1 != rc) { + if (-1 == gettimeofday(&now, NULL)) { + fprintf(stderr, "Error: gettimeofday failed\n"); + rc = -1; + } + } + + /* determine if statement should already be consumed, sleep if necessary */ + if (-1 != rc) { + /* calculate "target time" when item should be replayed: + statement time - first statement time + program start time - skipped time + ------------------------------------- + replay factor */ + + /* timestamp of the statement */ + target_time.tv_sec = stmt_time->tv_sec; + target_time.tv_usec = stmt_time->tv_usec; + + /* subtract time of first statement */ + timersub(&target_time, &first_stmt_time, &target_time); + + /* subtract skipped time */ + if (jump_enabled) { + timersub(&target_time, &jump_total, &target_time); + } + + /* divide by replay_factor */ + if (replay_factor != 1.0) { + /* - divide the seconds part by the factor + - divide the microsecond part by the factor and add the + fractional part (times 10^6) of the previous division + - if the result exceeds 10^6, subtract the excess and + add its 10^6th to the seconds part. */ + d = target_time.tv_sec / replay_factor; + target_time.tv_sec = d; + target_time.tv_usec = target_time.tv_usec / replay_factor + + (d - target_time.tv_sec) * 1000000.0; + i = target_time.tv_usec / 1000000; + target_time.tv_usec -= i * 1000000; + target_time.tv_sec += i; + } + + /* add program start time */ + timeradd(&target_time, &start_time, &target_time); + + /* warn if we fall behind too much */ + if (secs_behind < now.tv_sec - target_time.tv_sec) { + secs_behind = now.tv_sec - target_time.tv_sec; + for (j=0; j now.tv_sec) || + ((target_time.tv_sec == now.tv_sec) && (target_time.tv_usec > now.tv_usec))) && + all_idle) { + /* sleep or jump if all is idle and the target time is in the future */ + + /* calculate time to sleep or jump (delta = target_time - now) */ + timersub(&target_time, &now, &delta); + + if (jump_enabled) { + /* add the sleep time to jump_total */ + timeradd(&jump_total, &delta, &jump_total); + debug(2, "Skipping %lu.%06lu seconds\n", (unsigned long)delta.tv_sec, (unsigned long)delta.tv_usec); + /* then consume item */ + rc = 1; + } else { + /* sleep */ + if (-1 == do_sleep(&delta)) { + rc = -1; + } else { + /* then consume item */ + rc = 1; + } + } + } else if (((target_time.tv_sec < now.tv_sec) || + ((target_time.tv_sec == now.tv_sec) && (target_time.tv_usec <= now.tv_usec))) && + ((pg_connect == type) || + ((pg_disconnect == type) && (closed == found_conn->status)) || + ((pg_cancel == type) && (wait_read == found_conn->status)) || + (idle == found_conn->status))) { + /* if the item is due and its connection is idle, consume it */ + /* cancel items will also be consumed if the connection is waiting for a resonse */ + rc = 1; + } else if (found_conn && (closed == found_conn->status)) { + fprintf(stderr, "Connection 0x" UINT64_FORMAT " failed with FATAL error: %s\n", + found_conn->session_id, found_conn->errmsg); + rc = -1; + } + } + + /* send statement */ + if (1 == rc) { + /* count for statistics */ + ++stat_actions; + + switch (type) { + case pg_connect: + debug(2, "Starting database connection for session 0x" UINT64_FORMAT "\n", replay_get_session_id(item)); + + /* allocate a connect string */ + user = replay_get_user(item); + database = replay_get_database(item); + if (NULL == (connstr = malloc(strlen(conn_string) + 2 * strlen(user) + 2 * strlen(database) + 18))) { + fprintf(stderr, "Cannot allocate %lu bytes of memory\n", (unsigned long)strlen(conn_string) + 2 * strlen(user) + 2 * strlen(database) + 18); + rc = -1; + } else { + /* append user and password */ + strcpy(connstr, conn_string); + p1 = connstr + strlen(connstr); + *(p1++) = ' '; + *(p1++) = 'u'; + *(p1++) = 's'; + *(p1++) = 'e'; + *(p1++) = 'r'; + *(p1++) = '='; + *(p1++) = '\''; + for (p=user; '\0'!=*p; ++p) { + if (('\'' == *p) || ('\\' == *p)) { + *(p1++) = '\\'; + } + *(p1++) = *p; + } + *(p1++) = '\''; + *(p1++) = ' '; + *(p1++) = 'd'; + *(p1++) = 'b'; + *(p1++) = 'n'; + *(p1++) = 'a'; + *(p1++) = 'm'; + *(p1++) = 'e'; + *(p1++) = '='; + *(p1++) = '\''; + for (p=database; '\0'!=*p; ++p) { + if (('\'' == *p) || ('\\' == *p)) { + *(p1++) = '\\'; + } + *(p1++) = *p; + } + *(p1++) = '\''; + *p1 = '\0'; + + /* allocate a struct dbconn */ + if (NULL == (found_conn = malloc(sizeof(struct dbconn)))) { + fprintf(stderr, "Cannot allocate %lu bytes of memory\n", (unsigned long)sizeof(struct dbconn)); + rc = -1; + } else { + /* initialize a connection */ + if (NULL == (found_conn->db_conn = PQconnectStart(connstr))) { + fprintf(stderr, "Cannot allocate memory for database connection\n"); + rc = -1; + free(found_conn); + } else { + if (CONNECTION_BAD == PQstatus(found_conn->db_conn)) { + fprintf(stderr, "Error: connection to database failed: %s\n", PQerrorMessage(found_conn->db_conn)); + rc = -1; + PQfinish(found_conn->db_conn); + free(found_conn); + } else { + if (-1 == (found_conn->socket = PQsocket(found_conn->db_conn))) { + fprintf(stderr, "Error: cannot get socket for database connection\n"); + rc = -1; + PQfinish(found_conn->db_conn); + free(found_conn); + } else { + /* set values in struct dbconn */ + + found_conn->session_id = replay_get_session_id(item); + found_conn->status = conn_wait_write; + found_conn->errmsg = NULL; + found_conn->next = connections; + found_conn->search_path = malloc(POLARDBlEN); + found_conn->prepare_hash = NULL; + connections = found_conn; + + /* do not display notices */ + PQsetNoticeReceiver(found_conn->db_conn, ignore_notices, NULL); + } + } + } + } + + /* free connection sting */ + free(connstr); + } + break; + case pg_disconnect: + /* dead connections need not be closed */ + if (closed == found_conn->status) { + debug(2, "Removing closed session 0x" UINT64_FORMAT "\n", replay_get_session_id(item)); + } else { + debug(2, "Disconnecting database connection for session 0x" UINT64_FORMAT "\n", replay_get_session_id(item)); + + PQfinish(found_conn->db_conn); + + /* remember session duration for statistics */ + if (-1 == gettimeofday(&delta, NULL)) { + perror("Error calling gettimeofday"); + rc = -1; + } else { + /* subtract session start time */ + timersub(&delta, &(found_conn->session_start), &delta); + + /* add to total */ + timeradd(&stat_session, &delta, &stat_session); + } + + /* one less concurrent session */ + --stat_sessions; + } + + /* remove struct dbconn from linked list */ + if (prev_conn) { + prev_conn->next = found_conn->next; + } else { + connections = found_conn->next; + } + if (found_conn->errmsg) { + free(found_conn->errmsg); + } + if (found_conn->search_path) { + free(found_conn->search_path); + } + free(found_conn); + + break; + case pg_execute: + debug(2, "Sending simple statement on session 0x" UINT64_FORMAT "\n", replay_get_session_id(item)); + if(polardb_audit){ + /* set search before execute sql every times */ + char * search_path; + search_path = replay_get_search_path(item); + if(!search_path) { + fprintf(stderr, "Error not have search_path statement: %s\n", search_path); + }else{ + if(strcmp(search_path, found_conn->search_path) != 0){ + set_search_path(search_path, found_conn->db_conn); + strcpy(found_conn->search_path, search_path); + } + } + debug(1,"search_path is %s\n",search_path); + if(search_path) free(search_path); + + /* check out whether prepare exist,create it if not exist */ + char * params_typename; + params_typename = replay_get_prepare_params_typename(item); + if(params_typename){ + prepare_item * s; + char * tmp = strchr(params_typename,','); + *tmp = '\0'; + debug(1,"params_typename is %s\n",params_typename); + HASH_FIND_STR(found_conn->prepare_hash, params_typename, s); + if(!s){ + char* source_text; + source_text = replay_get_prepare_source_text(item); + if(!source_text){ + fprintf(stderr, "prepare cmd is err in statement: %s\n", source_text); + } + debug(1,"source_text is %s\n",source_text); + set_prepare_cmd(source_text, found_conn->db_conn); + s = (prepare_item*)malloc(sizeof(prepare_item)); + s->name = (char*)malloc(strlen(params_typename)+1); + // s->id = 1; + strcpy(s->name, params_typename); + HASH_ADD_KEYPTR(hh, found_conn->prepare_hash, s->name, strlen(s->name), s); + if(source_text) free(source_text); + } + } + if(params_typename) free(params_typename); + } + debug(1,"replay_get_statement(item) is %s\n",replay_get_statement(item)); + + if (! PQsendQuery(found_conn->db_conn, replay_get_statement(item))) { + fprintf(stderr, "Error sending simple statement: %s\n", PQerrorMessage(found_conn->db_conn)); + rc = -1; + } else { + found_conn->status = wait_write; + } + break; + case pg_prepare: + debug(2, "Sending prepare request on session 0x" UINT64_FORMAT "\n", replay_get_session_id(item)); + + /* count preparations for statistics */ + ++stat_prep; + + if (! PQsendPrepare( + found_conn->db_conn, + replay_get_name(item), + replay_get_statement(item), + 0, + NULL)) { + fprintf(stderr, "Error sending prepare request: %s\n", PQerrorMessage(found_conn->db_conn)); + rc = -1; + } else { + found_conn->status = wait_write; + } + break; + case pg_exec_prepared: + debug(2, "Sending prepared statement execution on session 0x" UINT64_FORMAT "\n", replay_get_session_id(item)); + + if (! PQsendQueryPrepared( + found_conn->db_conn, + replay_get_name(item), + replay_get_valuecount(item), + replay_get_values(item), + NULL, + NULL, + 0)) { + fprintf(stderr, "Error sending prepared statement execution: %s\n", PQerrorMessage(found_conn->db_conn)); + rc = -1; + } else { + found_conn->status = wait_write; + } + break; + case pg_cancel: + debug(2, "Sending cancel request on session 0x" UINT64_FORMAT "\n", replay_get_session_id(item)); + + if (NULL == (cancel_request = PQgetCancel(found_conn->db_conn))) { + fprintf(stderr, "Error creating cancel request\n"); + rc = -1; + } else { + if (! PQcancel(cancel_request, errbuf, 256)) { + fprintf(stderr, "Error sending cancel request: %s\n", errbuf); + rc = -1; + } + /* free cancel request */ + PQfreeCancel(cancel_request); + } + /* status remains unchanged */ + break; + } + + replay_free(item); + } + + /* try to flush the statement if necessary */ + if ((1 == rc) && (pg_disconnect != type) && (wait_write == found_conn->status)) { + switch (PQflush(found_conn->db_conn)) { + case 0: + /* complete request sent */ + found_conn->status = wait_read; + break; + case 1: + debug(2, "Session 0x" UINT64_FORMAT " needs to flush again\n", found_conn->session_id); + break; + default: + fprintf(stderr, "Error flushing to database: %s\n", PQerrorMessage(found_conn->db_conn)); + rc = -1; + } + + /* get statement start time */ + if (-1 == gettimeofday(&(found_conn->stmt_start), NULL)) { + perror("Error calling gettimeofday"); + rc = -1; + } + + /* count concurrent statements */ + if (++stat_statements > stat_stmtmax) { + stat_stmtmax = stat_statements; + } + } + + debug(3, "Leaving database_consumer%s\n", ""); + return rc; +} + +int database_consumer_dry_run(replay_item *item) { + const replay_type type = replay_get_type(item); + const struct timeval *stmt_time; + static int fstmt_set_dr = 0; + + debug(3, "Entering database_consumer_dry_run%s\n", ""); + + /* time when the statement originally ran */ + stmt_time = replay_get_time(item); + last_stmt_time.tv_sec = stmt_time->tv_sec; + last_stmt_time.tv_usec = stmt_time->tv_usec; + + /* set first_stmt_time if it is not yet set */ + if (! fstmt_set_dr) { + first_stmt_time.tv_sec = stmt_time->tv_sec; + first_stmt_time.tv_usec = stmt_time->tv_usec; + + fstmt_set_dr = 1; + } + + /* gather statistics */ + ++stat_actions; + + switch (type) { + case pg_connect: + ++stat_sesscnt; + if (++stat_sessions > stat_sessmax) { + stat_sessmax = stat_sessions; + } + break; + case pg_disconnect: + --stat_sessions; + break; + case pg_execute: + case pg_exec_prepared: + ++stat_stmt; + break; + case pg_prepare: + ++stat_prep; + break; + case pg_cancel: + break; + } + + replay_free(item); + debug(3, "Leaving database_consumer_dry_run%s\n", ""); + + return 1; +} + +/* +initailize connect for getting monitor info + */ +int monitor_connect_init(const char *host, int port, const char *passwd) { + // 建立连接 + // create connect to db server + char conn_info[100] = {'\0'}; + sprintf(conn_info,"dbname=postgres user=polardb password=%s hostaddr=%s port=%d ", passwd, host, port); + + monitor_conn = PQconnectdb(conn_info); + + // 检查连接是否成功 + // check if connect was success. + if (PQstatus(monitor_conn) != CONNECTION_OK) { + fprintf(stderr, "Connection to database failed: %s", PQerrorMessage(monitor_conn)); + PQfinish(monitor_conn); + return 1; + } + return 0; +} + +/* +get once monitor info + */ +int monitor_connect_execute(const char* sql) { + + // execute sql + static int print_title = 0; + PGresult *result = PQexec(monitor_conn, sql); + if(!print_title){ + printf("cpu_use mem_use read_count write_count read_bytes write_bytes disk_space active_conn total_conn load5 load10 tps qps stat_correct stat_errors time0 time1 time2 time3 time4\n"); + print_title++; + } + + // check if sql execution was success + if (PQresultStatus(result) != PGRES_TUPLES_OK) { + fprintf(stderr, "Query execution failed: %s", PQerrorMessage(monitor_conn)); + PQclear(result); + PQfinish(monitor_conn); + return 1; + } + + if(old_result){ + printf("%s ",PQgetvalue(result, 0, 2)); + printf("%s ",PQgetvalue(result, 1, 2)); + printf("%ld ",atol(PQgetvalue(result, 2, 2)) - atol(PQgetvalue(old_result, 2, 2))); + printf("%ld ",atol(PQgetvalue(result, 3, 2)) - atol(PQgetvalue(old_result, 3, 2))); + printf("%ld ",atol(PQgetvalue(result, 4, 2)) - atol(PQgetvalue(old_result, 4, 2))); + printf("%ld ",atol(PQgetvalue(result, 5, 2)) - atol(PQgetvalue(old_result, 5, 2))); + printf("%s ",PQgetvalue(result, 6, 2)); + printf("%s ",PQgetvalue(result, 7, 2)); + printf("%s ",PQgetvalue(result, 8, 2)); + printf("%s ",PQgetvalue(result, 9, 2)); + printf("%s ",PQgetvalue(result, 10, 2)); + printf("%ld ",atol(PQgetvalue(result, 11, 2)) - atol(PQgetvalue(old_result, 11, 2))); + printf("%ld ",atol(PQgetvalue(result, 12, 2)) - atol(PQgetvalue(old_result, 12, 2))); + + printf("%lu ", (stat_stmt - stat_errors) - (old_stat_stmt - old_stat_errors) ); + printf("%lu ", stat_errors - old_stat_errors); + printf("%lu ", stat_hist[0] - old_stat_hist[0]); + printf("%lu ", stat_hist[1] - old_stat_hist[1]); + printf("%lu ", stat_hist[2] - old_stat_hist[2]); + printf("%lu ", stat_hist[3] - old_stat_hist[3]); + printf("%lu ", stat_hist[4] - old_stat_hist[4]); + printf("\n"); + } + + if(old_result){ + PQclear(old_result); + } + old_result = result; + old_stat_errors = stat_errors; + old_stat_stmt = stat_stmt; + memcpy(old_stat_hist, stat_hist, sizeof(stat_hist)); + + return 0; +} + +// close connect of monitor info +int monitor_connect_finish() { + // Release resources + PQclear(old_result); + PQfinish(monitor_conn); + + return 0; +} diff --git a/pgreplay/install-sh b/pgreplay/install-sh new file mode 100755 index 00000000000..6781b987bdb --- /dev/null +++ b/pgreplay/install-sh @@ -0,0 +1,520 @@ +#!/bin/sh +# install - install a program, script, or datafile + +scriptversion=2009-04-28.21; # UTC + +# This originates from X11R5 (mit/util/scripts/install.sh), which was +# later released in X11R6 (xc/config/util/install.sh) with the +# following copyright and license. +# +# Copyright (C) 1994 X Consortium +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN +# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC- +# TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +# Except as contained in this notice, the name of the X Consortium shall not +# be used in advertising or otherwise to promote the sale, use or other deal- +# ings in this Software without prior written authorization from the X Consor- +# tium. +# +# +# FSF changes to this file are in the public domain. +# +# Calling this script install-sh is preferred over install.sh, to prevent +# `make' implicit rules from creating a file called install from it +# when there is no Makefile. +# +# This script is compatible with the BSD install script, but was written +# from scratch. + +nl=' +' +IFS=" "" $nl" + +# set DOITPROG to echo to test this script + +# Don't use :- since 4.3BSD and earlier shells don't like it. +doit=${DOITPROG-} +if test -z "$doit"; then + doit_exec=exec +else + doit_exec=$doit +fi + +# Put in absolute file names if you don't have them in your path; +# or use environment vars. + +chgrpprog=${CHGRPPROG-chgrp} +chmodprog=${CHMODPROG-chmod} +chownprog=${CHOWNPROG-chown} +cmpprog=${CMPPROG-cmp} +cpprog=${CPPROG-cp} +mkdirprog=${MKDIRPROG-mkdir} +mvprog=${MVPROG-mv} +rmprog=${RMPROG-rm} +stripprog=${STRIPPROG-strip} + +posix_glob='?' +initialize_posix_glob=' + test "$posix_glob" != "?" || { + if (set -f) 2>/dev/null; then + posix_glob= + else + posix_glob=: + fi + } +' + +posix_mkdir= + +# Desired mode of installed file. +mode=0755 + +chgrpcmd= +chmodcmd=$chmodprog +chowncmd= +mvcmd=$mvprog +rmcmd="$rmprog -f" +stripcmd= + +src= +dst= +dir_arg= +dst_arg= + +copy_on_change=false +no_target_directory= + +usage="\ +Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE + or: $0 [OPTION]... SRCFILES... DIRECTORY + or: $0 [OPTION]... -t DIRECTORY SRCFILES... + or: $0 [OPTION]... -d DIRECTORIES... + +In the 1st form, copy SRCFILE to DSTFILE. +In the 2nd and 3rd, copy all SRCFILES to DIRECTORY. +In the 4th, create DIRECTORIES. + +Options: + --help display this help and exit. + --version display version info and exit. + + -c (ignored) + -C install only if different (preserve the last data modification time) + -d create directories instead of installing files. + -g GROUP $chgrpprog installed files to GROUP. + -m MODE $chmodprog installed files to MODE. + -o USER $chownprog installed files to USER. + -s $stripprog installed files. + -t DIRECTORY install into DIRECTORY. + -T report an error if DSTFILE is a directory. + +Environment variables override the default commands: + CHGRPPROG CHMODPROG CHOWNPROG CMPPROG CPPROG MKDIRPROG MVPROG + RMPROG STRIPPROG +" + +while test $# -ne 0; do + case $1 in + -c) ;; + + -C) copy_on_change=true;; + + -d) dir_arg=true;; + + -g) chgrpcmd="$chgrpprog $2" + shift;; + + --help) echo "$usage"; exit $?;; + + -m) mode=$2 + case $mode in + *' '* | *' '* | *' +'* | *'*'* | *'?'* | *'['*) + echo "$0: invalid mode: $mode" >&2 + exit 1;; + esac + shift;; + + -o) chowncmd="$chownprog $2" + shift;; + + -s) stripcmd=$stripprog;; + + -t) dst_arg=$2 + shift;; + + -T) no_target_directory=true;; + + --version) echo "$0 $scriptversion"; exit $?;; + + --) shift + break;; + + -*) echo "$0: invalid option: $1" >&2 + exit 1;; + + *) break;; + esac + shift +done + +if test $# -ne 0 && test -z "$dir_arg$dst_arg"; then + # When -d is used, all remaining arguments are directories to create. + # When -t is used, the destination is already specified. + # Otherwise, the last argument is the destination. Remove it from $@. + for arg + do + if test -n "$dst_arg"; then + # $@ is not empty: it contains at least $arg. + set fnord "$@" "$dst_arg" + shift # fnord + fi + shift # arg + dst_arg=$arg + done +fi + +if test $# -eq 0; then + if test -z "$dir_arg"; then + echo "$0: no input file specified." >&2 + exit 1 + fi + # It's OK to call `install-sh -d' without argument. + # This can happen when creating conditional directories. + exit 0 +fi + +if test -z "$dir_arg"; then + trap '(exit $?); exit' 1 2 13 15 + + # Set umask so as not to create temps with too-generous modes. + # However, 'strip' requires both read and write access to temps. + case $mode in + # Optimize common cases. + *644) cp_umask=133;; + *755) cp_umask=22;; + + *[0-7]) + if test -z "$stripcmd"; then + u_plus_rw= + else + u_plus_rw='% 200' + fi + cp_umask=`expr '(' 777 - $mode % 1000 ')' $u_plus_rw`;; + *) + if test -z "$stripcmd"; then + u_plus_rw= + else + u_plus_rw=,u+rw + fi + cp_umask=$mode$u_plus_rw;; + esac +fi + +for src +do + # Protect names starting with `-'. + case $src in + -*) src=./$src;; + esac + + if test -n "$dir_arg"; then + dst=$src + dstdir=$dst + test -d "$dstdir" + dstdir_status=$? + else + + # Waiting for this to be detected by the "$cpprog $src $dsttmp" command + # might cause directories to be created, which would be especially bad + # if $src (and thus $dsttmp) contains '*'. + if test ! -f "$src" && test ! -d "$src"; then + echo "$0: $src does not exist." >&2 + exit 1 + fi + + if test -z "$dst_arg"; then + echo "$0: no destination specified." >&2 + exit 1 + fi + + dst=$dst_arg + # Protect names starting with `-'. + case $dst in + -*) dst=./$dst;; + esac + + # If destination is a directory, append the input filename; won't work + # if double slashes aren't ignored. + if test -d "$dst"; then + if test -n "$no_target_directory"; then + echo "$0: $dst_arg: Is a directory" >&2 + exit 1 + fi + dstdir=$dst + dst=$dstdir/`basename "$src"` + dstdir_status=0 + else + # Prefer dirname, but fall back on a substitute if dirname fails. + dstdir=` + (dirname "$dst") 2>/dev/null || + expr X"$dst" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$dst" : 'X\(//\)[^/]' \| \ + X"$dst" : 'X\(//\)$' \| \ + X"$dst" : 'X\(/\)' \| . 2>/dev/null || + echo X"$dst" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q' + ` + + test -d "$dstdir" + dstdir_status=$? + fi + fi + + obsolete_mkdir_used=false + + if test $dstdir_status != 0; then + case $posix_mkdir in + '') + # Create intermediate dirs using mode 755 as modified by the umask. + # This is like FreeBSD 'install' as of 1997-10-28. + umask=`umask` + case $stripcmd.$umask in + # Optimize common cases. + *[2367][2367]) mkdir_umask=$umask;; + .*0[02][02] | .[02][02] | .[02]) mkdir_umask=22;; + + *[0-7]) + mkdir_umask=`expr $umask + 22 \ + - $umask % 100 % 40 + $umask % 20 \ + - $umask % 10 % 4 + $umask % 2 + `;; + *) mkdir_umask=$umask,go-w;; + esac + + # With -d, create the new directory with the user-specified mode. + # Otherwise, rely on $mkdir_umask. + if test -n "$dir_arg"; then + mkdir_mode=-m$mode + else + mkdir_mode= + fi + + posix_mkdir=false + case $umask in + *[123567][0-7][0-7]) + # POSIX mkdir -p sets u+wx bits regardless of umask, which + # is incompatible with FreeBSD 'install' when (umask & 300) != 0. + ;; + *) + tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$ + trap 'ret=$?; rmdir "$tmpdir/d" "$tmpdir" 2>/dev/null; exit $ret' 0 + + if (umask $mkdir_umask && + exec $mkdirprog $mkdir_mode -p -- "$tmpdir/d") >/dev/null 2>&1 + then + if test -z "$dir_arg" || { + # Check for POSIX incompatibilities with -m. + # HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or + # other-writeable bit of parent directory when it shouldn't. + # FreeBSD 6.1 mkdir -m -p sets mode of existing directory. + ls_ld_tmpdir=`ls -ld "$tmpdir"` + case $ls_ld_tmpdir in + d????-?r-*) different_mode=700;; + d????-?--*) different_mode=755;; + *) false;; + esac && + $mkdirprog -m$different_mode -p -- "$tmpdir" && { + ls_ld_tmpdir_1=`ls -ld "$tmpdir"` + test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1" + } + } + then posix_mkdir=: + fi + rmdir "$tmpdir/d" "$tmpdir" + else + # Remove any dirs left behind by ancient mkdir implementations. + rmdir ./$mkdir_mode ./-p ./-- 2>/dev/null + fi + trap '' 0;; + esac;; + esac + + if + $posix_mkdir && ( + umask $mkdir_umask && + $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir" + ) + then : + else + + # The umask is ridiculous, or mkdir does not conform to POSIX, + # or it failed possibly due to a race condition. Create the + # directory the slow way, step by step, checking for races as we go. + + case $dstdir in + /*) prefix='/';; + -*) prefix='./';; + *) prefix='';; + esac + + eval "$initialize_posix_glob" + + oIFS=$IFS + IFS=/ + $posix_glob set -f + set fnord $dstdir + shift + $posix_glob set +f + IFS=$oIFS + + prefixes= + + for d + do + test -z "$d" && continue + + prefix=$prefix$d + if test -d "$prefix"; then + prefixes= + else + if $posix_mkdir; then + (umask=$mkdir_umask && + $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir") && break + # Don't fail if two instances are running concurrently. + test -d "$prefix" || exit 1 + else + case $prefix in + *\'*) qprefix=`echo "$prefix" | sed "s/'/'\\\\\\\\''/g"`;; + *) qprefix=$prefix;; + esac + prefixes="$prefixes '$qprefix'" + fi + fi + prefix=$prefix/ + done + + if test -n "$prefixes"; then + # Don't fail if two instances are running concurrently. + (umask $mkdir_umask && + eval "\$doit_exec \$mkdirprog $prefixes") || + test -d "$dstdir" || exit 1 + obsolete_mkdir_used=true + fi + fi + fi + + if test -n "$dir_arg"; then + { test -z "$chowncmd" || $doit $chowncmd "$dst"; } && + { test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } && + { test "$obsolete_mkdir_used$chowncmd$chgrpcmd" = false || + test -z "$chmodcmd" || $doit $chmodcmd $mode "$dst"; } || exit 1 + else + + # Make a couple of temp file names in the proper directory. + dsttmp=$dstdir/_inst.$$_ + rmtmp=$dstdir/_rm.$$_ + + # Trap to clean up those temp files at exit. + trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0 + + # Copy the file name to the temp name. + (umask $cp_umask && $doit_exec $cpprog "$src" "$dsttmp") && + + # and set any options; do chmod last to preserve setuid bits. + # + # If any of these fail, we abort the whole thing. If we want to + # ignore errors from any of these, just make sure not to ignore + # errors from the above "$doit $cpprog $src $dsttmp" command. + # + { test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } && + { test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } && + { test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } && + { test -z "$chmodcmd" || $doit $chmodcmd $mode "$dsttmp"; } && + + # If -C, don't bother to copy if it wouldn't change the file. + if $copy_on_change && + old=`LC_ALL=C ls -dlL "$dst" 2>/dev/null` && + new=`LC_ALL=C ls -dlL "$dsttmp" 2>/dev/null` && + + eval "$initialize_posix_glob" && + $posix_glob set -f && + set X $old && old=:$2:$4:$5:$6 && + set X $new && new=:$2:$4:$5:$6 && + $posix_glob set +f && + + test "$old" = "$new" && + $cmpprog "$dst" "$dsttmp" >/dev/null 2>&1 + then + rm -f "$dsttmp" + else + # Rename the file to the real destination. + $doit $mvcmd -f "$dsttmp" "$dst" 2>/dev/null || + + # The rename failed, perhaps because mv can't rename something else + # to itself, or perhaps because mv is so ancient that it does not + # support -f. + { + # Now remove or move aside any old file at destination location. + # We try this two ways since rm can't unlink itself on some + # systems and the destination file might be busy for other + # reasons. In this case, the final cleanup might fail but the new + # file should still install successfully. + { + test ! -f "$dst" || + $doit $rmcmd -f "$dst" 2>/dev/null || + { $doit $mvcmd -f "$dst" "$rmtmp" 2>/dev/null && + { $doit $rmcmd -f "$rmtmp" 2>/dev/null; :; } + } || + { echo "$0: cannot unlink or rename $dst" >&2 + (exit 1); exit 1 + } + } && + + # Now rename the file to the real destination. + $doit $mvcmd "$dsttmp" "$dst" + } + fi || exit 1 + + trap '' 0 + fi +done + +# Local variables: +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-time-zone: "UTC" +# time-stamp-end: "; # UTC" +# End: diff --git a/pgreplay/main.c b/pgreplay/main.c new file mode 100644 index 00000000000..e21934d7f0e --- /dev/null +++ b/pgreplay/main.c @@ -0,0 +1,466 @@ +#include "pgreplay.h" + +#include +#include +#include +#include +#include +#ifdef WINDOWS +# include +#endif + +/* from getopt */ +extern char *optarg; + +int debug_level = 0; + +/* destination of statistics output */ +FILE *sf; + +/* if 1, backslash will escape the following single quote in string literal */ +int backslash_quote = 0; + +/* if 1, replay will skip idle intervals instead of sleeping */ +int jump_enabled = 0; + +/* extra connect options specified with the -X option */ +char *extra_connstr; +/* indicates whether input file is from polardb */ +int polardb_audit = 0; + +char monitor_sql[] = + " select 1 as id,'cpu_use' as monitor_name ,100 - split_part((select pg_sys_cpu_usage_info())::text,',', 4 )::decimal as num union" + " select 2,'mem_use' as monitor_name , (( split_part(replace((select pg_sys_memory_info())::text,'(','') , ',',1)::decimal - " + " split_part(replace((select pg_sys_memory_info())::text,'(','') , ',',7)::decimal )" + " / split_part(replace((select pg_sys_memory_info())::text,'(','') , ',',1)::decimal)::decimal(4,2)*100 as num union" + " select 3,'read_count' monitor_name, sum(io_count) num from (select split_part(pg_sys_io_analysis_info()::text,',',2)::bigint as io_count) as a union " + " select 4,'write_count' monitor_name, sum(io_count) num from (select split_part(pg_sys_io_analysis_info()::text,',',3)::bigint as io_count) as a union " + " select 5,'read_bytes' monitor_name, sum(io_count) num from (select split_part(pg_sys_io_analysis_info()::text,',',4)::bigint as io_count) as a union " + " select 6,'write_bytes' monitor_name, sum(io_count) num from (select split_part(pg_sys_io_analysis_info()::text,',',5)::bigint as io_count) as a union" + " select 7,'disk_space' monitor_name, sum(disk_space / 1024 / 1024) num from (select split_part(pg_sys_disk_info()::text,',',7)::bigint as disk_space) as a union" + " select 8,'active_conn' as monitor_name, count(query) as num from pg_stat_activity where state = 'active' and backend_type = 'client backend' union" + " select 9,'total_conn' as monitor_name, count(query) as num from pg_stat_activity union" + " select 10,'load5_value' monitor_name, split_part(pg_sys_load_avg_info()::text,',',2)::decimal as num union" + " select 11,'load10_value' monitor_name, split_part(pg_sys_load_avg_info()::text,',',3)::decimal as num union" + " select 12,'tps' as monitor_name,split_part(txid_current_snapshot()::text,':',1)::integer as num union" + " select 13,'qps' as monitor_name,sum(calls) as num from pg_stat_statements order by id;"; + +/* wrapper for setenv, returns 0 on success and -1 on error */ +static int do_setenv(const char *name, const char *value) { + int rc; + +#ifdef WINDOWS + if (0 == SetEnvironmentVariable(name, value)) { + win_perror("Error setting environment variable", 0); + rc = -1; + } else { + rc = 0; + } +#else + if (-1 == (rc = setenv(name, value, 1))) { + fprintf(stderr, "Error setting environment variable\n"); + } +#endif + + return rc; +} + +static void version(FILE *f) { + fprintf(f, "pgreplay %s\n", VERSION); +} + +static void help(FILE *f) { + fprintf(f, "\n"); + version(f); + fprintf(f, "==============\n"); + fprintf(f, "\nUsage: pgreplay [] [] []\n"); + fprintf(f, " pgreplay -f [] [-o ] []\n"); + fprintf(f, " pgreplay -r [] []\n\n"); + fprintf(f, " The first form parses a PostgreSQL log file and replays the\n"); + fprintf(f, "statements against a database.\n"); + fprintf(f, " The second form parses a PostgreSQL log file and writes the\n"); + fprintf(f, "contents to a \"replay file\" that can be replayed with -r.\n"); + fprintf(f, " The third form replays a file generated with -f.\n\n"); + fprintf(f, "Parse options:\n"); + fprintf(f, " -c (assume CSV logfile)\n"); + fprintf(f, " -P (assume Polardb11 adult logfile)\n"); + fprintf(f, " -m (print monitor info,only support for polardb)\n"); + fprintf(f, " -b (start time for parsing logfile)\n"); + fprintf(f, " -e (end time for parsing logfile)\n"); + fprintf(f, " -q ( \\' in string literal is a single quote)\n\n"); + fprintf(f, " -D (database name to use as filter for parsing logfile)\n"); + fprintf(f, " -U (username to use as filter for parsing logfile)\n"); + fprintf(f, "Replay options:\n"); + fprintf(f, " -h \n"); + fprintf(f, " -p \n"); + fprintf(f, " -W (must be the same for all users)\n"); + fprintf(f, " -s (speed factor for replay)\n"); + fprintf(f, " -E (server encoding)\n"); + fprintf(f, " -j (skip idle time during replay)\n"); + fprintf(f, " -X (extra libpq connect options)\n\n"); + fprintf(f, " -n (dry-run, will replay file without running queries)\n\n"); + fprintf(f, "Debugging:\n"); + fprintf(f, " -d (level between 1 and 3)\n"); + fprintf(f, " -v (prints version and exits)\n"); +} + +int main(int argc, char **argv) { + int arg, parse_only = 0, replay_only = 0, port = -1, csv = 0, + parse_opt = 0, replay_opt = 0, rc = 0, dry_run = 0, monitor_gap = 0 ; + double factor = 1.0; + char *host = NULL, *encoding = NULL, *endptr, *passwd = NULL, + *outfilename = NULL, *infilename = NULL, + *database_only = NULL, *username_only = NULL, *tmp = NULL, + start_time[24] = { '\0' }, end_time[24] = { '\0' }; + const char *errmsg; + unsigned long portnr = 0l, debug = 0l, length; + replay_item_provider *provider; + replay_item_provider_init *provider_init; + replay_item_provider_finish *provider_finish; + replay_item_consumer *consumer; + replay_item_consumer_init *consumer_init; + replay_item_consumer_finish *consumer_finish; + replay_item *item = NULL; + const struct timeval *tmp_time; + struct timeval monitor_time; + + /* initialize errno to avoid bogus error messages */ + errno = 0; + + /* parse arguments */ + opterr = 0; + while (-1 != (arg = getopt(argc, argv, "vfro:h:p:W:s:E:d:cb:e:qjnX:D:U:Pm:"))) { + switch (arg) { + case 'v': + version(stdout); + return 0; + break; + case 'f': + parse_only = 1; + if (replay_only) { + fprintf(stderr, "Error: options -p and -r are mutually exclusive\n"); + help(stderr); + return 1; + } + break; + case 'r': + replay_only = 1; + if (parse_only) { + fprintf(stderr, "Error: options -p and -r are mutually exclusive\n"); + help(stderr); + return 1; + } + break; + case 'o': + outfilename = ('\0' == *optarg) ? NULL : optarg; + break; + case 'h': + replay_opt = 1; + + host = ('\0' == *optarg) ? NULL : optarg; + break; + case 'p': + replay_opt = 1; + + portnr = strtoul(optarg, &endptr, 0); + if (('\0' == *optarg) || ('\0' != *endptr)) { + fprintf(stderr, "Not a valid port number: \"%s\"\n", optarg); + help(stderr); + return 1; + } + if ((portnr < 1) || (65535 < portnr)) { + fprintf(stderr, "Port number must be between 1 and 65535\n"); + help(stderr); + return 1; + } + port = (int)portnr; + break; + case 'W': + replay_opt = 1; + + passwd = ('\0' == *optarg) ? NULL : optarg; + break; + case 's': + replay_opt = 1; + + factor = strtod(optarg, &endptr); + if (('\0' == *optarg) || ('\0' != *endptr)) { + fprintf(stderr, "Not a valid floating point number: \"%s\"\n", optarg); + help(stderr); + return 1; + } + if (0 != errno) { + perror("Error converting speed factor"); + help(stderr); + return 1; + } + if (factor <= 0.0) { + fprintf(stderr, "Factor must be greater than 0\n"); + help(stderr); + return 1; + } + break; + case 'E': + replay_opt = 1; + + encoding = ('\0' == *optarg) ? NULL : optarg; + break; + case 'd': + debug = strtoul(optarg, &endptr, 0); + if (('\0' == *optarg) || ('\0' != *endptr)) { + fprintf(stderr, "Not a valid debug level: \"%s\"\n", optarg); + help(stderr); + return 1; + } + if ((debug < 0) || (3 < debug)) { + fprintf(stderr, "Debug level must be between 0 and 3\n"); + help(stderr); + return 1; + } + debug_level = (int)debug; + break; + case 'c': + parse_opt = 1; + + csv = 1; + break; + case 'P': + parse_opt = 1; + polardb_audit = 1; + break; + case 'm': + tmp = ('\0' == *optarg) ? NULL : optarg; + monitor_gap = atoi(tmp); + break; + case 'b': + parse_opt = 1; + + if (NULL == (errmsg = parse_time(optarg, NULL))) { + strncpy(start_time, optarg, 23); + } else { + fprintf(stderr, "Error in begin timestamp: %s\n", errmsg); + help(stderr); + return 1; + } + break; + case 'e': + parse_opt = 1; + + if (NULL == (errmsg = parse_time(optarg, NULL))) { + strncpy(end_time, optarg, 23); + } else { + fprintf(stderr, "Error in end timestamp: %s\n", errmsg); + help(stderr); + return 1; + } + break; + case 'q': + backslash_quote = 1; + break; + case 'j': + replay_opt = 1; + + jump_enabled = 1; + break; + case 'n': + replay_opt = 1; + + dry_run = 1; + break; + case 'X': + replay_opt = 1; + + extra_connstr = optarg; + break; + case 'D': + parse_opt = 1; + + if (NULL == database_only) { + length = strlen(optarg) + 3; + database_only = malloc(length); + if (NULL != database_only) + strcpy(database_only, "\\"); + } else { + length = strlen(database_only) + strlen(optarg) + 2; + database_only = realloc(database_only, length); + } + if (NULL == database_only) { + fprintf(stderr, "Cannot allocate %lu bytes of memory\n", length); + return 1; + } + + strcat(database_only, optarg); + strcat(database_only, "\\"); + break; + case 'U': + parse_opt = 1; + + if (NULL == username_only) { + length = strlen(optarg) + 3; + username_only = malloc(length); + if (NULL != username_only) + strcpy(username_only, "\\"); + } else { + length = strlen(username_only) + strlen(optarg) + 2; + username_only = realloc(username_only, length); + } + if (NULL == username_only) { + fprintf(stderr, "Cannot allocate %lu bytes of memory\n", length); + return 1; + } + + strcat(username_only, optarg); + strcat(username_only, "\\"); + break; + case '?': + if (('?' == optopt) || ('h' == optopt)) { + help(stdout); + return 0; + } else { + fprintf(stderr, "Error: unknown option -%c\n", optopt); + help(stderr); + return 1; + } + break; + } + } + + if (optind + 1 < argc) { + fprintf(stderr, "More than one argument given\n"); + help(stderr); + return 1; + } + + if (optind + 1 == argc) { + infilename = argv[optind]; + } + + if (parse_only && replay_opt) { + fprintf(stderr, "Error: cannot specify replay option with -f\n"); + help(stderr); + return 1; + } + + if (replay_only && parse_opt) { + fprintf(stderr, "Error: cannot specify parse option with -r\n"); + help(stderr); + return 1; + } + + if (NULL != outfilename) { + if (! parse_only) { + fprintf(stderr, "Error: option -o is only allowed with -f\n"); + help(stderr); + return 1; + } + } + + if(polardb_audit){ + if(parse_only || replay_only){ + fprintf(stderr,"only support fisrt pattern for polardb (combined parse and replay)"); + help(stderr); + return 1; + } + } + + /* set default encoding */ + if (NULL != encoding) { + if (-1 == do_setenv("PGCLIENTENCODING", encoding)) { + return 1; + } + } + + /* figure out destination for statistics output */ + if (parse_only && (NULL == outfilename)) { + sf = stderr; /* because replay file will go to stdout */ + } else { + sf = stdout; + } + + /* configure main loop */ + + if (replay_only) { + provider_init = &file_provider_init; + provider = &file_provider; + provider_finish = &file_provider_finish; + } else { + provider_init = &parse_provider_init; + provider = &parse_provider; + provider_finish = &parse_provider_finish; + } + + if (parse_only) { + consumer_init = &file_consumer_init; + consumer_finish = &file_consumer_finish; + consumer = &file_consumer; + } else { + consumer_init = &database_consumer_init; + consumer_finish = &database_consumer_finish; + if (0 == dry_run) { + consumer = &database_consumer; + } else { + consumer = &database_consumer_dry_run; + } + } + + /* main loop */ + + if (! (*provider_init)( + infilename, + csv, + (('\0' == start_time[0]) ? NULL : start_time), + (('\0' == end_time[0]) ? NULL : end_time), + database_only, + username_only + )) + { + rc = 1; + } + + if ((0 == rc) && (*consumer_init)(outfilename, host, port, passwd, factor)) { + /* try to get first item */ + if (! (item = (*provider)())) { + rc = 1; + } + } else { + rc = 1; + } + + tmp_time = replay_get_time(item); + monitor_time.tv_sec = tmp_time->tv_sec; + monitor_time.tv_usec = tmp_time->tv_usec; + if(polardb_audit && monitor_gap) + monitor_connect_init(host, port, passwd); + + while ((0 == rc) && (end_item != item)) { + + int n = (*consumer)(item); + + switch (n) { + case 0: /* item not consumed */ + break; + case 1: /* item consumed */ + if (! (item = (*provider)())) { + rc = 1; + } + break; + default: /* error occurred */ + rc = 1; + } + if(polardb_audit && monitor_gap && replay_get_time(item)->tv_sec - monitor_time.tv_sec >= monitor_gap){ + monitor_connect_execute(monitor_sql); + monitor_time.tv_sec = replay_get_time(item)->tv_sec; + } + } + + /* no statistics output if there was an error */ + if (1 == rc) { + sf = NULL; + } + + (*provider_finish)(); + (*consumer_finish)(dry_run); + monitor_connect_finish(); + + return rc; +} diff --git a/pgreplay/parse.c b/pgreplay/parse.c new file mode 100644 index 00000000000..735436d9fb5 --- /dev/null +++ b/pgreplay/parse.c @@ -0,0 +1,1654 @@ +#include "pgreplay.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef TIME_WITH_SYS_TIME +# include +# include +#else +# ifdef HAVE_SYS_TIME_H +# include +# else +# include +# endif +#endif + +extern void parse_provider_finish(void); +extern replay_item * parse_provider(void); + +/* long enough to contain the beginning of a log line */ +#define BUFLEN 1024 +/* separates log line entries */ +#define SEPCHAR '|' + +/* types of log entries */ +typedef enum { + log_debug5, + log_debug4, + log_debug3, + log_debug2, + log_debug1, + log_info, + log_notice, + log_warning, + log_error, + log_log, + log_fatal, + log_panic, + log_unknown +} log_type; + +/* type for functions parsing the next log entry */ +typedef int (parse_log_entry_func)(struct timeval *, char *, char *, uint64_t *, log_type *, char **, char **); +/* functions for parsing stderr and CSV log entries */ +static parse_log_entry_func parse_errlog_entry; +static parse_log_entry_func parse_csvlog_entry; +static parse_log_entry_func * const parse_log_entry[2] = { + &parse_errlog_entry, + &parse_csvlog_entry +}; + +/* used to remember prepared statements */ +struct prep_stmt { + char *name; + struct prep_stmt *next; +}; +/* used to remember "open" connections */ +struct connection { + uint64_t session_id; + struct connection *next; + struct prep_stmt *statements; +}; +/* hash structure for "open" connections */ +static struct connection * open_conn[256] = { NULL }; + + + +/* indicates whether we are parsing a CSV file */ +static int csv; + +/* start and end timestamp for parsing log entries */ +static const char *start_time, *end_time; +/* database and username filters for parsing log entries */ +static const char *database_only, *username_only; +/* file which we parse */ +static int infile; +/* line number for error messages */ +static unsigned long lineno = 0; +/* offset for time values (what mktime(3) makes of 2000-01-01 00:00:00) + used to make timestamps independent of local time and broken mktime */ +static time_t epoch; +/* time of the first and last statement that we parse */ +static struct timeval first_stmt_time, last_stmt_time; + +/* statistics */ +static unsigned long stat_simple = 0; /* simple statements */ +static unsigned long stat_copy = 0; /* copy statements */ +static unsigned long stat_param = 0; /* parametrized statements */ +static unsigned long stat_named = 0; /* different named statements */ +static unsigned long stat_execnamed = 0; /* named statement executions */ +static unsigned long stat_fastpath = 0; /* fast-path function calls */ +static unsigned long stat_cancel = 0; /* cancel requests */ + +/* a version of strcpy that handles overlapping strings well */ +static char *overlap_strcpy(char *dest, const char *src) { + register char c; + + while((c = *(src++))) { + *(dest++) = c; + } + *dest = '\0'; + + return dest; +} + +/* convert a string to a log_type */ +static log_type to_log_type(const char* s) { + /* compare in order of expected likelyhood for performance */ + if (! strcmp(s, "LOG")) { + return log_log; + } else if (! strcmp(s, "ERROR")) { + return log_error; + } else if (! strcmp(s, "STATEMENT")) { + return log_unknown; + } else if (! strcmp(s, "DETAIL")) { + return log_unknown; + } else if (! strcmp(s, "HINT")) { + return log_unknown; + } else if (! strcmp(s, "FATAL")) { + return log_fatal; + } else if (! strcmp(s, "WARNING")) { + return log_warning; + } else if (! strcmp(s, "NOTICE")) { + return log_notice; + } else if (! strcmp(s, "INFO")) { + return log_info; + } else if (! strcmp(s, "PANIC")) { + return log_panic; + } else if (! strcmp(s, "DEBUG1")) { + return log_debug1; + } else if (! strcmp(s, "DEBUG2")) { + return log_debug2; + } else if (! strcmp(s, "DEBUG3")) { + return log_debug3; + } else if (! strcmp(s, "DEBUG4")) { + return log_debug4; + } else if (! strcmp(s, "DEBUG5")) { + return log_debug5; + } else { + return log_unknown; + } +} + +/* Parses a timestamp (ignoring the time zone part). + If "dest" is not null, the parsed time will be returned there. + Return value is NULL on success, else an error message */ + +const char * parse_time(const char *source, struct timeval *dest) { + int i; + static struct tm tm; /* initialize with zeros */ + char s[24] = { '\0' }; /* modifiable copy of source */ + static char errmsg[BUFLEN]; + /* format of timestamp part */ + static char format[] = "nnnn-nn-nn nn:nn:nn"; + + /* check timestamp for validity */ + if (!source) { + strcpy(errmsg, "NULL passed as timestamp string"); + return errmsg; + } + + if (strlen(source) < strlen(format)) { + sprintf(errmsg, "timestamp string is less than %lu characters long", (unsigned long)strlen(format)); + return errmsg; + } + + if (strlen(source) >= BUFLEN) { + sprintf(errmsg, "timestamp string is more than %d characters long", BUFLEN-1); + return errmsg; + } + + for (i=0; i '9')) { + sprintf(errmsg, "character %d in timestamp string is '%c', expected digit", i+1, source[i]); + return errmsg; + } else + s[i] = source[i]; + break; + default: + if (source[i] != format[i]) { + sprintf(errmsg, "character %d in timestamp string is '%c', expected '%c'", i+1, source[i], format[i]); + return errmsg; + } else + s[i] = '\0'; /* tokenize parts */ + } + } + + /* parse time into 'tm' */ + tm.tm_year = atoi(s) - 1900; + tm.tm_mon = atoi(s + 5) - 1; + tm.tm_mday = atoi(s + 8); + tm.tm_hour = atoi(s + 11); + tm.tm_min = atoi(s + 14); + tm.tm_sec = atoi(s + 17); + tm.tm_isdst = 0; /* ignore daylight savings time */ + + if (dest) { + dest->tv_sec = mktime(&tm) - epoch; + dest->tv_usec = atoi(s + 20) * 1000; + } + + return NULL; +} + +static char * parse_session(const char *source, uint64_t *dest) { + char s[BUFLEN]; /* modifiable copy of source */ + static char errmsg[BUFLEN]; + char *s1 = NULL, c; + uint32_t part1, part2; + int i; + + /* check input for validity */ + if (!source) { + strcpy(errmsg, "NULL passed as session id string"); + return errmsg; + } + + if (strlen(source) > BUFLEN -1) { + sprintf(errmsg, "session id string is more than %d characters long", BUFLEN); + return errmsg; + } + + for (i=0; i<=strlen(source); ++i) { + c = source[i]; + if (('.' == c) && (! s1)) { + s[i] = '\0'; + s1 = s + i + 1; + } else if (((c < '0') || (c > '9')) && ((c < 'a') || (c > 'f')) && ('\0' != c)) { + sprintf(errmsg, "character %d in session id string is '%c', expected hex digit", i+1, c); + return errmsg; + } else + s[i] = c; + } + + if (! s1) { + strcpy(errmsg, "Missing \".\" in session id string"); + return errmsg; + } + + if ((strlen(s) > 8) || (strlen(s1) > 8)) { + strcpy(errmsg, "none of the parts of a session id string may be longer than 8 hex digits"); + return errmsg; + } + + /* convert both parts */ + sscanf(s, UINT32_FORMAT, &part1); + sscanf(s1, UINT32_FORMAT, &part2); + *dest = (((uint64_t)part1) << 32) + part2; + + return NULL; +} + +/* reads one log entry from the input file + the result is a malloc'ed string that must be freed + a return value of NULL means that there was an error */ + +static char * read_log_line() { + char *line, buf[BUFLEN] = { '\0' }, *p; + int len, escaped = 0, nl_found = 0, line_size = 0, i, l; + ssize_t bytes_read; + /* this will contain stuff we have read from the file but not used yet */ + static char peekbuf[BUFLEN] = { '\0' }; + static int peeklen = 0; + + debug(3, "Entering read_log_line, current line number %lu\n", lineno+1); + + /* pre-allocate the result to length 1 */ + if (NULL == (line = malloc(1))) { + fprintf(stderr, "Cannot allocate 1 byte of memory\n"); + return NULL; + } + *line = '\0'; + + while (! nl_found) { + /* if there were any chars left from the last invokation, use them first */ + len = peeklen; + if (len) { + strcpy(buf, peekbuf); + peekbuf[0] = '\0'; + } + peeklen = 0; + + /* read from file until buf is full (at most) */ + if (len < BUFLEN - 1) { + if (-1 == (bytes_read = read(infile, buf + len, BUFLEN - 1 - len))) { + perror("Error reading from input file"); + return NULL; + } + len += bytes_read; + buf[len] = '\0'; + } + + /* if there is still nothing, we're done */ + if (0 == len) { + debug(2, "Encountered EOF%s\n", ""); + debug(3, "Leaving read_log_line%s\n", ""); + return line; + } + + /* search the string for unescaped newlines */ + for (p=buf; *p!='\0'; ++p) { + if (csv && ('"' == *p)) { + escaped = !escaped; + } + /* keep up with line count */ + lineno += ('\n' == *p); + + /* look for unescaped newline */ + if (!escaped && ('\n' == *p)) { + /* if a newline is found, truncate the string + and prepend the rest to peekbuf */ + l = len - (++p - buf); + /* right shift peekbuf by l */ + for (i=peeklen; i>=0; --i) { + peekbuf[l+i] = peekbuf[i]; + } + strncpy(peekbuf, p, l); + *p = '\0'; + peeklen += len - (p - buf); + len = p - buf; + if (csv) { + /* for a CSV file, this must be the end of the log entry */ + nl_found = 1; + break; /* out from the for loop */ + } else { + /* in a stderr log file, we must check for a + continuation line (newline + tab) */ + /* first, make sure there is something to peek at */ + if (0 == peeklen) { + /* try to read one more byte from the file */ + if (-1 == (bytes_read = read(infile, peekbuf, 1))) { + perror("Error reading from input file"); + return NULL; + } + if (0 == bytes_read) { + /* EOF means end of log entry */ + nl_found = 1; + break; /* out from the for loop */ + } else { + peeklen = bytes_read; + peekbuf[peeklen] = '\0'; + } + } + /* then check for a continuation tab */ + if ('\t' == *peekbuf) { + /* continuation line, remove tab + and copy peekbuf back to buf */ + strncpy(p--, peekbuf + 1, BUFLEN - 1 - len); + if (peeklen > BUFLEN - len) { + overlap_strcpy(peekbuf, peekbuf + (BUFLEN - len)); + peeklen = peeklen - BUFLEN + len; + len = BUFLEN - 1; + } else { + *peekbuf = '\0'; + len += peeklen - 1; + peeklen = 0; + } + buf[len] = '\0'; + } else { + /* end of log entry reached */ + nl_found = 1; + break; /* out from the for loop */ + } + } + } + } + /* extend result line and append buf */ + line_size += len; + if (NULL == (p = realloc(line, line_size+1))) { + fprintf(stderr, "Cannot allocate %d bytes of memory\n", line_size); + free(line); line = NULL; + return NULL; + } + line = p; + strcat(line, buf); + *buf = '\0'; + len = 0; + } + + /* remove trailing newline in result if present */ + if ('\n' == line[line_size - 1]) { + line[line_size - 1] = '\0'; + } + + debug(3, "Leaving read_log_line%s\n", ""); + + return line; +} + +/* parses the next stderr log entry (and maybe a detail message after that) + timestamp, user, database, session ID, log message type, log message + and detail message are returned in the respective parameters + "message" and "detail" are malloc'ed if they are not NULL + return values: -1 (error), 0 (end-of-file), or 1 (success) */ + +static int parse_errlog_entry(struct timeval *time, char *user, char *database, uint64_t *session_id, log_type *type, char **message, char **detail) { + char *line = NULL, *part2, *part3, *part4, *part5, *part6; + const char *errmsg; + int i, skip_line = 0; + static int dump_found = 0; + /* if not NULL, contains the next log entry to parse */ + static char* keepline = NULL; + + debug(3, "Entering parse_errlog_entry%s\n", ""); + + /* initialize message and detail with NULL */ + *message = NULL; + *detail = NULL; + + /* use cached line or read next line from log file */ + if (keepline) { + line = keepline; + keepline = NULL; + } else { + /* read lines until we are between start_time and end_time */ + do { + if (line) { + free(line); + } + if (NULL == (line = read_log_line())) { + return -1; + } + + /* is it the start of a memory dump? */ + if (0 == strncmp(line, "TopMemoryContext: ", 18)) { + fprintf(stderr, "Found memory dump in line %lu\n", lineno); + dump_found = 1; + skip_line = 1; + } else { + /* if there is a dump and the line starts blank, + assume the line is part of the dump + */ + if (dump_found && (' ' == *line)) { + skip_line = 1; + } else { + skip_line = 0; + } + } + } while (('\0' != *line) + && (skip_line + || (start_time && (strncmp(line, start_time, 23) < 0)))); + } + + /* check for EOF */ + if (('\0' == *line) || (end_time && (strncmp(line, end_time, 23) > 0))) { + free(line); + debug(3, "Leaving parse_errlog_entry%s\n", ""); + return 0; + } + + /* split line on | in six pieces: time, user, database, session ID, log entry type, rest */ + if (NULL == (part2 = strchr(line, SEPCHAR))) { + fprintf(stderr, "Error parsing line %lu: no \"%c\" found - log_line_prefix may be wrong\n", lineno, SEPCHAR); + free(line); + return -1; + } else { + *(part2++) = '\0'; + } + + if (NULL == (part3 = strchr(part2, SEPCHAR))) { + fprintf(stderr, "Error parsing line %lu: second \"%c\" not found - log_line_prefix may be wrong\n", lineno, SEPCHAR); + free(line); + return -1; + } else { + *(part3++) = '\0'; + } + + if (NULL == (part4 = strchr(part3, SEPCHAR))) { + fprintf(stderr, "Error parsing line %lu: third \"%c\" not found - log_line_prefix may be wrong\n", lineno, SEPCHAR); + free(line); + return -1; + } else { + *(part4++) = '\0'; + } + + if (NULL == (part5 = strchr(part4, SEPCHAR))) { + fprintf(stderr, "Error parsing line %lu: fourth \"%c\" not found - log_line_prefix may be wrong\n", lineno, SEPCHAR); + free(line); + return -1; + } else { + *(part5++) = '\0'; + } + + if (NULL == (part6 = strstr(part5, ": "))) { + fprintf(stderr, "Error parsing line %lu: log message does not begin with a log type\n", lineno); + free(line); + return -1; + } else { + *part6 = '\0'; + part6 += 3; + } + + /* first part is the time, parse it into parameter */ + if ((errmsg = parse_time(line, time))) { + fprintf(stderr, "Error parsing line %lu: %s\n", lineno, errmsg); + free(line); + return -1; + } + + /* second part is the username, copy to parameter */ + if (NAMELEN < strlen(part2)) { + fprintf(stderr, "Error parsing line %lu: username exceeds %d characters\n", lineno, NAMELEN); + free(line); + return -1; + } else { + strcpy(user, part2); + } + + /* third part is the database, copy to parameter */ + if (NAMELEN < strlen(part3)) { + fprintf(stderr, "Error parsing line %lu: database name exceeds %d characters\n", lineno, NAMELEN); + free(line); + return -1; + } else { + strcpy(database, part3); + } + + /* fourth part is the session ID, copy to parameter */ + if ((errmsg = parse_session(part4, session_id))) { + fprintf(stderr, "Error parsing line %lu: %s\n", lineno, errmsg); + free(line); + return -1; + } + + /* fifth part is the log type, copy to parameter */ + *type = to_log_type(part5); + + /* sixth part is the log message */ + overlap_strcpy(line, part6); + *message = line; + + /* read the next log entry so that we can peek at it */ + line = NULL; + do { + if (NULL != line) { + free(line); + } + if (NULL == (line = read_log_line())) { + free(*message); + *message = NULL; + return -1; + } + + /* is it the start of a memory dump? */ + if (0 == strncmp(line, "TopMemoryContext: ", 18)) { + fprintf(stderr, "Found memory dump in line %lu\n", lineno); + dump_found = 1; + skip_line = 1; + } else { + /* if there is a dump and the line starts blank, + assume the line is part of the dump + */ + if (dump_found && (' ' == *line)) { + skip_line = 1; + } else { + skip_line = 0; + } + } + } while (('\0' != *line) && skip_line); + + if ('\0' == *line) { + /* EOF, that's ok */ + keepline = line; + } else { + /* skip four | to the fifth part */ + part2 = line; + for (i=0; i<4; ++i) { + if (NULL == (part2 = strchr(part2, SEPCHAR))) { + fprintf(stderr, "Error parsing line %lu: only %d \"%c\" found - log_line_prefix may be wrong\n", lineno, i, SEPCHAR); + free(*message); + free(line); + *message = NULL; + return -1; + } else { + ++part2; + } + } + + /* check if it is a DETAIL */ + if (strncmp(part2, "DETAIL: ", 9)) { + /* if not, remember the line for the next pass */ + keepline = line; + } else { + debug(2, "Found a DETAIL message%s\n", ""); + + /* set the return parameter to the detail message */ + overlap_strcpy(line, part2 + 9); + *detail = line; + } + } + + debug(3, "Leaving parse_errlog_entry%s\n", ""); + return 1; +} + +/* parses the next CSV log entry + timestamp, user, database, session ID, log message type, log message + and detail message are returned in the respective parameters + "message" is malloc'ed, "detail" not + return values: -1 (error), 0 (end-of-file), or 1 (success) */ + +static int parse_csvlog_entry(struct timeval *time, char *user, char *database, uint64_t *session_id, log_type *type, char **message, char **detail) { + char *line = NULL, *part[16], *p1, *p2; + const char *errmsg; + int i, escaped = 0; + + debug(3, "Entering parse_csvlog_entry%s\n", ""); + + /* initialize message and detail with NULL */ + *message = NULL; + *detail = NULL; + + /* read next line after start timestamp from log file */ + do { + if (line) { + free(line); + } + if (NULL == (line = read_log_line())) { + return -1; + } + } while (('\0' != *line) + && (start_time && (strncmp(line, start_time, 23) < 0))); + + /* check for EOF */ + if (('\0' == *line) || (end_time && (strncmp(line, end_time, 23) > 0))) { + free(line); + debug(3, "Leaving parse_errlog_entry%s\n", ""); + return 0; + } + + /* parse first 15 parts from the CSV record */ + part[0] = p1 = line; + for (i=1; i<16; ++i) { + p2 = p1; + /* copy p1 to p2 until we hit an unescaped comma, + remove escaping double quotes */ + while (escaped || (',' != *p1)) { + switch (*p1) { + case '\0': + fprintf(stderr, "Error parsing line %lu: comma number %d not found (or unmatched quotes)\n", lineno, i); + free(line); + return -1; + case '"': + /* don't copy the first double quote */ + if (!escaped && (p1 != part[i-1])) { + *(p2++) = '"'; + } + ++p1; + escaped = !escaped; + break; + default: + *(p2++) = *(p1++); + } + } + *p2 = '\0'; + part[i] = ++p1; + } + + /* first part is the time, parse it into parameter */ + if ((errmsg = parse_time(part[0], time))) { + fprintf(stderr, "Error parsing line %lu: %s\n", lineno, errmsg); + free(line); + return -1; + } + + /* second part is the username, copy to parameter */ + if (NAMELEN < strlen(part[1])) { + fprintf(stderr, "Error parsing line %lu: username exceeds %d characters\n", lineno, NAMELEN); + free(line); + return -1; + } else { + strcpy(user, part[1]); + } + + /* third part is the database, copy to parameter */ + if (NAMELEN < strlen(part[2])) { + fprintf(stderr, "Error parsing line %lu: database name exceeds %d characters\n", lineno, NAMELEN); + free(line); + return -1; + } else { + strcpy(database, part[2]); + } + + /* sixth part is the session ID, copy to parameter */ + if ((errmsg = parse_session(part[5], session_id))) { + fprintf(stderr, "Error parsing line %lu: %s\n", lineno, errmsg); + free(line); + return -1; + } + + /* twelfth part is the log type, copy to parameter */ + *type = to_log_type(part[11]); + + /* fourteenth part is the message, assign to output parameter */ + overlap_strcpy(line, part[13]); + *message = line; + + /* detail is the fifteenth part of the line, if not empty */ + *detail = part[14]; + if ('\0' == **detail) { + *detail = NULL; + } + + debug(3, "Leaving parse_csvlog_entry%s\n", ""); + return 1; +} + +/* add (malloc) the prepared statement name to the list of + prepared statements for the connection + returns 0 if the statement already existed, 1 if it was added and -1 if there was an error */ + +static int add_pstmt(struct connection * conn, char const *name) { + struct prep_stmt *pstmt = conn->statements; + int rc; + + debug(3, "Entering add_pstmt for statement \"%s\"\n", name); + + if ('\0' == *name) { + /* the empty statement will never be stored, but should be prepared */ + rc = 1; + + /* count for statistics */ + ++stat_param; + } else { + while (pstmt && strcmp(pstmt->name, name)) { + pstmt = pstmt->next; + } + + if (pstmt) { + /* statement already prepared */ + debug(2, "Prepared statement is already in list%s\n", ""); + rc = 0; + } else { + debug(2, "Adding prepared statement to list%s\n", ""); + /* add statement name to linked list */ + if (NULL == (pstmt = malloc(sizeof(struct prep_stmt)))) { + fprintf(stderr, "Cannot allocate %lu bytes of memory\n", (unsigned long)sizeof(struct prep_stmt)); + return -1; + } + if (NULL == (pstmt->name = malloc(strlen(name) + 1))) { + fprintf(stderr, "Cannot allocate %lu bytes of memory\n", (unsigned long)strlen(name) + 1); + free(pstmt); + return -1; + } + strcpy(pstmt->name, name); + pstmt->next = conn->statements; + conn->statements = pstmt; + rc = 1; + + /* count for statistics */ + ++stat_named; + } + /* count for statistics */ + ++stat_execnamed; + } + + debug(3, "Leaving add_pstmt%s\n", ""); + return rc; +} + +/* remove (free) the prepared statement name to the list of + prepared statements for the connection */ + +static void remove_pstmt(struct connection * conn, char const *name) { + struct prep_stmt *pstmt = conn->statements, *pstmt2 = NULL; + + debug(3, "Entering remove_pstmt for statement \"%s\"\n", name); + + while (pstmt && strcmp(pstmt->name, name)) { + pstmt2 = pstmt; /*remember previous */ + pstmt = pstmt->next; + } + + if (pstmt) { + if (pstmt2) { + pstmt2->next = pstmt->next; + } else { + conn->statements = pstmt->next; + } + free(pstmt->name); + free(pstmt); + } else { + debug(2, "Prepared statement not found%s\n", ""); + } + + debug(3, "Leaving remove_pstmt%s\n", ""); + return; +} + +static void remove_all_pstmts(struct connection * conn) { + struct prep_stmt *pstmt = conn->statements, *pstmt2 = NULL; + + debug(3, "Entering remove_all_pstmts%s\n", ""); + + while(pstmt) { + pstmt2 = pstmt; + pstmt = pstmt->next; + free(pstmt2->name); + free(pstmt2); + } + conn->statements = NULL; + + debug(3, "Leaving remove_all_pstmts%s\n", ""); + return; +} + +/* remove all "COPY" and "SET client_encoding" statements; + for DEALLOCATE statements, try to remove prepared statement */ + +/* maximum number of tokens we need to analyze a statement */ +#define MAX_TOKENS 3 + +static int filter_bad_statements(char *line, struct connection *conn) { + char *statement = line, *p = line, token[MAX_TOKENS][NAMELEN + 1], + *q = NULL, *quote, *h; + int comment_depth, tokens = 0, ok = 1, i, nameindex, quotelen; + + debug(3, "Entering filter_bad_statements%s\n", ""); + + for (i=0; i 0) { + /* count parsed simple statements */ + ++stat_simple; + + /* remove statements that won't work */ + + if (! strcmp("copy", token[0])) { + fprintf(stderr, "Warning: COPY statement ignored in line %lu\n", lineno); + /* replace statement with blanks */ + while (statement < p) { + *(statement++) = ' '; + } + + /* count for statistics */ + ++stat_copy; + } else if ((tokens > 1) && (! strcmp("set", token[0])) && (! strcmp("client_encoding", token[1]))) { + fprintf(stderr, "Warning: \"SET client_encoding\" statement ignored in line %lu\n", lineno); + /* replace statement with blanks */ + while (statement < p) { + *(statement++) = ' '; + } + } else if (! strcmp("deallocate", token[0])) { + /* there coule be a "prepare" in the second token, should be ignored */ + if (strcmp("prepare", token[1])) { + nameindex = 1; + } else { + nameindex = 2; + } + if (strcmp("all", token[nameindex])) { + /* deallocate single statement */ + debug(2, "Deallocating prepared statement \"%s\"\n", token[nameindex]); + remove_pstmt(conn, token[nameindex]); + } else { + /* deallocate all prepared statements */ + debug(2, "Deallocating all prepared statements%s\n", ""); + remove_all_pstmts(conn); + } + } + } + + /* break out of loop if end-of-line is reached */ + if ('\0' == *p) { + break; + } + + /* else prepare for next statement */ + statement = ++p; + for (i=0; i *(p+1)) || (('9' < *(p+1))))) { + /* dollar quoted string constant; skip to end */ + quote = p++; + while (('$' != *p) && ('\0' != *p)) { + ++p; + } + if ('\0' == *p) { + fprintf(stderr, "Error: end of dollar quote not found in line %lu\n", lineno); + ok = 0; + } else { + quotelen = p - quote; + *p = '\0'; + h = p; + do { + h = strstr(++h, quote); + } while ((NULL != h) && ('$' != *(h + quotelen))); + *p = '$'; + if (NULL == h) { + fprintf(stderr, "Error: end of dollar quoted string found in line %lu\n", lineno); + ok = 0; + } else { + p = h + (quotelen + 1); + } + } + } else if (('-' == *p) && ('-' == p[1])) { + /* comment; skip to end of line or statement */ + while (('\n' != *p) && ('\0' != *p)) { + ++p; + } + } else if (('/' == *p) && ('*' == p[1])) { + /* comment, skip to matching end */ + p += 2; + comment_depth = 1; /* comments can be nested */ + while (0 != comment_depth) { + if ('\0' == *p) { + fprintf(stderr, "Error: comment not closed in line %lu\n", lineno); + ok = 0; + break; + } else if (('*' == *p) && ('/' == p[1])) { + --comment_depth; + p += 2; + } else if (('/' == *p) && ('*' == p[1])) { + ++comment_depth; + p += 2; + } else { + ++p; + } + } + } else if ('"' == *p) { + /* quoted identifier, copy to token if necessary */ + if (tokens < MAX_TOKENS) { + q = token[tokens]; + } + while (1) { + ++p; + if ('\0' == *p) { + fprintf(stderr, "Error: quoted identifier not closed in line %lu\n", lineno); + ok = 0; + break; + } else if ('"' == *p) { + if ('"' == p[1]) { + /* double " means a single " in a quoted identifier */ + if ((tokens < MAX_TOKENS) && (q - token[tokens] < NAMELEN)) { + *(q++) = '"'; + } + ++p; + } else { + /* end of token */ + if (tokens < MAX_TOKENS) { + *q = '\0'; + ++tokens; + } + ++p; + break; + } + } else { + /* normal character, copy to token */ + if ((tokens < MAX_TOKENS) && (q - token[tokens] < NAMELEN)) { + *(q++) = *p; + } + } + } + } else if ((('A' <= *p) && ('Z' >= *p)) + || (('a' <= *p) && ('z' >= *p)) + || (127 < (unsigned char)(*p)) /* consider > 127 as letter */ + || ('_' == *p)) { + /* normal identifier, copy to token if necessary */ + if (tokens < MAX_TOKENS) { + q = token[tokens]; + } + while ((('A' <= *p) && ('Z' >= *p)) + || (('a' <= *p) && ('z' >= *p)) + || (('0' <= *p) && ('9' >= *p)) + || (127 < (unsigned char)(*p)) /* consider > 127 as letter */ + || ('_' == *p) || ('$' == *p)) { + if ((tokens < MAX_TOKENS) && (q - token[tokens] < NAMELEN)) { + /* convert to lowercase */ + *(q++) = *p + ('a' - 'A') * ((*p >= 'A') && (*p <= 'Z')); + } + ++p; + } + *q = '\0'; + ++tokens; + } else { + /* everything else is considered unimportant */ + ++p; + } + } + + debug(3, "Leaving filter_bad_statements%s\n", ""); + return ok; +} + +/* check if there is a connection for this session_id + if not, a new connection replay_item is generated and returned in r + return values: found or created hash entry for success, NULL for failure */ + +static struct connection *add_connection(replay_item **r, struct timeval *time, const char *user, const char *database, uint64_t session_id) { + unsigned char hash; + struct connection *conn; + + hash = hash_session(session_id); + conn = open_conn[hash]; + + debug(3, "Entering add_connection for session 0x" UINT64_FORMAT "\n", session_id); + + while (conn && (conn->session_id != session_id)) { + conn = conn->next; + } + + if (conn) { + /* session already exists */ + *r = NULL; + } else { + /* session doesn't exist yet; create it and add it to hash table */ + if (NULL == (*r = replay_create_connect(time, session_id, user, database))) { + /* something went wrong */ + return NULL; + } else { + if ((conn = malloc(sizeof(struct connection)))) { + conn->next = open_conn[hash]; + conn->statements = NULL; + conn->session_id = session_id; + open_conn[hash] = conn; + } else { + fprintf(stderr, "Cannot allocate %lu bytes of memory\n", (unsigned long)sizeof(struct connection)); + replay_free(*r); + return NULL; + } + } + } + + debug(3, "Leaving add_connection%s\n", ""); + return conn; +} + +/* remove a connection from the hash structure + returns 1 for success, 0 for failure */ + +static int remove_connection(uint64_t session_id) { + unsigned char hash; + struct connection *conn, *conn2 = NULL; + + hash = hash_session(session_id); + conn = open_conn[hash]; + + debug(3, "Entering remove_connection for session 0x" UINT64_FORMAT "\n", session_id); + + while (conn && (conn->session_id != session_id)) { + conn2 = conn; /* remember previous */ + conn = conn->next; + } + + if (conn) { + remove_all_pstmts(conn); + if (conn2) { + conn2->next = conn->next; + } else { + open_conn[hash] = conn->next; + } + free(conn); + } + + debug(3, "Leaving remove_connection%s\n", ""); + + return (NULL != conn); +} + +/* tokenizes the "line" into single arguments, pointers to which + are stored in "result" (which is malloc'ed and must be freed) + returns the number of arguments and -1 if there is an error + Note that the strings that "result" points to are *not* malloc'ed, + but part of "line" */ + +static int parse_bind_args(char *** const result, char *line) { + int count = 0; + char *p, *p1; + + *result = NULL; + + debug(3, "Entering parse_bind_args%s\n", ""); + + /* count the dollar signs in the line = upper limit for number of arguments */ + for (p=line; '\0'!=*p; ++p) { + if ('$' == *p) { + ++count; + } + } + /* if there is none, we are done */ + if (! count) { + return 0; + } + + /* allocate result */ + if (NULL == (*result = calloc(count, sizeof(char **)))) { + fprintf(stderr, "Cannot allocate %lu bytes of memory\n", (unsigned long)sizeof(char **)); + return -1; + } + + /* reset count - now we will really count */ + count = 0; + + /* loop until end of the line */ + p = line; + while ('\0' != *p) { + /* expect "$ = " */ + if ('$' != *(p++)) { + fprintf(stderr, "Error: no dollar sign found in argument %d in line %lu\n", count, lineno); + free(*result); + *result = NULL; + return -1; + } + while (('0' <= *p) && ('9' >= *p)) { + ++p; + } + if ((' ' != p[0]) || ('=' != p[1]) || (' ' != p[2])) { + fprintf(stderr, "Error: bad format in argument %d in line %lu\n", count, lineno); + free(*result); + *result = NULL; + return -1; + } + p += 3; + + if ('\'' == *p) { + /* normal argument in single quotes */ + (*result)[count] = p1 = ++p; + + /* eliminate double single quotes and search for ending single quote */ + while (*p1) { + if ('\0' == *p) { + fprintf(stderr, "Error: unexpected end of line in argument %d in line %lu\n", count, lineno); + free(*result); + *result = NULL; + return -1; + } + if ('\'' == *p) { + /* single quote encountered */ + if ('\'' == p[1]) { + /* double single quote, skip one */ + *(p1++) = '\''; + ++p; + } else { + /* end of argument */ + *p1 = '\0'; + } + } else { + /* normal character */ + *(p1++) = *p; + } + ++p; + } + } else { + /* must me NULL argument */ + if (('N' == p[0]) && ('U' == p[1]) && ('L' == p[2]) && ('L' == p[3])) { + (*result)[count] = NULL; + p += 4; + } else { + fprintf(stderr, "Error: expected NULL in argument %d in line %lu\n", count, lineno); + free(*result); + *result = NULL; + return -1; + } + } + + /* skip ", " if present */ + if ((',' == p[0]) && (' ' == p[1])) { + p += 2; + } + + ++count; + } + + debug(2, "Parsed %d arguments\n", count); + debug(3, "Leaving parse_bind_args%s\n", ""); + + return count; +} + +static void print_parse_statistics() { + int hours, minutes; + double seconds; + struct timeval delta; + + fprintf(sf, "\nParse statistics\n"); + fprintf(sf, "================\n\n"); + fprintf(sf, "Log lines read: %lu\n", lineno); + fprintf(sf, "Total SQL statements processed: %lu\n", stat_simple + stat_param + stat_execnamed); + fprintf(sf, "Simple SQL statements processed: %lu\n", stat_simple); + if (stat_copy) { + fprintf(sf, "(includes %lu ignored copy statements)\n", stat_copy); + } + fprintf(sf, "Parametrized SQL statements processed: %lu\n", stat_param); + fprintf(sf, "Named prepared SQL statements executions processed: %lu\n", stat_execnamed); + if (stat_named) { + fprintf(sf, "Different named prepared SQL statements processed: %lu\n", stat_named); + fprintf(sf, "(average reuse count %.3f)\n", (double)(stat_execnamed - stat_named) / stat_named); + } + fprintf(sf, "Cancel requests processed: %lu\n", stat_cancel); + fprintf(sf, "Fast-path function calls ignored: %lu\n", stat_fastpath); + + /* calculate lengh of the recorded workload */ + timersub(&last_stmt_time, &first_stmt_time, &delta); + hours = delta.tv_sec / 3600; + delta.tv_sec -= hours * 3600; + minutes = delta.tv_sec / 60; + delta.tv_sec -= minutes * 60; + seconds = delta.tv_usec / 1000000.0 + delta.tv_sec; + + fprintf(sf, "Duration of recorded workload:"); + if (hours > 0) { + fprintf(sf, " %d hours", hours); + } + if (minutes > 0) { + fprintf(sf, " %d minutes", minutes); + } + fprintf(sf, " %.3f seconds\n", seconds); +} + +int parse_provider_init(const char *in, int parse_csv, const char *begin, const char *end, const char *db_only, const char *usr_only) { + static struct tm tm; /* initialize with zeros */ + int rc = 1; + + debug(3, "Entering parse_provider_init%s\n", ""); + + if (NULL == in) { + infile = 0; + } else { + if (-1 == (infile = open(in, O_RDONLY))) { + perror("Error opening input file"); + rc = 0; + } + } + + csv = parse_csv; + start_time = begin; + end_time = end; + database_only = db_only; + username_only = usr_only; + + /* initialize epoch with 2000-01-01 00:00:00 */ + tm.tm_year = 2000 - 1900; + tm.tm_mon = 1 - 1; + tm.tm_mday = 1; + tm.tm_hour = 0; + tm.tm_min = 0; + tm.tm_sec = 0; + tm.tm_isdst = 0; /* ignore daylight savings time */ + epoch = mktime(&tm); + + debug(3, "Leaving parse_provider_init%s\n", ""); + + return rc; +} + +void parse_provider_finish() { + debug(3, "Entering parse_provider_finish%s\n", ""); + + if (0 != infile) { + if (close(infile)) { + perror("Error closing input file:"); + } + } + + if (sf) { + print_parse_statistics(); + } + + debug(3, "Leaving parse_provider_finish%s\n", ""); +} + +/* the replay_item is malloc'ed and must be freed with replay_free() + will return NULL if an error occurred */ + +replay_item * parse_provider() { + replay_item *r = NULL; + char *message = NULL, *detail = NULL, *statement = NULL, *namep = NULL, name[NAMELEN + 1], **args, user[NAMELEN + 1], database[NAMELEN + 1], quote_name[NAMELEN + 3]; + log_type logtype; + uint64_t session_id; + int count, i; + /* possible stati: -1 = error, 0 = looking for log line, 1 = interesting line found + 2 = using cached value, 3 = EOF */ + int status = 0; + replay_type type = -1; /* -1 is an impossible value */ + struct timeval time; + /* remember time from last parsed line */ + static struct timeval oldtime = { 0, 0 }; + struct connection *conn = NULL; + /* queue of up to two replay items */ + static replay_item *queue[2] = { NULL, NULL }; + static int first_stmt_time_set = 0; + + debug(3, "Entering parse_provider%s\n", ""); + + if (queue[0]) { + /* if there is something in the queue, return it */ + debug(2, "Queue is not empty, returning top element%s\n", ""); + r = queue[0]; + queue[0] = queue[1]; + queue[1] = NULL; + + status = 2; + } + + /* read a log entry until we find an interesting one */ + while (0 == status) { + int n = 0; + + n = (*parse_log_entry[csv])(&time, user, database, &session_id, &logtype, &message, &detail); + + switch (n) { + case 0: + /* EOF encountered */ + status = 3; + break; + case 1: + memset(quote_name, '\0', NAMELEN + 3); + if ('\0' != *database) { + quote_name[0] = '\\'; + strcat(quote_name, database); + strcat(quote_name, "\\"); + } + if ((NULL != database_only) && (NULL == strstr(database_only, quote_name))) { + debug(2, "Database \"%s\" does not match filter, skipped log entry\n", database); + free(message); + if (! csv && detail) { + free(detail); + } + break; + } + memset(quote_name, '\0', NAMELEN + 3); + if ('\0' != *user) { + quote_name[0] = '\\'; + strcat(quote_name, user); + strcat(quote_name, "\\"); + } + if ((NULL != username_only) && (NULL == strstr(username_only, quote_name))) { + debug(2, "User \"%s\" does not match filter, skipped log entry\n", user); + free(message); + if (! csv && detail) { + free(detail); + } + break; + } + + /* check line prefix to determine type */ + if ((log_log == logtype) && (! strncmp(message, "connection authorized: ", 23))) { + debug(2, "Connection 0x" UINT64_FORMAT " found\n", session_id); + type = pg_connect; + statement = message + 23; + status = 1; + } else if ((log_log == logtype) && (! strncmp(message, "disconnection: ", 15))) { + debug(2, "Disconnection found%s\n", ""); + type = pg_disconnect; + statement = message + 15; + status = 1; + } else if ((log_log == logtype) && (! strncmp(message, "statement: ", 11))) { + debug(2, "Simple statement found%s\n", ""); + type = pg_execute; + statement = message + 11; + status = 1; + } else if ((log_log == logtype) && (! strncmp(message, "execute ", 8))) { + debug(2, "Prepared statement execution found%s\n", ""); + type = pg_exec_prepared; + namep = message + 8; + if ((statement = strchr(namep, ':'))) { + /* split in name and statement */ + *(statement++) = '\0'; + ++statement; + /* check for unnamed statement, change name to empty string */ + if (! strcmp(namep, "")) { + *namep = '\0'; + } + if(polardb_audit){ + type = pg_execute; + } + status = 1; + } else { + fprintf(stderr, "Error: missing statement name in line %lu\n", lineno); + status = -1; + } + } else if ((log_error == logtype) && (! strncmp(message, "canceling statement due to user request", 39))) { + debug(2, "Cancel request found%s\n", ""); + type = pg_cancel; + status = 1; + } else if ((log_log == logtype) && (! strncmp(message, "fastpath function call: ", 24))) { + free(message); + if (! csv && detail) { + free(detail); + } + fprintf(stderr, "Warning: fast-path function call ignored in line %lu\n", lineno); + + /* count for statistics */ + ++stat_fastpath; + } else { + free(message); + if (! csv && detail) { + free(detail); + } + debug(2, "Skipped log entry%s\n", ""); + } + break; + default: + /* something went wrong */ + status = -1; + } + } + + /* if everything is ok so far, search for a connection in our list + add one if there is none so far, store the "connect" replay_item in r */ + if (1 == status) { + /* we need a connection in any case */ + if ((conn = add_connection(&r, &time, user, database, session_id))) { + if (r && (pg_connect != type)) { + debug(2, "Automatically generated connection%s\n", ""); + } + if ((!r) && (pg_connect == type)) { + /* if the connection already existed and we read + a connect line, that is an error */ + fprintf(stderr, "Error: duplicate session ID 0x" UINT64_FORMAT "\n", session_id); + status = -1; + } + } else { + /* error occurred */ + status = -1; + } + } + + /* if everything is ok so far, process line according to type + result will be stored in queue[0] and queue[1] */ + if (1 == status) { + switch (type) { + case pg_connect: + /* we are status! */ + break; + case pg_disconnect: + if (NULL == (queue[0] = replay_create_disconnect(&time, session_id))) { + status = -1; + } + /* remove connection from hash structure */ + if (! remove_connection(session_id)) { + /* can't happen */ + fprintf(stderr, "Error: cannot remove connection " UINT64_FORMAT " from hash table\n", session_id); + status = -1; + } + break; + case pg_execute: + if (filter_bad_statements(statement, conn)) { + if (NULL == (queue[0] = replay_create_execute(&time, session_id, statement))) { + status = -1; + } + } else { + status = -1; + } + break; + case pg_exec_prepared: + /* we don't need to filter, since it can only be a single statement + and neither COPY nor SET can be prepared statements */ + + /* make a persistent copy of the statement name */ + strcpy(name, namep); + + /* see if this is a new statement */ + switch (add_pstmt(conn, name)) { + case 1: /* new */ + if (NULL == (queue[0] = replay_create_prepare(&time, session_id, name, statement))) { + status = -1; + } + /* the detail message is read later */ + break; + case 0: /* statement already exists */ + /* the detail message is read later */ + break; + default: /* error */ + status = -1; + } + break; + case pg_cancel: + if (NULL == (queue[0] = replay_create_cancel(&time, session_id))) { + status = -1; + } + + /* count for statistics */ + ++stat_cancel; + + break; + default: + /* can't happen */ + fprintf(stderr, "Error: impossible type parsing line %lu\n", lineno); + status = -1; + } + } + + /* read and process the DETAIL message for a prepared statement */ + if ((1 == status) && (pg_exec_prepared == type)) { + + if (! detail) { + /* no DETAIL message --> statement has no parameters */ + debug(2, "Prepared statement \"%s\" has no bind arguments\n", name); + if (NULL == (queue[1] = replay_create_exec_prepared(&time, session_id, name, 0, NULL))) { + status = -1; + } + } else if (strncmp(detail, "parameters: ", 12)) { + fprintf(stderr, "Error: no parameters for prepared statement at line %lu\n", lineno); + status = -1; + } else { + debug(2, "Reading bind arguments for prepared statement \"%s\"\n", name); + statement = detail + 12; + if (-1 == (count = parse_bind_args(&args, statement))) { + status = -1; + } else { + if (NULL == (queue[1] = replay_create_exec_prepared(&time, session_id, name, count, args))) { + status = -1; + } + free(args); + } + } + } + + if (! csv && detail) { + free(detail); + } + if (message) { + free(message); + } + + /* if EOF, close all connections that are still open */ + if (3 == status) { + /* search for entry in connection hash table */ + for (i=0; i<256; ++i) { + if (open_conn[i]) { + /* entry found, create disconnect item */ + debug(2, "End-of-file encountered, creating disconnect item for session 0x" UINT64_FORMAT "\n", open_conn[i]->session_id); + if (NULL == (r = replay_create_disconnect(&oldtime, open_conn[i]->session_id))) { + status = -1; + } + remove_connection(open_conn[i]->session_id); + break; + } + } + + if ((! r) && (3 == status)) { + debug(2, "End-of-file encountered, signal end%s\n", ""); + r = end_item; + } + } + + if (1 == status) { + /* condense queue */ + if (! queue[0]) { + queue[0] = queue[1]; + queue[1] = NULL; + } + if (! r) { + r = queue[0]; + queue[0] = queue[1]; + queue[1] = NULL; + } + if (! queue[0]) { + queue[0] = queue[1]; + queue[1] = NULL; + } + + /* remember time */ + oldtime.tv_sec = replay_get_time(r)->tv_sec; + oldtime.tv_usec = replay_get_time(r)->tv_usec; + + last_stmt_time.tv_sec = replay_get_time(r)->tv_sec; + last_stmt_time.tv_usec = replay_get_time(r)->tv_usec; + if (! first_stmt_time_set) { + first_stmt_time.tv_sec = replay_get_time(r)->tv_sec; + first_stmt_time.tv_usec = replay_get_time(r)->tv_usec; + + first_stmt_time_set = 1; + } + } + + if (-1 == status) { + /* free items in queue */ + if (r) { + replay_free(r); + r = NULL; + } + if (queue[0]) { + replay_free(queue[0]); + queue[0] = NULL; + } + if (queue[1]) { + replay_free(queue[1]); + queue[1] = NULL; + } + } + + if (r && (1 <= debug_level) && (end_item != r)) { + replay_print_debug(r); + } + + debug(3, "Leaving parse_provider%s\n", ""); + return r; +} \ No newline at end of file diff --git a/pgreplay/pgreplay.1 b/pgreplay/pgreplay.1 new file mode 100644 index 00000000000..af155e90b29 --- /dev/null +++ b/pgreplay/pgreplay.1 @@ -0,0 +1,203 @@ +.TH pgreplay 1 "" "Jun 2011" "PostgreSQL Utilities" +.SH NAME +pgreplay \- PostgreSQL log file replayer for performance tests +.SH SYNOPSIS +\fBpgreplay\fP [\fIparse options\fR] [\fIreplay options\fR] +[\fB-d\fR \fIlevel\fR] [\fIinfile\fR] +.br +\fBpgreplay\fP \fB-f\fP [\fIparse options\fR] [\fB-o\fP \fIoutfile\fR] +[\fB-d\fR \fIlevel\fR] [\fIinfile\fR] +.br +\fBpgreplay\fP \fB-r\fP [\fIreplay options\fR] [\fB-d\fR \fIlevel\fR] +[\fIinfile\fR] +.SH DESCRIPTION +\fBpgreplay\fR reads a PostgreSQL log file (\fInot\fR a WAL file), extracts +the SQL statements and executes them in the same order and relative time +against a PostgreSQL database cluster. +A final report gives you a useful statistical analysis of your workload +and its execution. +.P +In the first form, the log file \fIinfile\fR is replayed at the time it is +read. +.P +With the \fB-f\fR option, \fBpgreplay\fR will not execute the statements, but +write them to a \(oqreplay file\(cq \fIoutfile\fR that can be replayed with +the third form. +.P +With the \fB-r\fP option, \fBpgreplay\fR will execute the statements in the +replay file \fIinfile\fR that was created by the second form. +.P +If the execution of statements gets behind schedule, warning messages +are issued that indicate that the server cannot handle the load in a +timely fashion. +The idea is to replay a real-world database workload as exactly as possible. +.P +To create a log file that can be parsed by \fBpgreplay\fR, you need to set the +following parameters in \fBpostgresql.conf\fR: +.IP +\fBlog_min_messages=error\fR (or more) +.br +\fBlog_min_error_statement=log\fR (or more) +.br +\fBlog_connections=on\fR +.br +\fBlog_disconnections=on\fR +.br +\fBlog_line_prefix=\(aq%m|%u|%d|%c|\(aq\fR (if you don\(aqt use CSV logging) +.br +\fBlog_statement=\(aqall\(aq\fR +.br +\fBlc_messages\fR must be set to English (encoding does not matter) +.br +\fBbytea_output=escape\fR +(from version 9.0 on, only if you want to replay the log on 8.4 or earlier) +.P +The database cluster against which you replay the SQL statements must be +a clone of the database cluster that generated the logs from the time +\fIimmediately before\fR the logs were generated. +.P +\fBpgreplay\fR is useful for performance tests, particularly in the following +situations: +.TP 4 +* +You want to compare the performance of your PostgreSQL application +on different hardware or different operating systems. +.TP 4 +* +You want to upgrade your database and want to make sure that the new +database version does not suffer from performance regressions that +affect you. +.P +Moreover, \fBpgreplay\fR can give you some feeling as to how your application +\fImight\fR scale by allowing you to try to replay the workload at a higher +speed. Be warned, though, that 500 users working at double speed is not really +the same as 1000 users working at normal speed. +.SH OPTIONS +.SS Parse options: +.TP +\fB-c\fR +Specifies that the log file is in \(aqcsvlog\(aq format (highly recommended) +and not in \(aqstderr\(aq format. +.TP +\fB-b\fR \fItimestamp\fR +Only log entries greater or equal to that timestamp will be parsed. +The format is \fBYYYY-MM-DD HH:MM:SS.FFF\fR like in the log file. +An optional time zone part will be ignored. +.TP +\fB-e\fR \fItimestamp\fR +Only log entries less or equal to that timestamp will be parsed. +The format is \fBYYYY-MM-DD HH:MM:SS.FFF\fR like in the log file. +An optional time zone part will be ignored. +.TP +\fB-q\fR +Specifies that a backslash in a simple string literal will escape +the following single quote. +This depends on configuration options like +\fBstandard_conforming_strings\fR and is the default for server +version 9.0 and less. +.TP +\fB-D\fR \fIdatabase\fR +Only log entries related to the specified database will be parsed +(this option can be specified multiple times for more than one database). +.TP +\fB-U\fR \fIusername\fR +Only log entries related to the specified username will be parsed +(this option can be specified multiple times for more than one user). +.SS Replay options: +.TP +\fB-h\fR \fIhostname\fR +Host name where the target database cluster is running (or directory where +the UNIX socket can be found). Defaults to local connections. +.br +This works just like the \fB-h\fR option of \fBpsql\fR. +.TP +\fB-p\fR \fIport\fR +TCP port where the target database cluster can be reached. +.TP +\fB-W\fR \fIpassword\fR +By default, \fBpgreplay\fR assumes that the target database cluster +is configured for \fItrust\fR authentication. With the \fB-W\fR option +you can specify a password that will be used for all users in the cluster. +.TP +\fB-s\fR \fIfactor\fR +Speed factor for replay, by default 1. This can be any valid positive +floating point number. A \fIfactor\fR less than 1 will replay the workload +in \(oqslow motion\(cq, while a \fIfactor\fR greater than 1 means +\(oqfast forward\(cq. +.TP +\fB-E\fR \fIencoding\fR +Specifies the encoding of the log file, which will be used as client +encoding during replay. If it is omitted, your default client encoding will +be used. +.TP +\fB-j\fR +If all connections are idle, jump ahead to the next request instead of +sleeping. This will speed up replay. Execution delays will still be reported +correctly, but replay statistics will not contain the idle time. +.TP +\fB-n\fR +Dry run mode. No connections to the server are made. +Useful for checking if the replay file is corrupt or to get statistics +about the replay file (number of statements, original duration, ...) +.TP +\fB-X\fR \fIoptions\fR +Extra connection options for replay connections. These must be libpq +connection options specified in the format \(oqoption=value [...]\(cq. +.SS Output options: +.TP +\fB-o\fP \fIoutfile\fR +specifies the replay file where the statements will be written +for later replay. +.SS Debug options: +.TP +\fB-d\fR \fIlevel\fR +Specifies the trace level (between 1 and 3). Increasing levels will produce +more detailed information about what \fBpgreplay\fR is doing. +.TP +\fB-v\fR +Prints the program version and exits. +.SH ENVIRONMENT +.TP +\fBPGHOST\fR +Specifies the default value for the \fB-h\fR option. +.TP +\fBPGPORT\fR +Specifies the default value for the \fB-p\fR option. +.TP +\fBPGCLIENTENCODING\fR +Specifies the default value for the \fB-E\fR option. +.SH LIMITATIONS +\fBpgreplay\fR can only replay what is logged by PostgreSQL. +This leads to some limitations: +.TP 4 +* +\fBCOPY\fR statements will not be replayed, because the copy data are not +logged. +.TP 4 +* +Fast-path API function calls are not logged and will not be replayed. +Unfortunately, this includes the Large Object API. +.TP 4 +* +Since the log file is always in the server encoding (which you can specify +with the \fB-E\fR switch of \fBpgreplay\fR), all +\fBSET client_encoding\fR statements will be ignored. +.TP 4 +* +Since the preparation time of prepared statements is not logged (unless +\fBlog_min_messages\fR is \fBdebug2\fR or more), these statements will be +prepared immediately before they are first executed during replay. +.TP 4 +* +Because the log file contains only text, query parameters and return values +will always be in text and never in binary format. If you use binary mode to, +say, transfer large binary data, \fBpgreplay\fR can cause significantly more +network traffic than the original run. +.TP 4 +* +Sometimes, if a connection takes longer to complete, the session ID +unexpectedly changes in the PostgreSQL log file. This causes \fBpgreplay\fR +to treat the session as two different ones, resulting in an additional +connection. This is arguably a bug in PostgreSQL. +.SH AUTHOR +Written by Laurenz Albe \fB\fR. diff --git a/pgreplay/pgreplay.h b/pgreplay/pgreplay.h new file mode 100644 index 00000000000..cda6b070e47 --- /dev/null +++ b/pgreplay/pgreplay.h @@ -0,0 +1,187 @@ +#ifndef _PGREPLAY_H +#define _PGREPLAY_H 1 + +#include "config.h" + +/* safeguard against broken config.h */ +#ifndef SIZEOF_UNSIGNED_INT +# error SIZEOF_UNSIGNED_INT not defined in config.h. Please execute 'configure' first! +#endif + +#if defined(WIN32) || defined(WIN64) +# ifndef WINDOWS +# define WINDOWS +# endif +#endif + +#include +#include +#include + +/* maximum length of a name in PostgreSQL */ +#define NAMELEN 64 +#define POLARDBlEN 256 +/* types for replay items */ +typedef enum { + pg_connect = 0, + pg_disconnect, + pg_execute, + pg_prepare, + pg_exec_prepared, + pg_cancel +} replay_type; + +struct replay_item { + struct timeval time; + uint64_t session_id; + replay_type type; + uint16_t count; + char **data; +}; + +/* one "command" parsed from a log file to be replayed + the definition is in replay_item.c */ +typedef struct replay_item replay_item; + +typedef int (replay_item_provider_init)(const char *, int, const char *, const char *, const char *, const char *); +typedef replay_item *(replay_item_provider)(); +typedef void (replay_item_provider_finish)(); + +typedef int (replay_item_consumer_init)(const char *, const char *, int, const char *, double); +typedef int (replay_item_consumer)(replay_item *); +typedef void (replay_item_consumer_finish)(int); + +/* hash value for session ID is computed as low byte of background PID */ +#define hash_session(x) (unsigned char)(x & 0xFF); + +/* printf/scanf formats for various data types */ +#if SIZEOF_UNSIGNED_INT == 4 +# define UINT32_FORMAT "%x" +#else +# define UINT32_FORMAT "%hx" +#endif + +#ifdef HAVE_INTTYPES_H +# include +# define UINT64_FORMAT "%" PRIx64 +#else +/* fall back to guessing */ +# ifdef WINDOWS +# define UINT64_FORMAT "%I64x" +# else +# if SIZEOF_UNSIGNED_LONG == 8 +# define UINT64_FORMAT "%lx" +# else +# define UINT64_FORMAT "%llx" +# endif +# endif +#endif + +/*********************/ +/* defined in main.c */ +/*********************/ + +extern int debug_level; + +/* destination of statistics output */ +extern FILE *sf; + +/* if 1, backslash will escape the following single quote in string literal */ +extern int backslash_quote; + +/* if 1, replay will skip idle intervals instead of sleeping */ +extern int jump_enabled; + +/* extra connect options specified with the -X option */ +extern char *extra_connstr; + +/* print debug messages */ +#define debug(level, format, ...) { \ + if (level <= debug_level) { \ + fprintf (stderr, format, __VA_ARGS__); \ + fflush(stderr); \ + } \ +} + +/***************************/ +/* defined in replayitem.c */ +/***************************/ + +/* functions to create replay items */ +extern replay_item *replay_create_connect(const struct timeval *time, uint64_t session_id, const char *user, const char *database); +extern replay_item *replay_create_disconnect(const struct timeval *time, uint64_t session_id); +extern replay_item *replay_create_execute(const struct timeval *time, uint64_t session_id, const char *statement); +extern replay_item *replay_create_prepare(const struct timeval *time, uint64_t session_id, const char *statement, const char *name); +extern replay_item *replay_create_exec_prepared(const struct timeval *time, uint64_t session_id, const char *name, uint16_t count, char * const *values); +extern replay_item *replay_create_cancel(const struct timeval *time, uint64_t session_id); + +/* free mamory of a replay_item */ +extern void replay_free(replay_item *r); + +/* get attributes of a replay item */ +extern replay_type replay_get_type(const replay_item *r); +extern uint64_t replay_get_session_id(const replay_item *r); +extern const struct timeval * replay_get_time(const replay_item *r); +extern const char * replay_get_statement(const replay_item *r); +extern char * replay_get_search_path(const replay_item *r); +extern char * replay_get_prepare_params_typename(const replay_item *r); +extern char * replay_get_prepare_source_text(const replay_item *r); +extern const char * replay_get_name(const replay_item *r); +extern const char * replay_get_user(const replay_item *r); +extern const char * replay_get_database(const replay_item *r); +extern int replay_get_valuecount(const replay_item *r); +extern const char * const * replay_get_values(const replay_item *r); + +/* dump a replay item at debug level 3 */ +extern void replay_print_debug(const replay_item *r); + +/* special replay_item that signals end-of-file */ +extern replay_item * const end_item; + +/**********************/ +/* defined in parse.c */ +/**********************/ + +/* parse a timestamp (excluding time zone) */ +extern const char * parse_time(const char *, struct timeval *); + +extern replay_item_provider parse_provider; +extern replay_item_provider_init parse_provider_init; +extern replay_item_provider_finish parse_provider_finish; + +/***************************/ +/* defined in replayfile.c */ +/***************************/ + +extern replay_item_provider file_provider; +extern replay_item_provider_init file_provider_init; +extern replay_item_provider_finish file_provider_finish; + +extern replay_item_consumer file_consumer; +extern replay_item_consumer_init file_consumer_init; +extern replay_item_consumer_finish file_consumer_finish; + +/*************************/ +/* defined in database.c */ +/*************************/ + +extern replay_item_consumer database_consumer; +extern replay_item_consumer database_consumer_dry_run; +extern replay_item_consumer_init database_consumer_init; +extern replay_item_consumer_finish database_consumer_finish; + +#ifdef WINDOWS +/************************/ +/* defined in windows.c */ +/************************/ + +extern void win_perror(const char *prefix, int is_network_error); +#endif + +#endif + +#define MONITOR_RES_LEN 32 +extern int polardb_audit; +extern int monitor_connect_init(const char *host, int port, const char *passwd); +extern int monitor_connect_execute(const char* sql); +extern int monitor_connect_finish(); \ No newline at end of file diff --git a/pgreplay/pgreplay.html b/pgreplay/pgreplay.html new file mode 100644 index 00000000000..73c1cfd42db --- /dev/null +++ b/pgreplay/pgreplay.html @@ -0,0 +1,486 @@ + + + + + + + + + +pgreplay + + + + +

pgreplay

+ +NAME
+SYNOPSIS
+DESCRIPTION
+OPTIONS
+ENVIRONMENT
+LIMITATIONS
+AUTHOR
+ +
+ + +

NAME + +

+ + +

pgreplay +− PostgreSQL log file replayer for performance +tests

+ +

SYNOPSIS + +

+ + + +

pgreplay +[parse options] [replay options] [-d +level] [infile]
+pgreplay -f
[parse options] [-o +outfile] [-d level] [infile] +
+pgreplay -r
[replay options] [-d +level] [infile]

+ +

DESCRIPTION + +

+ + + +

pgreplay +reads a PostgreSQL log file (not a WAL file), +extracts the SQL statements and executes them in the same +order and relative time against a PostgreSQL database +cluster. A final report gives you a useful statistical +analysis of your workload and its execution.

+ +

In the first +form, the log file infile is replayed at the time it +is read.

+ +

With the +-f option, pgreplay will not execute the +statements, but write them to a ‘replay file’ +outfile that can be replayed with the third form.

+ +

With the +-r option, pgreplay will execute the +statements in the replay file infile that was created +by the second form.

+ +

If the +execution of statements gets behind schedule, warning +messages are issued that indicate that the server cannot +handle the load in a timely fashion. The idea is to replay a +real-world database workload as exactly as possible.

+ +

To create a log +file that can be parsed by pgreplay, you need to set +the following parameters in postgresql.conf:

+ + +

log_min_messages=error +(or more)
+log_min_error_statement=log
(or more)
+log_connections=on
+log_disconnections=on
+log_line_prefix='%m|%u|%d|%c|'
(if you don't use CSV +logging)
+log_statement='all'
+lc_messages
must be set to English (encoding does not +matter)
+bytea_output=escape
(from version 9.0 on, only if you +want to replay the log on 8.4 or earlier)

+ +

The database +cluster against which you replay the SQL statements must be +a clone of the database cluster that generated the logs from +the time immediately before the logs were +generated.

+ + +

pgreplay +is useful for performance tests, particularly in the +following situations:

+ + + + + + + + + + + + +
+ + +

*

+ + +

You want to compare the performance of your PostgreSQL +application on different hardware or different operating +systems.

+ + +

*

+ + +

You want to upgrade your database and want to make sure +that the new database version does not suffer from +performance regressions that affect you.

+ +

Moreover, +pgreplay can give you some feeling as to how your +application might scale by allowing you to try to +replay the workload at a higher speed. Be warned, though, +that 500 users working at double speed is not really the +same as 1000 users working at normal speed.

+ +

OPTIONS + +

+ + +

Parse +options:

+ + + + + + + +
+ + +

-c

+ + +

Specifies that the log file is +in 'csvlog' format (highly recommended) and not in 'stderr' +format.

+ +

-b timestamp

+ +

Only log entries greater or +equal to that timestamp will be parsed. The format is +YYYY-MM-DD HH:MM:SS.FFF like in the log file. An +optional time zone part will be ignored.

+ +

-e timestamp

+ +

Only log entries less or equal +to that timestamp will be parsed. The format is +YYYY-MM-DD HH:MM:SS.FFF like in the log file. An +optional time zone part will be ignored.

+ + + + + + + +
+ + +

-q

+ + +

Specifies that a backslash in a simple string literal +will escape the following single quote. This depends on +configuration options like +standard_conforming_strings and is the default for +server version 9.0 and less.

+ +

-D database

+ +

Only log entries related to the +specified database will be parsed (this option can be +specified multiple times for more than one database).

+ +

-U username

+ +

Only log entries related to the +specified username will be parsed (this option can be +specified multiple times for more than one user).

+ +

Replay +options:
+-h
hostname

+ +

Host name where the target +database cluster is running (or directory where the UNIX +socket can be found). Defaults to local connections.
+This works just like the -h option of +psql.

+ +

-p port

+ +

TCP port where the target +database cluster can be reached.

+ +

-W password

+ +

By default, pgreplay +assumes that the target database cluster is configured for +trust authentication. With the -W option you +can specify a password that will be used for all users in +the cluster.

+ +

-s factor

+ +

Speed factor for replay, by +default 1. This can be any valid positive floating point +number. A factor less than 1 will replay the workload +in ‘slow motion’, while a factor greater +than 1 means ‘fast forward’.

+ +

-E encoding

+ +

Specifies the encoding of the +log file, which will be used as client encoding during +replay. If it is omitted, your default client encoding will +be used.

+ + + + + + + + + + + + +
+ + +

-j

+ + +

If all connections are idle, jump ahead to the next +request instead of sleeping. This will speed up replay. +Execution delays will still be reported correctly, but +replay statistics will not contain the idle time.

+ + +

-n

+ + +

Dry run mode. No connections to the server are made. +Useful for checking if the replay file is corrupt or to get +statistics about the replay file (number of statements, +original duration, ...)

+ +

-X options

+ +

Extra connection options for +replay connections. These must be libpq connection options +specified in the format ‘option=value +[...]’.

+ +

Output +options:
+-o
outfile

+ +

specifies the replay file where +the statements will be written for later replay.

+ +

Debug +options:
+-d
level

+ +

Specifies the trace level +(between 1 and 3). Increasing levels will produce more +detailed information about what pgreplay is +doing.

+ + + + + + + + +
+ + +

-v

+ + +

Prints the program version and exits.

+
+ +

ENVIRONMENT + +

+ + + + + + + + + + + + + + + +
+ + +

PGHOST

+ + +

Specifies the default value for +the -h option.

+
+ + +

PGPORT

+ + +

Specifies the default value for the -p +option.

+
+ +

PGCLIENTENCODING

+ +

Specifies the default value for +the -E option.

+ +

LIMITATIONS + +

+ + + +

pgreplay +can only replay what is logged by PostgreSQL. This leads to +some limitations:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +

*

+ + +

COPY statements will not be replayed, because the +copy data are not logged.

+ + +

*

+ + +

Fast-path API function calls are not logged and will not +be replayed. Unfortunately, this includes the Large Object +API.

+ + +

*

+ + +

Since the log file is always in the server encoding +(which you can specify with the -E switch of +pgreplay), all SET client_encoding statements +will be ignored.

+ + +

*

+ + +

Since the preparation time of prepared statements is not +logged (unless log_min_messages is debug2 or +more), these statements will be prepared immediately before +they are first executed during replay.

+ + +

*

+ + +

Because the log file contains only text, query +parameters and return values will always be in text and +never in binary format. If you use binary mode to, say, +transfer large binary data, pgreplay can cause +significantly more network traffic than the original +run.

+ + +

*

+ + +

Sometimes, if a connection takes longer to complete, the +session ID unexpectedly changes in the PostgreSQL log file. +This causes pgreplay to treat the session as two +different ones, resulting in an additional connection. This +is arguably a bug in PostgreSQL.

+ +

AUTHOR + +

+ + +

Written by +Laurenz Albe <laurenz.albe@cybertec.at>.

+
+ + diff --git a/pgreplay/replayfile.c b/pgreplay/replayfile.c new file mode 100644 index 00000000000..f74fb809119 --- /dev/null +++ b/pgreplay/replayfile.c @@ -0,0 +1,437 @@ +#include "pgreplay.h" + +#include +#include +#include +#include +#include +#include +#ifdef WINDOWS +# include +# include +# define FILE_MODE S_IRUSR | S_IWUSR +#else +# ifdef HAVE_NETINET_IN_H +# include +# endif +# define FILE_MODE S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH +#endif +#include +#include +#include + +/* input or output file */ +static int filed=0; + +/* functions to convert 64-bit integers between host and network byte order */ +#ifndef htonll +# ifdef WORDS_BIGENDIAN +# define htonll(x) (x) +# define ntohll(x) (x) +# else +# define htonll(x) ((((uint64_t)htonl(x)) << 32) + htonl(x >> 32)) +# define ntohll(x) ((((uint64_t)ntohl(x)) << 32) + ntohl(x >> 32)) +# endif +#endif + +/* this length indicates a null value */ +#define NULL_VALUE 0x80000000 + +/* wrapper functions for read and write */ +static int do_write(const void * const buf, size_t count) { + int rc = write(filed, buf, count); + + if (-1 == rc) { + perror("Error writing to output file"); + return 0; + } else if (count != rc) { + fprintf(stderr, "Error: not all bytes written to output file\n"); + return 0; + } + + return 1; +} + +static int do_read(void *buf, size_t count, int *eof_indicator) { + int rc = read(filed, buf, count); + + if (eof_indicator) { + *eof_indicator = 0; + } + + if (-1 == rc) { + perror("Error reading from input file"); + return 0; + } else if (eof_indicator && (0 == rc)) { + *eof_indicator = 1; + } else if (count != rc) { + fprintf(stderr, "Error: unexpected end of file on input file\n"); + return 0; + } + + return 1; +} + +/* write a string to the output file */ +static int write_string(char const * const s) { + uint32_t u32, len; + + /* write length + NULL indicator (4 byte) */ + if (NULL == s) { + len = NULL_VALUE; + } else { + len = strlen(s); + } + u32 = htonl(len); + if (! do_write(&u32, 4)) { + return 0; + } else if (NULL != s) { + /* write string */ + if (! do_write(s, len)) { + return 0; + } + } + + return 1; +} + +/* malloc and read a string from the input file */ +static int read_string(char ** const s) { + uint32_t u32, len; + + /* read length (4 byte) */ + if (! do_read(&u32, 4, NULL)) { + return 0; + } + len = ntohl(u32); + if (NULL_VALUE == len) { + *s = NULL; + } else { + /* allocate the string */ + if (! (*s = malloc(len + 1))) { + fprintf(stderr, "Cannot allocate %d bytes of memory\n", len + 1); + return 0; + } else { + /* read string */ + if (! do_read(*s, len, NULL)) { + return 0; + } + (*s)[len] = '\0'; + } + } + + return 1; +} + +int file_provider_init(const char *infile, int cvs, const char *begin_time, const char *end_time, const char *db_only, const char *usr_only) { + int rc = 1; + debug(3, "Entering file_provider_init%s\n", ""); + + if (NULL == infile) { + filed = 0; +#ifdef WINDOWS + setmode(filed, O_BINARY); +#endif + } else { + if (-1 == (filed = open(infile, O_RDONLY +#ifdef WINDOWS + | O_BINARY +#endif + ))) { + perror("Error opening input file:"); + rc = 0; + } + } + + debug(3, "Leaving file_provider_init%s\n", ""); + + return rc; +} + +void file_provider_finish() { + debug(3, "Entering file_provider_finish%s\n", ""); + + if (0 != filed) { + if (close(filed)) { + perror("Error closing input file:"); + } + } + + debug(3, "Leaving file_provider_finish%s\n", ""); +} + +replay_item * file_provider() { + replay_item *r = NULL; + uint16_t u16; + uint32_t u32; + uint64_t u64, session_id = 0; + struct timeval tv; + replay_type type = -1; + int ok = 1, i = 0, eof; + unsigned long count; + char *user, *database, *statement, *name, **values, nl; + + debug(3, "Entering file_provider%s\n", ""); + + /* read timestamp (8 byte) */ + if (! do_read(&u32, 4, &eof)) { + ok = 0; + } else { + /* handle expected end-of-file condition */ + if (eof) { + return end_item; + } + + tv.tv_sec = ntohl(u32); + if (! do_read(&u32, 4, NULL)) { + ok = 0; + } else { + tv.tv_usec = ntohl(u32); + } + } + + /* read session_id (8 byte) */ + if (ok && do_read(&u64, 8, NULL)) { + session_id = ntohll(u64); + } else { + ok = 0; + } + + /* read type (1 byte) */ + if (ok) { + u16 = 0; + if (! do_read((char *)(&u16) + 1, 1, NULL)) { + ok = 0; + } else { + type = ntohs(u16); + if ((type < pg_connect) || (type > pg_cancel)) { + fprintf(stderr, "Error: unknown type %u encountered\n", type); + ok = 0; + } + } + } + + /* read type specific stuff */ + if (ok) { + switch (type) { + case pg_connect: + if (read_string(&user)) { + if (read_string(&database)) { + r = replay_create_connect(&tv, session_id, user, database); + free(database); + } + free(user); + } + break; + case pg_disconnect: + r = replay_create_disconnect(&tv, session_id); + break; + case pg_execute: + if (read_string(&statement)) { + r = replay_create_execute(&tv, session_id, statement); + free(statement); + } + break; + case pg_prepare: + if (read_string(&statement)) { + if (read_string(&name)) { + r = replay_create_prepare(&tv, session_id, name, statement); + free(name); + } + free(statement); + } + break; + case pg_exec_prepared: + /* read statement name */ + if (read_string(&name)) { + /* number of bind arguments (2 byte) */ + if (do_read(&u16, 2, NULL)) { + count = ntohs(u16); + if (NULL == (values = calloc(count, sizeof(char *)))) { + fprintf(stderr, "Cannot allocate %lu bytes of memory\n", count * sizeof(char *)); + } else { + /* read bind values */ + while (i < count) { + if (read_string(values + i)) { + ++i; + } else { + break; + } + } + if (i == count) { + r = replay_create_exec_prepared(&tv, session_id, name, count, values); + } + while (--i >= 0) { + if (values[i]) { + free(values[i]); + } + } + free(values); + } + } + free(name); + } + break; + case pg_cancel: + r = replay_create_cancel(&tv, session_id); + break; + } + } + + /* read new-line at the end of the record */ + if (r && do_read(&nl, 1, NULL) && ('\n' != nl)) { + fprintf(stderr, "Error: missing new-line at end of line\n"); + if (r) { + replay_free(r); + r = NULL; + } + } + + if (r && (1 <= debug_level) && (end_item != r)) { + replay_print_debug(r); + } + + debug(3, "Leaving file_provider%s\n", ""); + + return r; +} + +int file_consumer_init(const char *outfile, const char *host, int port, const char *passwd, double factor) { + debug(3, "Entering file_consumer_init%s\n", ""); + + if ((NULL == outfile) || ('\0' == outfile[0]) + || (('-' == outfile[0]) && ('\0' == outfile[1]))) { + filed = 1; +#ifdef WINDOWS + /* set stdout to binary mode */ + setmode(filed, O_BINARY); +#endif + } else { + if (-1 == (filed = open(outfile, O_WRONLY | O_CREAT | O_TRUNC +#ifdef WINDOWS + | O_BINARY +#endif + , FILE_MODE))) { + perror("Error opening output file:"); + return 0; + } + } + + debug(3, "Leaving file_consumer_init%s\n", ""); + return 1; +} + +void file_consumer_finish(int dry_run) { + debug(3, "Entering file_consumer_finish%s\n", ""); + + if (1 != filed) { + if (close(filed)) { + perror("Error closing output file:"); + } + } + + debug(3, "Leaving file_consumer_finish%s\n", ""); +} + +int file_consumer(replay_item *item) { + const struct timeval *tv = replay_get_time(item); + uint16_t count; + const replay_type type = replay_get_type(item); + uint16_t u16, i; + uint32_t u32; + uint64_t u64; + int rc = 1; + const char * const *values; + + debug(3, "Entering file_consumer%s\n", ""); + + /* write timestamp (8 byte) */ + u32 = htonl(tv->tv_sec); + if (! do_write(&u32, 4)) { + rc = -1; + } else { + u32 = htonl(tv->tv_usec); + if (! do_write(&u32, 4)) { + rc = -1; + } + } + + /* write session_id (8 byte) */ + if (1 == rc) { + u64 = htonll(replay_get_session_id(item)); + if (! do_write(&u64, 8)) { + rc = -1; + } + } + + /* write type (1 byte) */ + if (1 == rc) { + u16 = htons((uint16_t) type); + if (! do_write((char *)(&u16) + 1, 1)) { + rc = -1; + } + } + + /* write type specific stuff */ + if (1 == rc) { + switch (type) { + case pg_connect: + if (! write_string(replay_get_user(item))) { + rc = -1; + } else if (! write_string(replay_get_database(item))) { + rc = -1; + } + break; + case pg_disconnect: + break; + case pg_execute: + if (! write_string(replay_get_statement(item))) { + rc = -1; + } + break; + case pg_prepare: + if (! write_string(replay_get_statement(item))) { + rc = -1; + } else if (! write_string(replay_get_name(item))) { + rc = -1; + } + break; + case pg_exec_prepared: + count = replay_get_valuecount(item); + /* write statement name */ + if (! write_string(replay_get_name(item))) { + rc = -1; + } else { + /* write count (2 byte) */ + u16 = htons(count); + if (! do_write(&u16, 2)) { + rc = -1; + } else { + /* write values */ + values = replay_get_values(item); + for (i=0; i +#include +#include +#include +#include + +/* special replay_item that signals end-of-file */ +static replay_item end_replay_item = {{0, 0}, 0, -1, 0, NULL}; +replay_item * const end_item = &end_replay_item; + +/* create common part of a replay_item */ +static replay_item *replay_create(const struct timeval *time, uint64_t session_id, replay_type type, uint16_t count) { + replay_item *r; + + r = malloc(sizeof(struct replay_item)); + if (NULL == r) { + fprintf(stderr, "Cannot allocate %lu bytes of memory\n", (unsigned long)sizeof(struct replay_item)); + return NULL; + } + r->time.tv_sec = time->tv_sec; + r->time.tv_usec = time->tv_usec; + r->session_id = session_id; + r->type = type; + r->count = count; + if (0 == count) { + r->data = NULL; + } else { + r->data = calloc(count, sizeof(char *)); + if (NULL == r->data) { + fprintf(stderr, "Cannot allocate %lu bytes of memory\n", (unsigned long)count * sizeof(char *)); + free(r); + return NULL; + } + } + + return r; +} + +replay_item *replay_create_connect(const struct timeval *time, uint64_t session_id, const char *user, const char *database) { + replay_item *r; + + debug(3, "Entering replay_create_connect%s\n", ""); + + r = replay_create(time, session_id, pg_connect, 2); + if (NULL == r) { + return NULL; + } + + (r->data)[0] = malloc(strlen(user) + 1); + if (NULL == (r->data)[0]) { + fprintf(stderr, "Cannot allocate %lu bytes of memory\n", (unsigned long)strlen(user) + 1); + free(r->data); + free(r); + return NULL; + } + strcpy((r->data)[0], user); + + (r->data)[1] = malloc(strlen(database) + 1); + if (NULL == (r->data)[1]) { + fprintf(stderr, "Cannot allocate %lu bytes of memory\n", (unsigned long)strlen(user) + 1); + free((r->data)[0]); + free(r->data); + free(r); + return NULL; + } + strcpy((r->data)[1], database); + + debug(3, "Leaving replay_create_connect%s\n", ""); + return r; +} + +replay_item *replay_create_disconnect(const struct timeval *time, uint64_t session_id) { + replay_item *r; + + debug(3, "Entering replay_create_disconnect%s\n", ""); + + r = replay_create(time, session_id, pg_disconnect, 0); + if (NULL == r) { + return NULL; + } + debug(3, "Leaving replay_create_disconnect%s\n", ""); + return r; +} + +replay_item *replay_create_execute(const struct timeval *time, uint64_t session_id, const char *statement) { + replay_item *r; + + debug(3, "Entering replay_create_execute%s\n", ""); + + r = replay_create(time, session_id, pg_execute, 1); + if (NULL == r) { + return NULL; + } + + (r->data)[0] = malloc(strlen(statement) + 1); + if (NULL == (r->data)[0]) { + fprintf(stderr, "Cannot allocate %lu bytes of memory\n", (unsigned long)strlen(statement) + 1); + free(r->data); + free(r); + return NULL; + } + strcpy((r->data)[0], statement); + + debug(3, "Leaving replay_create_execute%s\n", ""); + return r; +} + +replay_item *replay_create_prepare(const struct timeval *time, uint64_t session_id, const char *name, const char *statement) { + replay_item *r; + + debug(3, "Entering replay_create_prepare%s\n", ""); + + r = replay_create(time, session_id, pg_prepare, 2); + if (NULL == r) { + return NULL; + } + + (r->data)[0] = malloc(strlen(statement) + 1); + if (NULL == (r->data)[0]) { + fprintf(stderr, "Cannot allocate %lu bytes of memory\n", (unsigned long)strlen(statement) + 1); + free(r->data); + free(r); + return NULL; + } + strcpy((r->data)[0], statement); + + (r->data)[1] = malloc(strlen(name) + 1); + if (NULL == (r->data)[1]) { + fprintf(stderr, "Cannot allocate %lu bytes of memory\n", (unsigned long)strlen(name) + 1); + free((r->data)[0]); + free(r->data); + free(r); + return NULL; + } + strcpy((r->data)[1], name); + + debug(3, "Leaving replay_create_prepare%s\n", ""); + return r; +} + +replay_item *replay_create_exec_prepared(const struct timeval *time, uint64_t session_id, const char*name, uint16_t count, char * const *values) { + replay_item *r; + int i; + + debug(3, "Entering replay_create_exec_prepared%s\n", ""); + + r = replay_create(time, session_id, pg_exec_prepared, count + 1); + if (NULL == r) { + return NULL; + } + + (r->data)[0] = malloc(strlen(name) + 1); + if (NULL == (r->data)[0]) { + free(r->data); + free(r); + return NULL; + } + strcpy((r->data)[0], name); + + for (i=1; idata)[i] = malloc(strlen(values[i-1]) + 1); + if (NULL == (r->data)[i]) { + fprintf(stderr, "Cannot allocate %lu bytes of memory\n", (unsigned long)strlen(values[i-1]) + 1); + for (--i; i>=0; --i) { + if ((r->data)[i]) { + free((r->data)[i]); + } + } + free(r->data); + free(r); + return NULL; + } + strcpy((r->data)[i], values[i-1]); + } else { + (r->data)[i] = NULL; + } + } + + debug(3, "Leaving replay_create_exec_prepared%s\n", ""); + return r; +} + +replay_item *replay_create_cancel(const struct timeval *time, uint64_t session_id) { + replay_item *r; + + debug(3, "Entering replay_create_cancel%s\n", ""); + + r = replay_create(time, session_id, pg_cancel, 0); + if (NULL == r) { + return NULL; + } + debug(3, "Leaving replay_create_cancel%s\n", ""); + return r; +} + +void replay_free(replay_item *r) { + int i; + + debug(3, "Entering replay_free%s\n", ""); + + assert((pg_connect == r->type) || (pg_disconnect == r->type) || (pg_execute == r->type) || (pg_prepare == r->type) || (pg_exec_prepared == r->type) || (pg_cancel == r->type)); + + for (i=0; icount; ++i) { + if ((r->data)[i]) { + free((r->data)[i]); + } + } + if (r->count) { + free(r->data); + } + + free(r); + + debug(3, "Leaving replay_free%s\n", ""); +} + +replay_type replay_get_type(const replay_item *r) { + return r->type; +} + +uint64_t replay_get_session_id(const replay_item *r) { + return r->session_id; +} + +const struct timeval * replay_get_time(const replay_item *r) { + return &(r->time); +} + +const char * replay_get_statement(const replay_item *r) { + assert((pg_execute == r->type) || (pg_prepare == r->type)); + + return (r->data)[0]; +} + +/* get search_path, prepare params and prepare source text from statement of replay item */ +char * replay_get_statement_info(const replay_item *r, int count) { + assert((pg_execute == r->type) || (pg_prepare == r->type) || (pg_exec_prepared == r->type) ); + char* header ; + char* tailer ; + size_t len ; + char* res ; + + header = (r->data)[0]; + for(int i=0;itype) || (pg_exec_prepared == r->type)); + + return (pg_prepare == r->type) ? (r->data)[1] : (r->data)[0]; +} + +const char * replay_get_user(const replay_item *r) { + assert(pg_connect == r->type); + + return (r->data)[0]; +} + +const char * replay_get_database(const replay_item *r) { + assert(pg_connect == r->type); + + return (r->data)[1]; +} + +int replay_get_valuecount(const replay_item *r) { + assert(pg_exec_prepared == r->type); + + return r->count - 1; +} + +const char * const * replay_get_values(const replay_item *r) { + assert(pg_exec_prepared == r->type); + + return (const char * const *)((r->data) + 1); +} + +/* maximal part of a value for display */ +#define SAMPLE_SIZE 100 + +void replay_print_debug(const replay_item *r) { + replay_type type; + int i; + char valuepart[SAMPLE_SIZE+4], *p; + + valuepart[SAMPLE_SIZE] = '.'; + valuepart[SAMPLE_SIZE+1] = '.'; + valuepart[SAMPLE_SIZE+2] = '.'; + valuepart[SAMPLE_SIZE+3] = '\0'; + + debug(1, "---------------------------%s\n", ""); + debug(1, "Item: time = %lu.%06lu\n", (unsigned long)r->time.tv_sec, (unsigned long)r->time.tv_usec); + debug(1, " session id = 0x" UINT64_FORMAT "\n", r->session_id); + type = r->type; + debug(1, " type = %s\n", + (pg_connect == type) ? "connect" : + ((pg_disconnect == type) ? "disconnect" : + ((pg_execute == type) ? "execute" : + ((pg_prepare == type) ? "prepare" : + ((pg_exec_prepared == type) ? "exec_prepared" : + ((pg_cancel == type) ? "cancel" : "unknown") + ) + ) + ) + ) + ); + switch (type) { + case pg_connect: + debug(1, " user = %s\n", replay_get_user(r)); + debug(1, " database = %s\n", replay_get_database(r)); + case pg_disconnect: + case pg_cancel: + break; + case pg_prepare: + debug(1, " name = %s\n", replay_get_name(r)); + case pg_execute: + debug(1, " statement = %s\n", replay_get_statement(r)); + break; + case pg_exec_prepared: + debug(1, " name = %s\n", replay_get_name(r)); + for (i=0; i /* memcmp, memset, strlen */ +#include /* ptrdiff_t */ +#include /* exit */ + +#if defined(HASH_DEFINE_OWN_STDINT) && HASH_DEFINE_OWN_STDINT +/* This codepath is provided for backward compatibility, but I plan to remove it. */ +#warning "HASH_DEFINE_OWN_STDINT is deprecated; please use HASH_NO_STDINT instead" +typedef unsigned int uint32_t; +typedef unsigned char uint8_t; +#elif defined(HASH_NO_STDINT) && HASH_NO_STDINT +#else +#include /* uint8_t, uint32_t */ +#endif + +/* These macros use decltype or the earlier __typeof GNU extension. + As decltype is only available in newer compilers (VS2010 or gcc 4.3+ + when compiling c++ source) this code uses whatever method is needed + or, for VS2008 where neither is available, uses casting workarounds. */ +#if !defined(DECLTYPE) && !defined(NO_DECLTYPE) +#if defined(_MSC_VER) /* MS compiler */ +#if _MSC_VER >= 1600 && defined(__cplusplus) /* VS2010 or newer in C++ mode */ +#define DECLTYPE(x) (decltype(x)) +#else /* VS2008 or older (or VS2010 in C mode) */ +#define NO_DECLTYPE +#endif +#elif defined(__MCST__) /* Elbrus C Compiler */ +#define DECLTYPE(x) (__typeof(x)) +#elif defined(__BORLANDC__) || defined(__ICCARM__) || defined(__LCC__) || defined(__WATCOMC__) +#define NO_DECLTYPE +#else /* GNU, Sun and other compilers */ +#define DECLTYPE(x) (__typeof(x)) +#endif +#endif + +#ifdef NO_DECLTYPE +#define DECLTYPE(x) +#define DECLTYPE_ASSIGN(dst,src) \ +do { \ + char **_da_dst = (char**)(&(dst)); \ + *_da_dst = (char*)(src); \ +} while (0) +#else +#define DECLTYPE_ASSIGN(dst,src) \ +do { \ + (dst) = DECLTYPE(dst)(src); \ +} while (0) +#endif + +#ifndef uthash_malloc +#define uthash_malloc(sz) malloc(sz) /* malloc fcn */ +#endif +#ifndef uthash_free +#define uthash_free(ptr,sz) free(ptr) /* free fcn */ +#endif +#ifndef uthash_bzero +#define uthash_bzero(a,n) memset(a,'\0',n) +#endif +#ifndef uthash_strlen +#define uthash_strlen(s) strlen(s) +#endif + +#ifndef HASH_FUNCTION +#define HASH_FUNCTION(keyptr,keylen,hashv) HASH_JEN(keyptr, keylen, hashv) +#endif + +#ifndef HASH_KEYCMP +#define HASH_KEYCMP(a,b,n) memcmp(a,b,n) +#endif + +#ifndef uthash_noexpand_fyi +#define uthash_noexpand_fyi(tbl) /* can be defined to log noexpand */ +#endif +#ifndef uthash_expand_fyi +#define uthash_expand_fyi(tbl) /* can be defined to log expands */ +#endif + +#ifndef HASH_NONFATAL_OOM +#define HASH_NONFATAL_OOM 0 +#endif + +#if HASH_NONFATAL_OOM +/* malloc failures can be recovered from */ + +#ifndef uthash_nonfatal_oom +#define uthash_nonfatal_oom(obj) do {} while (0) /* non-fatal OOM error */ +#endif + +#define HASH_RECORD_OOM(oomed) do { (oomed) = 1; } while (0) +#define IF_HASH_NONFATAL_OOM(x) x + +#else +/* malloc failures result in lost memory, hash tables are unusable */ + +#ifndef uthash_fatal +#define uthash_fatal(msg) exit(-1) /* fatal OOM error */ +#endif + +#define HASH_RECORD_OOM(oomed) uthash_fatal("out of memory") +#define IF_HASH_NONFATAL_OOM(x) + +#endif + +/* initial number of buckets */ +#define HASH_INITIAL_NUM_BUCKETS 32U /* initial number of buckets */ +#define HASH_INITIAL_NUM_BUCKETS_LOG2 5U /* lg2 of initial number of buckets */ +#define HASH_BKT_CAPACITY_THRESH 10U /* expand when bucket count reaches */ + +/* calculate the element whose hash handle address is hhp */ +#define ELMT_FROM_HH(tbl,hhp) ((void*)(((char*)(hhp)) - ((tbl)->hho))) +/* calculate the hash handle from element address elp */ +#define HH_FROM_ELMT(tbl,elp) ((UT_hash_handle*)(void*)(((char*)(elp)) + ((tbl)->hho))) + +#define HASH_ROLLBACK_BKT(hh, head, itemptrhh) \ +do { \ + struct UT_hash_handle *_hd_hh_item = (itemptrhh); \ + unsigned _hd_bkt; \ + HASH_TO_BKT(_hd_hh_item->hashv, (head)->hh.tbl->num_buckets, _hd_bkt); \ + (head)->hh.tbl->buckets[_hd_bkt].count++; \ + _hd_hh_item->hh_next = NULL; \ + _hd_hh_item->hh_prev = NULL; \ +} while (0) + +#define HASH_VALUE(keyptr,keylen,hashv) \ +do { \ + HASH_FUNCTION(keyptr, keylen, hashv); \ +} while (0) + +#define HASH_FIND_BYHASHVALUE(hh,head,keyptr,keylen,hashval,out) \ +do { \ + (out) = NULL; \ + if (head) { \ + unsigned _hf_bkt; \ + HASH_TO_BKT(hashval, (head)->hh.tbl->num_buckets, _hf_bkt); \ + if (HASH_BLOOM_TEST((head)->hh.tbl, hashval) != 0) { \ + HASH_FIND_IN_BKT((head)->hh.tbl, hh, (head)->hh.tbl->buckets[ _hf_bkt ], keyptr, keylen, hashval, out); \ + } \ + } \ +} while (0) + +#define HASH_FIND(hh,head,keyptr,keylen,out) \ +do { \ + (out) = NULL; \ + if (head) { \ + unsigned _hf_hashv; \ + HASH_VALUE(keyptr, keylen, _hf_hashv); \ + HASH_FIND_BYHASHVALUE(hh, head, keyptr, keylen, _hf_hashv, out); \ + } \ +} while (0) + +#ifdef HASH_BLOOM +#define HASH_BLOOM_BITLEN (1UL << HASH_BLOOM) +#define HASH_BLOOM_BYTELEN (HASH_BLOOM_BITLEN/8UL) + (((HASH_BLOOM_BITLEN%8UL)!=0UL) ? 1UL : 0UL) +#define HASH_BLOOM_MAKE(tbl,oomed) \ +do { \ + (tbl)->bloom_nbits = HASH_BLOOM; \ + (tbl)->bloom_bv = (uint8_t*)uthash_malloc(HASH_BLOOM_BYTELEN); \ + if (!(tbl)->bloom_bv) { \ + HASH_RECORD_OOM(oomed); \ + } else { \ + uthash_bzero((tbl)->bloom_bv, HASH_BLOOM_BYTELEN); \ + (tbl)->bloom_sig = HASH_BLOOM_SIGNATURE; \ + } \ +} while (0) + +#define HASH_BLOOM_FREE(tbl) \ +do { \ + uthash_free((tbl)->bloom_bv, HASH_BLOOM_BYTELEN); \ +} while (0) + +#define HASH_BLOOM_BITSET(bv,idx) (bv[(idx)/8U] |= (1U << ((idx)%8U))) +#define HASH_BLOOM_BITTEST(bv,idx) (bv[(idx)/8U] & (1U << ((idx)%8U))) + +#define HASH_BLOOM_ADD(tbl,hashv) \ + HASH_BLOOM_BITSET((tbl)->bloom_bv, ((hashv) & (uint32_t)((1UL << (tbl)->bloom_nbits) - 1U))) + +#define HASH_BLOOM_TEST(tbl,hashv) \ + HASH_BLOOM_BITTEST((tbl)->bloom_bv, ((hashv) & (uint32_t)((1UL << (tbl)->bloom_nbits) - 1U))) + +#else +#define HASH_BLOOM_MAKE(tbl,oomed) +#define HASH_BLOOM_FREE(tbl) +#define HASH_BLOOM_ADD(tbl,hashv) +#define HASH_BLOOM_TEST(tbl,hashv) (1) +#define HASH_BLOOM_BYTELEN 0U +#endif + +#define HASH_MAKE_TABLE(hh,head,oomed) \ +do { \ + (head)->hh.tbl = (UT_hash_table*)uthash_malloc(sizeof(UT_hash_table)); \ + if (!(head)->hh.tbl) { \ + HASH_RECORD_OOM(oomed); \ + } else { \ + uthash_bzero((head)->hh.tbl, sizeof(UT_hash_table)); \ + (head)->hh.tbl->tail = &((head)->hh); \ + (head)->hh.tbl->num_buckets = HASH_INITIAL_NUM_BUCKETS; \ + (head)->hh.tbl->log2_num_buckets = HASH_INITIAL_NUM_BUCKETS_LOG2; \ + (head)->hh.tbl->hho = (char*)(&(head)->hh) - (char*)(head); \ + (head)->hh.tbl->buckets = (UT_hash_bucket*)uthash_malloc( \ + HASH_INITIAL_NUM_BUCKETS * sizeof(struct UT_hash_bucket)); \ + (head)->hh.tbl->signature = HASH_SIGNATURE; \ + if (!(head)->hh.tbl->buckets) { \ + HASH_RECORD_OOM(oomed); \ + uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ + } else { \ + uthash_bzero((head)->hh.tbl->buckets, \ + HASH_INITIAL_NUM_BUCKETS * sizeof(struct UT_hash_bucket)); \ + HASH_BLOOM_MAKE((head)->hh.tbl, oomed); \ + IF_HASH_NONFATAL_OOM( \ + if (oomed) { \ + uthash_free((head)->hh.tbl->buckets, \ + HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \ + uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ + } \ + ) \ + } \ + } \ +} while (0) + +#define HASH_REPLACE_BYHASHVALUE_INORDER(hh,head,fieldname,keylen_in,hashval,add,replaced,cmpfcn) \ +do { \ + (replaced) = NULL; \ + HASH_FIND_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, replaced); \ + if (replaced) { \ + HASH_DELETE(hh, head, replaced); \ + } \ + HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh, head, &((add)->fieldname), keylen_in, hashval, add, cmpfcn); \ +} while (0) + +#define HASH_REPLACE_BYHASHVALUE(hh,head,fieldname,keylen_in,hashval,add,replaced) \ +do { \ + (replaced) = NULL; \ + HASH_FIND_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, replaced); \ + if (replaced) { \ + HASH_DELETE(hh, head, replaced); \ + } \ + HASH_ADD_KEYPTR_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, add); \ +} while (0) + +#define HASH_REPLACE(hh,head,fieldname,keylen_in,add,replaced) \ +do { \ + unsigned _hr_hashv; \ + HASH_VALUE(&((add)->fieldname), keylen_in, _hr_hashv); \ + HASH_REPLACE_BYHASHVALUE(hh, head, fieldname, keylen_in, _hr_hashv, add, replaced); \ +} while (0) + +#define HASH_REPLACE_INORDER(hh,head,fieldname,keylen_in,add,replaced,cmpfcn) \ +do { \ + unsigned _hr_hashv; \ + HASH_VALUE(&((add)->fieldname), keylen_in, _hr_hashv); \ + HASH_REPLACE_BYHASHVALUE_INORDER(hh, head, fieldname, keylen_in, _hr_hashv, add, replaced, cmpfcn); \ +} while (0) + +#define HASH_APPEND_LIST(hh, head, add) \ +do { \ + (add)->hh.next = NULL; \ + (add)->hh.prev = ELMT_FROM_HH((head)->hh.tbl, (head)->hh.tbl->tail); \ + (head)->hh.tbl->tail->next = (add); \ + (head)->hh.tbl->tail = &((add)->hh); \ +} while (0) + +#define HASH_AKBI_INNER_LOOP(hh,head,add,cmpfcn) \ +do { \ + do { \ + if (cmpfcn(DECLTYPE(head)(_hs_iter), add) > 0) { \ + break; \ + } \ + } while ((_hs_iter = HH_FROM_ELMT((head)->hh.tbl, _hs_iter)->next)); \ +} while (0) + +#ifdef NO_DECLTYPE +#undef HASH_AKBI_INNER_LOOP +#define HASH_AKBI_INNER_LOOP(hh,head,add,cmpfcn) \ +do { \ + char *_hs_saved_head = (char*)(head); \ + do { \ + DECLTYPE_ASSIGN(head, _hs_iter); \ + if (cmpfcn(head, add) > 0) { \ + DECLTYPE_ASSIGN(head, _hs_saved_head); \ + break; \ + } \ + DECLTYPE_ASSIGN(head, _hs_saved_head); \ + } while ((_hs_iter = HH_FROM_ELMT((head)->hh.tbl, _hs_iter)->next)); \ +} while (0) +#endif + +#if HASH_NONFATAL_OOM + +#define HASH_ADD_TO_TABLE(hh,head,keyptr,keylen_in,hashval,add,oomed) \ +do { \ + if (!(oomed)) { \ + unsigned _ha_bkt; \ + (head)->hh.tbl->num_items++; \ + HASH_TO_BKT(hashval, (head)->hh.tbl->num_buckets, _ha_bkt); \ + HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt], hh, &(add)->hh, oomed); \ + if (oomed) { \ + HASH_ROLLBACK_BKT(hh, head, &(add)->hh); \ + HASH_DELETE_HH(hh, head, &(add)->hh); \ + (add)->hh.tbl = NULL; \ + uthash_nonfatal_oom(add); \ + } else { \ + HASH_BLOOM_ADD((head)->hh.tbl, hashval); \ + HASH_EMIT_KEY(hh, head, keyptr, keylen_in); \ + } \ + } else { \ + (add)->hh.tbl = NULL; \ + uthash_nonfatal_oom(add); \ + } \ +} while (0) + +#else + +#define HASH_ADD_TO_TABLE(hh,head,keyptr,keylen_in,hashval,add,oomed) \ +do { \ + unsigned _ha_bkt; \ + (head)->hh.tbl->num_items++; \ + HASH_TO_BKT(hashval, (head)->hh.tbl->num_buckets, _ha_bkt); \ + HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt], hh, &(add)->hh, oomed); \ + HASH_BLOOM_ADD((head)->hh.tbl, hashval); \ + HASH_EMIT_KEY(hh, head, keyptr, keylen_in); \ +} while (0) + +#endif + + +#define HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh,head,keyptr,keylen_in,hashval,add,cmpfcn) \ +do { \ + IF_HASH_NONFATAL_OOM( int _ha_oomed = 0; ) \ + (add)->hh.hashv = (hashval); \ + (add)->hh.key = (char*) (keyptr); \ + (add)->hh.keylen = (unsigned) (keylen_in); \ + if (!(head)) { \ + (add)->hh.next = NULL; \ + (add)->hh.prev = NULL; \ + HASH_MAKE_TABLE(hh, add, _ha_oomed); \ + IF_HASH_NONFATAL_OOM( if (!_ha_oomed) { ) \ + (head) = (add); \ + IF_HASH_NONFATAL_OOM( } ) \ + } else { \ + void *_hs_iter = (head); \ + (add)->hh.tbl = (head)->hh.tbl; \ + HASH_AKBI_INNER_LOOP(hh, head, add, cmpfcn); \ + if (_hs_iter) { \ + (add)->hh.next = _hs_iter; \ + if (((add)->hh.prev = HH_FROM_ELMT((head)->hh.tbl, _hs_iter)->prev)) { \ + HH_FROM_ELMT((head)->hh.tbl, (add)->hh.prev)->next = (add); \ + } else { \ + (head) = (add); \ + } \ + HH_FROM_ELMT((head)->hh.tbl, _hs_iter)->prev = (add); \ + } else { \ + HASH_APPEND_LIST(hh, head, add); \ + } \ + } \ + HASH_ADD_TO_TABLE(hh, head, keyptr, keylen_in, hashval, add, _ha_oomed); \ + HASH_FSCK(hh, head, "HASH_ADD_KEYPTR_BYHASHVALUE_INORDER"); \ +} while (0) + +#define HASH_ADD_KEYPTR_INORDER(hh,head,keyptr,keylen_in,add,cmpfcn) \ +do { \ + unsigned _hs_hashv; \ + HASH_VALUE(keyptr, keylen_in, _hs_hashv); \ + HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh, head, keyptr, keylen_in, _hs_hashv, add, cmpfcn); \ +} while (0) + +#define HASH_ADD_BYHASHVALUE_INORDER(hh,head,fieldname,keylen_in,hashval,add,cmpfcn) \ + HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh, head, &((add)->fieldname), keylen_in, hashval, add, cmpfcn) + +#define HASH_ADD_INORDER(hh,head,fieldname,keylen_in,add,cmpfcn) \ + HASH_ADD_KEYPTR_INORDER(hh, head, &((add)->fieldname), keylen_in, add, cmpfcn) + +#define HASH_ADD_KEYPTR_BYHASHVALUE(hh,head,keyptr,keylen_in,hashval,add) \ +do { \ + IF_HASH_NONFATAL_OOM( int _ha_oomed = 0; ) \ + (add)->hh.hashv = (hashval); \ + (add)->hh.key = (const void*) (keyptr); \ + (add)->hh.keylen = (unsigned) (keylen_in); \ + if (!(head)) { \ + (add)->hh.next = NULL; \ + (add)->hh.prev = NULL; \ + HASH_MAKE_TABLE(hh, add, _ha_oomed); \ + IF_HASH_NONFATAL_OOM( if (!_ha_oomed) { ) \ + (head) = (add); \ + IF_HASH_NONFATAL_OOM( } ) \ + } else { \ + (add)->hh.tbl = (head)->hh.tbl; \ + HASH_APPEND_LIST(hh, head, add); \ + } \ + HASH_ADD_TO_TABLE(hh, head, keyptr, keylen_in, hashval, add, _ha_oomed); \ + HASH_FSCK(hh, head, "HASH_ADD_KEYPTR_BYHASHVALUE"); \ +} while (0) + +#define HASH_ADD_KEYPTR(hh,head,keyptr,keylen_in,add) \ +do { \ + unsigned _ha_hashv; \ + HASH_VALUE(keyptr, keylen_in, _ha_hashv); \ + HASH_ADD_KEYPTR_BYHASHVALUE(hh, head, keyptr, keylen_in, _ha_hashv, add); \ +} while (0) + +#define HASH_ADD_BYHASHVALUE(hh,head,fieldname,keylen_in,hashval,add) \ + HASH_ADD_KEYPTR_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, add) + +#define HASH_ADD(hh,head,fieldname,keylen_in,add) \ + HASH_ADD_KEYPTR(hh, head, &((add)->fieldname), keylen_in, add) + +#define HASH_TO_BKT(hashv,num_bkts,bkt) \ +do { \ + bkt = ((hashv) & ((num_bkts) - 1U)); \ +} while (0) + +/* delete "delptr" from the hash table. + * "the usual" patch-up process for the app-order doubly-linked-list. + * The use of _hd_hh_del below deserves special explanation. + * These used to be expressed using (delptr) but that led to a bug + * if someone used the same symbol for the head and deletee, like + * HASH_DELETE(hh,users,users); + * We want that to work, but by changing the head (users) below + * we were forfeiting our ability to further refer to the deletee (users) + * in the patch-up process. Solution: use scratch space to + * copy the deletee pointer, then the latter references are via that + * scratch pointer rather than through the repointed (users) symbol. + */ +#define HASH_DELETE(hh,head,delptr) \ + HASH_DELETE_HH(hh, head, &(delptr)->hh) + +#define HASH_DELETE_HH(hh,head,delptrhh) \ +do { \ + const struct UT_hash_handle *_hd_hh_del = (delptrhh); \ + if ((_hd_hh_del->prev == NULL) && (_hd_hh_del->next == NULL)) { \ + HASH_BLOOM_FREE((head)->hh.tbl); \ + uthash_free((head)->hh.tbl->buckets, \ + (head)->hh.tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ + uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ + (head) = NULL; \ + } else { \ + unsigned _hd_bkt; \ + if (_hd_hh_del == (head)->hh.tbl->tail) { \ + (head)->hh.tbl->tail = HH_FROM_ELMT((head)->hh.tbl, _hd_hh_del->prev); \ + } \ + if (_hd_hh_del->prev != NULL) { \ + HH_FROM_ELMT((head)->hh.tbl, _hd_hh_del->prev)->next = _hd_hh_del->next; \ + } else { \ + DECLTYPE_ASSIGN(head, _hd_hh_del->next); \ + } \ + if (_hd_hh_del->next != NULL) { \ + HH_FROM_ELMT((head)->hh.tbl, _hd_hh_del->next)->prev = _hd_hh_del->prev; \ + } \ + HASH_TO_BKT(_hd_hh_del->hashv, (head)->hh.tbl->num_buckets, _hd_bkt); \ + HASH_DEL_IN_BKT((head)->hh.tbl->buckets[_hd_bkt], _hd_hh_del); \ + (head)->hh.tbl->num_items--; \ + } \ + HASH_FSCK(hh, head, "HASH_DELETE_HH"); \ +} while (0) + +/* convenience forms of HASH_FIND/HASH_ADD/HASH_DEL */ +#define HASH_FIND_STR(head,findstr,out) \ +do { \ + unsigned _uthash_hfstr_keylen = (unsigned)uthash_strlen(findstr); \ + HASH_FIND(hh, head, findstr, _uthash_hfstr_keylen, out); \ +} while (0) +#define HASH_ADD_STR(head,strfield,add) \ +do { \ + unsigned _uthash_hastr_keylen = (unsigned)uthash_strlen((add)->strfield); \ + HASH_ADD(hh, head, strfield[0], _uthash_hastr_keylen, add); \ +} while (0) +#define HASH_REPLACE_STR(head,strfield,add,replaced) \ +do { \ + unsigned _uthash_hrstr_keylen = (unsigned)uthash_strlen((add)->strfield); \ + HASH_REPLACE(hh, head, strfield[0], _uthash_hrstr_keylen, add, replaced); \ +} while (0) +#define HASH_FIND_INT(head,findint,out) \ + HASH_FIND(hh,head,findint,sizeof(int),out) +#define HASH_ADD_INT(head,intfield,add) \ + HASH_ADD(hh,head,intfield,sizeof(int),add) +#define HASH_REPLACE_INT(head,intfield,add,replaced) \ + HASH_REPLACE(hh,head,intfield,sizeof(int),add,replaced) +#define HASH_FIND_PTR(head,findptr,out) \ + HASH_FIND(hh,head,findptr,sizeof(void *),out) +#define HASH_ADD_PTR(head,ptrfield,add) \ + HASH_ADD(hh,head,ptrfield,sizeof(void *),add) +#define HASH_REPLACE_PTR(head,ptrfield,add,replaced) \ + HASH_REPLACE(hh,head,ptrfield,sizeof(void *),add,replaced) +#define HASH_DEL(head,delptr) \ + HASH_DELETE(hh,head,delptr) + +/* HASH_FSCK checks hash integrity on every add/delete when HASH_DEBUG is defined. + * This is for uthash developer only; it compiles away if HASH_DEBUG isn't defined. + */ +#ifdef HASH_DEBUG +#include /* fprintf, stderr */ +#define HASH_OOPS(...) do { fprintf(stderr, __VA_ARGS__); exit(-1); } while (0) +#define HASH_FSCK(hh,head,where) \ +do { \ + struct UT_hash_handle *_thh; \ + if (head) { \ + unsigned _bkt_i; \ + unsigned _count = 0; \ + char *_prev; \ + for (_bkt_i = 0; _bkt_i < (head)->hh.tbl->num_buckets; ++_bkt_i) { \ + unsigned _bkt_count = 0; \ + _thh = (head)->hh.tbl->buckets[_bkt_i].hh_head; \ + _prev = NULL; \ + while (_thh) { \ + if (_prev != (char*)(_thh->hh_prev)) { \ + HASH_OOPS("%s: invalid hh_prev %p, actual %p\n", \ + (where), (void*)_thh->hh_prev, (void*)_prev); \ + } \ + _bkt_count++; \ + _prev = (char*)(_thh); \ + _thh = _thh->hh_next; \ + } \ + _count += _bkt_count; \ + if ((head)->hh.tbl->buckets[_bkt_i].count != _bkt_count) { \ + HASH_OOPS("%s: invalid bucket count %u, actual %u\n", \ + (where), (head)->hh.tbl->buckets[_bkt_i].count, _bkt_count); \ + } \ + } \ + if (_count != (head)->hh.tbl->num_items) { \ + HASH_OOPS("%s: invalid hh item count %u, actual %u\n", \ + (where), (head)->hh.tbl->num_items, _count); \ + } \ + _count = 0; \ + _prev = NULL; \ + _thh = &(head)->hh; \ + while (_thh) { \ + _count++; \ + if (_prev != (char*)_thh->prev) { \ + HASH_OOPS("%s: invalid prev %p, actual %p\n", \ + (where), (void*)_thh->prev, (void*)_prev); \ + } \ + _prev = (char*)ELMT_FROM_HH((head)->hh.tbl, _thh); \ + _thh = (_thh->next ? HH_FROM_ELMT((head)->hh.tbl, _thh->next) : NULL); \ + } \ + if (_count != (head)->hh.tbl->num_items) { \ + HASH_OOPS("%s: invalid app item count %u, actual %u\n", \ + (where), (head)->hh.tbl->num_items, _count); \ + } \ + } \ +} while (0) +#else +#define HASH_FSCK(hh,head,where) +#endif + +/* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to + * the descriptor to which this macro is defined for tuning the hash function. + * The app can #include to get the prototype for write(2). */ +#ifdef HASH_EMIT_KEYS +#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) \ +do { \ + unsigned _klen = fieldlen; \ + write(HASH_EMIT_KEYS, &_klen, sizeof(_klen)); \ + write(HASH_EMIT_KEYS, keyptr, (unsigned long)fieldlen); \ +} while (0) +#else +#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) +#endif + +/* The Bernstein hash function, used in Perl prior to v5.6. Note (x<<5+x)=x*33. */ +#define HASH_BER(key,keylen,hashv) \ +do { \ + unsigned _hb_keylen = (unsigned)keylen; \ + const unsigned char *_hb_key = (const unsigned char*)(key); \ + (hashv) = 0; \ + while (_hb_keylen-- != 0U) { \ + (hashv) = (((hashv) << 5) + (hashv)) + *_hb_key++; \ + } \ +} while (0) + + +/* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at + * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx + * (archive link: https://archive.is/Ivcan ) + */ +#define HASH_SAX(key,keylen,hashv) \ +do { \ + unsigned _sx_i; \ + const unsigned char *_hs_key = (const unsigned char*)(key); \ + hashv = 0; \ + for (_sx_i=0; _sx_i < keylen; _sx_i++) { \ + hashv ^= (hashv << 5) + (hashv >> 2) + _hs_key[_sx_i]; \ + } \ +} while (0) +/* FNV-1a variation */ +#define HASH_FNV(key,keylen,hashv) \ +do { \ + unsigned _fn_i; \ + const unsigned char *_hf_key = (const unsigned char*)(key); \ + (hashv) = 2166136261U; \ + for (_fn_i=0; _fn_i < keylen; _fn_i++) { \ + hashv = hashv ^ _hf_key[_fn_i]; \ + hashv = hashv * 16777619U; \ + } \ +} while (0) + +#define HASH_OAT(key,keylen,hashv) \ +do { \ + unsigned _ho_i; \ + const unsigned char *_ho_key=(const unsigned char*)(key); \ + hashv = 0; \ + for(_ho_i=0; _ho_i < keylen; _ho_i++) { \ + hashv += _ho_key[_ho_i]; \ + hashv += (hashv << 10); \ + hashv ^= (hashv >> 6); \ + } \ + hashv += (hashv << 3); \ + hashv ^= (hashv >> 11); \ + hashv += (hashv << 15); \ +} while (0) + +#define HASH_JEN_MIX(a,b,c) \ +do { \ + a -= b; a -= c; a ^= ( c >> 13 ); \ + b -= c; b -= a; b ^= ( a << 8 ); \ + c -= a; c -= b; c ^= ( b >> 13 ); \ + a -= b; a -= c; a ^= ( c >> 12 ); \ + b -= c; b -= a; b ^= ( a << 16 ); \ + c -= a; c -= b; c ^= ( b >> 5 ); \ + a -= b; a -= c; a ^= ( c >> 3 ); \ + b -= c; b -= a; b ^= ( a << 10 ); \ + c -= a; c -= b; c ^= ( b >> 15 ); \ +} while (0) + +#define HASH_JEN(key,keylen,hashv) \ +do { \ + unsigned _hj_i,_hj_j,_hj_k; \ + unsigned const char *_hj_key=(unsigned const char*)(key); \ + hashv = 0xfeedbeefu; \ + _hj_i = _hj_j = 0x9e3779b9u; \ + _hj_k = (unsigned)(keylen); \ + while (_hj_k >= 12U) { \ + _hj_i += (_hj_key[0] + ( (unsigned)_hj_key[1] << 8 ) \ + + ( (unsigned)_hj_key[2] << 16 ) \ + + ( (unsigned)_hj_key[3] << 24 ) ); \ + _hj_j += (_hj_key[4] + ( (unsigned)_hj_key[5] << 8 ) \ + + ( (unsigned)_hj_key[6] << 16 ) \ + + ( (unsigned)_hj_key[7] << 24 ) ); \ + hashv += (_hj_key[8] + ( (unsigned)_hj_key[9] << 8 ) \ + + ( (unsigned)_hj_key[10] << 16 ) \ + + ( (unsigned)_hj_key[11] << 24 ) ); \ + \ + HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ + \ + _hj_key += 12; \ + _hj_k -= 12U; \ + } \ + hashv += (unsigned)(keylen); \ + switch ( _hj_k ) { \ + case 11: hashv += ( (unsigned)_hj_key[10] << 24 ); /* FALLTHROUGH */ \ + case 10: hashv += ( (unsigned)_hj_key[9] << 16 ); /* FALLTHROUGH */ \ + case 9: hashv += ( (unsigned)_hj_key[8] << 8 ); /* FALLTHROUGH */ \ + case 8: _hj_j += ( (unsigned)_hj_key[7] << 24 ); /* FALLTHROUGH */ \ + case 7: _hj_j += ( (unsigned)_hj_key[6] << 16 ); /* FALLTHROUGH */ \ + case 6: _hj_j += ( (unsigned)_hj_key[5] << 8 ); /* FALLTHROUGH */ \ + case 5: _hj_j += _hj_key[4]; /* FALLTHROUGH */ \ + case 4: _hj_i += ( (unsigned)_hj_key[3] << 24 ); /* FALLTHROUGH */ \ + case 3: _hj_i += ( (unsigned)_hj_key[2] << 16 ); /* FALLTHROUGH */ \ + case 2: _hj_i += ( (unsigned)_hj_key[1] << 8 ); /* FALLTHROUGH */ \ + case 1: _hj_i += _hj_key[0]; /* FALLTHROUGH */ \ + default: ; \ + } \ + HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ +} while (0) + +/* The Paul Hsieh hash function */ +#undef get16bits +#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \ + || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__) +#define get16bits(d) (*((const uint16_t *) (d))) +#endif + +#if !defined (get16bits) +#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8) \ + +(uint32_t)(((const uint8_t *)(d))[0]) ) +#endif +#define HASH_SFH(key,keylen,hashv) \ +do { \ + unsigned const char *_sfh_key=(unsigned const char*)(key); \ + uint32_t _sfh_tmp, _sfh_len = (uint32_t)keylen; \ + \ + unsigned _sfh_rem = _sfh_len & 3U; \ + _sfh_len >>= 2; \ + hashv = 0xcafebabeu; \ + \ + /* Main loop */ \ + for (;_sfh_len > 0U; _sfh_len--) { \ + hashv += get16bits (_sfh_key); \ + _sfh_tmp = ((uint32_t)(get16bits (_sfh_key+2)) << 11) ^ hashv; \ + hashv = (hashv << 16) ^ _sfh_tmp; \ + _sfh_key += 2U*sizeof (uint16_t); \ + hashv += hashv >> 11; \ + } \ + \ + /* Handle end cases */ \ + switch (_sfh_rem) { \ + case 3: hashv += get16bits (_sfh_key); \ + hashv ^= hashv << 16; \ + hashv ^= (uint32_t)(_sfh_key[sizeof (uint16_t)]) << 18; \ + hashv += hashv >> 11; \ + break; \ + case 2: hashv += get16bits (_sfh_key); \ + hashv ^= hashv << 11; \ + hashv += hashv >> 17; \ + break; \ + case 1: hashv += *_sfh_key; \ + hashv ^= hashv << 10; \ + hashv += hashv >> 1; \ + break; \ + default: ; \ + } \ + \ + /* Force "avalanching" of final 127 bits */ \ + hashv ^= hashv << 3; \ + hashv += hashv >> 5; \ + hashv ^= hashv << 4; \ + hashv += hashv >> 17; \ + hashv ^= hashv << 25; \ + hashv += hashv >> 6; \ +} while (0) + +/* iterate over items in a known bucket to find desired item */ +#define HASH_FIND_IN_BKT(tbl,hh,head,keyptr,keylen_in,hashval,out) \ +do { \ + if ((head).hh_head != NULL) { \ + DECLTYPE_ASSIGN(out, ELMT_FROM_HH(tbl, (head).hh_head)); \ + } else { \ + (out) = NULL; \ + } \ + while ((out) != NULL) { \ + if ((out)->hh.hashv == (hashval) && (out)->hh.keylen == (keylen_in)) { \ + if (HASH_KEYCMP((out)->hh.key, keyptr, keylen_in) == 0) { \ + break; \ + } \ + } \ + if ((out)->hh.hh_next != NULL) { \ + DECLTYPE_ASSIGN(out, ELMT_FROM_HH(tbl, (out)->hh.hh_next)); \ + } else { \ + (out) = NULL; \ + } \ + } \ +} while (0) + +/* add an item to a bucket */ +#define HASH_ADD_TO_BKT(head,hh,addhh,oomed) \ +do { \ + UT_hash_bucket *_ha_head = &(head); \ + _ha_head->count++; \ + (addhh)->hh_next = _ha_head->hh_head; \ + (addhh)->hh_prev = NULL; \ + if (_ha_head->hh_head != NULL) { \ + _ha_head->hh_head->hh_prev = (addhh); \ + } \ + _ha_head->hh_head = (addhh); \ + if ((_ha_head->count >= ((_ha_head->expand_mult + 1U) * HASH_BKT_CAPACITY_THRESH)) \ + && !(addhh)->tbl->noexpand) { \ + HASH_EXPAND_BUCKETS(addhh,(addhh)->tbl, oomed); \ + IF_HASH_NONFATAL_OOM( \ + if (oomed) { \ + HASH_DEL_IN_BKT(head,addhh); \ + } \ + ) \ + } \ +} while (0) + +/* remove an item from a given bucket */ +#define HASH_DEL_IN_BKT(head,delhh) \ +do { \ + UT_hash_bucket *_hd_head = &(head); \ + _hd_head->count--; \ + if (_hd_head->hh_head == (delhh)) { \ + _hd_head->hh_head = (delhh)->hh_next; \ + } \ + if ((delhh)->hh_prev) { \ + (delhh)->hh_prev->hh_next = (delhh)->hh_next; \ + } \ + if ((delhh)->hh_next) { \ + (delhh)->hh_next->hh_prev = (delhh)->hh_prev; \ + } \ +} while (0) + +/* Bucket expansion has the effect of doubling the number of buckets + * and redistributing the items into the new buckets. Ideally the + * items will distribute more or less evenly into the new buckets + * (the extent to which this is true is a measure of the quality of + * the hash function as it applies to the key domain). + * + * With the items distributed into more buckets, the chain length + * (item count) in each bucket is reduced. Thus by expanding buckets + * the hash keeps a bound on the chain length. This bounded chain + * length is the essence of how a hash provides constant time lookup. + * + * The calculation of tbl->ideal_chain_maxlen below deserves some + * explanation. First, keep in mind that we're calculating the ideal + * maximum chain length based on the *new* (doubled) bucket count. + * In fractions this is just n/b (n=number of items,b=new num buckets). + * Since the ideal chain length is an integer, we want to calculate + * ceil(n/b). We don't depend on floating point arithmetic in this + * hash, so to calculate ceil(n/b) with integers we could write + * + * ceil(n/b) = (n/b) + ((n%b)?1:0) + * + * and in fact a previous version of this hash did just that. + * But now we have improved things a bit by recognizing that b is + * always a power of two. We keep its base 2 log handy (call it lb), + * so now we can write this with a bit shift and logical AND: + * + * ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0) + * + */ +#define HASH_EXPAND_BUCKETS(hh,tbl,oomed) \ +do { \ + unsigned _he_bkt; \ + unsigned _he_bkt_i; \ + struct UT_hash_handle *_he_thh, *_he_hh_nxt; \ + UT_hash_bucket *_he_new_buckets, *_he_newbkt; \ + _he_new_buckets = (UT_hash_bucket*)uthash_malloc( \ + sizeof(struct UT_hash_bucket) * (tbl)->num_buckets * 2U); \ + if (!_he_new_buckets) { \ + HASH_RECORD_OOM(oomed); \ + } else { \ + uthash_bzero(_he_new_buckets, \ + sizeof(struct UT_hash_bucket) * (tbl)->num_buckets * 2U); \ + (tbl)->ideal_chain_maxlen = \ + ((tbl)->num_items >> ((tbl)->log2_num_buckets+1U)) + \ + ((((tbl)->num_items & (((tbl)->num_buckets*2U)-1U)) != 0U) ? 1U : 0U); \ + (tbl)->nonideal_items = 0; \ + for (_he_bkt_i = 0; _he_bkt_i < (tbl)->num_buckets; _he_bkt_i++) { \ + _he_thh = (tbl)->buckets[ _he_bkt_i ].hh_head; \ + while (_he_thh != NULL) { \ + _he_hh_nxt = _he_thh->hh_next; \ + HASH_TO_BKT(_he_thh->hashv, (tbl)->num_buckets * 2U, _he_bkt); \ + _he_newbkt = &(_he_new_buckets[_he_bkt]); \ + if (++(_he_newbkt->count) > (tbl)->ideal_chain_maxlen) { \ + (tbl)->nonideal_items++; \ + if (_he_newbkt->count > _he_newbkt->expand_mult * (tbl)->ideal_chain_maxlen) { \ + _he_newbkt->expand_mult++; \ + } \ + } \ + _he_thh->hh_prev = NULL; \ + _he_thh->hh_next = _he_newbkt->hh_head; \ + if (_he_newbkt->hh_head != NULL) { \ + _he_newbkt->hh_head->hh_prev = _he_thh; \ + } \ + _he_newbkt->hh_head = _he_thh; \ + _he_thh = _he_hh_nxt; \ + } \ + } \ + uthash_free((tbl)->buckets, (tbl)->num_buckets * sizeof(struct UT_hash_bucket)); \ + (tbl)->num_buckets *= 2U; \ + (tbl)->log2_num_buckets++; \ + (tbl)->buckets = _he_new_buckets; \ + (tbl)->ineff_expands = ((tbl)->nonideal_items > ((tbl)->num_items >> 1)) ? \ + ((tbl)->ineff_expands+1U) : 0U; \ + if ((tbl)->ineff_expands > 1U) { \ + (tbl)->noexpand = 1; \ + uthash_noexpand_fyi(tbl); \ + } \ + uthash_expand_fyi(tbl); \ + } \ +} while (0) + + +/* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */ +/* Note that HASH_SORT assumes the hash handle name to be hh. + * HASH_SRT was added to allow the hash handle name to be passed in. */ +#define HASH_SORT(head,cmpfcn) HASH_SRT(hh,head,cmpfcn) +#define HASH_SRT(hh,head,cmpfcn) \ +do { \ + unsigned _hs_i; \ + unsigned _hs_looping,_hs_nmerges,_hs_insize,_hs_psize,_hs_qsize; \ + struct UT_hash_handle *_hs_p, *_hs_q, *_hs_e, *_hs_list, *_hs_tail; \ + if (head != NULL) { \ + _hs_insize = 1; \ + _hs_looping = 1; \ + _hs_list = &((head)->hh); \ + while (_hs_looping != 0U) { \ + _hs_p = _hs_list; \ + _hs_list = NULL; \ + _hs_tail = NULL; \ + _hs_nmerges = 0; \ + while (_hs_p != NULL) { \ + _hs_nmerges++; \ + _hs_q = _hs_p; \ + _hs_psize = 0; \ + for (_hs_i = 0; _hs_i < _hs_insize; ++_hs_i) { \ + _hs_psize++; \ + _hs_q = ((_hs_q->next != NULL) ? \ + HH_FROM_ELMT((head)->hh.tbl, _hs_q->next) : NULL); \ + if (_hs_q == NULL) { \ + break; \ + } \ + } \ + _hs_qsize = _hs_insize; \ + while ((_hs_psize != 0U) || ((_hs_qsize != 0U) && (_hs_q != NULL))) { \ + if (_hs_psize == 0U) { \ + _hs_e = _hs_q; \ + _hs_q = ((_hs_q->next != NULL) ? \ + HH_FROM_ELMT((head)->hh.tbl, _hs_q->next) : NULL); \ + _hs_qsize--; \ + } else if ((_hs_qsize == 0U) || (_hs_q == NULL)) { \ + _hs_e = _hs_p; \ + if (_hs_p != NULL) { \ + _hs_p = ((_hs_p->next != NULL) ? \ + HH_FROM_ELMT((head)->hh.tbl, _hs_p->next) : NULL); \ + } \ + _hs_psize--; \ + } else if ((cmpfcn( \ + DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl, _hs_p)), \ + DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl, _hs_q)) \ + )) <= 0) { \ + _hs_e = _hs_p; \ + if (_hs_p != NULL) { \ + _hs_p = ((_hs_p->next != NULL) ? \ + HH_FROM_ELMT((head)->hh.tbl, _hs_p->next) : NULL); \ + } \ + _hs_psize--; \ + } else { \ + _hs_e = _hs_q; \ + _hs_q = ((_hs_q->next != NULL) ? \ + HH_FROM_ELMT((head)->hh.tbl, _hs_q->next) : NULL); \ + _hs_qsize--; \ + } \ + if ( _hs_tail != NULL ) { \ + _hs_tail->next = ((_hs_e != NULL) ? \ + ELMT_FROM_HH((head)->hh.tbl, _hs_e) : NULL); \ + } else { \ + _hs_list = _hs_e; \ + } \ + if (_hs_e != NULL) { \ + _hs_e->prev = ((_hs_tail != NULL) ? \ + ELMT_FROM_HH((head)->hh.tbl, _hs_tail) : NULL); \ + } \ + _hs_tail = _hs_e; \ + } \ + _hs_p = _hs_q; \ + } \ + if (_hs_tail != NULL) { \ + _hs_tail->next = NULL; \ + } \ + if (_hs_nmerges <= 1U) { \ + _hs_looping = 0; \ + (head)->hh.tbl->tail = _hs_tail; \ + DECLTYPE_ASSIGN(head, ELMT_FROM_HH((head)->hh.tbl, _hs_list)); \ + } \ + _hs_insize *= 2U; \ + } \ + HASH_FSCK(hh, head, "HASH_SRT"); \ + } \ +} while (0) + +/* This function selects items from one hash into another hash. + * The end result is that the selected items have dual presence + * in both hashes. There is no copy of the items made; rather + * they are added into the new hash through a secondary hash + * hash handle that must be present in the structure. */ +#define HASH_SELECT(hh_dst, dst, hh_src, src, cond) \ +do { \ + unsigned _src_bkt, _dst_bkt; \ + void *_last_elt = NULL, *_elt; \ + UT_hash_handle *_src_hh, *_dst_hh, *_last_elt_hh=NULL; \ + ptrdiff_t _dst_hho = ((char*)(&(dst)->hh_dst) - (char*)(dst)); \ + if ((src) != NULL) { \ + for (_src_bkt=0; _src_bkt < (src)->hh_src.tbl->num_buckets; _src_bkt++) { \ + for (_src_hh = (src)->hh_src.tbl->buckets[_src_bkt].hh_head; \ + _src_hh != NULL; \ + _src_hh = _src_hh->hh_next) { \ + _elt = ELMT_FROM_HH((src)->hh_src.tbl, _src_hh); \ + if (cond(_elt)) { \ + IF_HASH_NONFATAL_OOM( int _hs_oomed = 0; ) \ + _dst_hh = (UT_hash_handle*)(void*)(((char*)_elt) + _dst_hho); \ + _dst_hh->key = _src_hh->key; \ + _dst_hh->keylen = _src_hh->keylen; \ + _dst_hh->hashv = _src_hh->hashv; \ + _dst_hh->prev = _last_elt; \ + _dst_hh->next = NULL; \ + if (_last_elt_hh != NULL) { \ + _last_elt_hh->next = _elt; \ + } \ + if ((dst) == NULL) { \ + DECLTYPE_ASSIGN(dst, _elt); \ + HASH_MAKE_TABLE(hh_dst, dst, _hs_oomed); \ + IF_HASH_NONFATAL_OOM( \ + if (_hs_oomed) { \ + uthash_nonfatal_oom(_elt); \ + (dst) = NULL; \ + continue; \ + } \ + ) \ + } else { \ + _dst_hh->tbl = (dst)->hh_dst.tbl; \ + } \ + HASH_TO_BKT(_dst_hh->hashv, _dst_hh->tbl->num_buckets, _dst_bkt); \ + HASH_ADD_TO_BKT(_dst_hh->tbl->buckets[_dst_bkt], hh_dst, _dst_hh, _hs_oomed); \ + (dst)->hh_dst.tbl->num_items++; \ + IF_HASH_NONFATAL_OOM( \ + if (_hs_oomed) { \ + HASH_ROLLBACK_BKT(hh_dst, dst, _dst_hh); \ + HASH_DELETE_HH(hh_dst, dst, _dst_hh); \ + _dst_hh->tbl = NULL; \ + uthash_nonfatal_oom(_elt); \ + continue; \ + } \ + ) \ + HASH_BLOOM_ADD(_dst_hh->tbl, _dst_hh->hashv); \ + _last_elt = _elt; \ + _last_elt_hh = _dst_hh; \ + } \ + } \ + } \ + } \ + HASH_FSCK(hh_dst, dst, "HASH_SELECT"); \ +} while (0) + +#define HASH_CLEAR(hh,head) \ +do { \ + if ((head) != NULL) { \ + HASH_BLOOM_FREE((head)->hh.tbl); \ + uthash_free((head)->hh.tbl->buckets, \ + (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket)); \ + uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ + (head) = NULL; \ + } \ +} while (0) + +#define HASH_OVERHEAD(hh,head) \ + (((head) != NULL) ? ( \ + (size_t)(((head)->hh.tbl->num_items * sizeof(UT_hash_handle)) + \ + ((head)->hh.tbl->num_buckets * sizeof(UT_hash_bucket)) + \ + sizeof(UT_hash_table) + \ + (HASH_BLOOM_BYTELEN))) : 0U) + +#ifdef NO_DECLTYPE +#define HASH_ITER(hh,head,el,tmp) \ +for(((el)=(head)), ((*(char**)(&(tmp)))=(char*)((head!=NULL)?(head)->hh.next:NULL)); \ + (el) != NULL; ((el)=(tmp)), ((*(char**)(&(tmp)))=(char*)((tmp!=NULL)?(tmp)->hh.next:NULL))) +#else +#define HASH_ITER(hh,head,el,tmp) \ +for(((el)=(head)), ((tmp)=DECLTYPE(el)((head!=NULL)?(head)->hh.next:NULL)); \ + (el) != NULL; ((el)=(tmp)), ((tmp)=DECLTYPE(el)((tmp!=NULL)?(tmp)->hh.next:NULL))) +#endif + +/* obtain a count of items in the hash */ +#define HASH_COUNT(head) HASH_CNT(hh,head) +#define HASH_CNT(hh,head) ((head != NULL)?((head)->hh.tbl->num_items):0U) + +typedef struct UT_hash_bucket { + struct UT_hash_handle *hh_head; + unsigned count; + + /* expand_mult is normally set to 0. In this situation, the max chain length + * threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If + * the bucket's chain exceeds this length, bucket expansion is triggered). + * However, setting expand_mult to a non-zero value delays bucket expansion + * (that would be triggered by additions to this particular bucket) + * until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH. + * (The multiplier is simply expand_mult+1). The whole idea of this + * multiplier is to reduce bucket expansions, since they are expensive, in + * situations where we know that a particular bucket tends to be overused. + * It is better to let its chain length grow to a longer yet-still-bounded + * value, than to do an O(n) bucket expansion too often. + */ + unsigned expand_mult; + +} UT_hash_bucket; + +/* random signature used only to find hash tables in external analysis */ +#define HASH_SIGNATURE 0xa0111fe1u +#define HASH_BLOOM_SIGNATURE 0xb12220f2u + +typedef struct UT_hash_table { + UT_hash_bucket *buckets; + unsigned num_buckets, log2_num_buckets; + unsigned num_items; + struct UT_hash_handle *tail; /* tail hh in app order, for fast append */ + ptrdiff_t hho; /* hash handle offset (byte pos of hash handle in element */ + + /* in an ideal situation (all buckets used equally), no bucket would have + * more than ceil(#items/#buckets) items. that's the ideal chain length. */ + unsigned ideal_chain_maxlen; + + /* nonideal_items is the number of items in the hash whose chain position + * exceeds the ideal chain maxlen. these items pay the penalty for an uneven + * hash distribution; reaching them in a chain traversal takes >ideal steps */ + unsigned nonideal_items; + + /* ineffective expands occur when a bucket doubling was performed, but + * afterward, more than half the items in the hash had nonideal chain + * positions. If this happens on two consecutive expansions we inhibit any + * further expansion, as it's not helping; this happens when the hash + * function isn't a good fit for the key domain. When expansion is inhibited + * the hash will still work, albeit no longer in constant time. */ + unsigned ineff_expands, noexpand; + + uint32_t signature; /* used only to find hash tables in external analysis */ +#ifdef HASH_BLOOM + uint32_t bloom_sig; /* used only to test bloom exists in external analysis */ + uint8_t *bloom_bv; + uint8_t bloom_nbits; +#endif + +} UT_hash_table; + +typedef struct UT_hash_handle { + struct UT_hash_table *tbl; + void *prev; /* prev element in app order */ + void *next; /* next element in app order */ + struct UT_hash_handle *hh_prev; /* previous hh in bucket order */ + struct UT_hash_handle *hh_next; /* next hh in bucket order */ + const void *key; /* ptr to enclosing struct's key */ + unsigned keylen; /* enclosing struct's key len */ + unsigned hashv; /* result of hash-fcn(key) */ +} UT_hash_handle; + +#endif /* UTHASH_H */ diff --git a/pgreplay/windows.c b/pgreplay/windows.c new file mode 100644 index 00000000000..a311b4d3457 --- /dev/null +++ b/pgreplay/windows.c @@ -0,0 +1,48 @@ +#include "pgreplay.h" +#include +#include + +/* gets the last error and prints an error message */ + +void win_perror(const char *prefix, int is_network_error) { + DWORD error_nr; + char *errmsg; + /* catalog of Windows socket error messages */ + static HMODULE sock_err_mod = NULL; + + /* get the message number */ + if (is_network_error) { + error_nr = WSAGetLastError(); + + if (NULL == sock_err_mod) { + /* try to load the Windows socket error message catalog */ + sock_err_mod = LoadLibraryEx( + "netmsg.dll", + NULL, + LOAD_LIBRARY_AS_DATAFILE + ); + } + } else { + error_nr = GetLastError(); + } + + /* get the error message text */ + if (FormatMessage( + FORMAT_MESSAGE_ALLOCATE_BUFFER + | FORMAT_MESSAGE_IGNORE_INSERTS + | FORMAT_MESSAGE_FROM_SYSTEM + | ((is_network_error && sock_err_mod) ? FORMAT_MESSAGE_FROM_HMODULE : 0), + sock_err_mod, + error_nr, + MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), + (LPSTR) &errmsg, + 0, + NULL)) { + fprintf(stderr, "%s: %s\n", prefix, errmsg); + + /* free the memory for the error message */ + LocalFree(errmsg); + } else { + fprintf(stderr, "%s: error number %ld\n", prefix, error_nr); + } +} diff --git a/src/backend/commands/prepare.c b/src/backend/commands/prepare.c index 875ef778b1d..ec087f261f3 100644 --- a/src/backend/commands/prepare.c +++ b/src/backend/commands/prepare.c @@ -47,11 +47,15 @@ */ #define prepared_queries POLAR_SESSION(prepared_queries) +/* POLAR: get log info for simple PREPARED query */ +LogPreparedInfo polar_log_prepared_info; + static void InitQueryHashTable(void); static ParamListInfo EvaluateParams(PreparedStatement *pstmt, List *params, const char *queryString, EState *estate); static Datum build_regtype_array(Oid *param_types, int num_params); +static void polar_fill_prepared_stmt_info(CachedPlanSource *psrc, const char *stmt_name); /* * Implements the 'PREPARE' utility statement. */ @@ -95,7 +99,8 @@ PrepareQuery(PrepareStmt *stmt, const char *queryString, */ plansource = CreateCachedPlan(rawstmt, queryString, CreateCommandTag(stmt->query), - POLAR_SS_NOT_DEDICATED()); + POLAR_SS_NOT_DEDICATED(), + stmt->name); /* Transform list of TypeNames to array of type OIDs */ nargs = list_length(stmt->argtypes); @@ -245,6 +250,9 @@ ExecuteQuery(ExecuteStmt *stmt, IntoClause *intoClause, queryString, estate); } + /* POLAR: fill log info of a simple PREPARED stmt */ + polar_fill_prepared_stmt_info(entry->plansource, stmt->name); + /* Create a new portal to run the query in */ portal = CreateNewPortal(); /* Don't display the portal in pg_cursors, it is for internal use only */ @@ -692,6 +700,9 @@ ExplainExecuteQuery(ExecuteStmt *execstmt, IntoClause *into, ExplainState *es, queryString, estate); } + /* POLAR: fill log info of a simple PREPARED stmt */ + polar_fill_prepared_stmt_info(entry->plansource, execstmt->name); + /* Replan if needed, and acquire a transient refcount */ /* * POLAR px: does not support create table as ... execute @@ -841,3 +852,32 @@ build_regtype_array(Oid *param_types, int num_params) result = construct_array(tmp_ary, num_params, REGTYPEOID, 4, true, 'i'); return PointerGetDatum(result); } + +/* + * POLAR: fill log info of a simple PREPARED statement. + */ +static void +polar_fill_prepared_stmt_info(CachedPlanSource *psrc, const char *stmt_name) +{ + if (polar_enable_log_parameter_type) + { + char *params_typename = polar_get_prepared_statement_params_typename(stmt_name); + if (params_typename) + { + /* + * POLAR: polar_params_typename is stored in ErrorContext. Normally, the + * allocated memory is freed manually after writing to log buffer. When + * error occurred, the allocated memory is freed automatically. + */ + polar_log_prepared_info.params_typename = + MemoryContextStrdup(ErrorContext, params_typename); + pfree(params_typename); + } + + if (psrc->query_string) + { + polar_log_prepared_info.source_text = + MemoryContextStrdup(ErrorContext, psrc->query_string); + } + } +} \ No newline at end of file diff --git a/src/backend/executor/spi.c b/src/backend/executor/spi.c index c73012ee012..54fd92d118c 100644 --- a/src/backend/executor/spi.c +++ b/src/backend/executor/spi.c @@ -1967,7 +1967,8 @@ _SPI_prepare_plan(const char *src, SPIPlanPtr plan) * Shared Server, here set polar_on_session_context = false. * This plansource is belong to backend context, not session context. */ - false); + false, + NULL); /* * Parameter datatypes are driven by parserSetup hook if provided, diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index 7175a7cb775..02a5b3992de 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -89,8 +89,10 @@ #include "access/xlog_internal.h" #include "libpq/polar_network_stats.h" #include "parser/parse_param.h" +#include "parser/parse_type.h" #include "storage/polar_fd.h" #include "utils/polar_sql_time_stat.h" +#include "utils/syscache.h" #include "libpq/auth.h" #include "catalog/namespace.h" @@ -1048,6 +1050,10 @@ exec_simple_query(const char *query_string) */ drop_unnamed_stmt(); + /* POLAR: initialize prepared log info */ + polar_log_prepared_info.params_typename = NULL; + polar_log_prepared_info.source_text = NULL; + /* * Switch to appropriate context for constructing parsetrees. */ @@ -1322,7 +1328,31 @@ exec_simple_query(const char *query_string) */ if (check_log_statement(parsetree_list) && !(needs_mask && MyProc->issuper)) { - polar_audit_log.query_string = query_string; + StringInfoData processed_query_string; + initStringInfo(&processed_query_string); + + /* POLAR : log search_ptah info */ + if (polar_enable_log_search_path) + appendStringInfo(&processed_query_string, "/*polardb %s polardb*/ ", + namespace_search_path); + + /* POLAR: log a simple PREPARED statement info */ + if (polar_enable_log_parameter_type) + { + if (polar_log_prepared_info.params_typename) + appendStringInfo(&processed_query_string, "/*polardb %s polardb*/ ", + polar_log_prepared_info.params_typename); + + appendStringInfoString(&processed_query_string, query_string); + + if (polar_log_prepared_info.source_text) + appendStringInfo(&processed_query_string, " /*polardb %s polardb*/", + polar_log_prepared_info.source_text); + } + else + appendStringInfoString(&processed_query_string, query_string); + + polar_audit_log.query_string = processed_query_string.data; if (polar_enable_multi_syslogger) { ErrorData edata; @@ -1334,18 +1364,26 @@ exec_simple_query(const char *query_string) edata.message = NULL; edata.needs_mask = needs_mask; - polar_write_audit_log(&edata, "statement: %s", query_string); + polar_write_audit_log(&edata, "statement: %s", processed_query_string.data); } else { ereport(LOG, - (errmsg("statement: %s", query_string), + (errmsg("statement: %s", processed_query_string.data), errhidestmt(true), polar_mark_audit_log(true), polar_mark_needs_mask(needs_mask), errdetail_execute(parsetree_list))); } was_logged = true; + + /* POLAR: free memory allocated in ErrorContext */ + if (polar_log_prepared_info.params_typename) + { + pfree(polar_log_prepared_info.params_typename); + pfree(polar_log_prepared_info.source_text); + } + pfree(processed_query_string.data); } /* POLAR end */ @@ -1366,14 +1404,29 @@ exec_simple_query(const char *query_string) errhidestmt(true))); break; case 2: - ereport(LOG, - (errmsg("duration: %s ms statement: %s", - msec_str, query_string), - errhidestmt(true), - polar_mark_slow_log(true), /* POLAR */ - polar_mark_needs_mask(needs_mask), - errdetail_execute(parsetree_list))); - break; + { + StringInfoData buf; + + initStringInfo(&buf); + + if (polar_enable_log_search_path) + appendStringInfo(&buf, "/*polardb %s polardb*/ ", namespace_search_path); + + ereport(LOG, + (errmsg("duration: %s ms statement: %s%s", + msec_str, + buf.data ? buf.data : "", + query_string), + errhidestmt(true), + polar_mark_slow_log(true), /* POLAR */ + polar_mark_needs_mask(needs_mask), + errdetail_execute(parsetree_list))); + + if (buf.data) + pfree(buf.data); + + break; + } } if (save_log_statement_stats) @@ -1511,7 +1564,7 @@ exec_parse_message(const char *query_string, /* string to execute */ * Create the CachedPlanSource before we do parse analysis, since it * needs to see the unmodified raw parse tree. */ - psrc = CreateCachedPlan(raw_parse_tree, query_string, commandTag, POLAR_SS_NOT_DEDICATED() && is_named); + psrc = CreateCachedPlan(raw_parse_tree, query_string, commandTag, POLAR_SS_NOT_DEDICATED() && is_named, stmt_name); /* * Set up a snapshot if parse analysis will need one. @@ -1571,7 +1624,7 @@ exec_parse_message(const char *query_string, /* string to execute */ /* Empty input string. This is legal. */ raw_parse_tree = NULL; commandTag = NULL; - psrc = CreateCachedPlan(raw_parse_tree, query_string, commandTag, POLAR_SS_NOT_DEDICATED() && is_named); + psrc = CreateCachedPlan(raw_parse_tree, query_string, commandTag, POLAR_SS_NOT_DEDICATED() && is_named, NULL); querytree_list = NIL; } @@ -1643,14 +1696,28 @@ exec_parse_message(const char *query_string, /* string to execute */ errhidestmt(true))); break; case 2: + { + StringInfoData buf; + + initStringInfo(&buf); + + if (polar_enable_log_search_path) + appendStringInfo(&buf, "/*polardb %s polardb*/ ", namespace_search_path); + ereport(LOG, - (errmsg("duration: %s ms parse %s: %s", + (errmsg("duration: %s ms parse %s: %s%s", msec_str, *stmt_name ? stmt_name : "", + buf.data ? buf.data : "", query_string), polar_mark_slow_log(true), /* POLAR */ errhidestmt(true))); + + if (buf.data) + pfree(buf.data); + break; + } } if (save_log_statement_stats) @@ -2045,17 +2112,31 @@ exec_bind_message(StringInfo input_message) errhidestmt(true))); break; case 2: + { + StringInfoData buf; + + initStringInfo(&buf); + + if (polar_enable_log_search_path) + appendStringInfo(&buf, "/*polardb %s polardb*/ ", namespace_search_path); + ereport(LOG, - (errmsg("duration: %s ms bind %s%s%s: %s\nparams: %s", + (errmsg("duration: %s ms bind %s%s%s: %s%s\nparams: %s", msec_str, *stmt_name ? stmt_name : "", *portal_name ? "/" : "", *portal_name ? portal_name : "", + buf.data ? buf.data : "", psrc->query_string, params_string ? params_string : ""), polar_mark_slow_log(true), /* POLAR */ errhidestmt(true))); + + if (buf.data) + pfree(buf.data); + break; + } } /* POLAR: free params_string */ @@ -2093,10 +2174,12 @@ exec_execute_message(const char *portal_name, long max_rows) /* POLAR */ ListCell *stmt_item; + StringInfoData buf; bool needs_mask = false; bool to_log = false; int log_mode = false; char *params_string = NULL; + char *params_typename = NULL; /* Adjust destination to tell printtup.c what to do */ dest = whereToSendOutput; @@ -2276,6 +2359,15 @@ exec_execute_message(const char *portal_name, long max_rows) /* POLAR: get errmsg params string, we must free it in the last */ // It needs to malloc and free twice, not so efficiency. params_string = polar_get_errmsg_params(portalParams); + + initStringInfo(&buf); + if (polar_enable_log_search_path) + appendStringInfo(&buf, "/*polardb %s polardb*/ ", namespace_search_path); + + /* POLAR: get params typename, we must free it in the last */ + if (portalParams && (portalParams->numParams > 0) && + polar_enable_log_parameter_type) + params_typename = polar_get_prepared_statement_params_typename(prepStmtName); if (polar_enable_multi_syslogger) { @@ -2298,13 +2390,14 @@ exec_execute_message(const char *portal_name, long max_rows) /* POLAR: print audit log*/ polar_write_audit_log(&edata, - "%s %s%s%s: %s", + "%s %s%s%s: %s%s", execute_is_fetch ? "execute fetch from" : "execute", prepStmtName, *portal_name ? "/" : "", *portal_name ? portal_name : "", + buf.data ? buf.data : "", audit_sql_log ? audit_sql_log : sourceText); /* POLAR: free audit_sql_log */ @@ -2313,28 +2406,38 @@ exec_execute_message(const char *portal_name, long max_rows) } else { + /* POLAR: only log parameters typename when not binding value */ + if (params_typename) + appendStringInfo(&buf, "/*polardb %s polardb*/ ", params_typename); + polar_write_audit_log(&edata, - "%s %s%s%s: %s\nparams: %s", + "%s %s%s%s: %s%s\nparams: %s", execute_is_fetch ? "execute fetch from" : "execute", prepStmtName, *portal_name ? "/" : "", *portal_name ? portal_name : "", + buf.data ? buf.data : "", sourceText, params_string ? params_string : ""); } } else { + /* POLAR: only log parameters typename when not binding value */ + if (params_typename) + appendStringInfo(&buf, "/*polardb %s polardb*/ ", params_typename); + ereport(LOG, - (errmsg("%s %s%s%s: %s\nparams: %s", + (errmsg("%s %s%s%s: %s%s\nparams: %s", execute_is_fetch ? _("execute fetch from") : _("execute"), prepStmtName, *portal_name ? "/" : "", *portal_name ? portal_name : "", + buf.data ? buf.data : "", sourceText, params_string ? params_string : ""), errhidestmt(true), @@ -2342,6 +2445,8 @@ exec_execute_message(const char *portal_name, long max_rows) polar_mark_needs_mask(needs_mask))); } /* POLAR end */ + if (params_typename) + pfree(params_typename); was_logged = true; } @@ -2364,7 +2469,7 @@ exec_execute_message(const char *portal_name, long max_rows) break; case 2: ereport(LOG, - (errmsg("duration: %s ms %s %s%s%s: %s\nparams: %s", + (errmsg("duration: %s ms %s %s%s%s: %s%s\nparams: %s", msec_str, execute_is_fetch ? _("execute fetch from") : @@ -2372,6 +2477,7 @@ exec_execute_message(const char *portal_name, long max_rows) prepStmtName, *portal_name ? "/" : "", *portal_name ? portal_name : "", + buf.data ? buf.data : "", sourceText, params_string ? params_string : ""), errhidestmt(true), @@ -2385,6 +2491,9 @@ exec_execute_message(const char *portal_name, long max_rows) pfree(params_string); /* POLAR end */ + if (buf.data) + pfree(buf.data); + if (save_log_statement_stats) ShowUsage("EXECUTE MESSAGE STATISTICS"); @@ -5690,4 +5799,57 @@ polar_process_client_readwrite_cancel_interrupt(void) if (!ProcDiePending && QueryCancelPending && handle_cancel_request) ProcDiePending = true; } -/* POLAR end */ \ No newline at end of file +/* POLAR end */ + +/* + * POLAR: get parameters typename of prepared statements by using stmt name + */ +char * +polar_get_prepared_statement_params_typename(const char *stmt_name) +{ + int paramno; + PreparedStatement *pstmt; + CachedPlanSource *psrc; + StringInfoData result; + + /* named stmt */ + if (strcmp(stmt_name, "")) + { + pstmt = FetchPreparedStatement(stmt_name, false); + + /* Assert pstmt is not NULL */ + if (!pstmt) + return NULL; + + psrc = pstmt->plansource; + } + /* unnamed stmt */ + else + { + /* special-case the unnamed statement */ + psrc = unnamed_stmt_psrc; + } + + /* invalid case, just return NULL */ + if (!psrc || psrc->num_params <= 0) + return NULL; + + /* initialize result and pattern string */ + initStringInfo(&result); + + /* result is filled with ,,... */ + appendStringInfoString(&result, psrc->stmt_name); + for (paramno = 0; paramno < psrc->num_params; paramno++) + { + Type type; + char *typename; + + type = typeidType(psrc->param_types[paramno]); + typename = typeTypeName(type); + ReleaseSysCache(type); + + appendStringInfo(&result, ",%s", typename); + } + + return result.data; +} \ No newline at end of file diff --git a/src/backend/utils/cache/plancache.c b/src/backend/utils/cache/plancache.c index dc5af577ac0..8a5278d8836 100644 --- a/src/backend/utils/cache/plancache.c +++ b/src/backend/utils/cache/plancache.c @@ -167,7 +167,8 @@ CachedPlanSource * CreateCachedPlan(RawStmt *raw_parse_tree, const char *query_string, const char *commandTag, - bool polar_on_session_context) + bool polar_on_session_context, + const char *stmt_name) { CachedPlanSource *plansource; MemoryContext source_context; @@ -239,6 +240,9 @@ CreateCachedPlan(RawStmt *raw_parse_tree, plansource->planId = 0; plansource->polar_on_session_context = polar_on_session_context; + /* POLAR: record stmt_name */ + plansource->stmt_name = stmt_name ? pstrdup(stmt_name) : NULL; + MemoryContextSwitchTo(oldcxt); return plansource; @@ -1470,6 +1474,8 @@ CopyCachedPlan(CachedPlanSource *plansource) newsource->resultDesc = NULL; newsource->context = source_context; + newsource->stmt_name = NULL; + if (plansource->polar_on_session_context) querytree_context = polar_session_alloc_set_context_create( source_context, diff --git a/src/backend/utils/error/elog.c b/src/backend/utils/error/elog.c index fba9f79bf22..fd7b80b36a4 100644 --- a/src/backend/utils/error/elog.c +++ b/src/backend/utils/error/elog.c @@ -79,6 +79,7 @@ #include "utils/ps_status.h" /* POLAR */ +#include "catalog/namespace.h" #include "utils/polar_backtrace.h" #include "utils/timestamp.h" @@ -3219,7 +3220,10 @@ send_message_to_server_log(ErrorData *edata) new_edata.sqlerrcode = edata->sqlerrcode; polar_audit_log.query_string = debug_query_string; - polar_write_audit_log(&new_edata, "statement: %s", debug_query_string); + if (polar_enable_log_search_path) + polar_write_audit_log(&new_edata, "statement: /*%s*/ %s", namespace_search_path, debug_query_string); + else + polar_write_audit_log(&new_edata, "statement: %s", debug_query_string); } } else diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 8824988bc32..779b864200f 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -848,6 +848,8 @@ int polar_max_auditlog_files; int polar_max_slowlog_files; int polar_max_logindex_files; int polar_trace_logindex_messages = LOG; +bool polar_enable_log_search_path = true; +bool polar_enable_log_parameter_type = true; /* * Forbidden functions names for non-superuser @@ -3845,6 +3847,27 @@ static struct config_bool ConfigureNamesBool[] = NULL, NULL, NULL }, + { + {"polar_enable_log_search_path", PGC_SIGHUP, DEVELOPER_OPTIONS, + gettext_noop("enable log search_path to log file."), + NULL, + GUC_NO_SHOW_ALL | GUC_NO_RESET_ALL + }, + &polar_enable_log_search_path, + true, + NULL, NULL, NULL + }, + { + {"polar_enable_log_parameter_type", PGC_SIGHUP, LOGGING, + gettext_noop("Enable polar log SQL parameter type."), + NULL, + GUC_NO_SHOW_ALL | GUC_NO_RESET_ALL + }, + &polar_enable_log_parameter_type, + true, + NULL, NULL, NULL + }, + /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, false, NULL, NULL, NULL @@ -7875,7 +7898,7 @@ static struct config_string ConfigureNamesString[] = "pg_stat_statements,btree_gin,btree_gist,citext,cube,dict_int,earthdistance," "hstore,intagg,intarray,isn,ltree,pgcrypto,pgrowlocks,pg_prewarm,pg_trgm," "sslinfo,tablefunc,unaccent,fuzzystrmatch,pgstattuple,pg_buffercache,\"uuid-ossp\",bloom," - "smlar," + "smlar,vector," /* POLAR: extensions in src/pl dir */ "plpgsql,plperl," /* PostGIS */ diff --git a/src/include/commands/prepare.h b/src/include/commands/prepare.h index ffec029df40..26b99128394 100644 --- a/src/include/commands/prepare.h +++ b/src/include/commands/prepare.h @@ -33,6 +33,16 @@ typedef struct TimestampTz prepare_time; /* the time when the stmt was prepared */ } PreparedStatement; +/* + * POLAR: the data structure representing parameters typename and definition + * of a prepared statement. Currently, it only used to log info of a simple + * PREPARED statement. + */ +typedef struct +{ + char *params_typename; + char *source_text; +} LogPreparedInfo; /* Utility statements PREPARE, EXECUTE, DEALLOCATE, EXPLAIN EXECUTE */ extern void PrepareQuery(PrepareStmt *stmt, const char *queryString, @@ -57,4 +67,5 @@ extern List *FetchPreparedStatementTargetList(PreparedStatement *stmt); extern void DropAllPreparedStatements(void); +extern LogPreparedInfo polar_log_prepared_info; #endif /* PREPARE_H */ diff --git a/src/include/postgres.h b/src/include/postgres.h index c1f2600731f..f134b542d4e 100644 --- a/src/include/postgres.h +++ b/src/include/postgres.h @@ -784,4 +784,5 @@ extern Datum Float8GetDatum(float8 X); #endif /* POLAR end */ +extern char *polar_get_prepared_statement_params_typename(const char *stmt_name); #endif /* POSTGRES_H */ diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h index d53f0edc0a5..fa3a9ce04cb 100644 --- a/src/include/utils/guc.h +++ b/src/include/utils/guc.h @@ -531,6 +531,8 @@ extern int polar_dma_max_standby_wait_delay_size_mb; extern char *polar_partition_recursive_reloptions; extern bool polar_enable_dump_incorrect_checksum_xlog; extern bool polar_trace_heap_scan_flow; +extern bool polar_enable_log_search_path; +extern bool polar_enable_log_parameter_type; /* POLAR end */ diff --git a/src/include/utils/plancache.h b/src/include/utils/plancache.h index fdaaec90cdb..e77f0bd78ef 100644 --- a/src/include/utils/plancache.h +++ b/src/include/utils/plancache.h @@ -121,6 +121,9 @@ typedef struct CachedPlanSource /* POLAR: Shared Server */ bool polar_on_session_context; + + /* POLAR: The stmt_name who created this plansource*/ + char *stmt_name; } CachedPlanSource; /* @@ -156,7 +159,8 @@ extern void ResetPlanCache(void); extern CachedPlanSource *CreateCachedPlan(struct RawStmt *raw_parse_tree, const char *query_string, const char *commandTag, - bool polar_on_session_context); + bool polar_on_session_context, + const char *stmt_name); extern CachedPlanSource *CreateOneShotCachedPlan(struct RawStmt *raw_parse_tree, const char *query_string, const char *commandTag); diff --git a/src/include/utils/px_sync_guc_name.h b/src/include/utils/px_sync_guc_name.h index 644ab723b0b..ff1108b3311 100644 --- a/src/include/utils/px_sync_guc_name.h +++ b/src/include/utils/px_sync_guc_name.h @@ -144,3 +144,5 @@ "polar_px_allow_pagemode_seqscan", "polar_px_allow_pagemode_bitmapscan", "polar_trace_heap_scan_flow", + "polar_enable_log_search_path" + "polar_enable_log_parameter_type"