Change the Oid generator.

To avoid locking on pg_class (required to safely call GetNewOidWithIndex or similar) and to be usable on a standby node, use the oids unused in the FirstBootstrapObjectId / FirstNormalObjectId range rather than real oids. For performance, always start with the biggest oid lesser than FirstNormalObjectId. This way the loop to find an unused oid will only happens once a single backend has created more than ~2.5k hypothetical indexes. For people needing to have thousands of hypothetical indexes at the same time, we also allow to use the initial implementation that relies on real oids, which comes with all the limitations mentioned above, with the new hypopg.use_real_oids GUC.
HypoPG · Jan 22, 2021 · 2bd376d · 2bd376d
1 parent 0864fbe
commit 2bd376d
Show file tree

Hide file tree

Showing 6 changed files with 239 additions and 18 deletions.
diff --git a/docs/usage.rst b/docs/usage.rst
@@ -48,6 +48,40 @@ As you can see, hypopg version 1.1.0 is installed.  If you need to check using
 plain SQL, please refer to the `pg_extension table documentation
 <https://www.postgresql.org/docs/current/static/catalog-pg-extension.html>`_.
 
+Configuration
+-------------
+
+The following configuration parameters (GUCs) are available, and can be changed
+interactively:
+
+hypopg.enabled:
+  Default to ``on``.
+  Use this parameter to globally enable or disable HypoPG.  When HypoPG is
+  disabled, no hypothetical index will be used, but the defined hypothetical
+  indexes won't be removed.
+
+hypopg.use_real_oids:
+  Default to ``off``.
+  By default, HypoPG won't use "real" object identifiers, but instead borrow
+  ones from the ~ 14000 / 16384 (respectively the lowest unused oid less then
+  FirstNormalObjectId and FirstNormalObjectId) range, which are reserved by
+  PostgreSQL for future usage in future releases.  This doesn't cause any
+  problem, as the free range is dynamically computed the first time a
+  connection uses HypoPG, and has the advantage to work on a standby server.
+  But the drawback is that you can't have more than approximately 2500
+  hypothetical indexes at the same time, and creating a new hypothetical index
+  will become very slow once more than the maximum number of objects has been
+  created until ``hypopg_reset()`` is called.
+
+  If those drawbacks are problematic, you can enable this parameter.  HypoPG
+  will then ask for a real object identifier, which will need to obtain more
+  locks and won't work on a standby, but will allow to use the full range of
+  object identifiers.
+
+  Note that switching this parameter doesn't require to reset the entries, both
+  can coexist at the same time.
+
+
 Create a hypothetical index
 ---------------------------
 

diff --git a/expected/hypopg.out b/expected/hypopg.out
@@ -162,3 +162,29 @@ SELECT hypopg_get_indexdef(indexrelid) FROM hypopg_create_index('create index on
  CREATE INDEX ON public.hypo USING btree (id DESC, id DESC, id DESC NULLS LAST, ((md5(val))::bpchar) bpchar_pattern_ops) WITH (fillfactor = 10) WHERE ((id < 1000) AND ((id + (1 % 2)) = 3))
 (1 row)
 
+-- Make sure the old Oid generator still works.  Test it while keeping existing
+-- entries, as both should be able to coexist.
+SET hypopg.use_real_oids = on;
+-- Should not use hypothetical index
+SELECT COUNT(*) FROM do_explain('SELECT * FROM hypo WHERE id = 1') e
+WHERE e ~ 'Index.*<\d+>btree_hypo.*';
+ count 
+-------
+     0
+(1 row)
+
+SELECT COUNT(*) AS nb
+FROM public.hypopg_create_index('CREATE INDEX ON hypo(id);');
+ nb 
+----
+  1
+(1 row)
+
+-- Should use hypothetical index
+SELECT COUNT(*) FROM do_explain('SELECT * FROM hypo WHERE id = 1') e
+WHERE e ~ 'Index.*<\d+>btree_hypo.*';
+ count 
+-------
+     1
+(1 row)
+
diff --git a/hypopg.c b/hypopg.c
@@ -17,11 +17,14 @@
 #include "postgres.h"
 #include "fmgr.h"
 
+#include "access/transam.h"
 #if PG_VERSION_NUM >= 110000
 #include "catalog/partition.h"
 #include "nodes/pg_list.h"
 #include "utils/lsyscache.h"
 #endif
+#include "executor/spi.h"
+#include "utils/elog.h"
 
 #include "include/hypopg.h"
 #include "include/hypopg_import.h"
@@ -33,8 +36,15 @@ PG_MODULE_MAGIC;
 
 bool		isExplain;
 bool		hypo_is_enabled;
+bool		hypo_use_real_oids;
 MemoryContext HypoMemoryContext;
 
+/*--- Private variables ---*/
+
+static Oid last_oid = InvalidOid;
+static Oid min_fake_oid = InvalidOid;
+static bool oid_wraparound = false;
+
 /*--- Functions --- */
 
 PGDLLEXPORT void _PG_init(void);
@@ -75,6 +85,7 @@ static void hypo_executorEnd_hook(QueryDesc *queryDesc);
 static ExecutorEnd_hook_type prev_ExecutorEnd_hook = NULL;
 
 
+static Oid hypo_get_min_fake_oid(void);
 static void hypo_get_relation_info_hook(PlannerInfo *root,
 										Oid relationObjectId,
 										bool inhparent,
@@ -125,6 +136,17 @@ _PG_init(void)
 							 NULL,
 							 NULL);
 
+	DefineCustomBoolVariable("hypopg.use_real_oids",
+							 "Use real oids rather than the range < 16384",
+							 NULL,
+							 &hypo_use_real_oids,
+							 false,
+							 PGC_USERSET,
+							 0,
+							 NULL,
+							 NULL,
+							 NULL);
+
 	EmitWarningsOnPlaceholders("hypopg");
 }
 
@@ -140,39 +162,114 @@ _PG_fini(void)
 }
 
 /*---------------------------------
- * Wrapper around GetNewRelFileNode
  * Return a new OID for an hypothetical index.
+ *
+ * To avoid locking on pg_class (required to safely call GetNewOidWithIndex or
+ * similar) and to be usable on a standby node, use the oids unused in the
+ * FirstBootstrapObjectId / FirstNormalObjectId range rather than real oids.
+ * For performance, always start with the biggest oid lesser than
+ * FirstNormalObjectId.  This way the loop to find an unused oid will only
+ * happens once a single backend has created more than ~2.5k hypothetical
+ * indexes.
+ *
+ * For people needing to have thousands of hypothetical indexes at the same
+ * time, we also allow to use the initial implementation that relies on real
+ * oids, which comes with all the limitations mentioned above.
  */
 Oid
 hypo_getNewOid(Oid relid)
 {
-	Relation	pg_class;
-	Relation	relation;
-	Oid			newoid;
-	Oid			reltablespace;
-	char		relpersistence;
+	Oid			newoid = InvalidOid;
+
+	if (hypo_use_real_oids)
+	{
+		Relation	pg_class;
+		Relation	relation;
+		Oid			reltablespace;
+		char		relpersistence;
 
-	/* Open the relation on which we want a new OID */
-	relation = table_open(relid, AccessShareLock);
+		/* Open the relation on which we want a new OID */
+		relation = table_open(relid, AccessShareLock);
 
-	reltablespace = relation->rd_rel->reltablespace;
-	relpersistence = relation->rd_rel->relpersistence;
+		reltablespace = relation->rd_rel->reltablespace;
+		relpersistence = relation->rd_rel->relpersistence;
 
-	/* Close the relation and release the lock now */
-	table_close(relation, AccessShareLock);
+		/* Close the relation and release the lock now */
+		table_close(relation, AccessShareLock);
 
-	/* Open pg_class to aks a new OID */
-	pg_class = table_open(RelationRelationId, RowExclusiveLock);
+		/* Open pg_class to aks a new OID */
+		pg_class = table_open(RelationRelationId, RowExclusiveLock);
 
-	/* ask for a new relfilenode */
-	newoid = GetNewRelFileNode(reltablespace, pg_class, relpersistence);
+		/* ask for a new relfilenode */
+		newoid = GetNewRelFileNode(reltablespace, pg_class, relpersistence);
 
-	/* Close pg_class and release the lock now */
-	table_close(pg_class, RowExclusiveLock);
+		/* Close pg_class and release the lock now */
+		table_close(pg_class, RowExclusiveLock);
+	}
+	else
+	{
+		/*
+		 * First, make sure we know what is the biggest oid smaller than
+		 * FirstNormalObjectId present in pg_class.  This can never change so
+		 * we cache the value.
+		 */
+		if (!OidIsValid(min_fake_oid))
+			min_fake_oid = hypo_get_min_fake_oid();
+
+		Assert(OidIsValid(min_fake_oid));
+
+		/* Make sure there's enough room to get one more Oid */
+		if (list_length(hypoIndexes) >= (FirstNormalObjectId - min_fake_oid))
+		{
+			ereport(ERROR,
+					(errmsg("hypopg: not more oid available"),
+					errhint("Remove hypothetical indexes "
+						"or enable hypopg.use_real_oids")));
+		}
 
+		while(!OidIsValid(newoid))
+		{
+			if (!OidIsValid(last_oid))
+				newoid = last_oid = min_fake_oid;
+			else
+				newoid = ++last_oid;
+
+			/* Check if we just exceeded the fake oids range */
+			if (newoid >= FirstNormalObjectId)
+			{
+				newoid = min_fake_oid;
+				last_oid = InvalidOid;
+				oid_wraparound = true;
+			}
+
+			/*
+			 * If we already used all available fake oids, we have to make sure
+			 * that the oid isn't used anymore.
+			 */
+			if (oid_wraparound)
+			{
+				if (hypo_get_index(newoid) != NULL)
+				{
+					/* We can't use this oid.  Reset newoid and start again */
+					newoid = InvalidOid;
+				}
+			}
+		}
+	}
+
+	Assert(OidIsValid(newoid));
 	return newoid;
 }
 
+/* Reset the state of the fake oid generator. */
+void
+hypo_reset_fake_oids(void)
+{
+	Assert(hypoIndexes == NIL);
+	last_oid = InvalidOid;
+	oid_wraparound = false;
+}
+
 /* This function setup the "isExplain" flag for next hooks.
  * If this flag is setup, we can add hypothetical indexes.
  */
@@ -342,6 +439,46 @@ hypo_executorEnd_hook(QueryDesc *queryDesc)
 		standard_ExecutorEnd(queryDesc);
 }
 
+/*
+ * Return the minmum usable oid in the FirstBootstrapObjectId -
+ * FirstNormalObjectId range.
+ */
+static Oid
+hypo_get_min_fake_oid(void)
+{
+	int			ret, nb;
+	Oid			oid = InvalidOid;
+
+	/*
+	 * Connect to SPI manager
+	 */
+	if ((ret = SPI_connect()) < 0)
+		/* internal error */
+		elog(ERROR, "SPI connect failure - returned %d", ret);
+
+	ret = SPI_execute("SELECT max(oid)"
+			" FROM pg_catalog.pg_class"
+			" WHERE oid < " CppAsString2(FirstNormalObjectId),
+			true, 1);
+	nb = SPI_processed;
+
+	if (ret != SPI_OK_SELECT || nb == 0)
+	{
+		SPI_finish();
+		elog(ERROR, "hypopg: could not find the minimum fake oid");
+	}
+
+	oid = atooid(SPI_getvalue(SPI_tuptable->vals[0],
+				 SPI_tuptable->tupdesc,
+				 1)) + 1;
+
+	/* release SPI related resources (and return to caller's context) */
+	SPI_finish();
+
+	Assert(OidIsValid(oid));
+	return oid;
+}
+
 /*
  * This function will execute the "hypo_injectHypotheticalIndex" for every
  * hypothetical index found for each relation if the isExplain flag is setup.

diff --git a/hypopg_index.c b/hypopg_index.c
@@ -311,6 +311,9 @@ hypo_index_reset(void)
 
 	list_free(hypoIndexes);
 	hypoIndexes = NIL;
+
+	hypo_reset_fake_oids();
+
 	return;
 }
 

diff --git a/include/hypopg.h b/include/hypopg.h
@@ -40,12 +40,18 @@
 #define LNEXT2(list, lc)	((lc)->next)
 #endif
 
+/* Backport of atooid macro */
+#if PG_VERSION_NUM < 100000
+#define atooid(x) ((Oid) strtoul((x), NULL, 10))
+#endif
+
 extern bool isExplain;
 
 /* GUC for enabling / disabling hypopg during EXPLAIN */
 extern bool hypo_is_enabled;
 extern MemoryContext HypoMemoryContext;
 
 Oid			hypo_getNewOid(Oid relid);
+void		hypo_reset_fake_oids(void);
 
 #endif
diff --git a/test/sql/hypopg.sql b/test/sql/hypopg.sql
@@ -100,3 +100,18 @@ WHERE e ~ 'Index.*<\d+>btree_hypo.*';
 
 -- Deparse an index DDL, with almost every possible pathcode
 SELECT hypopg_get_indexdef(indexrelid) FROM hypopg_create_index('create index on hypo using btree(id desc, id desc nulls first, id desc nulls last, cast(md5(val) as bpchar)  bpchar_pattern_ops) with (fillfactor = 10) WHERE id < 1000 AND id +1 %2 = 3');
+
+-- Make sure the old Oid generator still works.  Test it while keeping existing
+-- entries, as both should be able to coexist.
+SET hypopg.use_real_oids = on;
+
+-- Should not use hypothetical index
+SELECT COUNT(*) FROM do_explain('SELECT * FROM hypo WHERE id = 1') e
+WHERE e ~ 'Index.*<\d+>btree_hypo.*';
+
+SELECT COUNT(*) AS nb
+FROM public.hypopg_create_index('CREATE INDEX ON hypo(id);');
+
+-- Should use hypothetical index
+SELECT COUNT(*) FROM do_explain('SELECT * FROM hypo WHERE id = 1') e
+WHERE e ~ 'Index.*<\d+>btree_hypo.*';