/
hypopg.c
658 lines (565 loc) · 15.8 KB
/
hypopg.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
/*-------------------------------------------------------------------------
*
* hypopg.c: Implementation of hypothetical indexes for PostgreSQL
*
* Some functions are imported from PostgreSQL source code, theses are present
* in hypopg_import.* files.
*
* This program is open source, licensed under the PostgreSQL license.
* For license terms, see the LICENSE file.
*
* Copyright (C) 2015-2018: Julien Rouhaud
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "fmgr.h"
#include "miscadmin.h"
#if PG_VERSION_NUM >= 90300
#include "access/htup_details.h"
#endif
#if PG_VERSION_NUM >= 100000
#include "access/xact.h"
#endif
#include "utils/inval.h"
#include "utils/lsyscache.h"
#include "parser/parsetree.h"
#include "utils/selfuncs.h"
#include "utils/syscache.h"
#include "include/hypopg.h"
#include "include/hypopg_analyze.h"
#include "include/hypopg_import.h"
#include "include/hypopg_index.h"
#include "include/hypopg_table.h"
PG_MODULE_MAGIC;
/*--- Macros ---*/
#define HYPO_ENABLED() (isExplain && hypo_is_enabled)
typedef struct hypoWalkerContext
{
bool explain_found;
} hypoWalkerContext;
/*--- Variables exported ---*/
bool isExplain;
bool hypo_is_enabled;
MemoryContext HypoMemoryContext;
/*--- Variables not exported ---*/
static List *pending_invals = NIL; /* List of interesting OID for which we
received inval messages that need to be
processed. */
/*--- Functions --- */
void _PG_init(void);
void _PG_fini(void);
Datum hypopg_reset(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(hypopg_reset);
static void
hypo_utility_hook(
#if PG_VERSION_NUM >= 100000
PlannedStmt *pstmt,
#else
Node *parsetree,
#endif
const char *queryString,
#if PG_VERSION_NUM >= 90300
ProcessUtilityContext context,
#endif
ParamListInfo params,
#if PG_VERSION_NUM >= 100000
QueryEnvironment *queryEnv,
#endif
#if PG_VERSION_NUM < 90300
bool isTopLevel,
#endif
DestReceiver *dest,
char *completionTag);
static ProcessUtility_hook_type prev_utility_hook = NULL;
static void hypo_executorEnd_hook(QueryDesc *queryDesc);
static ExecutorEnd_hook_type prev_ExecutorEnd_hook = NULL;
static void hypo_get_relation_info_hook(PlannerInfo *root,
Oid relationObjectId,
bool inhparent,
RelOptInfo *rel);
static get_relation_info_hook_type prev_get_relation_info_hook = NULL;
static bool hypo_get_relation_stats_hook(PlannerInfo *root,
RangeTblEntry *rte,
AttrNumber attnum,
VariableStatData *vardata);
static get_relation_stats_hook_type prev_get_relation_stats_hook = NULL;
static bool hypo_query_walker(Node *node, hypoWalkerContext *context);
static void hypo_CacheRelCallback(Datum arg, Oid relid);
void
_PG_init(void)
{
/* Install hooks */
prev_utility_hook = ProcessUtility_hook;
ProcessUtility_hook = hypo_utility_hook;
prev_ExecutorEnd_hook = ExecutorEnd_hook;
ExecutorEnd_hook = hypo_executorEnd_hook;
prev_get_relation_info_hook = get_relation_info_hook;
get_relation_info_hook = hypo_get_relation_info_hook;
prev_explain_get_index_name_hook = explain_get_index_name_hook;
explain_get_index_name_hook = hypo_explain_get_index_name_hook;
prev_get_relation_stats_hook = get_relation_stats_hook;
get_relation_stats_hook = hypo_get_relation_stats_hook;
isExplain = false;
hypoIndexes = NIL;
#if PG_VERSION_NUM >= 100000
hypoTables = NULL;
#endif
HypoMemoryContext = AllocSetContextCreate(TopMemoryContext,
"HypoPG context",
#if PG_VERSION_NUM >= 90600
ALLOCSET_DEFAULT_SIZES
#else
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE
#endif
);
DefineCustomBoolVariable("hypopg.enabled",
"Enable / Disable hypopg",
NULL,
&hypo_is_enabled,
true,
PGC_USERSET,
0,
NULL,
NULL,
NULL);
CacheRegisterRelcacheCallback(hypo_CacheRelCallback, (Datum) 0);
}
void
_PG_fini(void)
{
/* uninstall hooks */
ProcessUtility_hook = prev_utility_hook;
ExecutorEnd_hook = prev_ExecutorEnd_hook;
get_relation_info_hook = prev_get_relation_info_hook;
explain_get_index_name_hook = prev_explain_get_index_name_hook;
get_relation_stats_hook = prev_get_relation_stats_hook;
}
/*---------------------------------
* Wrapper around GetNewRelFileNode
* Return a new OID for an hypothetical index.
*/
Oid
hypo_getNewOid(Oid relid)
{
Relation pg_class;
Relation relation;
Oid newoid;
Oid reltablespace;
char relpersistence;
/* Open the relation on which we want a new OID */
relation = heap_open(relid, AccessShareLock);
reltablespace = relation->rd_rel->reltablespace;
relpersistence = relation->rd_rel->relpersistence;
/* Close the relation and release the lock now */
heap_close(relation, AccessShareLock);
/* Open pg_class to aks a new OID */
pg_class = heap_open(RelationRelationId, RowExclusiveLock);
/* ask for a new relfilenode */
newoid = GetNewRelFileNode(reltablespace, pg_class, relpersistence);
/* Close pg_class and release the lock now */
heap_close(pg_class, RowExclusiveLock);
return newoid;
}
/* This function setup the "isExplain" flag for next hooks.
* If this flag is setup, we can add hypothetical indexes.
*/
void
hypo_utility_hook(
#if PG_VERSION_NUM >= 100000
PlannedStmt *pstmt,
#else
Node *parsetree,
#endif
const char *queryString,
#if PG_VERSION_NUM >= 90300
ProcessUtilityContext context,
#endif
ParamListInfo params,
#if PG_VERSION_NUM >= 100000
QueryEnvironment *queryEnv,
#endif
#if PG_VERSION_NUM < 90300
bool isTopLevel,
#endif
DestReceiver *dest,
char *completionTag)
{
hypoWalkerContext hypo_context = { 0 };
hypo_query_walker(
#if PG_VERSION_NUM >= 100000
(Node *) pstmt,
#else
parsetree,
#endif
&hypo_context);
isExplain = hypo_context.explain_found;
/*
* Process pending invalidation. For now, just do it if the current query
* might try to acess stored hypothetical objects
*/
if (isExplain && list_length(pending_invals) != 0)
hypo_process_inval();
if (prev_utility_hook)
prev_utility_hook(
#if PG_VERSION_NUM >= 100000
pstmt,
#else
parsetree,
#endif
queryString,
#if PG_VERSION_NUM >= 90300
context,
#endif
params,
#if PG_VERSION_NUM >= 100000
queryEnv,
#endif
#if PG_VERSION_NUM < 90300
isTopLevel,
#endif
dest, completionTag);
else
standard_ProcessUtility(
#if PG_VERSION_NUM >= 100000
pstmt,
#else
parsetree,
#endif
queryString,
#if PG_VERSION_NUM >= 90300
context,
#endif
params,
#if PG_VERSION_NUM >= 100000
queryEnv,
#endif
#if PG_VERSION_NUM < 90300
isTopLevel,
#endif
dest, completionTag);
}
/* Detect if the current utility command is compatible with hypothetical indexes
* i.e. an EXPLAIN, no ANALYZE
*/
static bool
hypo_query_walker(Node *node, hypoWalkerContext *context)
{
if (node == NULL)
return false;
switch (nodeTag(node))
{
case T_PlannedStmt:
{
Node *stmt = ((PlannedStmt *) node)->utilityStmt;
return query_or_expression_tree_walker(stmt, hypo_query_walker,
context, QTW_IGNORE_RANGE_TABLE);
}
case T_ExplainStmt:
{
ExplainStmt *stmt = (ExplainStmt *) node;
ListCell *lc;
foreach(lc, stmt->options)
{
DefElem *opt = (DefElem *) lfirst(lc);
if (strcmp(opt->defname, "analyze") == 0)
return false;
}
context->explain_found = true;
#if PG_VERSION_NUM >= 100000
/*
* No point in looking for unhandled command type if there are
* no hypothetical partitions
*/
if (!hypoTables)
return true;
return hypo_query_walker(stmt->query, context);
#else
return true;
#endif
}
break;
#if PG_VERSION_NUM >= 100000
case T_Query:
{
Query *query = (Query *) node;
Assert(context->explain_found);
if (context->explain_found &&
(query->commandType == CMD_UPDATE ||
query->commandType == CMD_DELETE)
)
{
RangeTblEntry *rte = rt_fetch(query->resultRelation,
query->rtable);
if (hypo_table_oid_is_hypothetical(rte->relid))
elog(ERROR, "hypopg: UPDATE and DELETE on hypothetically"
" partitioned tables are not supported");
}
if (query->cteList)
{
ListCell *lc;
foreach(lc, query->cteList)
{
CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc);
hypo_query_walker(cte->ctequery, context);
}
}
return query_or_expression_tree_walker(node, hypo_query_walker,
context, QTW_IGNORE_RANGE_TABLE);
}
break;
#endif
default:
return false;
}
return query_or_expression_tree_walker(node, hypo_query_walker, context,
QTW_IGNORE_RANGE_TABLE);
}
/*
* Callback for relcache inval message. Detect if the given relid correspond
* to something we should take care of. For now, we only care of table being
* dropped for which we have hypothetical partitioning information, thus
* needing to remove relevant hypoTable entries. At this point, we can't
* detect if the inval message is due to table dropping or not, because any
* cache access require a valid transaction, and we don't have a guarantee that
* it's the case at this point. Instead, maintain a deduplicated list of
* interesting OID that will be processed before usage of hypothetical
* partitioned object.
*/
static void
hypo_CacheRelCallback(Datum arg, Oid relid)
{
#if PG_VERSION_NUM >= 100000
hypoTable *entry;
entry = hypo_find_table(relid, true);
if (entry)
{
MemoryContext oldcontext;
oldcontext = MemoryContextSwitchTo(HypoMemoryContext);
pending_invals = list_append_unique_oid(pending_invals, relid);
MemoryContextSwitchTo(oldcontext);
}
#endif
}
/* Process any RelCache invalidation we previously received. We have to
* process them asynchronously, because we have to process it only if the
* invalidation message was due to the original table being dropped. We try to
* detect this case by comparing the relid'd relname if it exists, and this
* require a valid snapshot if may not be the case at the moment we receive the
* inval message.
*/
void
hypo_process_inval(void)
{
#if PG_VERSION_NUM >= 100000
ListCell *lc;
Assert(IsTransactionState());
/* XXX: remove this if support for hypothetical indexes is added */
if (!hypoTables)
{
pending_invals = NIL;
return;
}
if (pending_invals == NIL)
return;
foreach(lc, pending_invals)
{
Oid relid = lfirst_oid(lc);
hypoTable *entry = hypo_find_table(relid, false);
char *relname = get_rel_name(relid);
bool found;
/*
* The pending invalidations should be filtered and recorded after
* removing an entry, and should always be processed before any attempt
* to remove a hypothetical object, so we shoudl always find a
* hypoTable at this point.
*/
Assert(entry);
if (!relname || (strcmp(relname, entry->tablename) != 0))
found = hypo_table_remove(relid, NULL, true);
if (found)
elog(DEBUG1, "hypopg: hypo_process_inval removed table %s (%d)",
relname, relid);
}
list_free(pending_invals);
pending_invals = NIL;
#endif
}
/* Reset the isExplain flag after each query */
static void
hypo_executorEnd_hook(QueryDesc *queryDesc)
{
isExplain = false;
if (prev_ExecutorEnd_hook)
prev_ExecutorEnd_hook(queryDesc);
else
standard_ExecutorEnd(queryDesc);
}
/*
* This function will execute the "hypo_injectHypotheticalIndex" for every
* hypothetical index found for each relation if the isExplain flag is setup.
*/
static void
hypo_get_relation_info_hook(PlannerInfo *root,
Oid relationObjectId,
bool inhparent,
RelOptInfo *rel)
{
Relation relation;
#if PG_VERSION_NUM >= 100000
bool hypopart = false;
#endif
if (HYPO_ENABLED())
{
#if PG_VERSION_NUM >= 100000
hypopart = hypo_table_oid_is_hypothetical(relationObjectId);
/*
* If this relation is table we want to partition hypothetical,
* inject hypothetical partitioning
*/
if (hypopart)
hypo_injectHypotheticalPartitioning(root, relationObjectId, rel);
#endif
/* Open the current relation */
relation = heap_open(relationObjectId, AccessShareLock);
if (relation->rd_rel->relkind == RELKIND_RELATION
#if PG_VERSION_NUM >= 90300
|| relation->rd_rel->relkind == RELKIND_MATVIEW
#endif
)
{
ListCell *lc;
Oid parentId = relationObjectId;
#if PG_VERSION_NUM >= 100000
/*
* If this rel is a partition, get root table oid to look for
* hypothetical indexes.
*/
if (rel->reloptkind == RELOPT_OTHER_MEMBER_REL)
{
if (!hypopart)
{
/*
* when this is a real partition, we have to search root
* table from PlannerInfo to get root table oid. when this
* is a hypothetical partition, root table oid is equal to
* relationObjectId, so nothing to do
*/
AppendRelInfo *appinfo;
RelOptInfo *parentrel = rel;
do
{
appinfo = root->append_rel_array[parentrel->relid];
parentrel = find_base_rel(root, appinfo->parent_relid);
} while (parentrel->reloptkind == RELOPT_OTHER_MEMBER_REL);
parentId = appinfo->parent_reloid;
}
}
#endif
foreach(lc, hypoIndexes)
{
hypoIndex *entry = (hypoIndex *) lfirst(lc);
if (entry->relid == parentId
#if PG_VERSION_NUM >= 100000
&& !rel->part_scheme
#endif
)
{
/*
* hypothetical index found, add it to the relation's
* indextlist
*/
hypo_injectHypotheticalIndex(root, parentId,
inhparent, rel, relation, entry);
}
}
}
/* Close the relation and keep the lock, it might be reopened later */
heap_close(relation, NoLock);
}
if (prev_get_relation_info_hook)
prev_get_relation_info_hook(root, relationObjectId, inhparent, rel);
}
static bool
hypo_get_relation_stats_hook(PlannerInfo *root,
RangeTblEntry *rte,
AttrNumber attnum,
VariableStatData *vardata)
{
#if PG_VERSION_NUM < 100000
return false;
#else
Oid poid = InvalidOid;
hypoStatsKey key;
hypoStatsEntry *entry;
bool found;
/* Nothing to do if it's not a plain relation */
if (rte->rtekind != RTE_RELATION)
return false;
/*
* If this is a root table hypothetically partitioned, we have to retrieve
* the pg_statistic row ourselves, even if no hypopg_analyze has been
* performed yet, because postgres will search for an entry with stainherit
* = true, which won't exist.
*/
if (rte->security_barrier && (rte->values_lists == NIL))
{
vardata->statsTuple = SearchSysCache3(STATRELATTINH,
ObjectIdGetDatum(rte->relid),
Int16GetDatum(attnum),
BoolGetDatum(false));
vardata->freefunc = ReleaseSysCache;
if (HeapTupleIsValid(vardata->statsTuple))
{
/* check if user has permission to read this column */
vardata->acl_ok =
(pg_class_aclcheck(rte->relid, GetUserId(),
ACL_SELECT) == ACLCHECK_OK) ||
(pg_attribute_aclcheck(rte->relid, attnum, GetUserId(),
ACL_SELECT) == ACLCHECK_OK);
}
else
{
/* suppress any possible leakproofness checks later */
vardata->acl_ok = true;
}
return true;
}
/* Fast exit if the local hash hasn't been created yet */
if (!hypoStatsHash)
return false;
/* Nothing to do if it's not a hypothetical partition */
if (!rte->values_lists)
return false;
/* At this point, we have a hypothetical partition. Get its oid */
poid = linitial_oid(rte->values_lists);
if (poid == InvalidOid)
/* This should not happen */
return false;
/* Retrieve the pg_statistic stored row */
memset(&key, 0, sizeof(hypoStatsKey));
key.relid = poid;
key.attnum = attnum;
entry = hash_search(hypoStatsHash, &key, HASH_FIND, &found);
/* XXX should we warn about possible very bad estimation? */
if (!found)
return false;
vardata->statsTuple = heap_copytuple(entry->statsTuple);
vardata->freefunc = (void *) pfree;
return true;
#endif
}
/*
* Reset all stored entries.
*/
Datum
hypopg_reset(PG_FUNCTION_ARGS)
{
hypo_index_reset();
#if PG_VERSION_NUM >= 100000
hypo_table_reset();
#endif
PG_RETURN_VOID();
}