Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Testing my changes #116

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 4 additions & 1 deletion src/backend/statistics/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ OBJS = \
dependencies.o \
extended_stats.o \
mcv.o \
mvdistinct.o
mvdistinct.o \
statistics_gram.o

statistics_gram.o: statistics_scanner.c

include $(top_srcdir)/src/backend/common.mk
28 changes: 17 additions & 11 deletions src/backend/statistics/dependencies.c
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ static Selectivity clauselist_apply_dependencies(PlannerInfo *root, List *clause
int ndependencies,
AttrNumber *list_attnums,
Bitmapset **estimatedclauses);
extern void statistic_scanner_init(const char *query_string);

static void
generate_dependencies_recurse(DependencyGenerator state, int index,
Expand Down Expand Up @@ -648,21 +649,26 @@ statext_dependencies_load(Oid mvoid, bool inh)
/*
* pg_dependencies_in - input routine for type pg_dependencies.
*
* pg_dependencies is real enough to be a table column, but it has no operations
* of its own, and disallows input too
* converts the dependencies from the external format in "string" to its
* internal format.
*/
Datum
pg_dependencies_in(PG_FUNCTION_ARGS)
{
/*
* pg_node_list stores the data in binary form and parsing text input is
* not needed, so disallow this.
*/
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot accept a value of type %s", "pg_dependencies")));

PG_RETURN_VOID(); /* keep compiler quiet */
char *str = PG_GETARG_CSTRING(0);
MVDependencies *mvdependencies;
int parse_rc;

statistic_scanner_init(str);
parse_rc = statistic_yyparse();
if (parse_rc != 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("failed to parse a value of type %s", "pg_dependencies")));
statistic_scanner_finish();
mvdependencies = mvdependencies_parse_result;

PG_RETURN_MVNDistinct_P(statext_dependencies_serialize(mvdependencies));
}

/*
Expand Down
14 changes: 3 additions & 11 deletions src/backend/statistics/mcv.c
Original file line number Diff line number Diff line change
Expand Up @@ -1469,21 +1469,13 @@ pg_stats_ext_mcvlist_items(PG_FUNCTION_ARGS)
/*
* pg_mcv_list_in - input routine for type pg_mcv_list.
*
* pg_mcv_list is real enough to be a table column, but it has no operations
* of its own, and disallows input too
* converts serialized text MCV lists into a byte values by simply
* calling byeain().
*/
Datum
pg_mcv_list_in(PG_FUNCTION_ARGS)
{
/*
* pg_mcv_list stores the data in binary form and parsing text input is
* not needed, so disallow this.
*/
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot accept a value of type %s", "pg_mcv_list")));

PG_RETURN_VOID(); /* keep compiler quiet */
PG_RETURN_MCVList_P(byteain(fcinfo));
}


Expand Down
15 changes: 15 additions & 0 deletions src/backend/statistics/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,18 @@ backend_sources += files(
'mcv.c',
'mvdistinct.c',
)

stats_scan = custom_target('statsscan',
input: 'statistics_scanner.l',
output: 'statistics_scanner.c',
command: flex_cmd,
)
generated_sources += stats_scan
backend_sources += stats_scan

stats_gram = custom_target('stats_gram',
input: 'statistics_gram.y',
kwargs: bison_kw,
)
generated_sources += stats_gram.to_list()
backend_sources += stats_gram
25 changes: 18 additions & 7 deletions src/backend/statistics/mvdistinct.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ static double estimate_ndistinct(double totalrows, int numrows, int d, int f1);
static int n_choose_k(int n, int k);
static int num_combinations(int n);

extern void statistic_scanner_init(const char *query_string);

/* size of the struct header fields (magic, type, nitems) */
#define SizeOfHeader (3 * sizeof(uint32))

Expand Down Expand Up @@ -333,17 +335,26 @@ statext_ndistinct_deserialize(bytea *data)
* pg_ndistinct_in
* input routine for type pg_ndistinct
*
* pg_ndistinct is real enough to be a table column, but it has no
* operations of its own, and disallows input (just like pg_node_tree).
* converts the distinct from the external format in "string" to its internal
* format.
*/
Datum
pg_ndistinct_in(PG_FUNCTION_ARGS)
{
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot accept a value of type %s", "pg_ndistinct")));

PG_RETURN_VOID(); /* keep compiler quiet */
char *str = PG_GETARG_CSTRING(0);
MVNDistinct *mvndistinct;
int parse_rc;

statistic_scanner_init(str);
parse_rc = statistic_yyparse();
if (parse_rc != 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("failed to parse a value of type %s", "pg_ndistinct")));
statistic_scanner_finish();
mvndistinct = mvndistinct_parse_result;

PG_RETURN_MVNDistinct_P(statext_ndistinct_serialize(mvndistinct));
}

/*
Expand Down
176 changes: 176 additions & 0 deletions src/backend/statistics/statistics_gram.y
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
%{
#include "postgres.h"

#include "statistics/extended_stats_internal.h"
#include "statistics/statistics.h"

MVNDistinct *mvndistinct_parse_result;
MVDependencies *mvdependencies_parse_result;
/*
* Bison doesn't allocate anything that needs to live across parser calls,
* so we can easily have it use palloc instead of malloc. This prevents
* memory leaks if we error out during parsing. Note this only works with
* bison >= 2.0. However, in bison 1.875 the default is to use alloca()
* if possible, so there's not really much problem anyhow, at least if
* you're building with gcc.
*/
#define YYMALLOC palloc
#define YYFREE pfree

%}

%expect 0
%name-prefix="statistic_yy"


%union {
uint32 uintval;
double doubleval;

MVNDistinct *ndistinct;
MVNDistinctItem *ndistinct_item;

MVDependencies *dependencies;
MVDependency *dependency;

Bitmapset *bitmap;
List *list;
}

/* Non-keyword tokens */
%token <uintval> UCONST
%token <doubleval> DOUBLE
%token ARROW

%type <ndistinct> ndistinct
%type <ndistinct_item> ndistinct_item
%type <list> ndistinct_item_list

%type <dependencies> dependencies
%type <list> dependency_item_list
%type <dependency> dependency_item

%type <bitmap> attrs

%%

extended_statistic:
ndistinct { } |
dependencies { }
;

/*
* "ndistinct" rule helps to parse the input string recursively and stores the output into MVNDistinct structure.
* Exmple:
* intput : '{"1, 2": 1,"2, 3": 2, "3, 1", 2}'
* output : returns MVNDistinct object
*/
ndistinct:
'{' ndistinct_item_list '}'
{
$$ = palloc0(MAXALIGN(offsetof(MVNDistinct, items)) +
list_length($2) * sizeof(MVNDistinctItem));
mvndistinct_parse_result = $$;
$$->magic = STATS_NDISTINCT_MAGIC;
$$->type = STATS_NDISTINCT_TYPE_BASIC;
$$->nitems = list_length($2);

ListCell *cell;
MVNDistinctItem *pointer = $$->items;
foreach (cell, $2)
{
memcpy(pointer, lfirst(cell), sizeof(MVNDistinctItem));
pointer += 1;
}
}
;

ndistinct_item_list:
ndistinct_item_list ',' ndistinct_item
{
$$ = lappend($1, $3);
}
| ndistinct_item { $$ = lappend(NIL, $1);}
;

ndistinct_item:
'"' attrs '"' ':' DOUBLE
{
int attrCount = 0;
$$ = (MVNDistinctItem *)palloc0(sizeof(MVNDistinctItem));
$$->attributes = build_attnums_array($2, 0, &attrCount);
$$->nattributes = attrCount;
$$->ndistinct = $5;
}
;

attrs:
attrs ',' UCONST
{
$$ = bms_add_member($1, $3);

}
| UCONST ',' UCONST
{
$$ = bms_make_singleton($1);
$$ = bms_add_member($$, $3);
}
;

/*
* "dependencies" rule helps to parse the input string recursively and stores the output into MVDependencies structure.
* example:
* intput : '{"1 => 2": 1.000000, "2 => 3": 2.000000}'
* output : returns MVDependencies object
*/
dependencies:
'{' dependency_item_list '}'
{
$$ = palloc0(MAXALIGN(offsetof(MVDependencies, deps)) + list_length($2) * sizeof(MVDependency *));
mvdependencies_parse_result = $$;

$$->magic = STATS_DEPS_MAGIC;
$$->type = STATS_DEPS_TYPE_BASIC;
$$->ndeps = list_length($2);

for (int i=0; i<$$->ndeps; i++)
{
$$->deps[i] = list_nth($2, i);
}
}
;

dependency_item_list:
dependency_item_list ',' dependency_item
{
$$ = lappend($1, $3);
}
| dependency_item { $$ = lappend(NIL, $1);}
;

dependency_item:
'"' attrs ARROW UCONST '"' ':' DOUBLE
{
int attrCount = 0;
AttrNumber *ptr = build_attnums_array($2, 0, &attrCount);
$$ = (MVDependency *)palloc0(sizeof(MVDependency) + sizeof(AttrNumber) * (attrCount + 1));
$$->nattributes = attrCount + 1;
$$->degree = $7;
for (int i = 0; i < attrCount; i++)
{
$$->attributes[i] = *(ptr+i);
}
$$->attributes[$$->nattributes - 1] = $4;
}
| '"' UCONST ARROW UCONST '"' ':' DOUBLE
{
$$ = (MVDependency *)palloc0(sizeof(MVDependency) + sizeof(AttrNumber) * 2);
$$->nattributes = 2;
$$->degree = $7;
$$->attributes[0] = $2;
$$->attributes[1] = $4;
}
;
%%

#include "statistics_scanner.c"