Skip to content

Commit

Permalink
Refactor allSourceColumns query to use pg json parser; fix embedding …
Browse files Browse the repository at this point in the history
…through views with subqueries inside function calls
  • Loading branch information
wolfgangwalther committed Nov 18, 2020
1 parent 3f690ec commit 8e66b2a
Show file tree
Hide file tree
Showing 5 changed files with 114 additions and 31 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ This project adheres to [Semantic Versioning](http://semver.org/).
- #1162, Fix location header for POST request with select= without PK - @wolfgangwalther
- #1585, Fix error messages on connection failure for localized postgres on Windows - @wolfgangwalther
- #1636, Fix `application/octet-stream` appending `charset=utf-8` - @steve-chavez
- #1608, Fix embedding through views with subqueries in function calls - @wolfgangwalther

### Changed

Expand Down
119 changes: 88 additions & 31 deletions src/PostgREST/DbStructure.hs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ import GHC.Exts (groupWith)
import Protolude hiding (toS)
import Protolude.Conv (toS)
import Protolude.Unsafe (unsafeHead)
import Text.InterpolatedString.Perl6 (q, qc)
import Text.InterpolatedString.Perl6 (q)

import PostgREST.Private.Common
import PostgREST.Types
Expand All @@ -49,7 +49,7 @@ getDbStructure schemas extraSearchPath pgVer = do
HT.sql "set local schema ''" -- This voids the search path. The following queries need this for getting the fully qualified name(schema.name) of every db object
tabs <- HT.statement () allTables
cols <- HT.statement schemas $ allColumns tabs
srcCols <- HT.statement (schemas, extraSearchPath) $ pfkSourceColumns cols pgVer
srcCols <- HT.statement (schemas, extraSearchPath) $ pfkSourceColumns cols
m2oRels <- HT.statement () $ allM2ORels tabs cols
keys <- HT.statement () $ allPrimaryKeys tabs
procs <- HT.statement schemas allProcs
Expand Down Expand Up @@ -674,18 +674,11 @@ pkFromRow tabs (s, t, n) = PrimaryKey <$> table <*> pure n
where table = find (\tbl -> tableSchema tbl == s && tableName tbl == t) tabs

-- returns all the primary and foreign key columns which are referenced in views
pfkSourceColumns :: [Column] -> PgVersion -> H.Statement ([Schema], [Schema]) [SourceColumn]
pfkSourceColumns cols pgVer =
pfkSourceColumns :: [Column] -> H.Statement ([Schema], [Schema]) [SourceColumn]
pfkSourceColumns cols =
H.Statement sql (contrazip2 (arrayParam HE.text) (arrayParam HE.text)) (decodeSourceColumns cols) True
-- query explanation at https://gist.github.com/steve-chavez/7ee0e6590cddafb532e5f00c46275569
where
subselectRegex :: Text
-- "result" appears when the subselect is used inside "case when", see `authors_have_book_in_decade` fixture
-- "resno" appears in every other case
-- when copying the query into pg make sure you omit one backslash from \\d+, it should be like `\d+` for the regex
subselectRegex | pgVer < pgVersion100 = ":subselect {.*?:constraintDeps <>} :location \\d+} :res(no|ult)"
| otherwise = ":subselect {.*?:stmt_len 0} :location \\d+} :res(no|ult)"
sql = [qc|
sql = [q|
with recursive
pks_fks as (
-- pk + fk referencing col
Expand Down Expand Up @@ -713,36 +706,100 @@ pfkSourceColumns cols pgVer =
join pg_rewrite r on r.ev_class = c.oid
where c.relkind in ('v', 'm') and n.nspname = ANY($1 || $2)
),
removed_subselects as(
transform_json as (
select
view_id, view_schema, view_name,
regexp_replace(view_definition, '{subselectRegex}', '', 'g') as x
-- the following formatting is without indentation on purpose
-- to allow simple diffs, with less whitespace noise
replace(
replace(
replace(
replace(
replace(
replace(
replace(
replace(
replace(
regexp_replace(
replace(
replace(
replace(
replace(
replace(
replace(
replace(
replace(
replace(
replace(
replace(
view_definition::text,
-- This conversion to json is heavily optimized for performance.
-- The general idea is to use as few regexp_replace() calls as possible.
-- Simple replace() is a lot faster, so we jump through some hoops
-- to be able to use regexp_replace() only once.
-- This has been tested against a huge schema with 250+ different views.
-- The unit tests do NOT reflect all possible inputs. Be careful when changing this!
-- -----------------------------------------------
-- pattern | replacement | flags
-- -----------------------------------------------
-- `,` is not part of the pg_node_tree format, but used in the regex.
-- This removes all `,` that might be part of column names.
',' , ''
-- The same applies for `(`, `{` and `}`, although those are used a lot in pg_node_tree.
-- We remove the escaped ones, which might be part of column names again.
), '\(' , ''
), '\{' , ''
), '\}' , ''
-- The fields we need are formatted as json manually to protect them from the regex.
), ' :targetList ' , ',"targetList":'
), ' :resno ' , ',"resno":'
), ' :resorigtbl ' , ',"resorigtbl":'
), ' :resorigcol ' , ',"resorigcol":'
-- Make the regex also match the node type, e.g. `{QUERY ...`, to remove it in one pass.
), '{' , '{ :'
-- Protect node lists, which start with `({` or `((` from the greedy regex.
-- The extra `{` is removed again later.
), '((' , '{(('
), '({' , '{({'
-- This regex removes all unused fields to avoid the need to format all of them correctly.
-- This leads to a smaller json result as well.
-- Removal stops at `,` for used fields (see above) and `}` for the end of the current node.
-- Nesting can't be parsed correctly with a regex, so we stop at `{` as well and
-- add an empty key for the followig node.
), ' :[^{,}]+' , ',"":' , 'g'
-- For performance, the regex also added those empty keys when hitting a `,` or `}`.
-- Those are removed next.
), ',"":}' , '}'
), ',"":,' , ','
-- This reverses the "node list protection" from above.
), '{(' , '('
-- Every key above has been added with a `,` so far. The first key in an object doesn't need it.
), '{,' , '{'
-- pg_node_tree has `()` around lists, but JSON uses `[]`
), '(' , '['
), ')' , ']'
-- pg_node_tree has ` ` between list items, but JSON uses `,`
), '} {' , '},{'
), '] [' , '],['
-- `<>` in pg_node_tree is the same as `null` in JSON, but due to very poor performance of json_typeof
-- we need to make this an empty array here to prevent json_array_elements from throwing an error
-- when the targetList is null.
), '<>' , '[]'
)::json as view_definition
from views
),
target_lists as(
select
view_id, view_schema, view_name,
string_to_array(x, 'targetList') as x
from removed_subselects
),
last_target_list_wo_tail as(
select
view_id, view_schema, view_name,
(string_to_array(x[array_upper(x, 1)], ':onConflict'))[1] as x
from target_lists
),
target_entries as(
select
view_id, view_schema, view_name,
unnest(string_to_array(x, 'TARGETENTRY')) as entry
from last_target_list_wo_tail
json_array_elements(view_definition->0->'targetList') as entry
from transform_json
),
results as(
select
view_id, view_schema, view_name,
substring(entry from ':resno (\d+)')::int as view_column,
substring(entry from ':resorigtbl (\d+)')::oid as resorigtbl,
substring(entry from ':resorigcol (\d+)')::int as resorigcol
(entry->>'resno')::int as view_column,
(entry->>'resorigtbl')::oid as resorigtbl,
(entry->>'resorigcol')::int as resorigcol
from target_entries
),
recursion as(
Expand Down
6 changes: 6 additions & 0 deletions test/Feature/QuerySpec.hs
Original file line number Diff line number Diff line change
Expand Up @@ -456,6 +456,12 @@ spec actualPgVersion = do
[json|[{"title":"1984","first_publisher":"Secker & Warburg","author":{"name":"George Orwell"}}]|]
{ matchHeaders = [matchContentTypeJson] }

it "works with views that have subselects in a function call" $
get "/authors_have_book_in_decade2?select=*,books(title)&id=eq.3"
`shouldRespondWith`
[json|[ {"id":3,"name":"Antoine de Saint-Exupéry","has_book_in_forties":true,"has_book_in_fifties":false,
"has_book_in_sixties":false,"books":[{"title":"The Little Prince"}]} ]|]

it "works with views that have CTE" $
get "/odd_years_publications?select=title,publication_year,first_publisher,author:authors(name)&id=in.(1,2,3)" `shouldRespondWith`
[json|[
Expand Down
1 change: 1 addition & 0 deletions test/fixtures/privileges.sql
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ GRANT ALL ON TABLE
, jsonb_test
, authors_books_number
, authors_have_book_in_decade
, authors_have_book_in_decade2
, forties_and_fifties_books
, odd_years_publications
, foos
Expand Down
18 changes: 18 additions & 0 deletions test/fixtures/schema.sql
Original file line number Diff line number Diff line change
Expand Up @@ -1385,6 +1385,24 @@ select
end as has_book_in_sixties
from private.authors x;

create view test.authors_have_book_in_decade2 as
select
id,
name,
coalesce(
(select true from test.forties_books where author_id=x.id limit 1),
false
) as has_book_in_forties,
coalesce(
(select true from test.fifties_books where author_id=x.id limit 1),
false
) as has_book_in_fifties,
coalesce(
(select true from test.sixties_books where author_id=x.id limit 1),
false
) as has_book_in_sixties
from private.authors x;

create view test.forties_and_fifties_books as
select x.id, x.title, x.publication_year, y.name as first_publisher, x.author_id
from (
Expand Down

0 comments on commit 8e66b2a

Please sign in to comment.