Skip to content

Commit

Permalink
Merge with Aug2018 branch.
Browse files Browse the repository at this point in the history
  • Loading branch information
sjoerdmullender committed Dec 10, 2018
2 parents bfff7f0 + c34dff7 commit 7838a35
Show file tree
Hide file tree
Showing 3 changed files with 125 additions and 86 deletions.
164 changes: 105 additions & 59 deletions gdk/gdk_join.c
Expand Up @@ -138,14 +138,16 @@ joininitresults(BAT **r1p, BAT **r2p, BUN lcnt, BUN rcnt, bool lkey, bool rkey,
} else {
maxsize = rcnt;
}
} else {
} else if (rcnt == 0) {
/* nil_on_miss must be true due to previous checks, so
* all values on left miss */
maxsize = lcnt;
} else if (BUN_MAX / lcnt >= rcnt) {
/* in the worst case we have a full cross product */
if (lcnt == 0 || rcnt == 0)
maxsize = nil_on_miss ? lcnt : 0;
else if (BUN_MAX / lcnt >= rcnt)
maxsize = lcnt * rcnt;
else
maxsize = BUN_MAX;
maxsize = lcnt * rcnt;
} else {
/* a BAT cannot grow larger than BUN_MAX */
maxsize = BUN_MAX;
}
size = estimate == BUN_NONE ? lcnt : estimate;
if (size > maxsize)
Expand Down Expand Up @@ -221,19 +223,20 @@ nomatch(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BUN lstart, BUN lend,
bool nil_on_miss, bool only_misses, const char *func, lng t0)
{
BUN cnt;
BAT *r1, *r2;
BAT *r1, *r2 = NULL;

if (lstart == lend || !(nil_on_miss | only_misses)) {
/* return empty BATs */
if ((*r1p = BATdense(0, 0, 0)) == NULL)
return GDK_FAIL;
if (r2p && (*r2p = BATdense(0, 0, 0)) == NULL) {
BBPreclaim(*r1p);
*r1p = NULL;
if ((r1 = BATdense(0, 0, 0)) == NULL)
return GDK_FAIL;
if (r2p) {
if ((r2 = BATdense(0, 0, 0)) == NULL) {
BBPreclaim(r1);
return GDK_FAIL;
}
*r2p = r2;
}
r1 = *r1p;
r2 = r2p ? *r2p : NULL;
*r1p = r1;
ALGODEBUG fprintf(stderr,
"#%s(l=%s,r=%s)=(" ALGOBATFMT "," ALGOOPTBATFMT ") " LLFMT "us -- nomatch\n",
func, BATgetId(l), BATgetId(r),
Expand All @@ -243,8 +246,6 @@ nomatch(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BUN lstart, BUN lend,
}

if (lcand) {
BAT *r1;

cnt = (BUN) (lcandend - lcand);
r1 = COLnew(0, TYPE_oid, cnt, TRANSIENT);
if (r1 == NULL)
Expand All @@ -265,20 +266,19 @@ nomatch(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BUN lstart, BUN lend,
r1->tnil = false;
r1->tnonil = true;
BATsetcount(r1, cnt);
*r1p = r1;
} else {
cnt = lend - lstart;
if ((*r1p = BATdense(0, l->hseqbase + lstart, cnt)) == NULL)
if ((r1 = BATdense(0, l->hseqbase + lstart, cnt)) == NULL)
return GDK_FAIL;
}
if (r2p &&
(*r2p = BATconstant(0, TYPE_void, &oid_nil, cnt, TRANSIENT)) == NULL) {
BBPreclaim(*r1p);
*r1p = NULL;
return GDK_FAIL;
if (r2p) {
if ((r2 = BATconstant(0, TYPE_void, &oid_nil, cnt, TRANSIENT)) == NULL) {
BBPreclaim(r1);
return GDK_FAIL;
}
*r2p = r2;
}
r1 = *r1p;
r2 = r2p ? *r2p : NULL;
*r1p = r1;
ALGODEBUG fprintf(stderr,
"#%s(l=%s,r=%s)=(" ALGOBATFMT "," ALGOOPTBATFMT ") " LLFMT "us -- nomatch\n",
func, BATgetId(l), BATgetId(r),
Expand Down Expand Up @@ -421,6 +421,12 @@ selectjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr,
#define binsearch_oid(indir, offset, vals, lo, hi, v, ordering, last) binsearch_lng(indir, offset, (const lng *) vals, lo, hi, (lng) (v), ordering, last)
#endif

/* Implementation of join where the right-hand side is dense, and if
* there is a right candidate list, it too is dense. In case
* nil_on_miss is not set, we use a range select (BATselect) to find
* the matching values in the left column and then calculate the
* corresponding matches from the right. If nil_on_miss is set, we
* need to do some more work. */
static gdk_return
mergejoin_void(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr,
BUN lstart, BUN lend, BUN lcnt,
Expand Down Expand Up @@ -533,7 +539,7 @@ mergejoin_void(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr,
BATsetcount(r2, BATcount(r1));
}
*r2p = r2;
return GDK_SUCCEED;
goto doreturn2;
}
/* nil_on_miss is set, this means we must have a second output */
assert(r2p);
Expand Down Expand Up @@ -571,7 +577,7 @@ mergejoin_void(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr,
* respective OID values that we can store in
* r1 and r2; note that r1 will be dense since
* all values in l will match something (even
* if nil if nil_on_miss is set) */
* if nil since nil_on_miss is set) */
*r1p = r1 = BATdense(0, seq, lcnt);
if (r1 == NULL)
return GDK_FAIL;
Expand Down Expand Up @@ -803,13 +809,16 @@ mergejoin_void(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr,
virtualize(r1);
if (r2->tkey && r2->tsorted)
virtualize(r2);
doreturn2:
ALGODEBUG fprintf(stderr, "#mergejoin_void(l=%s,r=%s)=(" ALGOBATFMT "," ALGOOPTBATFMT ") " LLFMT "us\n",
BATgetId(l), BATgetId(r),
ALGOBATPAR(r1), ALGOOPTBATPAR(r2),
GDKusec() - t0);
return GDK_SUCCEED;
}

/* Implementation of mergejoin (see below) for the special case that
* the values are of type int, and some more conditions are met. */
static gdk_return
mergejoin_int(BAT **r1p, BAT **r2p, BAT *l, BAT *r,
bool nil_matches, BUN estimate, lng t0, bool swapped)
Expand Down Expand Up @@ -1104,6 +1113,8 @@ mergejoin_int(BAT **r1p, BAT **r2p, BAT *l, BAT *r,
return GDK_FAIL;
}

/* Implementation of mergejoin (see below) for the special case that
* the values are of type lng, and some more conditions are met. */
static gdk_return
mergejoin_lng(BAT **r1p, BAT **r2p, BAT *l, BAT *r,
bool nil_matches, BUN estimate, lng t0, bool swapped)
Expand Down Expand Up @@ -1413,9 +1424,6 @@ mergejoin_lng(BAT **r1p, BAT **r2p, BAT *l, BAT *r,
* there is a match of l in r, no matter how many matches there are in
* r; otherwise all matches are returned.
*
* maxsize is the absolute maximum size the output BATs can become (if
* they were to become larger, we have a bug).
*
* t0 and swapped are only for debugging (ALGOMASK set in GDKdebug).
*/
static gdk_return
Expand Down Expand Up @@ -2484,6 +2492,8 @@ binsearchcand(const oid *cand, BUN lo, BUN hi, oid v)
} \
} while (false)

/* Implementation of join using a hash lookup of values in the right
* column. */
static gdk_return
hashjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr,
BUN lstart, BUN lend, BUN lcnt,
Expand Down Expand Up @@ -3573,26 +3583,44 @@ bandjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr,

/* small ordered right, dense left, oid's only, do fetches */
static gdk_return
fetchjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, lng t0)
fetchjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BUN lstart, BUN lend,
BUN rstart, BUN rend, lng t0)
{
oid lo = l->tseqbase, hi = lo + BATcount(l);
BUN b = SORTfndfirst(r, &lo), e = SORTfndlast(r, &hi), p;
oid lo = l->tseqbase + lstart, hi = lo + lend;
BUN b, e, p;
BAT *r1, *r2 = NULL;

ALGODEBUG fprintf(stderr, "#fetchjoin(l=" ALGOBATFMT ","
"r=" ALGOBATFMT ")\n",
ALGOBATPAR(l), ALGOBATPAR(r));

if (r2p &&
(*r2p = BATdense(0, e == b ? 0 : r->hseqbase + b, e - b)) == NULL) {
return GDK_FAIL;
if (r->tsorted) {
b = SORTfndfirst(r, &lo);
e = SORTfndfirst(r, &hi);
} else {
assert(r->trevsorted);
b = SORTfndlast(r, &hi);
e = SORTfndlast(r, &lo);
}
if (b < rstart)
b = rstart;
if (e > rend)
e = rend;
if (e == b) {
return nomatch(r1p, r2p, l, r, lstart, lend, NULL, NULL,
false, false, "fetchjoin", t0);
}
BAT *r2 = r2p ? *r2p : NULL;
BAT *r1 = *r1p = COLnew(0, TYPE_oid, e - b, TRANSIENT);
if (r1 == NULL) {
if (r2p)
BBPreclaim(*r2p);
r1 = COLnew(0, TYPE_oid, e - b, TRANSIENT);
if (r1 == NULL)
return GDK_FAIL;
if (r2p) {
if ((r2 = BATdense(0, r->hseqbase + b, e - b)) == NULL) {
BBPreclaim(r1);
return GDK_FAIL;
}
*r2p = r2;
}
*r1p = r1;
oid *op = (oid *) Tloc(r1, 0);
const oid *rp = (const oid *) Tloc(r, 0);
for (p = b; p < e; p++) {
Expand All @@ -3615,7 +3643,7 @@ fetchjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, lng t0)
static gdk_return
leftjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr,
bool nil_matches, bool nil_on_miss, bool semi, bool only_misses,
BUN estimate, const char *name, lng t0)
BUN estimate, const char *func, lng t0)
{
BUN lstart, lend, lcnt;
const oid *lcand, *lcandend;
Expand All @@ -3632,17 +3660,26 @@ leftjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr,
*r1p = NULL;
if (r2p)
*r2p = NULL;
if (joinparamcheck(l, r, NULL, sl, sr, name) != GDK_SUCCEED)
if (joinparamcheck(l, r, NULL, sl, sr, func) != GDK_SUCCEED)
return GDK_FAIL;

CANDINIT(l, sl, lstart, lend, lcnt, lcand, lcandend);
CANDINIT(r, sr, rstart, rend, rcnt, rcand, rcandend);
lcnt = lcand ? (BUN) (lcandend - lcand) : lend - lstart;
rcnt = rcand ? (BUN) (rcandend - rcand) : rend - rstart;

if (lcnt == 0 || (!only_misses && !nil_on_miss && rcnt == 0))
if (lcnt == 0 || (!only_misses && !nil_on_miss && rcnt == 0)) {
ALGODEBUG fprintf(stderr, "#%s(l=" ALGOBATFMT ","
"r=" ALGOBATFMT ",sl=" ALGOOPTBATFMT ","
"sr=" ALGOOPTBATFMT ",nil_matches=%d,"
"nil_on_miss=%d,semi=%d,only_misses=%d)\n",
func,
ALGOBATPAR(l), ALGOBATPAR(r),
ALGOOPTBATPAR(sl), ALGOOPTBATPAR(sr),
nil_matches, nil_on_miss, semi, only_misses);
return nomatch(r1p, r2p, l, r, lstart, lend, lcand, lcandend,
nil_on_miss, only_misses, "leftjoin", t0);
nil_on_miss, only_misses, func, t0);
}

if (!nil_on_miss && !semi && !only_misses &&
(lcnt == 1 || (BATordered(l) && BATordered_rev(l)))) {
Expand All @@ -3656,35 +3693,37 @@ leftjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr,
return mergejoin_void(r1p, r2p, l, r, sl, sr,
lstart, lend, lcnt, lcand, lcandend,
nil_on_miss, only_misses, t0, false);
} else if (BATtdense(l)
&& lcand == NULL
&& rcand == NULL
&& !semi
&& !nil_matches
&& !only_misses
/* && (rcnt * 1024) < lcnt */
&& (BATordered(r) || BATordered_rev(r))) {
assert(ATOMtype(l->ttype) == TYPE_oid); /* tdense */
return fetchjoin(r1p, r2p, l, r,
lstart, lend, rstart, rend, t0);
} else if ((BATordered(r) || BATordered_rev(r))
&& (BATordered(l)
|| BATordered_rev(l)
|| BATtdense(r)
|| lcnt < 1024
|| BATcount(r) * (Tsize(r) + (r->tvheap ? r->tvheap->size : 0) + 2 * sizeof(BUN)) > GDK_mem_maxsize / (GDKnr_threads ? GDKnr_threads : 1)))
|| BATcount(r) * (Tsize(r) + (r->tvheap ? r->tvheap->size : 0) + 2 * sizeof(BUN)) > GDK_mem_maxsize / (GDKnr_threads ? GDKnr_threads : 1))) {
return mergejoin(r1p, r2p, l, r, sl, sr,
lstart, lend, lcnt, lcand, lcandend,
rstart, rend, rcnt, rcand, rcandend,
nil_matches, nil_on_miss, semi, only_misses,
estimate, t0, false);
if (BATtdense(l)
&& ATOMtype(l->ttype) == TYPE_oid
&& sl == NULL
&& sr == NULL
&& !semi
&& !nil_matches
&& !only_misses
&& (rcnt * 1024) < lcnt
&& BATordered(r))
return fetchjoin(r1p, r2p, l, r, t0);
}
phash = sr == NULL &&
VIEWtparent(r) != 0 &&
BATcount(BBPquickdesc(VIEWtparent(r), false)) == BATcount(r);
return hashjoin(r1p, r2p, l, r, sl, sr,
lstart, lend, lcnt, lcand, lcandend,
rstart, rend, rcnt, rcand, rcandend,
nil_matches, nil_on_miss, semi, only_misses,
estimate, t0, false, phash, "leftjoin");
estimate, t0, false, phash, func);
}

/* Perform an equi-join over l and r. Returns two new, aligned, bats
Expand Down Expand Up @@ -3820,9 +3859,16 @@ BATjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches
if (joinparamcheck(l, r, NULL, sl, sr, "BATjoin") != GDK_SUCCEED)
return GDK_FAIL;

if (lcnt == 0 || rcnt == 0)
if (lcnt == 0 || rcnt == 0) {
ALGODEBUG fprintf(stderr, "#BATjoin(l=" ALGOBATFMT ","
"r=" ALGOBATFMT ",sl=" ALGOOPTBATFMT ","
"sr=" ALGOOPTBATFMT ",nil_matches=%d)\n",
ALGOBATPAR(l), ALGOBATPAR(r),
ALGOOPTBATPAR(sl), ALGOOPTBATPAR(sr),
nil_matches);
return nomatch(r1p, r2p, l, r, lstart, lend, lcand, lcandend,
false, false, "BATjoin", t0);
}

swap = false;

Expand Down
8 changes: 4 additions & 4 deletions gdk/gdk_unique.c
Expand Up @@ -368,10 +368,10 @@ BATunique(BAT *b, BAT *s)
}
bn = virtualize(bn);
ALGODEBUG fprintf(stderr, "#BATunique(b=" ALGOBATFMT ","
"s=" ALGOBATFMT ")="
ALGOOPTBATFMT "\n",
ALGOBATPAR(b), ALGOBATPAR(s),
ALGOOPTBATPAR(bn));
"s=" ALGOOPTBATFMT ")="
ALGOBATFMT "\n",
ALGOBATPAR(b), ALGOOPTBATPAR(s),
ALGOBATPAR(bn));
return bn;

bunins_failed:
Expand Down

0 comments on commit 7838a35

Please sign in to comment.