Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Merge pull request mariadb-corporation#2402 from mariadb-corporation/…
…std_dev_improvememnts

MCOL-5104: Improved algorithm for STD/VAR functions
  • Loading branch information
drrtuy committed Jun 6, 2022
2 parents 4e50fca + c7e67ae commit 94ba91c
Show file tree
Hide file tree
Showing 11 changed files with 16,191 additions and 70 deletions.
1 change: 0 additions & 1 deletion .drone.jsonnet
Expand Up @@ -156,7 +156,6 @@ local Pipeline(branch, platform, event, arch='amd64') = {
mtr:: {
name: 'mtr',
image: 'docker:git',
[if arch == 'arm64' then 'failure']: 'ignore',
volumes: [pipeline._volumes.docker],
commands: [
'docker run --volume /sys/fs/cgroup:/sys/fs/cgroup:ro --env MYSQL_TEST_DIR=' + mtr_path + ' --env DEBIAN_FRONTEND=noninteractive --env MCS_USE_S3_STORAGE=0 --name mtr$${DRONE_BUILD_NUMBER} --privileged --detach ' + img + ' ' + init + ' --unit=basic.target',
Expand Down
28 changes: 14 additions & 14 deletions dbcon/joblist/tupleaggregatestep.cpp
Expand Up @@ -1478,7 +1478,7 @@ void TupleAggregateStep::prep1PhaseAggregate(JobInfo& jobInfo, vector<RowGroup>&

functionVec[i]->fAuxColumnIndex = lastCol;

// sum(x)
// mean(x)
oidsAgg.push_back(oidsProj[j]);
keysAgg.push_back(keysProj[j]);
scaleAgg.push_back(0);
Expand All @@ -1488,7 +1488,7 @@ void TupleAggregateStep::prep1PhaseAggregate(JobInfo& jobInfo, vector<RowGroup>&
widthAgg.push_back(sizeof(long double));
++lastCol;

// sum(x**2)
// sum(x_i - mean)^2
oidsAgg.push_back(oidsProj[j]);
keysAgg.push_back(keysProj[j]);
scaleAgg.push_back(0);
Expand Down Expand Up @@ -1910,7 +1910,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(JobInfo& jobInfo, vector<Ro
widthAgg.push_back(sizeof(double));
funct->fAuxColumnIndex = ++colAgg;

// sum(x)
// mean(x)
oidsAgg.push_back(oidsProj[colProj]);
keysAgg.push_back(aggKey);
scaleAgg.push_back(0);
Expand All @@ -1920,7 +1920,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(JobInfo& jobInfo, vector<Ro
widthAgg.push_back(sizeof(long double));
++colAgg;

// sum(x**2)
// sum(x_i - mean)^2
oidsAgg.push_back(oidsProj[colProj]);
keysAgg.push_back(aggKey);
scaleAgg.push_back(0);
Expand Down Expand Up @@ -2581,7 +2581,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(JobInfo& jobInfo, vector<Ro

functionVec2[i]->fAuxColumnIndex = lastCol;

// sum(x)
// mean(x)
oidsAggDist.push_back(oidsAgg[j]);
keysAggDist.push_back(keysAgg[j]);
scaleAggDist.push_back(0);
Expand All @@ -2591,7 +2591,7 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(JobInfo& jobInfo, vector<Ro
widthAggDist.push_back(sizeof(long double));
++lastCol;

// sum(x**2)
// sum(x_i - mean)^2
oidsAggDist.push_back(oidsAgg[j]);
keysAggDist.push_back(keysAgg[j]);
scaleAggDist.push_back(0);
Expand Down Expand Up @@ -3243,7 +3243,7 @@ void TupleAggregateStep::prep2PhasesAggregate(JobInfo& jobInfo, vector<RowGroup>
widthAggPm.push_back(sizeof(double));
funct->fAuxColumnIndex = ++colAggPm;

// sum(x)
// mean(x)
oidsAggPm.push_back(oidsProj[colProj]);
keysAggPm.push_back(aggKey);
scaleAggPm.push_back(0);
Expand All @@ -3253,7 +3253,7 @@ void TupleAggregateStep::prep2PhasesAggregate(JobInfo& jobInfo, vector<RowGroup>
widthAggPm.push_back(sizeof(long double));
++colAggPm;

// sum(x**2)
// sum(x_i - mean)^2
oidsAggPm.push_back(oidsProj[colProj]);
keysAggPm.push_back(aggKey);
scaleAggPm.push_back(0);
Expand Down Expand Up @@ -3701,7 +3701,7 @@ void TupleAggregateStep::prep2PhasesAggregate(JobInfo& jobInfo, vector<RowGroup>

functionVecUm[i]->fAuxColumnIndex = lastCol;

// sum(x)
// mean(x)
oidsAggUm.push_back(oidsAggPm[j]);
keysAggUm.push_back(keysAggPm[j]);
scaleAggUm.push_back(0);
Expand All @@ -3711,7 +3711,7 @@ void TupleAggregateStep::prep2PhasesAggregate(JobInfo& jobInfo, vector<RowGroup>
widthAggUm.push_back(sizeof(long double));
++lastCol;

// sum(x**2)
// sum(x_i - mean)^2
oidsAggUm.push_back(oidsAggPm[j]);
keysAggUm.push_back(keysAggPm[j]);
scaleAggUm.push_back(0);
Expand Down Expand Up @@ -4152,7 +4152,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(JobInfo& jobInfo, vector<R
widthAggPm.push_back(sizeof(double));
funct->fAuxColumnIndex = ++colAggPm;

// sum(x)
// mean(x)
oidsAggPm.push_back(oidsProj[colProj]);
keysAggPm.push_back(aggKey);
scaleAggPm.push_back(0);
Expand All @@ -4162,7 +4162,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(JobInfo& jobInfo, vector<R
widthAggPm.push_back(sizeof(long double));
++colAggPm;

// sum(x**2)
// sum(x_i - mean)^2
oidsAggPm.push_back(oidsProj[colProj]);
keysAggPm.push_back(aggKey);
scaleAggPm.push_back(0);
Expand Down Expand Up @@ -4808,7 +4808,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(JobInfo& jobInfo, vector<R

functionVecUm[i]->fAuxColumnIndex = lastCol;

// sum(x)
// mean(x)
oidsAggDist.push_back(oidsAggPm[j]);
keysAggDist.push_back(keysAggPm[j]);
scaleAggDist.push_back(0);
Expand All @@ -4818,7 +4818,7 @@ void TupleAggregateStep::prep2PhasesDistinctAggregate(JobInfo& jobInfo, vector<R
widthAggDist.push_back(sizeof(long double));
++lastCol;

// sum(x**2)
// sum(x_i - mean)^2
oidsAggDist.push_back(oidsAggPm[j]);
keysAggDist.push_back(keysAggPm[j]);
scaleAggDist.push_back(0);
Expand Down
217 changes: 217 additions & 0 deletions mysql-test/columnstore/basic/r/std_aggregate_columnstore.result
@@ -0,0 +1,217 @@
DROP DATABASE IF EXISTS std_test_db;
CREATE DATABASE std_test_db;
USE std_test_db;
create table t1 (
col_signed tinyint,
col_unsigned tinyint unsigned
)engine=columnstore;
LOAD DATA LOCAL infile 'MTR_SUITE_DIR/../std_data/tinyint_range.tbl' INTO TABLE t1 FIELDS TERMINATED BY '|';;
ALTER TABLE t1 ADD COLUMN col_small_signed SMALLINT;
ALTER TABLE t1 ADD COLUMN col_small_unsigned SMALLINT UNSIGNED;
ALTER TABLE t1 ADD COLUMN col_med_signed MEDIUMINT;
ALTER TABLE t1 ADD COLUMN col_med_unsigned MEDIUMINT UNSIGNED;
ALTER TABLE t1 ADD COLUMN col_int_signed INT;
ALTER TABLE t1 ADD COLUMN col_int_unsigned INT UNSIGNED;
ALTER TABLE t1 ADD COLUMN col_big_signed BIGINT;
ALTER TABLE t1 ADD COLUMN col_big_unsigned BIGINT UNSIGNED;
ALTER TABLE t1 ADD COLUMN col_dec_signed DECIMAL(38,0);
ALTER TABLE t1 ADD COLUMN col_dec_unsigned DECIMAL(38,0) UNSIGNED;
ALTER TABLE t1 ADD COLUMN col_float_signed FLOAT;
ALTER TABLE t1 ADD COLUMN col_float_unsigned FLOAT UNSIGNED;
ALTER TABLE t1 ADD COLUMN col_double_signed DOUBLE;
ALTER TABLE t1 ADD COLUMN col_double_unsigned DOUBLE UNSIGNED;
UPDATE t1 SET col_small_signed=col_signed + sign(col_signed) * 32000;
UPDATE t1 SET col_small_unsigned=col_unsigned + 65000;
UPDATE t1 SET col_med_signed=col_signed + sign(col_signed) * 8388000;
UPDATE t1 SET col_med_unsigned=col_unsigned + 16776000;
UPDATE t1 SET col_int_signed=col_signed + sign(col_signed) * 2147483000;
UPDATE t1 SET col_int_unsigned=col_unsigned + 4294000000;
UPDATE t1 SET col_big_signed=col_signed + sign(col_signed) * 9223372036854775000;
UPDATE t1 SET col_big_unsigned=col_unsigned + 18446744073709551000;
UPDATE t1 SET col_dec_signed=col_signed + sign(col_signed) * 800000000000000000000000000000000001;
UPDATE t1 SET col_dec_unsigned=col_unsigned + 800000000000000000000000000000000003;
UPDATE t1 SET col_float_signed=col_signed + 0.637 + sign(col_signed) * 8388000;
UPDATE t1 SET col_float_unsigned=col_unsigned + 0.637 + 16776000;
UPDATE t1 SET col_double_signed=col_signed + 0.637 + sign(col_signed) * 2147483000;
UPDATE t1 SET col_double_unsigned=col_unsigned + 0.637 + 4294000000;
SELECT 'q1', floor(STD(col_signed)) FROM t1;
q1 floor(STD(col_signed))
q1 73
SELECT 'q2', floor(STD(col_unsigned)) FROM t1;
q2 floor(STD(col_unsigned))
q2 73
SELECT 'q3', floor(STD(col_small_signed)) FROM t1;
q3 floor(STD(col_small_signed))
q3 32000
SELECT 'q4', floor(STD(col_small_unsigned)) FROM t1;
q4 floor(STD(col_small_unsigned))
q4 73
SELECT 'q5', floor(STD(col_med_signed)) FROM t1;
q5 floor(STD(col_med_signed))
q5 8371470
SELECT 'q6', floor(STD(col_med_unsigned)) FROM t1;
q6 floor(STD(col_med_unsigned))
q6 73
SELECT 'q7', floor(STD(col_int_signed)) FROM t1;
q7 floor(STD(col_int_signed))
q7 2143234889
SELECT 'q8', floor(STD(col_int_unsigned)) FROM t1;
q8 floor(STD(col_int_unsigned))
q8 73
SELECT 'q9', floor(STD(col_big_signed)) FROM t1;
q9 floor(STD(col_big_signed))
q9 9205126264421172000
SELECT 'q10', floor(STD(col_big_unsigned)) FROM t1;
q10 floor(STD(col_big_unsigned))
q10 73
SELECT 'q11', floor(STD(col_dec_signed)) FROM t1;
q11 floor(STD(col_dec_signed))
q11 798417431511104800000000000000000000
SELECT 'q13', floor(STD(col_float_signed)) FROM t1;
q13 floor(STD(col_float_signed))
q13 8371470
SELECT 'q14', floor(STD(col_float_unsigned)) FROM t1;
q14 floor(STD(col_float_unsigned))
q14 73
SELECT 'q15', floor(STD(col_double_signed)) FROM t1;
q15 floor(STD(col_double_signed))
q15 2143234889
SELECT 'q16', floor(STD(col_double_unsigned)) FROM t1;
q16 floor(STD(col_double_unsigned))
q16 73
SELECT 'q17', floor(STDDEV_SAMP(col_signed)) FROM t1;
q17 floor(STDDEV_SAMP(col_signed))
q17 73
SELECT 'q18', floor(STDDEV_SAMP(col_unsigned)) FROM t1;
q18 floor(STDDEV_SAMP(col_unsigned))
q18 73
SELECT 'q19', floor(STDDEV_SAMP(col_small_signed)) FROM t1;
q19 floor(STDDEV_SAMP(col_small_signed))
q19 32063
SELECT 'q20', floor(STDDEV_SAMP(col_small_unsigned)) FROM t1;
q20 floor(STDDEV_SAMP(col_small_unsigned))
q20 73
SELECT 'q21', floor(STDDEV_SAMP(col_med_signed)) FROM t1;
q21 floor(STDDEV_SAMP(col_med_signed))
q21 8387998
SELECT 'q22', floor(STDDEV_SAMP(col_med_unsigned)) FROM t1;
q22 floor(STDDEV_SAMP(col_med_unsigned))
q22 73
SELECT 'q23', floor(STDDEV_SAMP(col_int_signed)) FROM t1;
q23 floor(STDDEV_SAMP(col_int_signed))
q23 2147466354
SELECT 'q24', floor(STDDEV_SAMP(col_int_unsigned)) FROM t1;
q24 floor(STDDEV_SAMP(col_int_unsigned))
q24 73
SELECT 'q25', floor(STDDEV_SAMP(col_big_signed)) FROM t1;
q25 floor(STDDEV_SAMP(col_big_signed))
q25 9223300272764652000
SELECT 'q26', floor(STDDEV_SAMP(col_big_unsigned)) FROM t1;
q26 floor(STDDEV_SAMP(col_big_unsigned))
q26 73
SELECT 'q27', floor(STDDEV_SAMP(col_dec_signed)) FROM t1;
q27 floor(STDDEV_SAMP(col_dec_signed))
q27 799993775457406500000000000000000000
SELECT 'q29', floor(STDDEV_SAMP(col_float_signed)) FROM t1;
q29 floor(STDDEV_SAMP(col_float_signed))
q29 8387998
SELECT 'q30', floor(STDDEV_SAMP(col_float_unsigned)) FROM t1;
q30 floor(STDDEV_SAMP(col_float_unsigned))
q30 73
SELECT 'q31', floor(STDDEV_SAMP(col_double_signed)) FROM t1;
q31 floor(STDDEV_SAMP(col_double_signed))
q31 2147466354
SELECT 'q32', floor(STDDEV_SAMP(col_double_unsigned)) FROM t1;
q32 floor(STDDEV_SAMP(col_double_unsigned))
q32 73
SELECT 'q33', floor(VAR_POP(col_signed)) FROM t1;
q33 floor(VAR_POP(col_signed))
q33 5376
SELECT 'q34', floor(VAR_POP(col_unsigned)) FROM t1;
q34 floor(VAR_POP(col_unsigned))
q34 5376
SELECT 'q35', floor(VAR_POP(col_small_signed)) FROM t1;
q35 floor(VAR_POP(col_small_signed))
q35 1024021882
SELECT 'q36', floor(VAR_POP(col_small_unsigned)) FROM t1;
q36 floor(VAR_POP(col_small_unsigned))
q36 5376
SELECT 'q37', floor(VAR_POP(col_med_signed)) FROM t1;
q37 floor(VAR_POP(col_med_signed))
q37 70081516547007
SELECT 'q38', floor(VAR_POP(col_med_unsigned)) FROM t1;
q38 floor(VAR_POP(col_med_unsigned))
q38 5376
SELECT 'q39', floor(VAR_POP(col_int_signed)) FROM t1;
q39 floor(VAR_POP(col_int_signed))
q39 4593455793567983000
SELECT 'q40', floor(VAR_POP(col_int_unsigned)) FROM t1;
q40 floor(VAR_POP(col_int_unsigned))
q40 5376
SELECT 'q41', floor(VAR_POP(col_big_signed)) FROM t1;
q41 floor(VAR_POP(col_big_signed))
q41 84734349543936475000000000000000000000
SELECT 'q42', floor(VAR_POP(col_big_unsigned)) FROM t1;
q42 floor(VAR_POP(col_big_unsigned))
q42 5376
SELECT 'q43', floor(VAR_POP(col_dec_signed)) FROM t1;
q43 floor(VAR_POP(col_dec_signed))
q43 637470394940789800000000000000000000000000000000000000000000000000000000
SELECT 'q45', floor(VAR_POP(col_float_signed)) FROM t1;
q45 floor(VAR_POP(col_float_signed))
q45 70081516546971
SELECT 'q46', floor(VAR_POP(col_float_unsigned)) FROM t1;
q46 floor(VAR_POP(col_float_unsigned))
q46 5376
SELECT 'q47', floor(VAR_POP(col_double_signed)) FROM t1;
q47 floor(VAR_POP(col_double_signed))
q47 4593455793567983000
SELECT 'q48', floor(VAR_POP(col_double_unsigned)) FROM t1;
q48 floor(VAR_POP(col_double_unsigned))
q48 5376
SELECT 'q49', floor(VAR_SAMP(col_signed)) FROM t1;
q49 floor(VAR_SAMP(col_signed))
q49 5397
SELECT 'q50', floor(VAR_SAMP(col_unsigned)) FROM t1;
q50 floor(VAR_SAMP(col_unsigned))
q50 5397
SELECT 'q51', floor(VAR_SAMP(col_small_signed)) FROM t1;
q51 floor(VAR_SAMP(col_small_signed))
q51 1028069399
SELECT 'q52', floor(VAR_SAMP(col_small_unsigned)) FROM t1;
q52 floor(VAR_SAMP(col_small_unsigned))
q52 5397
SELECT 'q53', floor(VAR_SAMP(col_med_signed)) FROM t1;
q53 floor(VAR_SAMP(col_med_signed))
q53 70358518588695
SELECT 'q54', floor(VAR_SAMP(col_med_unsigned)) FROM t1;
q54 floor(VAR_SAMP(col_med_unsigned))
q54 5397
SELECT 'q55', floor(VAR_SAMP(col_int_signed)) FROM t1;
q55 floor(VAR_SAMP(col_int_signed))
q55 4611611745321216000
SELECT 'q56', floor(VAR_SAMP(col_int_unsigned)) FROM t1;
q56 floor(VAR_SAMP(col_int_unsigned))
q56 5397
SELECT 'q57', floor(VAR_SAMP(col_big_signed)) FROM t1;
q57 floor(VAR_SAMP(col_big_signed))
q57 85069267921580490000000000000000000000
SELECT 'q58', floor(VAR_SAMP(col_big_unsigned)) FROM t1;
q58 floor(VAR_SAMP(col_big_unsigned))
q58 5397
SELECT 'q59', floor(VAR_SAMP(col_dec_signed)) FROM t1;
q59 floor(VAR_SAMP(col_dec_signed))
q59 639990040770595400000000000000000000000000000000000000000000000000000000
SELECT 'q61', floor(VAR_SAMP(col_float_signed)) FROM t1;
q61 floor(VAR_SAMP(col_float_signed))
q61 70358518588659
SELECT 'q62', floor(VAR_SAMP(col_float_unsigned)) FROM t1;
q62 floor(VAR_SAMP(col_float_unsigned))
q62 5397
SELECT 'q63', floor(VAR_SAMP(col_double_signed)) FROM t1;
q63 floor(VAR_SAMP(col_double_signed))
q63 4611611745321216000
SELECT 'q64', floor(VAR_SAMP(col_double_unsigned)) FROM t1;
q64 floor(VAR_SAMP(col_double_unsigned))
q64 5397
DROP DATABASE std_test_db;

0 comments on commit 94ba91c

Please sign in to comment.