Skip to content
Permalink
Browse files
MDEV-23525 Wrong result of MIN(time_expr) and MAX(time_expr) with GRO…
…UP BY

Problem:

When calculatung MIN() and MAX() in a query with GROUP BY, like this:

  SELECT MIN(time_expr), MAX(time_expr) FROM t1 GROUP BY i;

the code in Item_sum_min_max::update_field() erroneosly used
string format comparison, therefore '100:20:30' was considered as
smaller than '10:20:30'.

Fix:

1. Implementing low level "native" related methods in class Time:
     Time::Time(const Native &native)           - convert native to Time
     Time::to_native(Native *to, uint decimals) - convert Time to native

   The "native" binary representation for TIME is equal to
   the binary data format of Field_timef, which is used to
   store TIME when mysql56_temporal_format is ON (default).

2. Implementing Type_handler_time_common "native" related methods:

  Type_handler_time_common::cmp_native()
  Type_handler_time_common::Item_val_native_with_conversion()
  Type_handler_time_common::Item_val_native_with_conversion_result()
  Type_handler_time_common::Item_param_val_native()

3. Implementing missing "native representation" related methods
   in Field_time and Field_timef:

  Field_time::store_native()
  Field_time::val_native()
  Field_timef::store_native()
  Field_timef::val_native()

4. Implementing missing "native" related methods in all Items
   that can have the TIME data type:

  Item_timefunc::val_native()
  Item_name_const::val_native()
  Item_time_literal::val_native()
  Item_cache_time::val_native()
  Item_handled_func::val_native()

5. Marking Type_handler_time_common as "native ready".
   So now Item_sum_min_max::update_field() calculates
   values using min_max_update_native_field(),
   which uses native binary representation rather than string representation.

   Before this change, only the TIMESTAMP data type used native
   representation to calculate MIN() and MAX().

Benchmarks (see more details in MDEV):

  This change not only fixes the wrong result, but also
  makes a "SELECT .. MAX.. GROUP BY .." query faster:

  # TIME(0)
  CREATE TABLE t1 (id INT, time_col TIME) ENGINE=HEAP;
  INSERT INTO t1 VALUES (1,'10:10:10'); -- repeat this 1m times
  SELECT id, MAX(time_col) FROM t1 GROUP BY id;

  MySQL80: 0.159 sec
  10.3:    0.108 sec
  10.4:    0.094 sec (fixed)

  # TIME(6):
  CREATE TABLE t1 (id INT, time_col TIME(6)) ENGINE=HEAP;
  INSERT INTO t1 VALUES (1,'10:10:10.999999'); -- repeat this 1m times
  SELECT id, MAX(time_col) FROM t1 GROUP BY id;

  My80: 0.154
  10.3: 0.135
  10.4: 0.093 (fixed)
  • Loading branch information
abarkov committed Aug 22, 2020
1 parent aa6cb7e commit ae33ebe
Show file tree
Hide file tree
Showing 10 changed files with 364 additions and 4 deletions.
@@ -2286,5 +2286,127 @@ SELECT '1972-11-06 16:58:58' < TIME'20:31:05';
'1972-11-06 16:58:58' < TIME'20:31:05'
1
#
# MDEV-23525 Wrong result of MIN(time_expr) and MAX(time_expr) with GROUP BY
#
SET timestamp=UNIX_TIMESTAMP('2020-08-21 18:19:20');
CREATE PROCEDURE p1()
BEGIN
SELECT MIN(t), MAX(t) FROM t1;
SELECT i, MIN(t), MAX(t) FROM t1 GROUP BY i;
SELECT i, MIN(COALESCE(t)), MAX(COALESCE(t)) FROM t1 GROUP BY i;
SELECT i, MIN(t+INTERVAL 1 SECOND), MAX(t+INTERVAL 1 SECOND) FROM t1 GROUP BY i;
SELECT i, MIN(TIME'10:20:30'+INTERVAL 1 SECOND) FROM t1 GROUP BY i;
SELECT i, MIN(CURRENT_TIME), MAX(CURRENT_TIME) FROM t1 GROUP BY i;
SELECT
i,
MIN((SELECT MAX(CURRENT_TIME) FROM t1)),
MAX((SELECT MAX(CURRENT_TIME) FROM t1))
FROM t1 GROUP BY i;
SELECT
i,
MIN(NAME_CONST('name',TIME'10:20:30')),
MAX(NAME_CONST('name',TIME'10:20:30'))
FROM t1 GROUP BY i;
EXECUTE IMMEDIATE "SELECT i, MIN(?),MAX(?) FROM t1 GROUP BY i"
USING TIME'10:20:30', TIME'10:20:30';
END;
$$
CREATE TABLE t1 (i INT, t TIME);
INSERT INTO t1 VALUES (1,'10:20:30');
INSERT INTO t1 VALUES (1,'100:20:20');
CALL p1;
MIN(t) MAX(t)
10:20:30 100:20:20
i MIN(t) MAX(t)
1 10:20:30 100:20:20
i MIN(COALESCE(t)) MAX(COALESCE(t))
1 10:20:30 100:20:20
i MIN(t+INTERVAL 1 SECOND) MAX(t+INTERVAL 1 SECOND)
1 10:20:31 100:20:21
i MIN(TIME'10:20:30'+INTERVAL 1 SECOND)
1 10:20:31
i MIN(CURRENT_TIME) MAX(CURRENT_TIME)
1 18:19:20 18:19:20
i MIN((SELECT MAX(CURRENT_TIME) FROM t1)) MAX((SELECT MAX(CURRENT_TIME) FROM t1))
1 18:19:20 18:19:20
i MIN(NAME_CONST('name',TIME'10:20:30')) MAX(NAME_CONST('name',TIME'10:20:30'))
1 10:20:30 10:20:30
i MIN(?) MAX(?)
1 10:20:30 10:20:30
DROP TABLE t1;
CREATE TABLE t1 (i INT, t TIME(3));
INSERT INTO t1 VALUES (1,'10:20:30.123');
INSERT INTO t1 VALUES (1,'100:20:20.123');
CALL p1;
MIN(t) MAX(t)
10:20:30.123 100:20:20.123
i MIN(t) MAX(t)
1 10:20:30.123 100:20:20.123
i MIN(COALESCE(t)) MAX(COALESCE(t))
1 10:20:30.123 100:20:20.123
i MIN(t+INTERVAL 1 SECOND) MAX(t+INTERVAL 1 SECOND)
1 10:20:31.123 100:20:21.123
i MIN(TIME'10:20:30'+INTERVAL 1 SECOND)
1 10:20:31
i MIN(CURRENT_TIME) MAX(CURRENT_TIME)
1 18:19:20 18:19:20
i MIN((SELECT MAX(CURRENT_TIME) FROM t1)) MAX((SELECT MAX(CURRENT_TIME) FROM t1))
1 18:19:20 18:19:20
i MIN(NAME_CONST('name',TIME'10:20:30')) MAX(NAME_CONST('name',TIME'10:20:30'))
1 10:20:30 10:20:30
i MIN(?) MAX(?)
1 10:20:30 10:20:30
DROP TABLE t1;
CREATE TABLE t1 (i INT, t TIME(6));
INSERT INTO t1 VALUES (1,'10:20:30.123456');
INSERT INTO t1 VALUES (1,'100:20:20.123456');
CALL p1;
MIN(t) MAX(t)
10:20:30.123456 100:20:20.123456
i MIN(t) MAX(t)
1 10:20:30.123456 100:20:20.123456
i MIN(COALESCE(t)) MAX(COALESCE(t))
1 10:20:30.123456 100:20:20.123456
i MIN(t+INTERVAL 1 SECOND) MAX(t+INTERVAL 1 SECOND)
1 10:20:31.123456 100:20:21.123456
i MIN(TIME'10:20:30'+INTERVAL 1 SECOND)
1 10:20:31
i MIN(CURRENT_TIME) MAX(CURRENT_TIME)
1 18:19:20 18:19:20
i MIN((SELECT MAX(CURRENT_TIME) FROM t1)) MAX((SELECT MAX(CURRENT_TIME) FROM t1))
1 18:19:20 18:19:20
i MIN(NAME_CONST('name',TIME'10:20:30')) MAX(NAME_CONST('name',TIME'10:20:30'))
1 10:20:30 10:20:30
i MIN(?) MAX(?)
1 10:20:30 10:20:30
DROP TABLE t1;
SET @@global.mysql56_temporal_format=false;
CREATE TABLE t1 (i INT, t TIME(6));
INSERT INTO t1 VALUES (1,'10:20:30.123456');
INSERT INTO t1 VALUES (1,'100:20:20.123456');
CALL p1;
MIN(t) MAX(t)
10:20:30.123456 100:20:20.123456
i MIN(t) MAX(t)
1 10:20:30.123456 100:20:20.123456
i MIN(COALESCE(t)) MAX(COALESCE(t))
1 10:20:30.123456 100:20:20.123456
i MIN(t+INTERVAL 1 SECOND) MAX(t+INTERVAL 1 SECOND)
1 10:20:31.123456 100:20:21.123456
i MIN(TIME'10:20:30'+INTERVAL 1 SECOND)
1 10:20:31
i MIN(CURRENT_TIME) MAX(CURRENT_TIME)
1 18:19:20 18:19:20
i MIN((SELECT MAX(CURRENT_TIME) FROM t1)) MAX((SELECT MAX(CURRENT_TIME) FROM t1))
1 18:19:20 18:19:20
i MIN(NAME_CONST('name',TIME'10:20:30')) MAX(NAME_CONST('name',TIME'10:20:30'))
1 10:20:30 10:20:30
i MIN(?) MAX(?)
1 10:20:30 10:20:30
DROP TABLE t1;
SET @@global.mysql56_temporal_format=default;
DROP PROCEDURE p1;
SET timestamp=DEFAULT;
#
# End of 10.4 tests
#
@@ -1490,6 +1490,66 @@ DROP TABLE t1;

SELECT '1972-11-06 16:58:58' < TIME'20:31:05';

--echo #
--echo # MDEV-23525 Wrong result of MIN(time_expr) and MAX(time_expr) with GROUP BY
--echo #

SET timestamp=UNIX_TIMESTAMP('2020-08-21 18:19:20');

DELIMITER $$;
CREATE PROCEDURE p1()
BEGIN
SELECT MIN(t), MAX(t) FROM t1;
SELECT i, MIN(t), MAX(t) FROM t1 GROUP BY i;
SELECT i, MIN(COALESCE(t)), MAX(COALESCE(t)) FROM t1 GROUP BY i;
SELECT i, MIN(t+INTERVAL 1 SECOND), MAX(t+INTERVAL 1 SECOND) FROM t1 GROUP BY i;
SELECT i, MIN(TIME'10:20:30'+INTERVAL 1 SECOND) FROM t1 GROUP BY i;
SELECT i, MIN(CURRENT_TIME), MAX(CURRENT_TIME) FROM t1 GROUP BY i;
SELECT
i,
MIN((SELECT MAX(CURRENT_TIME) FROM t1)),
MAX((SELECT MAX(CURRENT_TIME) FROM t1))
FROM t1 GROUP BY i;
SELECT
i,
MIN(NAME_CONST('name',TIME'10:20:30')),
MAX(NAME_CONST('name',TIME'10:20:30'))
FROM t1 GROUP BY i;
EXECUTE IMMEDIATE "SELECT i, MIN(?),MAX(?) FROM t1 GROUP BY i"
USING TIME'10:20:30', TIME'10:20:30';
END;
$$
DELIMITER ;$$

CREATE TABLE t1 (i INT, t TIME);
INSERT INTO t1 VALUES (1,'10:20:30');
INSERT INTO t1 VALUES (1,'100:20:20');
CALL p1;
DROP TABLE t1;

CREATE TABLE t1 (i INT, t TIME(3));
INSERT INTO t1 VALUES (1,'10:20:30.123');
INSERT INTO t1 VALUES (1,'100:20:20.123');
CALL p1;
DROP TABLE t1;

CREATE TABLE t1 (i INT, t TIME(6));
INSERT INTO t1 VALUES (1,'10:20:30.123456');
INSERT INTO t1 VALUES (1,'100:20:20.123456');
CALL p1;
DROP TABLE t1;

SET @@global.mysql56_temporal_format=false;
CREATE TABLE t1 (i INT, t TIME(6));
INSERT INTO t1 VALUES (1,'10:20:30.123456');
INSERT INTO t1 VALUES (1,'100:20:20.123456');
CALL p1;
DROP TABLE t1;
SET @@global.mysql56_temporal_format=default;

DROP PROCEDURE p1;
SET timestamp=DEFAULT;

--echo #
--echo # End of 10.4 tests
--echo #
@@ -6011,6 +6011,24 @@ bool Field_time::get_date(MYSQL_TIME *ltime, date_mode_t fuzzydate)
}


int Field_time::store_native(const Native &value)
{
Time t(value);
DBUG_ASSERT(t.is_valid_time());
store_TIME(t);
return 0;
}


bool Field_time::val_native(Native *to)
{
MYSQL_TIME ltime;
get_date(&ltime, date_mode_t(0));
int warn;
return Time(&warn, &ltime, 0).to_native(to, decimals());
}


bool Field_time::send_binary(Protocol *protocol)
{
MYSQL_TIME ltime;
@@ -6254,6 +6272,22 @@ bool Field_timef::get_date(MYSQL_TIME *ltime, date_mode_t fuzzydate)
return false;
}

int Field_timef::store_native(const Native &value)
{
DBUG_ASSERT(value.length() == my_time_binary_length(dec));
DBUG_ASSERT(Time(value).is_valid_time());
memcpy(ptr, value.ptr(), value.length());
return 0;
}


bool Field_timef::val_native(Native *to)
{
uint32 binlen= my_time_binary_length(dec);
return to->copy((const char*) ptr, binlen);
}


/****************************************************************************
** year type
** Save in a byte the year 0, 1901->2155
@@ -3213,6 +3213,8 @@ class Field_time :public Field_temporal {
decimals() == from->decimals();
}
sql_mode_t conversion_depends_on_sql_mode(THD *, Item *) const;
int store_native(const Native &value);
bool val_native(Native *to);
int store_time_dec(const MYSQL_TIME *ltime, uint dec);
int store(const char *to,size_t length,CHARSET_INFO *charset);
int store(double nr);
@@ -3334,6 +3336,8 @@ class Field_timef :public Field_time_with_dec {
}
int reset();
bool get_date(MYSQL_TIME *ltime, date_mode_t fuzzydate);
int store_native(const Native &value);
bool val_native(Native *to);
uint size_of() const { return sizeof(*this); }
};

@@ -2011,6 +2011,11 @@ bool Item_name_const::get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydat
return rc;
}

bool Item_name_const::val_native(THD *thd, Native *to)
{
return val_native_from_item(thd, value_item, to);
}

bool Item_name_const::is_null()
{
return value_item->is_null();
@@ -1311,16 +1311,13 @@ class Item: public Value_source,
{
/*
The default implementation for the Items that do not need native format:
- Item_basic_value
- Item_basic_value (default implementation)
- Item_copy
- Item_exists_subselect
- Item_sum_field
- Item_sum_or_func (default implementation)
- Item_proc
- Item_type_holder (as val_xxx() are never called for it);
- TODO: Item_name_const will need val_native() in the future,
when we add this syntax:
TIMESTAMP WITH LOCAL TIMEZONE'2001-01-01 00:00:00'

These hybrid Item types override val_native():
- Item_field
@@ -1331,6 +1328,8 @@ class Item: public Value_source,
- Item_direct_ref
- Item_direct_view_ref
- Item_ref_null_helper
- Item_name_const
- Item_time_literal
- Item_sum_or_func
Note, these hybrid type Item_sum_or_func descendants
override the default implementation:
@@ -3139,6 +3138,7 @@ class Item_name_const : public Item_fixed_hybrid
String *val_str(String *sp);
my_decimal *val_decimal(my_decimal *);
bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate);
bool val_native(THD *thd, Native *to);
bool is_null();
virtual void print(String *str, enum_query_type query_type);

@@ -4897,6 +4897,10 @@ class Item_time_literal: public Item_temporal_literal
String *val_str(String *to) { return Time(this).to_string(to, decimals); }
my_decimal *val_decimal(my_decimal *to) { return Time(this).to_decimal(to); }
bool get_date(THD *thd, MYSQL_TIME *res, date_mode_t fuzzydate);
bool val_native(THD *thd, Native *to)
{
return Time(thd, this).to_native(to, decimals);
}
Item *get_copy(THD *thd)
{ return get_item_copy<Item_time_literal>(thd, this); }
};
@@ -6872,6 +6876,10 @@ class Item_cache_time: public Item_cache_temporal
{
return has_value() ? Time(this).to_decimal(to) : NULL;
}
bool val_native(THD *thd, Native *to)
{
return has_value() ? Time(thd, this).to_native(to, decimals) : true;
}
};


@@ -497,6 +497,12 @@ class Item_handled_func: public Item_func
virtual longlong val_int(Item_handled_func *) const= 0;
virtual my_decimal *val_decimal(Item_handled_func *, my_decimal *) const= 0;
virtual bool get_date(THD *thd, Item_handled_func *, MYSQL_TIME *, date_mode_t fuzzydate) const= 0;
virtual bool val_native(THD *thd, Item_handled_func *, Native *to) const
{
DBUG_ASSERT(0);
to->length(0);
return true;
}
virtual const Type_handler *return_type_handler() const= 0;
virtual bool fix_length_and_dec(Item_handled_func *) const= 0;
};
@@ -600,6 +606,10 @@ class Item_handled_func: public Item_func
{
return Time(item).to_string(to, item->decimals);
}
bool val_native(THD *thd, Item_handled_func *item, Native *to) const
{
return Time(thd, item).to_native(to, item->decimals);
}
};


@@ -668,6 +678,10 @@ class Item_handled_func: public Item_func
{
return m_func_handler->get_date(thd, this, to, fuzzydate);
}
bool val_native(THD *thd, Native *to)
{
return m_func_handler->val_native(thd, this, to);
}
};


@@ -598,6 +598,10 @@ class Item_timefunc :public Item_func
double val_real() { return Time(this).to_double(); }
String *val_str(String *to) { return Time(this).to_string(to, decimals); }
my_decimal *val_decimal(my_decimal *to) { return Time(this).to_decimal(to); }
bool val_native(THD *thd, Native *to)
{
return Time(thd, this).to_native(to, decimals);
}
};


0 comments on commit ae33ebe

Please sign in to comment.