Skip to content

Commit 0479639

Browse files
spetruniacvicentiu
authored andcommitted
MDEV-9736: Window functions: multiple cursors to read filesort result
Add support for having multiple IO_CACHEs with type=READ_CACHE to share the file they are reading from. Each IO_CACHE keeps its own in-memory buffer. When doing a read or seek operation on the file, it notifies other IO_CACHEs that the file position has been changed. Make Rowid_seq_cursor use cloned IO_CACHE when reading filesort result.
1 parent 6e40157 commit 0479639

File tree

5 files changed

+447
-57
lines changed

5 files changed

+447
-57
lines changed

include/my_sys.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -472,6 +472,8 @@ typedef struct st_io_cache /* Used when cacheing files */
472472
const char *dir;
473473
char prefix[3];
474474
File file; /* file descriptor */
475+
476+
struct st_io_cache *next_file_user;
475477
/*
476478
seek_not_done is set by my_b_seek() to inform the upcoming read/write
477479
operation that a seek needs to be preformed prior to the actual I/O
@@ -802,6 +804,11 @@ extern my_bool reinit_io_cache(IO_CACHE *info,enum cache_type type,
802804
extern void setup_io_cache(IO_CACHE* info);
803805
extern void init_io_cache_share(IO_CACHE *read_cache, IO_CACHE_SHARE *cshare,
804806
IO_CACHE *write_cache, uint num_threads);
807+
808+
extern int init_slave_io_cache(IO_CACHE *master, IO_CACHE *slave);
809+
void end_slave_io_cache(IO_CACHE *cache);
810+
void seek_io_cache(IO_CACHE *cache, my_off_t needed_offset);
811+
805812
extern void remove_io_thread(IO_CACHE *info);
806813
extern int _my_b_async_read(IO_CACHE *info,uchar *Buffer,size_t Count);
807814
extern int my_b_append(IO_CACHE *info,const uchar *Buffer,size_t Count);

mysql-test/r/win_big.result

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
create table t0 (a int);
2+
insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
3+
create table t1(a int);
4+
insert into t1 select A.a + B.a* 10 + C.a * 100 from t0 A, t0 B, t0 C;
5+
create table t10 (a int, b int, c int);
6+
insert into t10
7+
select
8+
A.a + 1000*B.a,
9+
A.a + 1000*B.a,
10+
A.a + 1000*B.a
11+
from t1 A, t0 B
12+
order by A.a+1000*B.a;
13+
#################################################################
14+
## Try a basic example
15+
flush status;
16+
create table t21 as
17+
select
18+
sum(b) over (order by a rows between 2 preceding and 2 following) as SUM_B
19+
from
20+
t10;
21+
show status like 'Sort_merge_passes';
22+
Variable_name Value
23+
Sort_merge_passes 0
24+
set sort_buffer_size=1024;
25+
flush status;
26+
create table t22 as
27+
select
28+
sum(b) over (order by a rows between 2 preceding and 2 following) as SUM_B
29+
from
30+
t10;
31+
show status like 'Sort_merge_passes';
32+
Variable_name Value
33+
Sort_merge_passes 35
34+
include/diff_tables.inc [t21, t22]
35+
drop table t21, t22;
36+
#################################################################
37+
# Try many cursors
38+
set sort_buffer_size=default;
39+
flush status;
40+
create table t21 as
41+
select
42+
sum(b) over (order by a rows between 2 preceding and 2 following) as SUM_B1,
43+
sum(b) over (order by a rows between 5 preceding and 5 following) as SUM_B2,
44+
sum(b) over (order by a rows between 20 preceding and 20 following) as SUM_B3
45+
from
46+
t10;
47+
show status like 'Sort_merge_passes';
48+
Variable_name Value
49+
Sort_merge_passes 0
50+
set sort_buffer_size=1024;
51+
flush status;
52+
create table t22 as
53+
select
54+
sum(b) over (order by a rows between 2 preceding and 2 following) as SUM_B1,
55+
sum(b) over (order by a rows between 5 preceding and 5 following) as SUM_B2,
56+
sum(b) over (order by a rows between 20 preceding and 20 following) as SUM_B3
57+
from
58+
t10;
59+
show status like 'Sort_merge_passes';
60+
Variable_name Value
61+
Sort_merge_passes 35
62+
include/diff_tables.inc [t21, t22]
63+
drop table t21, t22;
64+
#################################################################
65+
# Try having cursors pointing at different IO_CACHE pages
66+
# in the IO_CACHE
67+
set sort_buffer_size=default;
68+
flush status;
69+
create table t21 as
70+
select
71+
a,
72+
sum(b) over (order by a range between 5000 preceding and 5000 following) as SUM_B1
73+
from
74+
t10;
75+
show status like 'Sort_merge_passes';
76+
Variable_name Value
77+
Sort_merge_passes 0
78+
set sort_buffer_size=1024;
79+
flush status;
80+
create table t22 as
81+
select
82+
a,
83+
sum(b) over (order by a range between 5000 preceding and 5000 following) as SUM_B1
84+
from
85+
t10;
86+
show status like 'Sort_merge_passes';
87+
Variable_name Value
88+
Sort_merge_passes 35
89+
include/diff_tables.inc [t21, t22]
90+
drop table t21, t22;
91+
#################################################################
92+
drop table t10;
93+
drop table t0,t1;

mysql-test/t/win_big.test

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
#
2+
# Tests for window functions over big datasets.
3+
# "Big" here is "big enough so that filesort result doesn't fit in a
4+
# memory buffer".
5+
#
6+
#
7+
8+
create table t0 (a int);
9+
insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
10+
11+
create table t1(a int);
12+
insert into t1 select A.a + B.a* 10 + C.a * 100 from t0 A, t0 B, t0 C;
13+
14+
create table t10 (a int, b int, c int);
15+
insert into t10
16+
select
17+
A.a + 1000*B.a,
18+
A.a + 1000*B.a,
19+
A.a + 1000*B.a
20+
from t1 A, t0 B
21+
order by A.a+1000*B.a;
22+
23+
--echo #################################################################
24+
--echo ## Try a basic example
25+
flush status;
26+
create table t21 as
27+
select
28+
sum(b) over (order by a rows between 2 preceding and 2 following) as SUM_B
29+
from
30+
t10;
31+
show status like 'Sort_merge_passes';
32+
33+
set sort_buffer_size=1024;
34+
flush status;
35+
create table t22 as
36+
select
37+
sum(b) over (order by a rows between 2 preceding and 2 following) as SUM_B
38+
from
39+
t10;
40+
show status like 'Sort_merge_passes';
41+
42+
let $diff_tables= t21, t22;
43+
source include/diff_tables.inc;
44+
drop table t21, t22;
45+
46+
--echo #################################################################
47+
--echo # Try many cursors
48+
set sort_buffer_size=default;
49+
flush status;
50+
create table t21 as
51+
select
52+
sum(b) over (order by a rows between 2 preceding and 2 following) as SUM_B1,
53+
sum(b) over (order by a rows between 5 preceding and 5 following) as SUM_B2,
54+
sum(b) over (order by a rows between 20 preceding and 20 following) as SUM_B3
55+
from
56+
t10;
57+
show status like 'Sort_merge_passes';
58+
59+
set sort_buffer_size=1024;
60+
flush status;
61+
create table t22 as
62+
select
63+
sum(b) over (order by a rows between 2 preceding and 2 following) as SUM_B1,
64+
sum(b) over (order by a rows between 5 preceding and 5 following) as SUM_B2,
65+
sum(b) over (order by a rows between 20 preceding and 20 following) as SUM_B3
66+
from
67+
t10;
68+
show status like 'Sort_merge_passes';
69+
70+
let $diff_tables= t21, t22;
71+
source include/diff_tables.inc;
72+
drop table t21, t22;
73+
74+
--echo #################################################################
75+
--echo # Try having cursors pointing at different IO_CACHE pages
76+
--echo # in the IO_CACHE
77+
set sort_buffer_size=default;
78+
flush status;
79+
create table t21 as
80+
select
81+
a,
82+
sum(b) over (order by a range between 5000 preceding and 5000 following) as SUM_B1
83+
from
84+
t10;
85+
show status like 'Sort_merge_passes';
86+
87+
set sort_buffer_size=1024;
88+
flush status;
89+
create table t22 as
90+
select
91+
a,
92+
sum(b) over (order by a range between 5000 preceding and 5000 following) as SUM_B1
93+
from
94+
t10;
95+
show status like 'Sort_merge_passes';
96+
97+
let $diff_tables= t21, t22;
98+
source include/diff_tables.inc;
99+
drop table t21, t22;
100+
--echo #################################################################
101+
102+
drop table t10;
103+
drop table t0,t1;
104+

mysys/mf_iocache.c

Lines changed: 134 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,7 @@ int init_io_cache(IO_CACHE *info, File file, size_t cachesize,
193193
info->alloced_buffer = 0;
194194
info->buffer=0;
195195
info->seek_not_done= 0;
196+
info->next_file_user= NULL;
196197

197198
if (file >= 0)
198199
{
@@ -328,6 +329,101 @@ int init_io_cache(IO_CACHE *info, File file, size_t cachesize,
328329
DBUG_RETURN(0);
329330
} /* init_io_cache */
330331

332+
333+
334+
/*
335+
Initialize the slave IO_CACHE to read the same file (and data)
336+
as master does.
337+
338+
One can create multiple slaves from a single master. Every slave and master
339+
will have independent file positions.
340+
341+
The master must be a non-shared READ_CACHE.
342+
It is assumed that no more reads are done after a master and/or a slave
343+
has been freed (this limitation can be easily lifted).
344+
*/
345+
346+
int init_slave_io_cache(IO_CACHE *master, IO_CACHE *slave)
347+
{
348+
uchar *slave_buf;
349+
DBUG_ASSERT(master->type == READ_CACHE);
350+
DBUG_ASSERT(!master->share);
351+
DBUG_ASSERT(master->alloced_buffer);
352+
353+
if (!(slave_buf= (uchar*)my_malloc(master->buffer_length, MYF(0))))
354+
{
355+
return 1;
356+
}
357+
memcpy(slave, master, sizeof(IO_CACHE));
358+
slave->buffer= slave_buf;
359+
360+
memcpy(slave->buffer, master->buffer, master->buffer_length);
361+
slave->read_pos= slave->buffer + (master->read_pos - master->buffer);
362+
slave->read_end= slave->buffer + (master->read_end - master->buffer);
363+
364+
DBUG_ASSERT(master->current_pos == &master->read_pos);
365+
slave->current_pos= &slave->read_pos;
366+
DBUG_ASSERT(master->current_end == &master->read_end);
367+
slave->current_end= &slave->read_end;
368+
369+
if (master->next_file_user)
370+
{
371+
IO_CACHE *p;
372+
for (p= master->next_file_user;
373+
p->next_file_user !=master;
374+
p= p->next_file_user)
375+
{}
376+
377+
p->next_file_user= slave;
378+
slave->next_file_user= master;
379+
}
380+
else
381+
{
382+
slave->next_file_user= master;
383+
master->next_file_user= slave;
384+
}
385+
return 0;
386+
}
387+
388+
389+
void end_slave_io_cache(IO_CACHE *cache)
390+
{
391+
my_free(cache->buffer);
392+
}
393+
394+
/*
395+
Seek a read io cache to a given offset
396+
*/
397+
void seek_io_cache(IO_CACHE *cache, my_off_t needed_offset)
398+
{
399+
my_off_t cached_data_start= cache->pos_in_file;
400+
my_off_t cached_data_end= cache->pos_in_file + (cache->read_pos -
401+
cache->buffer);
402+
if (needed_offset >= cached_data_start &&
403+
needed_offset < cached_data_end)
404+
{
405+
/*
406+
The offset we're seeking to is in the buffer.
407+
Move buffer's read position accordingly
408+
*/
409+
cache->read_pos= cache->buffer + (needed_offset - cached_data_start);
410+
}
411+
else
412+
{
413+
if (needed_offset > cache->end_of_file)
414+
needed_offset= cache->end_of_file;
415+
/*
416+
The offset we're seeking to is not in the buffer.
417+
- Set the buffer to be exhausted.
418+
- Make the next read to a mysql_file_seek() call to the required
419+
offset (but still use aligned reads).
420+
*/
421+
cache->read_pos= cache->read_end;
422+
cache->seek_not_done= 1;
423+
cache->pos_in_file= (needed_offset / IO_SIZE) * IO_SIZE;
424+
}
425+
}
426+
331427
/* Wait until current request is ready */
332428

333429
#ifdef HAVE_AIOWAIT
@@ -583,6 +679,17 @@ int _my_b_cache_read(IO_CACHE *info, uchar *Buffer, size_t Count)
583679
{
584680
/* No error, reset seek_not_done flag. */
585681
info->seek_not_done= 0;
682+
683+
if (info->next_file_user)
684+
{
685+
IO_CACHE *c;
686+
for (c= info->next_file_user;
687+
c!= info;
688+
c= c->next_file_user)
689+
{
690+
c->seek_not_done= 1;
691+
}
692+
}
586693
}
587694
else
588695
{
@@ -671,22 +778,35 @@ int _my_b_cache_read(IO_CACHE *info, uchar *Buffer, size_t Count)
671778
DBUG_RETURN(0); /* EOF */
672779
}
673780
}
674-
else if ((length= mysql_file_read(info->file,info->buffer, max_length,
781+
else
782+
{
783+
if (info->next_file_user)
784+
{
785+
IO_CACHE *c;
786+
for (c= info->next_file_user;
787+
c!= info;
788+
c= c->next_file_user)
789+
{
790+
c->seek_not_done= 1;
791+
}
792+
}
793+
if ((length= mysql_file_read(info->file,info->buffer, max_length,
675794
info->myflags)) < Count ||
676795
length == (size_t) -1)
677-
{
678-
/*
679-
We got an read error, or less than requested (end of file).
680-
If not a read error, copy, what we got.
681-
*/
682-
if (length != (size_t) -1)
683-
memcpy(Buffer, info->buffer, length);
684-
info->pos_in_file= pos_in_file;
685-
/* For a read error, return -1, otherwise, what we got in total. */
686-
info->error= length == (size_t) -1 ? -1 : (int) (length+left_length);
687-
info->read_pos=info->read_end=info->buffer;
688-
info->seek_not_done=1;
689-
DBUG_RETURN(1);
796+
{
797+
/*
798+
We got an read error, or less than requested (end of file).
799+
If not a read error, copy, what we got.
800+
*/
801+
if (length != (size_t) -1)
802+
memcpy(Buffer, info->buffer, length);
803+
info->pos_in_file= pos_in_file;
804+
/* For a read error, return -1, otherwise, what we got in total. */
805+
info->error= length == (size_t) -1 ? -1 : (int) (length+left_length);
806+
info->read_pos=info->read_end=info->buffer;
807+
info->seek_not_done=1;
808+
DBUG_RETURN(1);
809+
}
690810
}
691811
/*
692812
Count is the remaining number of bytes requested.

0 commit comments

Comments
 (0)