Skip to content

Commit

Permalink
Improve .bvfs_lsdirs performance
Browse files Browse the repository at this point in the history
In a previous fix, the the SQL query used by .bvfs_lsdirs was changed to
not show empty dirs from accurate jobs. This introduced performance
problems when .bvfs_lsdirs was used with larger amounts (hundreds) of
folders. This commit changes the SQL query to perform better when using
MySQL. Note that it is also necesary to create the following index when
using MySQL:
CREATE INDEX PathId_JobId_FileNameId_FileIndex ON File(PathId,JobId,FilenameId,FileIndex);
When using PostgreSQL create the following partial index:
CREATE INDEX file_jpfnidpart_idx ON File(PathId,JobId,FilenameId) WHERE FileIndex = 0;

Previously, the SQL Scripts for creating the tables added a single-key index
for JobId on the File table. Such an index is not necesary if another multi-key
index exists with JobId as the first key. As the number of indexes has a
performance impact on INSERT and UPATE, it is a good idea to drop the single-key
JobId index when adding the above index.

Fixes #837: performance issues with .bvfs_lsdirs while having a large number of directories
  • Loading branch information
sduehr committed Oct 6, 2017
1 parent 6cccbd4 commit fbebd27
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 32 deletions.
65 changes: 36 additions & 29 deletions src/cats/bvfs.c
Expand Up @@ -676,38 +676,45 @@ bool Bvfs::ls_dirs()
"listfile1.JobId AS JobId, listfile1.LStat AS LStat, "
"listfile1.FileId AS FileId "
"FROM ( "
"SELECT DISTINCT PathHierarchy1.PathId AS PathId "
"FROM PathHierarchy AS PathHierarchy1 "
"JOIN Path AS Path2 "
"ON (PathHierarchy1.PathId = Path2.PathId) "
"JOIN PathVisibility AS PathVisibility1 "
"ON (PathHierarchy1.PathId = PathVisibility1.PathId) "
"WHERE PathHierarchy1.PPathId = %s "
"AND PathVisibility1.JobId IN (%s) "
"AND PathVisibility1.PathId NOT IN ( "
"SELECT PathId FROM File "
"WHERE FilenameId = %s "
"AND JobId = ( "
"SELECT MAX(JobId) FROM PathVisibility "
"WHERE PathId = PathVisibility1.PathId "
"AND JobId IN (%s)) "
"AND FileIndex = 0) "
"%s "
") AS listpath1 "
"JOIN Path AS Path1 ON (listpath1.PathId = Path1.PathId) "

"LEFT JOIN ( " /* get attributes if any */
"SELECT File1.PathId AS PathId, File1.JobId AS JobId, "
"File1.LStat AS LStat, File1.FileId AS FileId FROM File AS File1 "
"WHERE File1.FilenameId = %s "
"AND File1.JobId IN (%s)) AS listfile1 "
"ON (listpath1.PathId = listfile1.PathId) "
") AS A ORDER BY 2,3 DESC LIMIT %d OFFSET %d",
"SELECT listpath1.PathId AS PathId "
"FROM ( "
"SELECT DISTINCT PathHierarchy1.PathId AS PathId "
"FROM PathHierarchy AS PathHierarchy1 "
"INNER JOIN Path AS Path2 "
"ON (PathHierarchy1.PathId = Path2.PathId) "
"INNER JOIN PathVisibility AS PathVisibility1 "
"ON (PathHierarchy1.PathId = PathVisibility1.PathId) "
"WHERE PathHierarchy1.PPathId = %s "
"AND PathVisibility1.JobId IN (%s) "
"%s "
") AS listpath1 "
"LEFT JOIN "
"( "
"SELECT PVD1.PathId AS PathId "
"FROM ( "
"SELECT PV1.PathId AS PathId, MAX(JobId) AS MaxJobId "
"FROM PathVisibility AS PV1 WHERE JobId IN (%s) GROUP BY PathId "
") AS PVD1 "
"INNER JOIN File AS F2 "
"ON (F2.PathId = PVD1.PathId AND F2.JobId = PVD1.MaxJobId "
"AND F2.FilenameId = %s AND F2.FileIndex = 0) "
") AS listpath2 ON (listpath1.PathId = listpath2.PathId) "
"WHERE listpath2.PathId IS NULL "
") AS listpath3 "
"INNER JOIN Path AS Path1 "
"ON (listpath3.PathId = Path1.PathId) "
"LEFT JOIN ( "
"SELECT File1.PathId AS PathId, File1.JobId AS JobId, "
"File1.LStat AS LStat, File1.FileId AS FileId FROM File AS File1 "
"WHERE File1.FilenameId = %s "
"AND File1.JobId IN (%s)) AS listfile1 "
"ON (listpath3.PathId = listfile1.PathId) "
") AS A ORDER BY 2, 3 DESC LIMIT %d OFFSET %d",
edit_uint64(pwd_id, ed1),
jobids,
edit_uint64(dir_filenameid, ed2),
jobids,
filter.c_str(),
jobids,
edit_uint64(dir_filenameid, ed2),
edit_uint64(dir_filenameid, ed2),
jobids,
limit, offset);
Expand Down
4 changes: 2 additions & 2 deletions src/cats/ddl/creates/mysql.sql
Expand Up @@ -43,8 +43,8 @@ CREATE TABLE File (
LStat TINYBLOB NOT NULL,
MD5 TINYBLOB,
PRIMARY KEY(FileId),
INDEX (JobId),
INDEX (JobId, PathId, FilenameId)
INDEX (JobId, PathId, FilenameId),
INDEX (PathId, JobId, FilenameId, FileIndex)
);

--
Expand Down
2 changes: 1 addition & 1 deletion src/cats/ddl/creates/postgresql.sql
Expand Up @@ -47,7 +47,7 @@ CREATE TABLE File
);

CREATE INDEX file_jpfid_idx ON File (JobId, PathId, FilenameId);
CREATE INDEX file_jobid_idx ON File (JobId);
CREATE INDEX file_jpfnidpart_idx ON File (PathId, JobId, FilenameId) WHERE FileIndex = 0;

--
-- Add this if you have a good number of job
Expand Down

0 comments on commit fbebd27

Please sign in to comment.