Skip to content

Commit

Permalink
perf(sql): allows removing DISTINCT from some MySQL queries
Browse files Browse the repository at this point in the history
We add the "distinct" option to a few query functions to allow devs to
remove DISTINCT clauses from queries where they are unnecessary and worsen
performance. We also set the option in some core queries where it's unlikely
to cause trouble.

Fixes #4594
  • Loading branch information
Srokap authored and mrclay committed Nov 30, 2014
1 parent d9268ec commit 293317f
Show file tree
Hide file tree
Showing 14 changed files with 84 additions and 13 deletions.
11 changes: 11 additions & 0 deletions docs/guides/database.rst
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,17 @@ the given type - this itself can be an address set up by a page handler.

The default handler is to use the default export interface.

Entity loading performance
~~~~~~~~~~~~~~~~~~~~~~~~~~

``elgg_get_entities`` has a couple options that can sometimes be useful to improve performance.

* **preload_owners**: If the entities fetched will be displayed in a list with the owner information, you can set this option to ``true`` to efficiently load the owner users of the fetched entities.

* **distinct**: When Elgg fetches entities using an SQL query, Elgg must be sure that each entity row appears only once in the result set. By default it includes a ``DISTINCT`` modifier on the GUID column to enforce this, but some queries naturally return unique entities. Setting the ``distinct`` option to false will remove this modifier, and rely on the query to enforce its own uniqueness.

The internals of Elgg entity queries is a complex subject and it's recommended to seek help on the Elgg Community site before using the ``distinct`` option.

Pre-1.8 Notes
-------------

Expand Down
9 changes: 6 additions & 3 deletions engine/classes/Elgg/Database/AdminNotices.php
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@ function delete($id) {
$result = true;
$notices = elgg_get_entities_from_metadata(array(
'metadata_name' => 'admin_notice_id',
'metadata_value' => $id
'metadata_value' => $id,
'distinct' => false,
));

if ($notices) {
Expand All @@ -97,7 +98,8 @@ function find($limit = 10) {
return elgg_get_entities_from_metadata(array(
'type' => 'object',
'subtype' => 'admin_notice',
'limit' => $limit
'limit' => $limit,
'distinct' => false,
));
}

Expand All @@ -114,7 +116,8 @@ function exists($id) {
$notice = elgg_get_entities_from_metadata(array(
'type' => 'object',
'subtype' => 'admin_notice',
'metadata_name_value_pair' => array('name' => 'admin_notice_id', 'value' => $id)
'metadata_name_value_pair' => array('name' => 'admin_notice_id', 'value' => $id),
'distinct' => false,
));
elgg_set_ignore_access($old_ia);

Expand Down
13 changes: 11 additions & 2 deletions engine/classes/Elgg/Database/EntityTable.php
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,11 @@ function enable($guid, $recursive = true) {
*
* callback => string A callback function to pass each row through
*
* distinct => bool (true) If set to false, Elgg will drop the DISTINCT clause from
* the MySQL query, which will improve performance in some situations.
* Avoid setting this option without a full understanding of the underlying
* SQL query Elgg creates.
*
* @return mixed If count, int. If not count, array. false on errors.
* @see elgg_get_entities_from_metadata()
* @see elgg_get_entities_from_relationship()
Expand Down Expand Up @@ -348,6 +353,7 @@ function getEntities(array $options = array()) {

'preload_owners' => false,
'callback' => 'entity_row_to_elggstar',
'distinct' => true,

// private API
'__ElggBatch' => null,
Expand Down Expand Up @@ -427,9 +433,12 @@ function getEntities(array $options = array()) {
}

if (!$options['count']) {
$query = "SELECT DISTINCT e.*{$selects} FROM {$this->CONFIG->dbprefix}entities e ";
$distinct = $options['distinct'] ? "DISTINCT" : "";
$query = "SELECT $distinct e.*{$selects} FROM {$this->CONFIG->dbprefix}entities e ";
} else {
$query = "SELECT count(DISTINCT e.guid) as total FROM {$this->CONFIG->dbprefix}entities e ";
// note: when DISTINCT unneeded, it's slightly faster to compute COUNT(*) than GUIDs
$count_expr = $options['distinct'] ? "DISTINCT e.guid" : "*";
$query = "SELECT COUNT($count_expr) as total FROM {$this->CONFIG->dbprefix}entities e ";
}

// add joins
Expand Down
4 changes: 3 additions & 1 deletion engine/classes/Elgg/Database/Plugins.php
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ function get($plugin_id) {
'selects' => array("oe.title", "oe.description"),
'wheres' => array("oe.title = '$plugin_id'"),
'limit' => 1,
'distinct' => false,
);

$plugins = elgg_get_entities($options);
Expand Down Expand Up @@ -355,7 +356,8 @@ function find($status = 'active', $site_guid = null) {
"JOIN {$db_prefix}objects_entity plugin_oe on plugin_oe.guid = e.guid"
),
'wheres' => array("ps.name = '$priority'"),
'order_by' => "CAST(ps.value as unsigned), e.guid"
'order_by' => "CAST(ps.value as unsigned), e.guid",
'distinct' => false,
);

switch ($status) {
Expand Down
1 change: 1 addition & 0 deletions engine/classes/ElggEntity.php
Original file line number Diff line number Diff line change
Expand Up @@ -801,6 +801,7 @@ public function enableAnnotations($name = '') {
private function getAnnotationCalculation($name, $calculation) {
$options = array(
'guid' => $this->getGUID(),
'distinct' => false,
'annotation_name' => $name,
'annotation_calculation' => $calculation
);
Expand Down
1 change: 1 addition & 0 deletions engine/classes/ElggVolatileMetadataCache.php
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,7 @@ public function populateFromEntities($guids) {
'guids' => $guids,
'limit' => 0,
'callback' => false,
'distinct' => false,
'joins' => array(
"JOIN {$db_prefix}metastrings v ON n_table.value_id = v.id",
"JOIN {$db_prefix}metastrings n ON n_table.name_id = n.id",
Expand Down
5 changes: 5 additions & 0 deletions engine/lib/entities.php
Original file line number Diff line number Diff line change
Expand Up @@ -474,6 +474,11 @@ function elgg_enable_entity($guid, $recursive = true) {
*
* callback => string A callback function to pass each row through
*
* distinct => bool (true) If set to false, Elgg will drop the DISTINCT clause from
* the MySQL query, which will improve performance in some situations.
* Avoid setting this option without a full understanding of the underlying
* SQL query Elgg creates.
*
* @return mixed If count, int. If not count, array. false on errors.
* @since 1.7.0
* @see elgg_get_entities_from_metadata()
Expand Down
7 changes: 5 additions & 2 deletions engine/lib/metastrings.php
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ function _elgg_get_metastring_based_objects($options) {
'wheres' => array(),
'joins' => array(),

'distinct' => true,
'preload_owners' => false,
'callback' => $callback,
);
Expand Down Expand Up @@ -260,6 +261,8 @@ function _elgg_get_metastring_based_objects($options) {
$wheres[] = _elgg_get_access_where_sql(array('table_alias' => 'n_table'));
}

$distinct = $options['distinct'] ? "DISTINCT " : "";

if ($options['metastring_calculation'] === ELGG_ENTITIES_NO_VALUE && !$options['count']) {
$selects = array_unique($selects);
// evalutate selects
Expand All @@ -270,10 +273,10 @@ function _elgg_get_metastring_based_objects($options) {
}
}

$query = "SELECT DISTINCT n_table.*{$select_str} FROM {$db_prefix}$type n_table";
$query = "SELECT $distinct n_table.*{$select_str} FROM {$db_prefix}$type n_table";
} elseif ($options['count']) {
// count is over the entities
$query = "SELECT count(DISTINCT e.guid) as calculation FROM {$db_prefix}$type n_table";
$query = "SELECT count($distinct e.guid) as calculation FROM {$db_prefix}$type n_table";
} else {
$query = "SELECT {$options['metastring_calculation']}(v.string) as calculation FROM {$db_prefix}$type n_table";
}
Expand Down
16 changes: 14 additions & 2 deletions engine/lib/river.php
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,12 @@ function elgg_delete_river(array $options = array()) {
* order_by => STR Order by clause (rv.posted desc)
* group_by => STR Group by clause
*
* distinct => BOOL If set to false, Elgg will drop the DISTINCT
* clause from the MySQL query, which will improve
* performance in some situations. Avoid setting this
* option without a full understanding of the
* underlying SQL query Elgg creates. (true)
*
* @return array|int
* @since 1.8.0
*/
Expand Down Expand Up @@ -303,6 +309,7 @@ function elgg_get_river(array $options = array()) {
'limit' => 20,
'offset' => 0,
'count' => false,
'distinct' => true,

'order_by' => 'rv.posted desc',
'group_by' => ELGG_ENTITIES_ANY_VALUE,
Expand Down Expand Up @@ -372,9 +379,14 @@ function elgg_get_river(array $options = array()) {
$wheres = array_unique($wheres);

if (!$options['count']) {
$query = "SELECT DISTINCT rv.* FROM {$CONFIG->dbprefix}river rv ";
$distinct = $options['distinct'] ? "DISTINCT" : "";

$query = "SELECT $distinct rv.* FROM {$CONFIG->dbprefix}river rv ";
} else {
$query = "SELECT COUNT(DISTINCT rv.id) AS total FROM {$CONFIG->dbprefix}river rv ";
// note: when DISTINCT unneeded, it's slightly faster to compute COUNT(*) than IDs
$count_expr = $options['distinct'] ? "DISTINCT rv.id" : "*";

$query = "SELECT COUNT($count_expr) as total FROM {$CONFIG->dbprefix}river rv ";
}

// add joins
Expand Down
2 changes: 1 addition & 1 deletion engine/lib/statistics.php
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ function get_entity_statistics($owner_guid = 0) {
$entity_stats = array();
$owner_guid = (int)$owner_guid;

$query = "SELECT distinct e.type,s.subtype,e.subtype as subtype_id
$query = "SELECT e.type,s.subtype,e.subtype as subtype_id
from {$CONFIG->dbprefix}entities e left
join {$CONFIG->dbprefix}entity_subtypes s on e.subtype=s.id";

Expand Down
20 changes: 20 additions & 0 deletions engine/tests/ElggCoreGetEntitiesTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -979,4 +979,24 @@ public function testEGEEmptySubtypePlurality() {
$entities = elgg_get_entities($options);
$this->assertTrue(is_array($entities));
}

public function testDistinctCanBeDisabled() {
$prefix = _elgg_services()->config->get('dbprefix');
$options = array(
'callback' => '',
'joins' => array(
"RIGHT JOIN {$prefix}metadata m ON (e.guid = m.entity_guid)"
),
'wheres' => array(
'm.entity_guid = ' . elgg_get_logged_in_user_guid(),
),
);

$users = elgg_get_entities($options);
$this->assertEqual(1, count($users));

$options['distinct'] = false;
$users = elgg_get_entities($options);
$this->assertTrue(count($users) > 1);
}
}
1 change: 1 addition & 0 deletions views/default/admin/appearance/default_widgets.php
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
$object = elgg_get_entities(array(
'type' => 'object',
'subtype' => 'moddefaultwidgets',
'distinct' => false,
'limit' => 1,
));

Expand Down
1 change: 1 addition & 0 deletions views/default/page/elements/comments.php
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
'full_view' => true,
'limit' => $limit,
'preload_owners' => true,
'distinct' => false,
));

echo $html;
Expand Down
6 changes: 4 additions & 2 deletions views/default/river/elements/responses.php
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
}

$item = $vars['item'];
/* @var ElggRiverItem $item */
$object = $item->getObjectEntity();
$target = $item->getTargetEntity();

Expand All @@ -30,9 +31,10 @@
'subtype' => 'comment',
'container_guid' => $object->getGUID(),
'limit' => 3,
'order_by' => 'e.time_created desc'
'order_by' => 'e.time_created desc',
'distinct' => false,
));

// why is this reversing it? because we're asking for the 3 latest
// comments by sorting desc and limiting by 3, but we want to display
// these comments with the latest at the bottom.
Expand Down

0 comments on commit 293317f

Please sign in to comment.