Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

children aggregation missing documents #9271

Closed
clintongormley opened this issue Jan 13, 2015 · 1 comment
Closed

children aggregation missing documents #9271

clintongormley opened this issue Jan 13, 2015 · 1 comment
Assignees

Comments

@clintongormley
Copy link

There appears to be a bug in the children aggregation, which misses one matching document.

There is a workaround for the bug, by adding an outer aggregation at the parent level. However, this clutters the query. Worse, even with the workaround the bug showed up again when I did multiple Children Aggregations (involving one parent type and two distinct children types).

DELETE _all 

#  Product Catalogue Index

DELETE /prodcatalog

PUT /prodcatalog

#  Use parent-child mapping to capture a Master Product and its Variant SKUs.

PUT /prodcatalog/masterprod/_mapping
{  
   "masterprod":{  
      "properties":{  
         "brand":{  
            "type":"string",
            "index":"not_analyzed"
         },
         "name":{  
            "type":"string"
         }
      }
   }
}

PUT /prodcatalog/variantsku/_mapping
{  
   "variantsku":{  
      "_parent":{  
         "type":"masterprod"
      },
      "properties":{  
         "color":{  
            "type":"string",
            "index":"not_analyzed"
         },
         "size":{  
            "type":"string",
            "index":"not_analyzed"
         }
      }
   }
}

GET /prodcatalog/_mapping


#  Index 2 parents and several children.


PUT /prodcatalog/masterprod/1
{
  "brand": "Levis",
  "name": "Style 501",
  "material": "Denim"
}

PUT /prodcatalog/variantsku/10001?parent=1
{
    "color" : "blue",
    "size"  : "32"
}


PUT /prodcatalog/variantsku/10002?parent=1
{
    "color" : "blue",
    "size"  : "34"
}


PUT /prodcatalog/variantsku/10003?parent=1
{
    "color" : "blue",
    "size"  : "36"
}


PUT /prodcatalog/variantsku/10004?parent=1
{
    "color" : "black",
    "size"  : "38"
}


PUT /prodcatalog/variantsku/10005?parent=1
{
    "color" : "black",
    "size"  : "40"
}


PUT /prodcatalog/variantsku/10006?parent=1
{
    "color" : "gray",
    "size"  : "36"
}


PUT /prodcatalog/masterprod/2
{
    "brand" : "Wrangler",
    "name"  : "Regular Cut",
    "material" : "Leather"
}

PUT /prodcatalog/variantsku/20001?parent=2
{
    "color" : "blue",
    "size"  : "32"
}

PUT /prodcatalog/variantsku/20002?parent=2
{
    "color" : "blue",
    "size"  : "34"
}


PUT /prodcatalog/variantsku/20003?parent=2
{
    "color" : "black",
    "size"  : "36"
}


PUT /prodcatalog/variantsku/20004?parent=2
{
    "color" : "black",
    "size"  : "38"
}


PUT /prodcatalog/variantsku/20005?parent=2
{
    "color" : "black",
    "size"  : "40"
}


PUT /prodcatalog/variantsku/20006?parent=2
{
    "color" : "orange",
    "size"  : "36"
}


PUT /prodcatalog/variantsku/20007?parent=2
{
    "color" : "green",
    "size"  : "44"
}




#  The query below should match the 1 masterprod doc with an orange variantsku.
#  The aggregations should return 7 refinements for the Children.
#  BUG: One color Children Aggregation is missing !!

POST /prodcatalog/masterprod/_search
{  
   "query":{  
      "has_child":{  
         "type":"variantsku",
         "score_mode":"none",
         "query":{  
            "term":{  
               "color":"orange"
            }
         }
      }
   },
   "aggs":{  
      "my-refinements":{  
         "children":{  
            "type":"variantsku"
         },
         "aggs":{  
            "my-sizes":{  
               "terms":{  
                  "field":"variantsku.size"
               }
            },
            "my-colors":{  
               "terms":{  
                  "field":"variantsku.color"
               }
            }            
         }
      }
   }
}


#  Partial results:
#  Note that the green variantsku is missing,
#  and we only have 6 results, though expecting 7.

         "my-colors": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
               {
                  "key": "black",
                  "doc_count": 3
               },
               {
                  "key": "blue",
                  "doc_count": 2
               },
               {
                  "key": "orange",
                  "doc_count": 1
               }
            ]
         }



#  Here is my Workaround.
#  By adding an aggregation at the parent level (brand),
#  we no longer lose 1 variantsku document.

POST /prodcatalog/masterprod/_search
{
   "query":{  
      "has_child":{  
         "type":"variantsku",
         "score_mode":"none",
         "query":{  
            "term":{  
               "color":"orange"
            }
         }
      }
   },    
  "aggs": {
    "my-brands": {
      "terms": {
        "field": "brand"
      },
      "aggs": {
        "my-refinements": {
          "children": {
            "type" : "variantsku" 
          },
          "aggs": {
            "my-colors": {
              "terms": {
                "field": "variantsku.color"
              }
            },
            "my-sizes": {
              "terms": {
                "field": "variantsku.size"
              }
            }
          }          
        }
      }
    }
  }
}

#  Partial Results:

                  "my-colors": {
                     "doc_count_error_upper_bound": 0,
                     "sum_other_doc_count": 0,
                     "buckets": [
                        {
                           "key": "black",
                           "doc_count": 3
                        },
                        {
                           "key": "blue",
                           "doc_count": 2
                        },
                        {
                           "key": "green",
                           "doc_count": 1
                        },
                        {
                           "key": "orange",
                           "doc_count": 1
                        }
                     ]


#  Since we are not interested in a breakdown by brand,
#  we can use something generic, like _type
#  which should only return 1 bucket at the parent level.
#  Again, we no longer lose 1 variantsku document.

POST /prodcatalog/masterprod/_search
{
  "query": {
    "has_child": {
      "type": "variantsku",
      "score_mode": "none",
      "query": {
        "term": {
          "color": "orange"
        }
      }
    }
  },
  "aggs": {
    "my-types": {
      "terms": {
        "field": "_type"
      },
      "aggs": {
        "my-refinements": {
          "children": {
            "type": "variantsku"
          },
          "aggs": {
            "my-colors": {
              "terms": {
                "field": "variantsku.color"
              }
            },
            "my-sizes": {
              "terms": {
                "field": "variantsku.size"
              }
            }
          }
        }
      }
    }
  }
}
@martijnvg
Copy link
Member

This is a bug is caused by the children agg, but has only effect if the global_ordinals_low_cardinality execution hint is enabled and this is what happens in the first search request.

I'll open a PR to fix this. A work around in the meantime would be for all terms agg that are wrapped by a children agg to use another execution hint (Either global_ordinals or global_ordinals_hash).

martijnvg added a commit to martijnvg/elasticsearch that referenced this issue Jan 19, 2015
…he buckets on the parent level to the child level and because of that it needs to invoke the post collection of its nested aggs.

Closes elastic#9271
martijnvg added a commit that referenced this issue Jan 19, 2015
…he buckets on the parent level to the child level and because of that it needs to invoke the post collection of its nested aggs.

Closes #9271
mute pushed a commit to mute/elasticsearch that referenced this issue Jul 29, 2015
…he buckets on the parent level to the child level and because of that it needs to invoke the post collection of its nested aggs.

Closes elastic#9271
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

No branches or pull requests

2 participants