Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

String sorting incorrect after reindex #3078

Closed
clintongormley opened this issue May 23, 2013 · 13 comments
Closed

String sorting incorrect after reindex #3078

clintongormley opened this issue May 23, 2013 · 13 comments

Comments

@clintongormley
Copy link

After reindexing a doc, it is not being returned in the correct sort order (when sorting on a string field)

First, index docs 1..100 with a string field user:

curl -XPOST 'http://127.0.0.1:9200/test/test/_bulk?pretty=1'  -d '
{"index" : {"_id" : "1"}}
{"user" : "1"}
{"index" : {"_id" : "2"}}
{"user" : "2"}
{"index" : {"_id" : "3"}}
{"user" : "3"}
{"index" : {"_id" : "4"}}
{"user" : "4"}
{"index" : {"_id" : "5"}}
{"user" : "5"}
{"index" : {"_id" : "6"}}
{"user" : "6"}
{"index" : {"_id" : "7"}}
{"user" : "7"}
{"index" : {"_id" : "8"}}
{"user" : "8"}
{"index" : {"_id" : "9"}}
{"user" : "9"}
{"index" : {"_id" : "10"}}
{"user" : "10"}
{"index" : {"_id" : "11"}}
{"user" : "11"}
{"index" : {"_id" : "12"}}
{"user" : "12"}
{"index" : {"_id" : "13"}}
{"user" : "13"}
{"index" : {"_id" : "14"}}
{"user" : "14"}
{"index" : {"_id" : "15"}}
{"user" : "15"}
{"index" : {"_id" : "16"}}
{"user" : "16"}
{"index" : {"_id" : "17"}}
{"user" : "17"}
{"index" : {"_id" : "18"}}
{"user" : "18"}
{"index" : {"_id" : "19"}}
{"user" : "19"}
{"index" : {"_id" : "20"}}
{"user" : "20"}
{"index" : {"_id" : "21"}}
{"user" : "21"}
{"index" : {"_id" : "22"}}
{"user" : "22"}
{"index" : {"_id" : "23"}}
{"user" : "23"}
{"index" : {"_id" : "24"}}
{"user" : "24"}
{"index" : {"_id" : "25"}}
{"user" : "25"}
{"index" : {"_id" : "26"}}
{"user" : "26"}
{"index" : {"_id" : "27"}}
{"user" : "27"}
{"index" : {"_id" : "28"}}
{"user" : "28"}
{"index" : {"_id" : "29"}}
{"user" : "29"}
{"index" : {"_id" : "30"}}
{"user" : "30"}
{"index" : {"_id" : "31"}}
{"user" : "31"}
{"index" : {"_id" : "32"}}
{"user" : "32"}
{"index" : {"_id" : "33"}}
{"user" : "33"}
{"index" : {"_id" : "34"}}
{"user" : "34"}
{"index" : {"_id" : "35"}}
{"user" : "35"}
{"index" : {"_id" : "36"}}
{"user" : "36"}
{"index" : {"_id" : "37"}}
{"user" : "37"}
{"index" : {"_id" : "38"}}
{"user" : "38"}
{"index" : {"_id" : "39"}}
{"user" : "39"}
{"index" : {"_id" : "40"}}
{"user" : "40"}
{"index" : {"_id" : "41"}}
{"user" : "41"}
{"index" : {"_id" : "42"}}
{"user" : "42"}
{"index" : {"_id" : "43"}}
{"user" : "43"}
{"index" : {"_id" : "44"}}
{"user" : "44"}
{"index" : {"_id" : "45"}}
{"user" : "45"}
{"index" : {"_id" : "46"}}
{"user" : "46"}
{"index" : {"_id" : "47"}}
{"user" : "47"}
{"index" : {"_id" : "48"}}
{"user" : "48"}
{"index" : {"_id" : "49"}}
{"user" : "49"}
{"index" : {"_id" : "50"}}
{"user" : "50"}
{"index" : {"_id" : "51"}}
{"user" : "51"}
{"index" : {"_id" : "52"}}
{"user" : "52"}
{"index" : {"_id" : "53"}}
{"user" : "53"}
{"index" : {"_id" : "54"}}
{"user" : "54"}
{"index" : {"_id" : "55"}}
{"user" : "55"}
{"index" : {"_id" : "56"}}
{"user" : "56"}
{"index" : {"_id" : "57"}}
{"user" : "57"}
{"index" : {"_id" : "58"}}
{"user" : "58"}
{"index" : {"_id" : "59"}}
{"user" : "59"}
{"index" : {"_id" : "60"}}
{"user" : "60"}
{"index" : {"_id" : "61"}}
{"user" : "61"}
{"index" : {"_id" : "62"}}
{"user" : "62"}
{"index" : {"_id" : "63"}}
{"user" : "63"}
{"index" : {"_id" : "64"}}
{"user" : "64"}
{"index" : {"_id" : "65"}}
{"user" : "65"}
{"index" : {"_id" : "66"}}
{"user" : "66"}
{"index" : {"_id" : "67"}}
{"user" : "67"}
{"index" : {"_id" : "68"}}
{"user" : "68"}
{"index" : {"_id" : "69"}}
{"user" : "69"}
{"index" : {"_id" : "70"}}
{"user" : "70"}
{"index" : {"_id" : "71"}}
{"user" : "71"}
{"index" : {"_id" : "72"}}
{"user" : "72"}
{"index" : {"_id" : "73"}}
{"user" : "73"}
{"index" : {"_id" : "74"}}
{"user" : "74"}
{"index" : {"_id" : "75"}}
{"user" : "75"}
{"index" : {"_id" : "76"}}
{"user" : "76"}
{"index" : {"_id" : "77"}}
{"user" : "77"}
{"index" : {"_id" : "78"}}
{"user" : "78"}
{"index" : {"_id" : "79"}}
{"user" : "79"}
{"index" : {"_id" : "80"}}
{"user" : "80"}
{"index" : {"_id" : "81"}}
{"user" : "81"}
{"index" : {"_id" : "82"}}
{"user" : "82"}
{"index" : {"_id" : "83"}}
{"user" : "83"}
{"index" : {"_id" : "84"}}
{"user" : "84"}
{"index" : {"_id" : "85"}}
{"user" : "85"}
{"index" : {"_id" : "86"}}
{"user" : "86"}
{"index" : {"_id" : "87"}}
{"user" : "87"}
{"index" : {"_id" : "88"}}
{"user" : "88"}
{"index" : {"_id" : "89"}}
{"user" : "89"}
{"index" : {"_id" : "90"}}
{"user" : "90"}
{"index" : {"_id" : "91"}}
{"user" : "91"}
{"index" : {"_id" : "92"}}
{"user" : "92"}
{"index" : {"_id" : "93"}}
{"user" : "93"}
{"index" : {"_id" : "94"}}
{"user" : "94"}
{"index" : {"_id" : "95"}}
{"user" : "95"}
{"index" : {"_id" : "96"}}
{"user" : "96"}
{"index" : {"_id" : "97"}}
{"user" : "97"}
{"index" : {"_id" : "98"}}
{"user" : "98"}
{"index" : {"_id" : "99"}}
{"user" : "99"}
{"index" : {"_id" : "100"}}
{"user" : "100"}
'

Search, sorting on user:

curl -XGET 'http://127.0.0.1:9200/test/_search?pretty=1'  -d '
{
   "sort" : {
      "user" : "asc"
   },
   "fields" : [],
   "size" : 10
}
'

Results show that user:1 is in first position:

# {
#    "hits" : {
#       "hits" : [
#          {
#             "sort" : [
#                "1"
#             ],
#             "_score" : null,
#             "_index" : "test",
#             "_id" : "1",
#             "_type" : "test"
#          },
#          {
#             "sort" : [
#                "10"
#             ],
#             "_score" : null,
#             "_index" : "test",
#             "_id" : "10",
#             "_type" : "test"
#          },
# ....

Now reindex the first doc, with the same values:

curl -XPUT 'http://127.0.0.1:9200/test/test/1?pretty=1'  -d '
{
   "user" : "1"
}
'

And search again:

curl -XGET 'http://127.0.0.1:9200/test/_search?pretty=1'  -d '
{
   "sort" : {
      "user" : "asc"
   },
   "fields" : [],
   "size" : 10
}
'

Doc with user:1 no longer appears in the correct position, in fact it doesn't appear anywhere in the first 10 results:

# {
#    "hits" : {
#       "hits" : [
#          {
#             "sort" : [
#                "10"
#             ],
#             "_score" : null,
#             "_index" : "test",
#             "_id" : "10",
#             "_type" : "test"
#          },
#          {
#             "sort" : [
#                "100"
#             ],
#             "_score" : null,
#             "_index" : "test",
#             "_id" : "100",
#             "_type" : "test"
#          },
#          {
#             "sort" : [
#                "11"
#             ],
#             "_score" : null,
#             "_index" : "test",
#             "_id" : "11",
#             "_type" : "test"
#          },
#          {
#             "sort" : [
#                "12"
#             ],
#             "_score" : null,
#             "_index" : "test",
#             "_id" : "12",
#             "_type" : "test"
#          },
#          {
#             "sort" : [
#                "13"
#             ],
#             "_score" : null,
#             "_index" : "test",
#             "_id" : "13",
#             "_type" : "test"
#          },
#          {
#             "sort" : [
#                "14"
#             ],
#             "_score" : null,
#             "_index" : "test",
#             "_id" : "14",
#             "_type" : "test"
#          },
#          {
#             "sort" : [
#                "15"
#             ],
#             "_score" : null,
#             "_index" : "test",
#             "_id" : "15",
#             "_type" : "test"
#          },
#          {
#             "sort" : [
#                "16"
#             ],
#             "_score" : null,
#             "_index" : "test",
#             "_id" : "16",
#             "_type" : "test"
#          },
#          {
#             "sort" : [
#                "17"
#             ],
#             "_score" : null,
#             "_index" : "test",
#             "_id" : "17",
#             "_type" : "test"
#          },
#          {
#             "sort" : [
#                "18"
#             ],
#             "_score" : null,
#             "_index" : "test",
#             "_id" : "18",
#             "_type" : "test"
#          }
#       ],
#       "max_score" : null,
#       "total" : 100
#    },
#    "timed_out" : false,
#    "_shards" : {
#       "failed" : 0,
#       "successful" : 5,
#       "total" : 5
#    },
#    "took" : 3
# }

However, if you return all 100 docs, then it appears in the first position again (correctly):

curl -XGET 'http://127.0.0.1:9200/test/_search?pretty=1'  -d '
{
   "sort" : {
      "user" : "asc"
   },
   "fields" : [],
   "size" : 100
}
'

# {
#    "hits" : {
#       "hits" : [
#          {
#             "sort" : [
#                "1"
#             ],
#             "_score" : null,
#             "_index" : "test",
#             "_id" : "1",
#             "_type" : "test"
#          },

which leads me to think that it is the shard level sorting which is incorrect.

@sarmiena
Copy link

Thanks for the ticket, @clintongormley . Sadly this bug is causing many people to yell at me :( They update a record and it's removed from their UI.

@ghost ghost assigned martijnvg May 23, 2013
@s1monw
Copy link
Contributor

s1monw commented May 23, 2013

what version does this reproduce on? does this still happen on master?

@s1monw
Copy link
Contributor

s1monw commented May 23, 2013

@martijnvg this is fixed it seems. this caused by #2991 and fixed in master and 0.90

@s1monw s1monw closed this as completed May 23, 2013
@sarmiena
Copy link

@s1monw this issue is definitely happening on 0.90.0 release. I also just pulled down the repo and ran it against v 1.0.0 beta1. The issue still exists there as well.

Please verify and reopen.

@clintongormley can you confirm?

@clintongormley
Copy link
Author

@sarmiena For me it is broken in 0.90.0, but fixed in master and in the 0.90.1 branch.

Unless you have a different test to show otherwise?

@sarmiena
Copy link

@clintongormley I'm building from master using:

mvn clean package -DskipTests

However, this is building elasticsearch-1.0.0.Beta1-SNAPSHOT.

I'm not sure how to build 0.90.1 since there is no tag or branch in the repo that I can see.

Let me know if you want me to show you (live) how to reproduce it using 1.0.0.Beta1

@sarmiena
Copy link

@clintongormley ok I just ran the same scenario on 0.90.1 branch and it's definitely still happening. Not sure why yours isn't showing the same issue.

@sarmiena
Copy link

@clintongormley Sorry to keep bothering :) However I have good news and bad news:

Good news: Your test case does work in 0.90.1
Bad news: An alternative test case produces same problem

You used bulk upload, while I simply added 1 record at a time (100 times).

https://gist.github.com/sarmiena/d945848fd683f39d212c

I used Ruby to iterate 100 POST requests in that gist, but you can use whatever you'd like.

The issue doesn't appear to be resolved. Can we reopen the ticket?

@clintongormley
Copy link
Author

@s1monw ?

@s1monw
Copy link
Contributor

s1monw commented May 23, 2013

i added a testcase that mirrors your ruby test in java and it doesn't fail. I can't reproduce your problem I am sorry. Are you sure you build 0.90.1?

@sarmiena
Copy link

@s1monw I'm sure I can reproduce this in 0.90.1. Perhaps the test isn't producing the same problems since the JSON api is being used and the test is using the interfaces directly?

I can to a teamviewer if you'd like. Otherwise you can just pop open irb and copy/paste the ruby code in there.

gchat me sarmiena@gmail.com if you want to get ahold of me. otherwise i'm on IRC in #elasticsearch as sarmiena_ (notice the underscore)

@sarmiena
Copy link

Ok looks like 0.90.1 does fix this issue. The formatting was a little off and I missed the record:

Please close.

# ruby code (irb)
(1).upto(100) do |i|
    `curl -XPUT 'http://localhost:9200/twitter/tweet/#{i}' -d '{ "user" : "#{i}"}'`
end

# command line
$ curl -X POST "http://localhost:9200/twitter/tweet/1" -d '
> {"user":"1"}
> '

$ curl -X GET 'http://localhost:9200/twitter/tweet/_search?pretty' -d '
{
  "sort": [
    {
      "user": "asc"
    }
  ],
  "size": 10,
  "from": 0
}
'
{
  "took" : 8,
  "timed_out" : false,
  "_shards" : {
    "total" : 5,
    "successful" : 5,
    "failed" : 0
  },
  "hits" : {
    "total" : 100,
    "max_score" : null,
    "hits" : [ {
      "_index" : "twitter",
      "_type" : "tweet",
      "_id" : "1",
      "_score" : null, "_source" :
{"user":"1"}
,
      "sort" : [ "1" ]
    }, {
      "_index" : "twitter",
      "_type" : "tweet",
      "_id" : "10",
      "_score" : null, "_source" : { "user" : "10"},
      "sort" : [ "10" ]
    }, {
      "_index" : "twitter",
      "_type" : "tweet",
      "_id" : "100",
      "_score" : null, "_source" : { "user" : "100"},
      "sort" : [ "100" ]
    }, {
      "_index" : "twitter",
      "_type" : "tweet",
      "_id" : "11",
      "_score" : null, "_source" : { "user" : "11"},
      "sort" : [ "11" ]
    }, {
      "_index" : "twitter",
      "_type" : "tweet",
      "_id" : "12",
      "_score" : null, "_source" : { "user" : "12"},
      "sort" : [ "12" ]
    }, {
      "_index" : "twitter",
      "_type" : "tweet",
      "_id" : "13",
      "_score" : null, "_source" : { "user" : "13"},
      "sort" : [ "13" ]
    }, {
      "_index" : "twitter",
      "_type" : "tweet",
      "_id" : "14",
      "_score" : null, "_source" : { "user" : "14"},
      "sort" : [ "14" ]
    }, {
      "_index" : "twitter",
      "_type" : "tweet",
      "_id" : "15",
      "_score" : null, "_source" : { "user" : "15"},
      "sort" : [ "15" ]
    }, {
      "_index" : "twitter",
      "_type" : "tweet",
      "_id" : "16",
      "_score" : null, "_source" : { "user" : "16"},
      "sort" : [ "16" ]
    }, {
      "_index" : "twitter",
      "_type" : "tweet",
      "_id" : "17",
      "_score" : null, "_source" : { "user" : "17"},
      "sort" : [ "17" ]
    } ]
  }
}

@s1monw
Copy link
Contributor

s1monw commented May 24, 2013

thanks for bringing clarification! good to work with you last night!

@s1monw s1monw closed this as completed May 24, 2013
mute pushed a commit to mute/elasticsearch that referenced this issue Jul 29, 2015
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

No branches or pull requests

4 participants