Skip to content
Permalink
Browse files

Improving search query

* Reduce the numbers of bucket results on each ChangeBucket event
* Warmup after reindexing, fixes #250
* Using multiple sort keys, fixes #218
* Index also unfree packages, fixes #226, fixes #237
* Add wilcard query, fixes #279
  • Loading branch information
garbas committed Feb 7, 2021
1 parent 72434f5 commit d9ab2fd23e022f1ab705ed7c61d72cfa56b8964a
@@ -2,6 +2,7 @@ name: "Hourly import channel to Elasticsearch"

on:

pull_request:
schedule:
- cron: '0 * * * *'

@@ -72,9 +73,9 @@ jobs:
path: ./eval-cache
key: eval-cache-${{ env.EVAL_ID }}

- name: Installing NixFlakes
- name: Installing nixFlakes (and jq)
run: |
nix-env -iA nixpkgs.nixFlakes
nix-env -iA nixpkgs.nixFlakes nixpkgs.jq
echo 'experimental-features = nix-command flakes' | sudo tee -a /etc/nix/nix.conf
nix --version
cat /etc/nix/nix.conf
@@ -89,3 +90,10 @@ jobs:
cp ./eval-cache/builds.json ./eval-${{ env.EVAL_ID }}.json
./result/bin/import-channel --es-url ${{ secrets.ELASTICSEARCH_URL }} --channel ${{ matrix.channel }} -vvv
if: github.repository == 'NixOS/nixos-search'

- name: Warmup ${{ matrix.channel }} channel
run: |
curl ${{ secrets.ELASTICSEARCH_URL }}/latest-$(cat VERSION)-${{ matrix.channel }}/_search | jq '.took'
curl ${{ secrets.ELASTICSEARCH_URL }}/latest-$(cat VERSION)-${{ matrix.channel }}/_search | jq '.took'
curl ${{ secrets.ELASTICSEARCH_URL }}/latest-$(cat VERSION)-${{ matrix.channel }}/_search | jq '.took'
if: github.repository == 'NixOS/nixos-search'
@@ -16,7 +16,6 @@ jobs:
- uses: cachix/cachix-action@v8
with:
name: nixos-search
signingKey: '${{ secrets.CACHIX_SIGNING_KEY }}'

- name: Install unstable channel
run: |
@@ -1 +1 @@
18
19
@@ -1,6 +1,9 @@
{
# Ensures no aliases are in the results.
allowAliases = false;
allowAliases = true;

# Also list unfree packages
allowUnfree = true;

# Enable recursion into attribute sets that nix-env normally doesn't look into
# so that we can get a more complete picture of the available packages for the
@@ -282,6 +282,8 @@ makeRequest options channel query from size _ sort =
"option_name"
[]
[]
[]
"option_name"
[ ( "option_name", 6.0 )
, ( "option_name_query", 3.0 )
, ( "option_description", 1.0 )
@@ -632,12 +632,14 @@ makeRequest options channel query from size maybeBuckets sort =
sort
"package"
"package_attr_name"
[ "package_pversion" ]
[ "package_attr_set"
, "package_license_set"
, "package_maintainers_set"
, "package_platforms"
]
filterByBuckets
"package_attr_name"
[ ( "package_attr_name", 9.0 )
, ( "package_pname", 6.0 )
, ( "package_attr_name_query", 4.0 )
@@ -486,25 +486,43 @@ toAggregations bucketsFields =
toSortQuery :
Sort
-> String
-> List String
-> ( String, Json.Encode.Value )
toSortQuery sort field =
toSortQuery sort field fields =
( "sort"
, case sort of
AlphabeticallyAsc ->
Json.Encode.list Json.Encode.object
[ [ ( field, Json.Encode.string "asc" )
]
[ List.append
[ ( field, Json.Encode.string "asc" )
]
(List.map
(\x -> ( x, Json.Encode.string "asc" ))
fields
)
]

AlphabeticallyDesc ->
Json.Encode.list Json.Encode.object
[ [ ( field, Json.Encode.string "desc" )
]
[ List.append
[ ( field, Json.Encode.string "desc" )
]
(List.map
(\x -> ( x, Json.Encode.string "desc" ))
fields
)
]

Relevance ->
Json.Encode.list Json.Encode.string
[ "_score"
Json.Encode.list Json.Encode.object
[ List.append
[ ( "_score", Json.Encode.string "desc" )
, ( field, Json.Encode.string "desc" )
]
(List.map
(\x -> ( x, Json.Encode.string "desc" ))
fields
)
]
)

@@ -970,9 +988,10 @@ filterByType type_ =

searchFields :
String
-> String
-> List ( String, Float )
-> List (List ( String, Json.Encode.Value ))
searchFields query fields =
searchFields query mainField fields =
let
queryVariations q =
case ( List.head q, List.tail q ) of
@@ -1002,22 +1021,39 @@ searchFields query fields =
|> List.map (\( field, score ) -> [ field ++ "^" ++ String.fromFloat score, field ++ ".edge^" ++ String.fromFloat score ])
|> List.concat
in
List.map
(\queryWords ->
[ ( "multi_match"
, Json.Encode.object
[ ( "type", Json.Encode.string "cross_fields" )
, ( "query", Json.Encode.string <| String.join " " queryWords )
, ( "analyzer", Json.Encode.string "whitespace" )
, ( "auto_generate_synonyms_phrase_query", Json.Encode.bool False )
, ( "operator", Json.Encode.string "and" )
, ( "_name", Json.Encode.string <| "multi_match_" ++ String.join "_" queryWords )
, ( "fields", Json.Encode.list Json.Encode.string allFields )
]
)
]
List.append
(List.map
(\queryWords ->
[ ( "multi_match"
, Json.Encode.object
[ ( "type", Json.Encode.string "cross_fields" )
, ( "query", Json.Encode.string <| String.join " " queryWords )
, ( "analyzer", Json.Encode.string "whitespace" )
, ( "auto_generate_synonyms_phrase_query", Json.Encode.bool False )
, ( "operator", Json.Encode.string "and" )
, ( "_name", Json.Encode.string <| "multi_match_" ++ String.join "_" queryWords )
, ( "fields", Json.Encode.list Json.Encode.string allFields )
]
)
]
)
(queryVariations (String.words (String.toLower query)))
)
(List.map
(\queryWord ->
[ ( "wildcard"
, Json.Encode.object
[ ( mainField
, Json.Encode.object
[ ( "value", Json.Encode.string ("*" ++ queryWord ++ "*") )
]
)
]
)
]
)
(String.words (String.toLower query))
)
(queryVariations (String.words (String.toLower query)))


makeRequestBody :
@@ -1028,10 +1064,12 @@ makeRequestBody :
-> String
-> String
-> List String
-> List String
-> List ( String, Json.Encode.Value )
-> String
-> List ( String, Float )
-> Http.Body
makeRequestBody query from sizeRaw sort type_ sortField bucketsFields filterByBuckets fields =
makeRequestBody query from sizeRaw sort type_ sortField otherSortFields bucketsFields filterByBuckets mainField fields =
let
-- you can not request more then 10000 results otherwise it will return 404
size =
@@ -1043,49 +1081,43 @@ makeRequestBody query from sizeRaw sort type_ sortField bucketsFields filterByBu
in
Http.jsonBody
(Json.Encode.object
(List.append
[ ( "from"
, Json.Encode.int from
)
, ( "size"
, Json.Encode.int size
)
, toSortQuery sort sortField
, toAggregations bucketsFields
, ( "query"
, Json.Encode.object
[ ( "bool"
, Json.Encode.object
[ ( "filter"
, Json.Encode.list Json.Encode.object
[ filterByType type_ ]
)
, ( "must"
, Json.Encode.list Json.Encode.object
[ [ ( "dis_max"
, Json.Encode.object
[ ( "tie_breaker", Json.Encode.float 0.7 )
, ( "queries"
, Json.Encode.list Json.Encode.object
(searchFields query fields)
)
]
)
]
]
)
]
)
]
)
]
(if List.isEmpty filterByBuckets then
[]

else
[ ( "post_filter", Json.Encode.object filterByBuckets ) ]
)
)
[ ( "from"
, Json.Encode.int from
)
, ( "size"
, Json.Encode.int size
)
, toSortQuery sort sortField otherSortFields
, toAggregations bucketsFields
, ( "query"
, Json.Encode.object
[ ( "bool"
, Json.Encode.object
[ ( "filter"
, Json.Encode.list Json.Encode.object
[ filterByType type_
, filterByBuckets
]
)
, ( "must"
, Json.Encode.list Json.Encode.object
[ [ ( "dis_max"
, Json.Encode.object
[ ( "tie_breaker", Json.Encode.float 0.7 )
, ( "queries"
, Json.Encode.list Json.Encode.object
(searchFields query mainField fields)
)
]
)
]
]
)
]
)
]
)
]
)


0 comments on commit d9ab2fd

Please sign in to comment.