Skip to content

Commit

Permalink
Hent ned indeks og gjør klar klientside søkemotor
Browse files Browse the repository at this point in the history
Co-authored-by: Magnar Sveen <magnar.sveen@mattilsynet.no>
  • Loading branch information
cjohansen and magnars committed Oct 13, 2023
1 parent c784c76 commit 2bb2947
Show file tree
Hide file tree
Showing 5 changed files with 203 additions and 17 deletions.
7 changes: 4 additions & 3 deletions dev/matvaretabellen/dev.clj
Expand Up @@ -36,11 +36,13 @@
[?food :food/id ?id]]
(d/db conn)))

(->> (d/entity (d/db conn) [:food/id "06.531"])
(into {}))

(index/index-foods {} (d/db conn) :en)

(seq (d/datoms (d/db (:datomic/conn integrant.repl.state/system))
:avet :page/uri))
:avet :page/uri))

;; config-admin
(ca/conceal-value (config/from-file "./config/local-config.edn")
Expand All @@ -49,8 +51,7 @@

(ca/conceal-value (config/from-file "./config/prod-config.edn")
:secret/prod
:foodcase/bearer-token)
)
:foodcase/bearer-token))

(comment

Expand Down
27 changes: 23 additions & 4 deletions src/matvaretabellen/pages.clj
@@ -1,5 +1,5 @@
(ns matvaretabellen.pages
(:require [clojure.data.json :as json]
(:require [datomic-type-extensions.api :as d]
[matvaretabellen.search-index :as index]
[mt-designsystem.components.search-input :refer [SearchInput]]
[mt-designsystem.components.site-header :refer [SiteHeader]]
Expand All @@ -17,13 +17,31 @@
:page/locale :nb}
{:page/uri "/index/en.json"
:page/kind :foods-index
:page/locale :en}
{:page/uri "/foods/nb.json"
:page/kind :foods-lookup
:page/locale :nb}
{:page/uri "/foods/en.json"
:page/kind :foods-lookup
:page/locale :en}])

(defn render-foods-index [db page]
{:content-type "application/json"
:body (json/write-str (index/build-index db (:page/locale page)))})
{:headers {"content-type" "application/json"}
:body (index/build-index db (:page/locale page))})

(defn render-foods-lookup [db page]
{:headers {"content-type" "application/json"}
:body (into
{}
(for [eid (d/q '[:find [?food ...]
:where
[?food :food/id]]
db)]
(let [food (d/entity db eid)]
[(:food/id food)
(get-in food [:food/name (:page/locale page)])])))})

(defn render-frontpage [context db page]
(defn render-frontpage [context _db page]
(html/render-hiccup
context
page
Expand All @@ -39,6 +57,7 @@
(let [db (:foods/db context)]
(case (:page/kind page)
:foods-index (render-foods-index db page)
:foods-lookup (render-foods-lookup db page)
:frontpage (render-frontpage context db page)
))
)
10 changes: 2 additions & 8 deletions src/matvaretabellen/search.cljc
Expand Up @@ -124,13 +124,7 @@
#(short? 1 %)]}

:foodNameEdgegrams
{:f #(get-searchable-name locale %)
{:f #(get-in % [:food/name locale])
:tokenizers [tokenize-lower-case
remove-diacritics
tokenize-words
(partial tokenize-edge-ngrams 10)]}})

(create-schema :nb)
(filter-tokens nil nil)
(get-field-syms nil nil)
(tokenize "Yo mama")
(partial tokenize-edge-ngrams 3 10)]}})
126 changes: 126 additions & 0 deletions src/matvaretabellen/ui/foods_search.cljs
@@ -0,0 +1,126 @@
(ns matvaretabellen.ui.foods-search
(:require [clojure.set :as set]
[matvaretabellen.search :as search]))

(defn score-term [index fields term]
(->> fields
(mapcat (fn [field]
(for [[id weight] (get-in index [field term])]
{:id id
:score weight
:field field})))
(group-by :id)
(map (fn [[id xs]]
{:id id
:score (reduce + 0 (map :score xs))
:fields (into {} (map (juxt :field :score) xs))
:term term}))))

(defn qualified-match? [terms res {:keys [operator min-accuracy]}]
(<= (cond
(and (= :or operator) min-accuracy)
(* min-accuracy (count terms))

(= :or operator)
1

:else (count terms))
(count res)))

(defn match-query [index {:keys [q boost tokenizers token-filters fields] :as query}]
(let [fields (or fields (keys index))
boost (or boost 1)
terms (->> (search/tokenize q tokenizers)
(search/filter-tokens token-filters))]
(->> terms
(mapcat #(score-term index fields %))
(group-by :id)
(filter (fn [[_ xs]]
(qualified-match? terms xs query)))
(map (fn [[id xs]]
{:id id
:score (* boost (reduce + 0 (map :score xs)))
:fields (->> (for [[k score] (apply merge-with + (map :fields xs))]
[k (* boost score)])
(into {}))
:terms (->> xs
(map (juxt :term (comp #(* boost %) :score)))
(into {}))})))))

(defn query
"Query the index created by `index-document` with `q`. `q` is a map with two
keys:
- `:queries` A seq of maps defining a query (see below)
- `:operator` Either `:or` or `:and` (default)
Each query in `:queries` is a map of the following keys:
- `:q` The query string
- `:tokenizers` How to tokenize the query string before matching against
indexes. Defaults to `default-tokenizers`.
- `:token-filters` Filter tokens
- `:fields` What field indexes to match against. Defaults to all fields.
- `:boost` A score boost for this query.
- `:operator` Either `:or` or `:and` (default). Determines whether a
single token match is good enough (`:or`), or if all tokens must
match (`:and`).
- `:min-accuracy` When `:operator` is `:or`, this can be a number between `0`
and `1` determining the lowest acceptable success rate. `0.5`
means that at least half the tokens from `q` must match tokens
in the queried indexes
Each query will possibly find some results. Results scored based on the number
of matching tokens. These scores are then boosted for each individual query.
The final result will be either the intersection of all sub-results (`:and`),
or the union (`:or`). The final score for each document id will be calculated
by summarizing individual query scores, and `query` returns a sorted seq of
results, with the best scoring result first.
Results are maps of:
- `:id` The id of the document
- `:score` The calculated total score
- `:fields` A map of `{field score}` - e.g. what fields contributed to the
result, and their individual scores.
- `:terms` A map of `{term score}` - e.g. what terms contributed to the result,
and their individual scores."
[index q]
(let [res (map #(match-query index %) (:queries q))
ids (map #(set (map :id %)) res)
res-ids (if (= :or (:operator q))
(apply set/union ids)
(apply set/intersection ids))]
(->> (apply concat res)
(filter (comp res-ids :id))
(group-by :id)
(map (fn [[id xs]]
{:id id
:score (reduce + 0 (map :score xs))
:fields (apply merge-with + (map :fields xs))
:terms (apply merge-with + (map :terms xs))}))
(sort-by (comp - :score)))))

(defn search [engine q]
(for [match
(query
(:index engine)
{:queries [;; "Autocomplete" what the user is typing
(-> (:foodNameEdgegrams (:schema engine))
(assoc :q q)
(assoc :fields ["foodNameEdgegrams"])
(assoc :boost 10))
;; Boost exact matches
(-> (:foodName (:schema engine))
(assoc :q q)
(assoc :fields ["foodName"])
(assoc :boost 5))
;; Add fuzziness
(-> (:foodNameNgrams (:schema engine))
(merge {:q q
:fields ["foodNameNgrams"]
:operator :or
:min-accuracy 0.8
}))]
:operator :or})]
(assoc match :name (get (:foods engine) (:id match)))))
50 changes: 48 additions & 2 deletions src/matvaretabellen/ui/main.cljs
@@ -1,7 +1,53 @@
(ns ^:figwheel-hooks matvaretabellen.ui.main)
(ns ^:figwheel-hooks matvaretabellen.ui.main
(:require [matvaretabellen.ui.foods-search :as foods-search]
[matvaretabellen.search :as search]))

(defonce search-engine (atom {:index-status :pending
:foods-status :pending}))

(defn load-json [url]
(-> (js/fetch url)
(.then #(.text %))
(.then #(js->clj (js/JSON.parse %)))))

(defn populate-search-engine [locale]
(when-not (:schema @search-engine)
(swap! search-engine assoc :schema (search/create-schema (keyword locale))))
(when (#{:pending :error} (:index-status @search-engine))
(swap! search-engine assoc :index-status :loading)
(-> (load-json (str "/index/" locale ".json"))
(.then #(swap! search-engine assoc
:index %
:index-status :ready))
(.catch (fn [e]
(js/console.error e)
(swap! search-engine assoc :index-status :error)))))
(when (#{:pending :error} (:foods-status @search-engine))
(swap! search-engine assoc :foods-status :loading)
(-> (load-json (str "/foods/" locale ".json"))
(.then #(swap! search-engine assoc
:foods %
:foods-status :ready))
(.catch (fn [e]
(js/console.error e)
(swap! search-engine assoc :foods-status :error))))))

(defn ^:after-load main []
(prn "The client is ready to do the server's bidding"))
(populate-search-engine js/document.documentElement.lang))

(defn ^:export boot []
(main))

(comment

(reset! search-engine {:index-status :pending
:foods-status :pending})

(main)
(select-keys @search-engine [:foods-status :index-status])

(foods-search/search @search-engine "eple")

(get-in @search-engine [:index "foodNameEdgegrams" "eple" "11.076"])

)

0 comments on commit 2bb2947

Please sign in to comment.