### Model: Significant Terms

Here, we model the recommendations using significant terms aggregation.

In [1]:
# We will define some persistant variables that we will use everywhere over here. Always run this script first

# you may want to update the value below to something like 'http://localhost:9200/search_recommendations' for testing locally
url = 'https://vYBoRZTxv:cafabdca-c61c-4b70-9c19-f7f7a5e27258@es-cluster-dc-test-2-b5c555.searchbase.io/search_recommendations'

headers = {
    'Content-Type': 'application/json'
}
%store url
%store headers

Stored 'url' (str)
Stored 'headers' (dict)


In [2]:
# (optional) deletes the index
import requests

response = requests.request("DELETE", url)
print(response.text.encode('utf8'))

b'{"acknowledged":true}'


In [3]:
# Here's the script to create a fresh index with the mapping
import requests
import json

payload = {
    "mappings": {
        "properties": {
            "items": {
                "type": "nested"
            }
        }
    }
}
response = requests.request("PUT", url, headers=headers, data = json.dumps(payload))
print(response.text.encode('utf8'))

b'{"acknowledged":true,"shards_acknowledged":true,"index":"search_recommendations"}'


In [4]:
# (only run once) Here's the script to populate some qualitative data in the index
import requests
import json

data = '''
{"index": {}}
{ "user_id": "1", "items": [ {"item": "forrest gump"}, {"item": "terminator"}, {"item": "rambo"}, {"item": "rocky"}, {"item": "good will hunting"} ] }
{"index": {}}
{ "user_id": "2", "items": [ {"item": "forrest gump"}, {"item": "terminator"}, {"item": "rocky iv"}, {"item": "rocky"}, {"item": "rocky ii"}, {"item": "predator"} ] }
{"index": {}}
{ "user_id": "3", "items": [ {"item": "forrest gump"}, {"item": "the thin red line"}, {"item": "good will hunting"}, {"item": "rocky ii"}, {"item": "predator"}, {"item": "batman"} ] }
{"index": {}}
{ "user_id": "4", "items": [ {"item": "forrest gump"}, {"item": "something about mary"}, {"item": "sixteen candles"} ] }
'''

headers = {
    'Content-Type': 'application/x-ndjson'
}

response = requests.request("POST", url+'/_bulk', headers=headers, data = data)
print(json.dumps(response.json(), indent=2))

{
  "took": 64,
  "errors": false,
  "items": [
    {
      "index": {
        "_index": "search_recommendations",
        "_type": "_doc",
        "_id": "HahhOnIB7SgwU7Ka8Zgf",
        "_version": 1,
        "result": "created",
        "_shards": {
          "total": 2,
          "successful": 2,
          "failed": 0
        },
        "_seq_no": 0,
        "_primary_term": 1,
        "status": 201
      }
    },
    {
      "index": {
        "_index": "search_recommendations",
        "_type": "_doc",
        "_id": "HqhhOnIB7SgwU7Ka8Zgf",
        "_version": 1,
        "result": "created",
        "_shards": {
          "total": 2,
          "successful": 2,
          "failed": 0
        },
        "_seq_no": 1,
        "_primary_term": 1,
        "status": 201
      }
    },
    {
      "index": {
        "_index": "search_recommendations",
        "_type": "_doc",
        "_id": "H6hhOnIB7SgwU7Ka8Zgf",
        "_version": 1,
        "result": "created",
        "_shards": {
  

In [5]:
# This is the search query
import requests
import json
payload = {
    "size": 0,
    "query": {
        "nested": {
            "path": "items",
            "query": {
                "bool": {
                    "should": [
                        {
                            "term": {
                                "items.item.keyword": "terminator"
                            }
                        },
                        {
                            "term": {
                                "items.item.keyword": "predator"
                            }
                        }
                    ]
                }
            }
        }
    },
    "aggs": {
        "urls": {
            "nested": {
                "path": "items"
            },
            "aggs": {
                "items_recommendation": {
                    "significant_terms": {
                        "field": "items.item.keyword",
                        "min_doc_count": 1
                    }
                }
            }
        }
    }
}
response = requests.request("GET", url+"/_search", headers=headers, data=json.dumps(payload))
print(json.dumps(response.json(), indent=2))

{
  "took": 6,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 3,
      "relation": "eq"
    },
    "max_score": null,
    "hits": []
  },
  "aggregations": {
    "urls": {
      "doc_count": 17,
      "items_recommendation": {
        "doc_count": 17,
        "bg_count": 24,
        "buckets": [
          {
            "key": "rocky",
            "doc_count": 2,
            "score": 0.04844290657439447,
            "bg_count": 2
          },
          {
            "key": "rocky ii",
            "doc_count": 2,
            "score": 0.04844290657439447,
            "bg_count": 2
          },
          {
            "key": "predator",
            "doc_count": 2,
            "score": 0.04844290657439447,
            "bg_count": 2
          },
          {
            "key": "terminator",
            "doc_count": 2,
            "score": 0.04844290657439447,
            "bg_count": 2
 