### Model: Significant Terms

Here, we model the recommendations using significant terms aggregation.

In [2]:
!pip install elasticsearch

Collecting elasticsearch
[?25l  Downloading https://files.pythonhosted.org/packages/4a/33/d0ed32e077f7dc860153fa866fc52ac312886c9890962ff29379aa753dd1/elasticsearch-7.7.1-py2.py3-none-any.whl (99kB)
[K    100% |████████████████████████████████| 102kB 437kB/s a 0:00:011
Installing collected packages: elasticsearch
Successfully installed elasticsearch-7.7.1
[33mYou are using pip version 18.1, however version 20.2b1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [16]:
from elasticsearch import Elasticsearch
es = Elasticsearch()

In [17]:
try:
    es.indices.delete(index='events')
    print('deleted')
except: 
    print('no index')

no index


In [18]:
# Here's the script to create a fresh index with the mapping
import json 

es.indices.create(index='events', body = {
    "mappings": {
        "properties": {
            "target": {
                "type": "keyword"
            },
            "agent": {
                "type": "keyword"
            },
            "actions": {
                "type": "keyword"
            },
            "time": {
                "type": "date"
            }
            
        }
    }
})

{'acknowledged': True, 'shards_acknowledged': True, 'index': 'events'}

In [19]:
# (only run once) Here's the script to populate some qualitative data in the index
import random
def event_generator(num):
    ev_agents = ['neal','billg','sid','ethanh','donatello','leonardo','michaelangelo','raphael','mada','john','rolo','eminem']
    ev_actions = ['buy','referred_by:sid','view','referred_by:leonardo','referred_by:mada', 'returned']
    ev_targets = ['prod:1','prod:2','prod:3','prod:4','prod:5','prod:6','prod:9','prod:8','prod:7','prod:10','prod:11','prod:12']
    for n in range(1,num + 1 ):
        d1 = random.randint(0,5)
        d2 = random.randint(0,5)
        yield {"actions": [ random.choice(ev_actions), ev_actions[abs(d1-d2)] ],"target": random.choice(ev_targets), "agent": ev_agents[d1+d2]}
    
for ev in event_generator(1000):
    es.index(index='events', body=ev)


In [20]:
print(es.count(index='events'))
es.search(index='events')

{'count': 1000, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}}


{'took': 31,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 1000, 'relation': 'eq'},
  'max_score': 1.0,
  'hits': [{'_index': 'events',
    '_type': '_doc',
    '_id': 'DVM5c3IBzzKGBWwBXLqd',
    '_score': 1.0,
    '_source': {'actions': ['referred_by:mada', 'referred_by:sid'],
     'target': 'prod:7',
     'agent': 'leonardo'}},
   {'_index': 'events',
    '_type': '_doc',
    '_id': 'DlM5c3IBzzKGBWwBXLrY',
    '_score': 1.0,
    '_source': {'actions': ['referred_by:leonardo', 'referred_by:mada'],
     'target': 'prod:2',
     'agent': 'michaelangelo'}},
   {'_index': 'events',
    '_type': '_doc',
    '_id': 'D1M5c3IBzzKGBWwBXLrm',
    '_score': 1.0,
    '_source': {'actions': ['referred_by:mada', 'referred_by:leonardo'],
     'target': 'prod:2',
     'agent': 'raphael'}},
   {'_index': 'events',
    '_type': '_doc',
    '_id': 'EFM5c3IBzzKGBWwBXLr2',
    '_score': 1.0,
    '_source': {'actions': ['referred_by:

In [21]:
# Most significant products by views
es.search(index='events', body={
    "query" : {
        "terms" : {"actions" : [ "view" ]}
    },
    "aggregations" : {
        "significant_products" : {
            "significant_terms" : { "field" : "target" }
        }
    }
} )

{'took': 91,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 331, 'relation': 'eq'},
  'max_score': 1.0,
  'hits': [{'_index': 'events',
    '_type': '_doc',
    '_id': 'ElM5c3IBzzKGBWwBXboH',
    '_score': 1.0,
    '_source': {'actions': ['referred_by:mada', 'view'],
     'target': 'prod:9',
     'agent': 'michaelangelo'}},
   {'_index': 'events',
    '_type': '_doc',
    '_id': 'E1M5c3IBzzKGBWwBXboO',
    '_score': 1.0,
    '_source': {'actions': ['buy', 'view'],
     'target': 'prod:4',
     'agent': 'sid'}},
   {'_index': 'events',
    '_type': '_doc',
    '_id': 'FFM5c3IBzzKGBWwBXboV',
    '_score': 1.0,
    '_source': {'actions': ['referred_by:leonardo', 'view'],
     'target': 'prod:12',
     'agent': 'mada'}},
   {'_index': 'events',
    '_type': '_doc',
    '_id': 'F1M5c3IBzzKGBWwBXbop',
    '_score': 1.0,
    '_source': {'actions': ['view', 'buy'],
     'target': 'prod:10',
     'agent': 'mada'}},
   {'_i

In [22]:
# Show products referred by sid
es.search(index='events', body={
    "query" : {
        "terms" : {"actions" : [ "referred_by:sid" ]}
    },
} )

{'took': 7,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 409, 'relation': 'eq'},
  'max_score': 1.0,
  'hits': [{'_index': 'events',
    '_type': '_doc',
    '_id': 'DVM5c3IBzzKGBWwBXLqd',
    '_score': 1.0,
    '_source': {'actions': ['referred_by:mada', 'referred_by:sid'],
     'target': 'prod:7',
     'agent': 'leonardo'}},
   {'_index': 'events',
    '_type': '_doc',
    '_id': 'EVM5c3IBzzKGBWwBXboA',
    '_score': 1.0,
    '_source': {'actions': ['referred_by:leonardo', 'referred_by:sid'],
     'target': 'prod:2',
     'agent': 'raphael'}},
   {'_index': 'events',
    '_type': '_doc',
    '_id': 'IlM5c3IBzzKGBWwBXbp0',
    '_score': 1.0,
    '_source': {'actions': ['referred_by:leonardo', 'referred_by:sid'],
     'target': 'prod:7',
     'agent': 'john'}},
   {'_index': 'events',
    '_type': '_doc',
    '_id': 'JFM5c3IBzzKGBWwBXbqF',
    '_score': 1.0,
    '_source': {'actions': ['referred_by:leonardo', 'r

In [23]:
# Most significant products referred by sid
es.search(index='events', body={
    "query" : {
        "terms" : {"actions" : [ "view" ]}
    },
    "aggregations" : {
        "significant_products" : {
            "significant_terms" : { "field" : "target",
        
        "background_filter": {
                "term" : { "actions" : "referred_by:sid"}
        }
        
    }
} } } )

{'took': 14,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 331, 'relation': 'eq'},
  'max_score': 1.0,
  'hits': [{'_index': 'events',
    '_type': '_doc',
    '_id': 'ElM5c3IBzzKGBWwBXboH',
    '_score': 1.0,
    '_source': {'actions': ['referred_by:mada', 'view'],
     'target': 'prod:9',
     'agent': 'michaelangelo'}},
   {'_index': 'events',
    '_type': '_doc',
    '_id': 'E1M5c3IBzzKGBWwBXboO',
    '_score': 1.0,
    '_source': {'actions': ['buy', 'view'],
     'target': 'prod:4',
     'agent': 'sid'}},
   {'_index': 'events',
    '_type': '_doc',
    '_id': 'FFM5c3IBzzKGBWwBXboV',
    '_score': 1.0,
    '_source': {'actions': ['referred_by:leonardo', 'view'],
     'target': 'prod:12',
     'agent': 'mada'}},
   {'_index': 'events',
    '_type': '_doc',
    '_id': 'F1M5c3IBzzKGBWwBXbop',
    '_score': 1.0,
    '_source': {'actions': ['view', 'buy'],
     'target': 'prod:10',
     'agent': 'mada'}},
   {'_i