In [48]:
import requests
import json

### Extract movies from tmbd.json

In [49]:
def extract():
    f = open('tmdb.json')
    if f:
        return json.loads(f.read());

### Indexing with Bulk API

In [50]:
def reindex(analysisSettings={}, mappingSettings={}, movieDict={}):
    settings = {
        "number_of_ shards": 1,
        "index": {
            "analysis": "analysisSettings",
        }
    }
    if mappingSettings:
        settings['mappings'] = mappingSettings
        
    resp = requests.delete('http://192.168.32.10:9200/tmdb')
    resp = requests.put('http://192.168.32.10:9200/tmdb', data=json.dumps(settings))
    bulkMovies = ""
    for id, movie in movieDict.iteritems():
        addCmd = {"index": {"_index": "tmdb",
                           "_type": "movie",
                           "_id": movie["id"]}}
        bulkMovies += json.dumps(addCmd) + "\n" + json.dumps(movie) + "\n"
    resp = requests.post("http://192.168.32.10:9200/_bulk", data=bulkMovies)

### Pulling data into Elasticsearch

In [51]:
movieDict = extract()
reindex(movieDict=movieDict)

### Search Function

In [52]:
def search(query):
    url = 'http://192.168.32.10:9200/tmdb/movie/_search'
    httpResp = requests.get(url, data=json.dumps(query))
    searchHits = json.loads(httpResp.text)['hits']
    print "Num\tRelevance Score \t\tMovie Title"
    for idx, hit in enumerate(searchHits['hits']):
        print "%s\t%s\t\t%s" % (idx + 1, hit['_score'], hit['_source']['title'])

### Search query

In [53]:
usersSearch = 'basketball with cartoon aliens'
query = {
    "size": "20",
    "query": {
        "multi_match": {
            "query": usersSearch,
            "fields": ["title^0.1", "overview"],
        }
    }
}

In [57]:
search(query)

Num	Relevance Score 		Movie Title
1	0.40751222		Meet Dave
2	0.27429563		Semi-Pro
3	0.26320052		Speed Racer
4	0.25425476		Aliens in the Attic
5	0.23608768		The Watch
6	0.23464145		Alien: Resurrection
7	0.23318566		Bedazzled
8	0.21495439		Space Jam
9	0.20845622		District 9
10	0.20734486		Teen Wolf
11	0.18881755		They Live
12	0.18289658		Grown Ups
13	0.17233244		Men in Black 3
14	0.12361832		White Men Can't Jump
15	0.12361832		Coach Carter
16	0.122831084		The Flintstones
17	0.11145923		Galaxy Quest
18	0.10765152		Pitch Black
19	0.098894656		Batteries Not Included
20	0.09270038		High School Musical


### Debugging Analysis

In [55]:
resp = requests.get('http://192.168.32.10:9200/tmdb/_analyze?analyzer=standard&format=yaml', data="Fire with Fire")

print resp.text



---
tokens:
- token: "fire"
  start_offset: 0
  end_offset: 4
  type: "<ALPHANUM>"
  position: 0
- token: "with"
  start_offset: 5
  end_offset: 9
  type: "<ALPHANUM>"
  position: 1
- token: "fire"
  start_offset: 10
  end_offset: 14
  type: "<ALPHANUM>"
  position: 2



### Reindexing with English analyser

In [56]:
mappingSettings = {
    "movie": {
        "properties": {
            "title": {
                "type": "string",
                "analyser": "english"
                },
            "overview": {
                "type": "string",
                "analyser": "english"
            }
        }
    }
}
movieDict = extract()
reindex(mappingSettings=mappingSettings, movieDict=movieDict)

In [31]:
resp = requests.get('http://192.168.32.10:9200/tmdb/_analyze?field=title&format=yaml', data="Fire with Fire")
print resp.text

---
tokens:
- token: "fire"
  start_offset: 0
  end_offset: 4
  type: "<ALPHANUM>"
  position: 0
- token: "with"
  start_offset: 5
  end_offset: 9
  type: "<ALPHANUM>"
  position: 1
- token: "fire"
  start_offset: 10
  end_offset: 14
  type: "<ALPHANUM>"
  position: 2



### Requesting a relavancy score explanation

In [47]:
query = {
    "explain": True,
    "query": {
        "multi_match":{
            "query": usersSearch,
            "fields": ["title^10", "overview"]
        }
    }
}
httpResp = requests.get('http://192.168.32.10:9200/tmdb/movie/_search', data=json.dumps(query))
jsonResp = json.loads(httpResp.text)
print "Explain for %s" % jsonResp['hits']['hits'][0]['_source']['title']
print json.dumps(jsonResp['hits']['hits'][0]['_explanation'], indent=True)

Explain for Aliens
{
 "description": "sum of:", 
 "value": 0.69430804, 
 "details": [
  {
   "description": "max of:", 
   "value": 0.69430804, 
   "details": [
    {
     "description": "product of:", 
     "value": 0.69430804, 
     "details": [
      {
       "description": "sum of:", 
       "value": 2.7772322, 
       "details": [
        {
         "description": "weight(title:aliens in 129) [PerFieldSimilarity], result of:", 
         "value": 2.7772322, 
         "details": [
          {
           "description": "score(doc=129,freq=1.0), product of:", 
           "value": 2.7772322, 
           "details": [
            {
             "description": "queryWeight, product of:", 
             "value": 0.4547279, 
             "details": [
              {
               "description": "boost", 
               "value": 10.0, 
               "details": []
              }, 
              {
               "description": "idf(docFreq=3, maxDocs=661)", 
               "value": 6.1074595