In [4]:
import requests
import json

### Extract movies from tmbd.json

In [5]:
def extract():
    f = open('tmdb.json')
    if f:
        return json.loads(f.read());

### Indexing with Bulk API

In [6]:
def reindex(analysisSettings={}, mappingSettings={}, movieDict={}):
    settings = {
        "number_of_ shards": 1,
        "index": {
            "analysis": "analysisSettings",
        }
    }
    if mappingSettings:
        settings['mappings'] = mappingSettings
        
    resp = requests.delete('http://192.168.32.10:9200/tmdb')
    resp = requests.put('http://192.168.32.10:9200/tmdb', data=json.dumps(settings))
    bulkMovies = ""
    for id, movie in movieDict.iteritems():
        addCmd = {"index": {"_index": "tmdb",
                           "_type": "movie",
                           "_id": movie["id"]}}
        bulkMovies += json.dumps(addCmd) + "\n" + json.dumps(movie) + "\n"
    resp = requests.post("http://192.168.32.10:9200/_bulk", data=bulkMovies)

### Pulling data into Elasticsearch

In [7]:
movieDict = extract()
reindex(movieDict=movieDict)

### Search Function

In [27]:
def search(query):
    url = 'http://192.168.32.10:9200/tmdb/movie/_search'
    httpResp = requests.get(url, data=json.dumps(query))
    searchHits = json.loads(httpResp.text)['hits']
    print "Num\tRelevance Score \t\tMovie Title"
    for idx, hit in enumerate(searchHits['hits']):
        print "%s\t%s\t\t%s" % (idx + 1, hit['_score'], hit['_source']['title'])

### Search query

In [28]:
usersSearch = 'basketball with cartoon aliens'
query = {
    "size": "50",
    "query": {
        "multi_match": {
            "query": usersSearch,
            "fields": ["title^10", "overview"],
        }
    }
}

In [10]:
search(query)

Num	Relevance Score 		Movie Title
1	0.694308		Aliens
2	0.4339425		Cowboys & Aliens
3	0.4185979		The Basketball Diaries
4	0.4185979		Monsters vs Aliens
5	0.4069433		Aliens in the Attic
6	0.36976868		Friends with Kids
7	0.347154		Aliens vs Predator: Requiem
8	0.33664408		Hobo with a Shotgun
9	0.29456356		Fun with Dick and Jane
10	0.28645036		From Paris with Love
11	0.28645036		Interview with the Vampire
12	0.28645036		To Rome with Love
13	0.28645036		Trouble with the Curve
14	0.26694793		Dances with Wolves
15	0.26694793		Sleeping with the Enemy
16	0.26694793		Just Go With It
17	0.26694793		Gone with the Wind
18	0.25248307		The Girl Who Played with Fire
19	0.23357943		Girl with a Pearl Earring
20	0.23042528		Friends with Benefits
21	0.23042528		My Week with Marilyn
22	0.21483776		The Girl with the Dragon Tattoo
23	0.21483776		The Girl with the Dragon Tattoo
24	0.20162213		Die Hard: With a Vengeance
25	0.20021094		Twin Peaks: Fire Walk with Me
26	0.17281896		Cloudy with a Chance of Meatbal

### Debugging Analysis

In [19]:
resp = requests.get('http://192.168.32.10:9200/tmdb/_analyze?analyzer=standard&format=yaml', data="Fire with Fire")

print resp.text



---
tokens:
- token: "fire"
  start_offset: 0
  end_offset: 4
  type: "<ALPHANUM>"
  position: 0
- token: "with"
  start_offset: 5
  end_offset: 9
  type: "<ALPHANUM>"
  position: 1
- token: "fire"
  start_offset: 10
  end_offset: 14
  type: "<ALPHANUM>"
  position: 2



### Reindexing with English analyser

In [30]:
mappingSettings = {
    "movie": {
        "properties": {
            "title": {
                "type": "string",
                "analyser": "english"
                },
            "overview": {
                "type": "string",
                "analyser": "english"
            }
        }
    }
}
movieDict = extract()
reindex(mappingSettings=mappingSettings, movieDict=movieDict)

In [31]:
resp = requests.get('http://192.168.32.10:9200/tmdb/_analyze?field=title&format=yaml', data="Fire with Fire")
print resp.text

---
tokens:
- token: "fire"
  start_offset: 0
  end_offset: 4
  type: "<ALPHANUM>"
  position: 0
- token: "with"
  start_offset: 5
  end_offset: 9
  type: "<ALPHANUM>"
  position: 1
- token: "fire"
  start_offset: 10
  end_offset: 14
  type: "<ALPHANUM>"
  position: 2



In [32]:
search(query)

Num	Relevance Score 		Movie Title
1	0.694308		Aliens
2	0.4339425		Cowboys & Aliens
3	0.4185979		The Basketball Diaries
4	0.4185979		Monsters vs Aliens
5	0.4069433		Aliens in the Attic
6	0.347154		Aliens vs Predator: Requiem
7	0.33664408		Hobo with a Shotgun
8	0.320123		Friends with Kids
9	0.320123		From Russia With Love
10	0.29456356		Fun with Dick and Jane
11	0.28645036		From Paris with Love
12	0.28645036		Interview with the Vampire
13	0.28645036		To Rome with Love
14	0.28645036		Trouble with the Curve
15	0.26694793		Dances with Wolves
16	0.26694793		Sleeping with the Enemy
17	0.26694793		Just Go With It
18	0.26694793		Gone with the Wind
19	0.25248307		The Girl Who Played with Fire
20	0.24009225		You Don't Mess With the Zohan
21	0.23357943		Girl with a Pearl Earring
22	0.23062827		Friends with Benefits
23	0.23062827		My Week with Marilyn
24	0.23062827		Fire with Fire
25	0.21483776		The Girl with the Dragon Tattoo
26	0.21483776		The Girl with the Dragon Tattoo
27	0.20179974		Die Hard: 