In [2]:
import requests
import json

elasticSearchUrl = "http://localhost:9200"

In [19]:
# multi_match - запрос по нескольким полям
# query - текст запроса
# fields - поля для поиска

usersSearch = 'basketball with cartoon aliens'

search = json.dumps(
    {
    "query": {
        "multi_match" : {
              "query": usersSearch, 
              "fields": ["title", "overview"] 
            }
          },
        'size': '10',
        'explain': True
        }
)

resp = requests.get("http://localhost:9200/tmdb/_doc/_search?pretty", 
                   data=search,
                   headers={"Content-Type": "application/json"}).json()

for idx, hit in enumerate(resp['hits']['hits']):
    print(idx, '\t', hit['_score'], '\t', hit['_source']['title'])

0 	 7.539825 	 Grown Ups
1 	 7.039643 	 Aliens
2 	 6.1616535 	 District 9
3 	 5.9619174 	 Cowboys & Aliens
4 	 4.939788 	 Pitch Black
5 	 4.6933355 	 Men in Black 3
6 	 3.8147893 	 Die Hard: With a Vengeance
7 	 3.452578 	 The Girl with the Dragon Tattoo
8 	 3.452578 	 Cloudy with a Chance of Meatballs
9 	 1.516017 	 Silver Linings Playbook


In [20]:
# Увеличим вес поля title в 10 раз

usersSearch = 'basketball with cartoon aliens'

search = json.dumps(
    {
    "query": {
        "multi_match" : {
              "query": usersSearch, 
              "fields": ["title^10", "overview"] 
            }
          },
        'size': '10',
        'explain': True
        }
)

resp = requests.get("http://localhost:9200/tmdb/_doc/_search?pretty", 
                   data=search,
                   headers={"Content-Type": "application/json"}).json()

for idx, hit in enumerate(resp['hits']['hits']):
    print(idx, '\t', hit['_score'], '\t', hit['_source']['title'])

0 	 70.39642 	 Aliens
1 	 59.619175 	 Cowboys & Aliens
2 	 38.14789 	 Die Hard: With a Vengeance
3 	 34.52578 	 The Girl with the Dragon Tattoo
4 	 34.52578 	 Cloudy with a Chance of Meatballs
5 	 7.539825 	 Grown Ups
6 	 6.1616535 	 District 9
7 	 4.939788 	 Pitch Black
8 	 4.6933355 	 Men in Black 3
9 	 1.516017 	 Silver Linings Playbook


In [21]:
# Увеличим вес поля overview в 10 раз

usersSearch = 'basketball with cartoon aliens'

search = json.dumps(
    {
    "query": {
        "multi_match" : {
              "query": usersSearch, 
              "fields": ["title", "overview^10"] 
            }
          },
        'size': '10',
        'explain': True
        }
)

resp = requests.get("http://localhost:9200/tmdb/_doc/_search?pretty", 
                   data=search,
                   headers={"Content-Type": "application/json"}).json()

for idx, hit in enumerate(resp['hits']['hits']):
    print(idx, '\t', hit['_score'], '\t', hit['_source']['title'])

0 	 75.39824 	 Grown Ups
1 	 61.61653 	 District 9
2 	 49.397877 	 Pitch Black
3 	 46.93335 	 Men in Black 3
4 	 15.16017 	 Silver Linings Playbook
5 	 13.982159 	 Saving Private Ryan
6 	 13.698313 	 Toy Story 3
7 	 13.698313 	 Parker
8 	 13.697424 	 White House Down
9 	 13.296026 	 The Curious Case of Benjamin Button


Удалим индекс и создадим его заново используя анализатор английского языка

In [163]:
r = requests.delete(elasticSearchUrl + '/tmdb')
r.text

'{"acknowledged":true}'

In [164]:
resp = requests.get(elasticSearchUrl+'/tmdb/_mappings?format=yaml')
print(resp.text)

---
error:
  root_cause:
  - type: "index_not_found_exception"
    reason: "no such index [tmdb]"
    resource.type: "index_or_alias"
    resource.id: "tmdb"
    index_uuid: "_na_"
    index: "tmdb"
  type: "index_not_found_exception"
  reason: "no such index [tmdb]"
  resource.type: "index_or_alias"
  resource.id: "tmdb"
  index_uuid: "_na_"
  index: "tmdb"
status: 404



In [165]:
# Применим к полям title и overview анализатор английского языка

settings = {
    'settings': {
        "number_of_shards": 1,
    },
    'mappings': {
        'properties': {
        'budget': {'type': 'long'},
        'genres': {'fields': {'keyword': {'ignore_above': 256, 'type': 'keyword'}},
         'type': 'text'},
        'id': {'type': 'long'},
        'overview': {'type': 'text', 'analyzer': 'english'},
        'production_companies': {'fields': {'keyword': {'ignore_above': 256,
           'type': 'keyword'}},
         'type': 'text'},
        'release_date': {'type': 'date'},
        'revenue': {'type': 'long'},
        'runtime': {'type': 'long'},
        'tagline': {'fields': {'keyword': {'ignore_above': 256,
           'type': 'keyword'}},
         'type': 'text'},
        'title': {'type': 'text', 'analyzer': 'english'},
        'vote_average': {'type': 'float'},
        'vote_count': {'type': 'long'}}
        }
}
    


In [166]:
from time import sleep

resp = requests.put(elasticSearchUrl+'/tmdb/', 
                    data=json.dumps(settings),
                   headers={"Content-Type": "application/json"})
print(resp.text)

{"acknowledged":true,"shards_acknowledged":true,"index":"tmdb"}


In [167]:
# Обновим индекс после изменения настроек
resp = requests.get(elasticSearchUrl+'/tmdb/_refresh')
resp.text

'{"_shards":{"total":2,"successful":1,"failed":0}}'

In [168]:
# Новый поиск с использованием анализатора

usersSearch = 'basketball with cartoon aliens'

search = json.dumps(
    {
    "query": {
        "multi_match" : {
              "query": usersSearch, 
              "fields": ["title", "overview"] 
            }
          },
        'size': '10',
        'explain': True
        }
)

resp = requests.get("http://localhost:9200/tmdb/_doc/_search?pretty", 
                   data=search,
                   headers={"Content-Type": "application/json"}).json()

for idx, hit in enumerate(resp['hits']['hits']):
    print(idx, '\t', hit['_score'], '\t', hit['_source']['title'])

0 	 7.367055 	 Grown Ups
1 	 6.2199554 	 Alien
2 	 6.2199554 	 Aliens
3 	 5.078388 	 Cowboys & Aliens
4 	 4.54947 	 District 9
5 	 4.3978195 	 Edge of Tomorrow
6 	 4.3978195 	 Independence Day
7 	 4.288439 	 Avatar
8 	 3.70602 	 Battleship
9 	 3.6683912 	 The Host


In [169]:
usersSearch = 'basketball with cartoon aliens'

search = json.dumps(
    {
    "query": {
        "multi_match" : {
              "query": usersSearch, 
              "fields": ["title^10", "overview"] 
            }
          },
        'size': '10',
        'explain': True
        }
)

resp = requests.get("http://localhost:9200/tmdb/_doc/_search?pretty", 
                   data=search,
                   headers={"Content-Type": "application/json"}).json()

for idx, hit in enumerate(resp['hits']['hits']):
    print(idx, '\t', hit['_score'], '\t', hit['_source']['title'])

0 	 62.19955 	 Alien
1 	 62.19955 	 Aliens
2 	 50.78388 	 Cowboys & Aliens
3 	 7.367055 	 Grown Ups
4 	 4.54947 	 District 9
5 	 4.3978195 	 Edge of Tomorrow
6 	 4.3978195 	 Independence Day
7 	 4.288439 	 Avatar
8 	 3.70602 	 Battleship
9 	 3.6683912 	 The Host


In [170]:
usersSearch = 'basketball with cartoon aliens'

search = json.dumps(
    {
    "query": {
        "multi_match" : {
              "query": usersSearch, 
              "fields": ["title", "overview^10"] 
            }
          },
        'size': '10',
        'explain': True
        }
)

resp = requests.get("http://localhost:9200/tmdb/_doc/_search?pretty", 
                   data=search,
                   headers={"Content-Type": "application/json"}).json()

for idx, hit in enumerate(resp['hits']['hits']):
    print(idx, '\t', hit['_score'], '\t', hit['_source']['title'])

0 	 73.67055 	 Grown Ups
1 	 45.4947 	 District 9
2 	 43.978195 	 Edge of Tomorrow
3 	 43.978195 	 Independence Day
4 	 42.884388 	 Avatar
5 	 37.0602 	 Battleship
6 	 36.683914 	 The Host
7 	 33.012287 	 Aliens
8 	 33.012287 	 Pitch Black
9 	 32.36443 	 Spider-Man 3


In [172]:
# Уменьшим значение заголовка

usersSearch = 'basketball with cartoon aliens'

search = json.dumps(
    {
    "query": {
        "multi_match" : {
              "query": usersSearch, 
              "fields": ["title^0.1", "overview"] 
            }
          },
        'size': '100',
        'explain': True
        }
)

resp = requests.get("http://localhost:9200/tmdb/_doc/_search?pretty", 
                   data=search,
                   headers={"Content-Type": "application/json"}).json()

for idx, hit in enumerate(resp['hits']['hits']):
    print(idx, '\t', hit['_score'], '\t', hit['_source']['title'])

0 	 7.367055 	 Grown Ups
1 	 4.54947 	 District 9
2 	 4.3978195 	 Edge of Tomorrow
3 	 4.3978195 	 Independence Day
4 	 4.288439 	 Avatar
5 	 3.70602 	 Battleship
6 	 3.6683912 	 The Host
7 	 3.3012288 	 Aliens
8 	 3.3012288 	 Pitch Black
9 	 3.2364428 	 Spider-Man 3
10 	 3.0564933 	 Alien
11 	 2.8455405 	 Men in Black
12 	 2.8455405 	 Riddick
13 	 2.5785875 	 Men in Black 3
14 	 2.5003967 	 Men in Black II
15 	 2.2299223 	 Wreck-It Ralph
16 	 0.50783885 	 Cowboys & Aliens


In [174]:
search = json.dumps(
    {
    "query": {
        "match" : {
            "title" : {
                "query" : "Star Trek"
            }
        }
    }
}
)

requests.get("http://localhost:9200/tmdb/_doc/_search?pretty", 
                   data=search,
                   headers={"Content-Type": "application/json"}).json()

{'_shards': {'failed': 0, 'skipped': 0, 'successful': 1, 'total': 1},
 'hits': {'hits': [{'_id': '13475',
    '_index': 'tmdb',
    '_score': 9.447634,
    '_source': {'budget': 150000000,
     'genres': ['Action', 'Adventure', 'Science Fiction'],
     'id': 13475,
     'overview': 'The fate of the galaxy rests in the hands of bitter rivals. One, James Kirk, is a delinquent, thrill-seeking Iowa farm boy. The other, Spock, a Vulcan, was raised in a logic-based society that rejects all emotion. As fiery instinct clashes with calm reason, their unlikely but powerful partnership is the only thing capable of leading their crew through unimaginable danger, boldly going where no one has gone before. The human adventure has begun again.',
     'production_companies': ['Paramount Pictures',
      'Bad Robot',
      'Spyglass Entertainment',
      'MavroCine Pictures GmbH & Co. KG'],
     'release_date': '2009-05-07',
     'revenue': 385680446,
     'runtime': 127,
     'tagline': 'The future be

In [153]:
requests.get("http://localhost:9200/tmdb/_doc/22794?pretty").json()

{'_id': '22794',
 '_index': 'tmdb',
 '_primary_term': 1,
 '_seq_no': 396,
 '_source': {'budget': 100000000,
  'genres': ['Animation', 'Comedy', 'Family'],
  'id': 22794,
  'overview': 'Inventor Flint Lockwood creates a machine that makes clouds rain food, enabling the down-and-out citizens of Chewandswallow to feed themselves. But when the falling food reaches gargantuan proportions, Flint must scramble to avert disaster. Can he regain control of the machine and put an end to the wild weather before the town is destroyed?',
  'production_companies': ['Sony Pictures Animation', 'Columbia Pictures'],
  'release_date': '2009-09-16',
  'revenue': 242988466,
  'runtime': 90,
  'tagline': 'Prepare to get served.',
  'title': 'Cloudy with a Chance of Meatballs',
  'vote_average': 6.3,
  'vote_count': 601},
 '_type': '_doc',
 '_version': 1,
 'found': True}

In [114]:
requests.get("http://localhost:9200/tmdb/_count?pretty").json()

{'_shards': {'failed': 0, 'skipped': 0, 'successful': 1, 'total': 1},
 'count': 0}