## Importing and testing

In [2]:
from elasticsearch import Elasticsearch
es = Elasticsearch()

In [3]:
es.ping()

True

## Creating index

In [104]:
employee_es_index = 'test_analyzer_employee_data'

In [105]:
test_employee_data_mapping = {
  "settings": {
    "analysis": {
      "analyzer": {
        "exact": {
          "tokenizer": "keyword"
        },
        "case_insensitive_exact": {
          "tokenizer": "keyword",
          "filter": ["lowercase"]
        },
        "case_insensitive_stemmed_exact_no_stop": {
          "tokenizer": "keyword",
          "filter": ["case_insensitive_stop", "lowercase", "en_stem"]
        },
        "case_insensitive_tokens": {
          "tokenizer": "standard",
          "filter": ["lowercase"]
        },
        "case_insensitive_tokens_no_stop": {
          "tokenizer": "standard",
          "type": "standard",
          "filter": ["case_insensitive_stop", "lowercase"]

        },
        "case_insensitive_stemmed_tokens_no_stop": {
          "tokenizer": "standard",
          "filter": ["shingle_filter", "case_insensitive_stop", "lowercase", "en_stem"]
        }
      },
      "filter": {
        "case_insensitive_stop": {
          "type": "stop",
          "ignore_case": "true"
        },
        "en_stem": {
          "type": "condition",
          "filter": [ "stemmer" ],
          "script": {
            "source": "token.getTerm().length() > 3"
          }
        },
        "shingle_filter": {
          "type": "shingle",
          "max_shingle_size": 4,
          "min_shingle_size": 2,
          "output_unigrams": "true"
        }
      }
    }
  },
  "mappings": {
    "dynamic": "false",
    "properties": {
      "first_name": {
        "type": "text",
        "analyzer": "case_insensitive_exact",
        "search_analyzer": "case_insensitive_exact"
      },
         "last_name": {
        "type": "text",
        "analyzer": "case_insensitive_exact",
        "search_analyzer": "case_insensitive_exact"
      },
         "age": {
        "type": "integer"
      },
         "about": {
        "type": "text",
        "analyzer": "case_insensitive_stemmed_tokens_no_stop",
        "search_analyzer": "case_insensitive_stemmed_exact_no_stop"
      },
         "interests": {
        "type": "text",
        "analyzer": "case_insensitive_stemmed_tokens_no_stop",
        "search_analyzer": "case_insensitive_stemmed_tokens_no_stop"
      }
    }
  }         
}

In [106]:
es.indices.create(employee_es_index,test_employee_data_mapping)

{'acknowledged': True,
 'shards_acknowledged': True,
 'index': 'test_analyzer_employee_data'}

In [107]:
# es.indices.delete(index=employee_es_index) ##deleting index

## Indexing data

In [108]:
id_ = 1

data = {"first_name":"John",
        "last_name":"Smith",
        "age":25,
        "about":"I love to go rock climbing",
        "interests":['sports','music']} 

In [109]:
es.index(index=employee_es_index,  body=data,id=id_)

{'_index': 'test_analyzer_employee_data',
 '_type': '_doc',
 '_id': '1',
 '_version': 1,
 'result': 'created',
 '_shards': {'total': 2, 'successful': 1, 'failed': 0},
 '_seq_no': 0,
 '_primary_term': 1}

In [110]:
id_ = 2

data = {"first_name":"Araya",
        "last_name":"Smith",
        "age":22,
        "about":"I love to read books",
        "interests":['books','music']} 

In [111]:
es.index(index=employee_es_index,  body=data,id=id_)

{'_index': 'test_analyzer_employee_data',
 '_type': '_doc',
 '_id': '2',
 '_version': 1,
 'result': 'created',
 '_shards': {'total': 2, 'successful': 1, 'failed': 0},
 '_seq_no': 1,
 '_primary_term': 1}

## Checking data

In [113]:
es.search(index=employee_es_index, body={"query": {"match_all": {}}},size=10)

{'took': 3,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 2, 'relation': 'eq'},
  'max_score': 1.0,
  'hits': [{'_index': 'test_analyzer_employee_data',
    '_type': '_doc',
    '_id': '1',
    '_score': 1.0,
    '_source': {'first_name': 'John',
     'last_name': 'Smith',
     'age': 25,
     'about': 'I love to go rock climbing',
     'interests': ['sports', 'music']}},
   {'_index': 'test_analyzer_employee_data',
    '_type': '_doc',
    '_id': '2',
    '_score': 1.0,
    '_source': {'first_name': 'Araya',
     'last_name': 'Smith',
     'age': 22,
     'about': 'I love to read books',
     'interests': ['books', 'music']}}]}}

## Searching

Last Name

In [92]:
body = {
            "query": {'match': {'last_name': {'query': "smith"}
                }

            }
        }

In [93]:
es.search(index=employee_es_index, body=body)

{'took': 1,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 2, 'relation': 'eq'},
  'max_score': 0.18232156,
  'hits': [{'_index': 'test_analyzer_employee_data',
    '_type': '_doc',
    '_id': '1',
    '_score': 0.18232156,
    '_source': {'first_name': 'John',
     'last_name': 'Smith',
     'age': 25,
     'about': 'I love to go rock climbing',
     'interests': ['sports', 'music']}},
   {'_index': 'test_analyzer_employee_data',
    '_type': '_doc',
    '_id': '2',
    '_score': 0.18232156,
    '_source': {'first_name': 'Araya',
     'last_name': 'Smith',
     'age': 22,
     'about': 'I love to read books',
     'interests': ['books', 'music']}}]}}

Interests

In [94]:
body = {
            "query": {'match': {'interests': {'query': "books"}
                }

            }
        }

In [95]:
es.search(index=employee_es_index, body=body)

{'took': 2,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 1, 'relation': 'eq'},
  'max_score': 0.6931471,
  'hits': [{'_index': 'test_analyzer_employee_data',
    '_type': '_doc',
    '_id': '2',
    '_score': 0.6931471,
    '_source': {'first_name': 'Araya',
     'last_name': 'Smith',
     'age': 22,
     'about': 'I love to read books',
     'interests': ['books', 'music']}}]}}

About

In [96]:
body = {
            "query": {
                'match': {'about': {'query': "love books"}
                }

            }
        }

In [97]:
es.search(index=employee_es_index, body=body)

{'took': 1,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 0, 'relation': 'eq'},
  'max_score': None,
  'hits': []}}

In [98]:
body = {
            "query": {'match': {'about': {'query': "rocks"}
                }

            }
        }

In [99]:
es.search(index=employee_es_index, body=body)

{'took': 0,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 1, 'relation': 'eq'},
  'max_score': 0.9186288,
  'hits': [{'_index': 'test_analyzer_employee_data',
    '_type': '_doc',
    '_id': '1',
    '_score': 0.9186288,
    '_source': {'first_name': 'John',
     'last_name': 'Smith',
     'age': 25,
     'about': 'I love to go rock climbing',
     'interests': ['sports', 'music']}}]}}

### Highlights

In [100]:
body = {
            "query": {
                "match": { 'about': {'query': "rocks"}
                         }

            }
        }

In [101]:
sources = ["about"]

In [102]:
body["highlight"] = {
        "pre_tags": ["<em class='highlight'>"],
        "post_tags": ["</em>"],
        'fields': {x: {'number_of_fragments': 0} for x in sources}
    }

In [103]:
es.search(index=employee_es_index, body=body)

{'took': 26,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 1, 'relation': 'eq'},
  'max_score': 0.9186288,
  'hits': [{'_index': 'test_analyzer_employee_data',
    '_type': '_doc',
    '_id': '1',
    '_score': 0.9186288,
    '_source': {'first_name': 'John',
     'last_name': 'Smith',
     'age': 25,
     'about': 'I love to go rock climbing',
     'interests': ['sports', 'music']},
    'highlight': {'about': ["I love to go <em class='highlight'>rock</em> climbing"]}}]}}