# Hello world! 🚀

In [1]:
import requests as r


class Elastic:
    """
    The so simple wrapper for Elasticsearch.
    """
    headers = {
        'Content-Type': 'application/json'
    }
    url = 'http://elsatic:9200/{path}?pretty'
    
    def get(self, path: str = '/') -> None:
        res =  r.get(self.url.format(path=path))
        print(res.text)
    
    def post(self, path: str = '/', **kwargs) -> None:
        res =  r.post(
            self.url.format(path=path),
            headers=self.headers,
            **kwargs
        )
        print(res.text)
    
    def put(self, path: str = '/', **kwargs) -> None:
        res =  r.put(
            self.url.format(path=path),
            headers=self.headers,
            **kwargs,
        )
        print(res.text)
    
    
    def delete(self, path: str = '/') -> None:
        res =  r.delete(self.url.format(path=path))
        print(res.text)


elastic = Elastic()

elastic.get('/')

{
  "name" : "5TSIKPT",
  "cluster_name" : "elasticsearch",
  "cluster_uuid" : "_IO8aZNEQh6BjWPJVRvOvQ",
  "version" : {
    "number" : "5.6.4",
    "build_hash" : "8bbedf5",
    "build_date" : "2017-10-31T18:55:38.105Z",
    "build_snapshot" : false,
    "lucene_version" : "6.6.1"
  },
  "tagline" : "You Know, for Search"
}



---

### Elastic Search info
[\[docs\]](https://www.elastic.co/guide/en/elasticsearch/reference/current/cat.html)

In [5]:
# cluster status
elastic.get('_cat/health')

1532955411 12:56:51 elasticsearch yellow 1 1 13 13 0 0 10 0 - 56.5%



In [6]:
# allocation of data
elastic.get('_cat/allocation')

13 513.6mb 133.5gb 332.1gb 465.7gb 28 127.0.0.1 127.0.0.1 5TSIKPT
10                                                        UNASSIGNED



---

### Organization of data

**PostgreSQL**

- database
- table
- row

**Elastic Search**

- index
- type
- document


---

### Schema

In [7]:
# GET /index_name/

elastic.get('library/')

{
  "library" : {
    "aliases" : { },
    "mappings" : {
      "books" : {
        "properties" : {
          "author" : {
            "type" : "text",
            "fields" : {
              "keyword" : {
                "type" : "keyword",
                "ignore_above" : 256
              }
            }
          },
          "city" : {
            "type" : "text",
            "fields" : {
              "keyword" : {
                "type" : "keyword",
                "ignore_above" : 256
              }
            }
          },
          "date_of_birth" : {
            "type" : "date"
          },
          "pages" : {
            "type" : "long"
          },
          "tag" : {
            "type" : "text",
            "fields" : {
              "keyword" : {
                "type" : "keyword",
                "ignore_above" : 256
              }
            }
          },
          "text" : {
            "type" : "text",
            "fields" : {
              "keyword" : {
    

In [8]:
elastic.get('library/_settings')

{
  "library" : {
    "settings" : {
      "index" : {
        "creation_date" : "1532865120904",
        "number_of_shards" : "5",
        "number_of_replicas" : "1",
        "uuid" : "Wg7vFQINSMWZKCRJ__n3bQ",
        "version" : {
          "created" : "5060499"
        },
        "provided_name" : "library"
      }
    }
  }
}



---

### Create index

In [9]:
# create index
# retry, shards

TEST_INDEX = 'test_index'

data = {
    "settings" : {
        "index" : {
            "number_of_shards" : 2,
            "number_of_replicas" : 0
        }
    }
}

# PUT /index_name/

elastic.put(f'{TEST_INDEX}', json=data)

{
  "error" : {
    "root_cause" : [
      {
        "type" : "index_already_exists_exception",
        "reason" : "index [test_index/O_3WLFY4RZyULc3bs8CkLQ] already exists",
        "index_uuid" : "O_3WLFY4RZyULc3bs8CkLQ",
        "index" : "test_index"
      }
    ],
    "type" : "index_already_exists_exception",
    "reason" : "index [test_index/O_3WLFY4RZyULc3bs8CkLQ] already exists",
    "index_uuid" : "O_3WLFY4RZyULc3bs8CkLQ",
    "index" : "test_index"
  },
  "status" : 400
}



In [10]:
elastic.get(f'{TEST_INDEX}/')

{
  "test_index" : {
    "aliases" : { },
    "mappings" : {
      "test" : {
        "properties" : {
          "is_active" : {
            "type" : "boolean"
          },
          "new_field" : {
            "type" : "text",
            "analyzer" : "standard"
          },
          "number" : {
            "type" : "long"
          },
          "title" : {
            "type" : "text"
          }
        }
      },
      "auto_type" : {
        "properties" : {
          "body" : {
            "type" : "text",
            "fields" : {
              "keyword" : {
                "type" : "keyword",
                "ignore_above" : 256
              }
            }
          },
          "is_active" : {
            "type" : "boolean"
          },
          "number" : {
            "type" : "long"
          },
          "title" : {
            "type" : "text",
            "fields" : {
              "keyword" : {
                "type" : "keyword",
                "ignore_above" : 256
 

 ---

### Delete index

In [11]:
# DELETE /index_name/

elastic.delete(f'{TEST_INDEX}')

{
  "acknowledged" : true
}



### Create type

In [12]:
data = {
    "body": "I am auto generated field.",
    "number": 3
}

# POST /index_name/type_name/

elastic.post(f'{TEST_INDEX}/auto_type/', json=data)

{
  "_index" : "test_index",
  "_type" : "auto_type",
  "_id" : "AWTrTpZ_ixB8v_q4bkha",
  "_version" : 1,
  "result" : "created",
  "_shards" : {
    "total" : 2,
    "successful" : 1,
    "failed" : 0
  },
  "created" : true
}



In [13]:
elastic.get(f'{TEST_INDEX}/_mapping/auto_type')

{
  "test_index" : {
    "mappings" : {
      "auto_type" : {
        "properties" : {
          "body" : {
            "type" : "text",
            "fields" : {
              "keyword" : {
                "type" : "keyword",
                "ignore_above" : 256
              }
            }
          },
          "number" : {
            "type" : "long"
          }
        }
      }
    }
  }
}



In [14]:
# number > 3

data = {
    "body": False,
    "number": 3.5
}


elastic.post(f'{TEST_INDEX}/auto_type/', json=data)

{
  "_index" : "test_index",
  "_type" : "auto_type",
  "_id" : "AWTrT17dixB8v_q4bkhb",
  "_version" : 1,
  "result" : "created",
  "_shards" : {
    "total" : 2,
    "successful" : 1,
    "failed" : 0
  },
  "created" : true
}



___

### Create type
Define the type before creating

In [15]:
DOC_TYPE = 'doc_type'
data = {
  "properties": {
    "title": {
      "type": "text"   
    },
    "number": {
      "type": "long"
    },
    "is_active": {
      "type": "boolean"
    }
  }
}

res = elastic.put(f'{TEST_INDEX}/{DOC_TYPE}/_mapping', json=data)

{
  "acknowledged" : true
}



In [16]:
elastic.get(f'{TEST_INDEX}/_mapping/{DOC_TYPE}')

{
  "test_index" : {
    "mappings" : {
      "doc_type" : {
        "properties" : {
          "is_active" : {
            "type" : "boolean"
          },
          "number" : {
            "type" : "long"
          },
          "title" : {
            "type" : "text"
          }
        }
      }
    }
  }
}



### Field datatypes

[\[docs\]](https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-types.html)


**String**

- text
- keyword

**Number**

- integer
- float
- ...

**Date**

- date

**Boolean**

- boolean

**Binary**
- binary

**Range**
- integer_range
- ...

**Geo-point**

**Object**

**Array**

...

---

### String

```json
{
  "mappings": {
    "my_index": {
      "properties": {
        "full_name": {
          "type":  "text",
          "analyzer": "standatd",
        }
      }
    }
  }
}
```

---
"**type**":  "text" / "keywords"

(analyze, filter, prefix)


---
"**analyzer**": "standard"

---
"**index**": false

___
"**similarity**": "BM25" | "classic" (TF/IDF) | "boolean"


---
**fields:**

```json
"city": {
  "type": "text",
  "fields": {
    "keyword": { 
      "type":  "keyword"
    }
  }
}
```

(analyzers, languages, types)



[\[Example\]](https://www.elastic.co/guide/en/elasticsearch/reference/current/multi-fields.html#_multi_fields_with_multiple_analyzers)

### Analyzer

[\[docs\]](https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-tokenizers.html)

Analyzer

    - character filters
    - tokenizers
    - filters

### Character filters
___

**raw string:**
```html
<a href="/">Link to sources!</a>
```

**prepare string:**

```
 Link to sources
```

### Tokenizers

___

**prepare string:**

```
 Link to sources
```

**tokens**
```
 [Link, to, source]
```


### Filters

___

**tokens**
```
 [Link, to, source]
```


**final result:**
```
 [link, source]
```



built-in, text, custom, query, changed

___

In [21]:
# filter, ngram

data = {
  "tokenizer": "ngram",
  "text": "The most interesting query in the world!"
}

res = elastic.post(f'_analyze', json=data)

{
  "tokens" : [
    {
      "token" : "T",
      "start_offset" : 0,
      "end_offset" : 1,
      "type" : "word",
      "position" : 0
    },
    {
      "token" : "Th",
      "start_offset" : 0,
      "end_offset" : 2,
      "type" : "word",
      "position" : 1
    },
    {
      "token" : "h",
      "start_offset" : 1,
      "end_offset" : 2,
      "type" : "word",
      "position" : 2
    },
    {
      "token" : "he",
      "start_offset" : 1,
      "end_offset" : 3,
      "type" : "word",
      "position" : 3
    },
    {
      "token" : "e",
      "start_offset" : 2,
      "end_offset" : 3,
      "type" : "word",
      "position" : 4
    },
    {
      "token" : "e ",
      "start_offset" : 2,
      "end_offset" : 4,
      "type" : "word",
      "position" : 5
    },
    {
      "token" : " ",
      "start_offset" : 3,
      "end_offset" : 4,
      "type" : "word",
      "position" : 6
    },
    {
      "token" : " m",
      "start_offset" : 3,
      "end_offset" : 5,
      

In [18]:
data = {
  "settings": {
    "analysis": {
      "analyzer": {
        "my_analyzer": {
          "tokenizer": "new_tokenizer",
          "char_filter": ["html_strip"],
          "filter": ["lowercase", "stop_filter"]
            
        }
      },
      "tokenizer": {
        "new_tokenizer": {
          "type": "letter",
        }
      },
      "filter": {
        "stop_filter": {
          "type": "stop",
          "stopwords": ["the", "is", "or", "and", "online"]
        }
      }
    }
  }
}


elastic.post(f'{TEST_INDEX}/_close')

elastic.put(f'{TEST_INDEX}/_settings', json=data)

elastic.post(f'{TEST_INDEX}/_open')

elastic.get(f'{TEST_INDEX}/_settings')


{
  "acknowledged" : true
}

{
  "acknowledged" : true
}

{
  "acknowledged" : true
}

{
  "test_index" : {
    "settings" : {
      "index" : {
        "number_of_shards" : "5",
        "provided_name" : "test_index",
        "creation_date" : "1532956152990",
        "analysis" : {
          "filter" : {
            "stop_filter" : {
              "type" : "stop",
              "stopwords" : [
                "the",
                "is",
                "or",
                "and",
                "online"
              ]
            }
          },
          "analyzer" : {
            "my_analyzer" : {
              "filter" : [
                "lowercase",
                "stop_filter"
              ],
              "char_filter" : [
                "html_strip"
              ],
              "tokenizer" : "new_tokenizer"
            }
          },
          "tokenizer" : {
            "new_tokenizer" : {
              "type" : "letter"
            }
          }
        },
        "

In [20]:
data = {
  "analyzer": "my_analyzer",
  "text": "<a>some</a> The most interesting query in the world!"
}

elastic.post(f'{TEST_INDEX}/_analyze', json=data)

{
  "tokens" : [
    {
      "token" : "some",
      "start_offset" : 3,
      "end_offset" : 11,
      "type" : "word",
      "position" : 0
    },
    {
      "token" : "most",
      "start_offset" : 16,
      "end_offset" : 20,
      "type" : "word",
      "position" : 2
    },
    {
      "token" : "interesting",
      "start_offset" : 21,
      "end_offset" : 32,
      "type" : "word",
      "position" : 3
    },
    {
      "token" : "query",
      "start_offset" : 33,
      "end_offset" : 38,
      "type" : "word",
      "position" : 4
    },
    {
      "token" : "in",
      "start_offset" : 39,
      "end_offset" : 41,
      "type" : "word",
      "position" : 5
    },
    {
      "token" : "world",
      "start_offset" : 46,
      "end_offset" : 51,
      "type" : "word",
      "position" : 7
    }
  ]
}

