In [21]:
def index_map(type_name):
    
    text_mapping = {
      "type": "text",
      "fields": {
        "keyword": {
          "type": "keyword",
          "ignore_above": 256
        }
      }
    }
    
    n_gram = {
      "type": "text",
      "analyzer": "autocomplete",
      "search_analyzer": "autocomplete_search",
      "fields": {
        "keyword": {
          "type": "keyword",
          "ignore_above": 256
        }
      }
    }
    
    _index_mappings = {
      "settings": {
        "analysis": {
          "analyzer": {
            "autocomplete": {
              "tokenizer": "autocomplete",
              "filter": [
                "lowercase"
              ]
            },
            "autocomplete_search": {
              "tokenizer": "lowercase"
            }
          },
          "tokenizer": {
            "autocomplete": {
              "type": "edge_ngram",
              "min_gram": 4,
              "max_gram": 10,
              "token_chars": [
                "letter"
              ]
            }
          }
        }
      },
    "mappings": {
      type_name: {
        "properties": {
          "board": text_mapping,
          "content": n_gram,
          "tags": n_gram,
          "title": n_gram,
          "title_id": {
            "type": "long"
          },
          "title_url": text_mapping
        }
      }
    }
   }
    return _index_mappings

In [2]:
import traceback
from pymongo import MongoClient
from elasticsearch import Elasticsearch

In [3]:
db = MongoClient('mongodb://127.0.0.1:27017')['forum']

In [24]:
collection = db.es_article

In [5]:
es = Elasticsearch()

In [25]:
if es.indices.exists(index='articles') is not True:
    es.indices.create(index='articles', body=index_map('article'))

In [26]:
cursor2 = collection.find({}, projection={'_id':False})

In [27]:
docs = [x for x in cursor2]
len(docs)

45677

In [28]:
processed = 0
for _doc in docs[:1000]:
    try:
        es.index(index='articles', doc_type='article', body=_doc)
        processed += 1
    except:
        traceback.print_exc()
print('Processed: ' + str(processed))

Processed: 1000
