In [2]:
import traceback
from pymongo import MongoClient
from elasticsearch import Elasticsearch

In [3]:
db = MongoClient('mongodb://127.0.0.1:27017')['Dogforum']

In [None]:
es = Elasticsearch()

In [None]:
type_name = 'Breeds'

In [5]:
for type_name in db.collection_names():
    print(type_name)

Grooming
New_Additions
Train_Behavior
Introductions
DogDetail
General_Discussion
Breeds
Perform_sports
Dog_health
Gear_supplies
Food
Memorials


In [None]:
text_mapping = {
  "type": "text",
  "fields": {
    "keyword": {
      "type": "keyword",
      "ignore_above": 256
    }
  }
}

In [None]:
n_gram = {
  "type": "text",
  "analyzer": "autocomplete",
  "search_analyzer": "autocomplete_search",
  "fields": {
    "keyword": {
      "type": "keyword",
      "ignore_above": 256
    }
  }
}

In [None]:
def index_map(type_name):
    _index_mappings = {
      "settings": {
        "analysis": {
          "analyzer": {
            "autocomplete": {
              "tokenizer": "autocomplete",
              "filter": [
                "lowercase"
              ]
            },
            "autocomplete_search": {
              "tokenizer": "lowercase"
            }
          },
          "tokenizer": {
            "autocomplete": {
              "type": "edge_ngram",
              "min_gram": 3,
              "max_gram": 10,
              "token_chars": [
                "letter"
              ]
            }
          }
        }
      },
      "mappings": {
         type_name: {
          "properties": {
            "author_info": {
              "properties": {
                "join_date": text_mapping,
                "location": text_mapping,
                "member_type": text_mapping,
                "mentioned": text_mapping,
                "posts": text_mapping,
                "tagged": text_mapping
              }
            },
            "author_name": text_mapping,
            "author_url": text_mapping,
            "content": n_gram,
            "floor": {
              "type": "integer"
            },
            "id": text_mapping,
            "page": text_mapping,
            "post_date": {
              "properties": {
                "ampm": text_mapping,
                "date": {
                  "type": "date",
                  "format": "MM-dd-yyyy||strict_year_month_day"
                },
                "time": {
                  "type": "date",
                  "format": "HH:mm||strict_hour_minute"
                }
              }
            },
            "p_date": {
            "type": "integer"
            },
            "title": n_gram,
            "quotation": n_gram,
            "url":text_mapping
          }
        }
      }
    }
    return _index_mappings

In [None]:
if es.indices.exists(index='dogforum') is not True:
    es.indices.create(index='dogforum', body=index_map(type_name))

In [None]:
cursor = db[type_name].find({}, projection={'_id':False})

In [None]:
_docs = [x for x in cursor]

In [None]:
_docs[1].get('id').replace('/','')

In [None]:
for _doc in _docs[:10]:
    print(_doc.get('id').replace('/',''))

In [None]:
processed = 0
for _doc in _docs[:100]:
    try:
        es.index(index='dogforum', doc_type=type_name, id=_doc.get('id').replace('/',''), body=_doc)
        processed += 1
    except:
        traceback.print_exc()
print('Processed: ' + str(processed))