In [29]:
import traceback
from pymongo import MongoClient
from elasticsearch import Elasticsearch

In [30]:
db = MongoClient('mongodb://127.0.0.1:27017')['Dogforum']

In [31]:
es = Elasticsearch()

In [None]:
len(db.collection_names())

In [40]:
def article_setting():
    text_mapping = {
      "type": "text",
      "fields": {
        "keyword": {
          "type": "keyword",
          "ignore_above": 256
        }
      }
    }
    
    n_gram = {
      "type": "text",
      "analyzer": "autocomplete",
      "search_analyzer": "autocomplete_search",
      "fields": {
        "keyword": {
          "type": "keyword",
          "ignore_above": 256
        }
      }
    }
    
    article_params = {
      "properties": {
        "author_info": {
          "properties": {
            "join_date": text_mapping,
            "location": text_mapping,
            "member_type": text_mapping,
            "mentioned": text_mapping,
            "posts": text_mapping,
            "tagged": text_mapping
          }
        },
        "author_name": text_mapping,
        "author_url": text_mapping,
        "content": n_gram,
        "floor": {
          "type": "integer"
        },
        "id": text_mapping,
        "page": text_mapping,
        "post_date": {
          "properties": {
            "ampm": text_mapping,
            "date": {
              "type": "date",
              "format": "MM-dd-yyyy||strict_year_month_day"
            },
            "time": {
              "type": "date",
              "format": "HH:mm||strict_hour_minute"
            }
          }
        },
        "p_date": {
        "type": "integer"
        },
        "title": n_gram,
        "quotation": n_gram,
        "url":text_mapping
      }
    }
    return article_params

In [43]:
_index_mappings = {
  "settings": {
    "analysis": {
      "analyzer": {
        "autocomplete": {
          "tokenizer": "autocomplete",
          "filter": [
            "lowercase"
          ]
        },
        "autocomplete_search": {
          "tokenizer": "lowercase"
        }
      },
      "tokenizer": {
        "autocomplete": {
          "type": "edge_ngram",
          "min_gram": 3,
          "max_gram": 10,
          "token_chars": [
            "letter"
          ]
        }
      }
    }
  },
  "mappings": {
     "Grooming": article_setting(),
     "New_Additions": article_setting()
  }
}

In [None]:
# if es.indices.exists(index='dogforum') is not True:
#    es.indices.create(index='dogforum', body=index_map(type_name))

In [41]:
for type_name in db.collection_names():
    print(type_name)

Grooming
New_Additions
Train_Behavior
Introductions
DogDetail
General_Discussion
Breeds
Perform_sports
Dog_health
Gear_supplies
Food
Memorials


In [44]:
es.indices.create(index='dogforum', body=_index_mappings)

PUT http://localhost:9200/dogforum [status:400 request:0.005s]


RequestError: TransportError(400, 'illegal_argument_exception', 'Rejecting mapping update to [dogforum] as the final mapping would have more than 1 type: [New_Additions, Grooming]')

# For single insert

In [None]:
cursor = db[type_name].find({}, projection={'_id':False})

In [None]:
_docs = [x for x in cursor]

In [None]:
_docs[1].get('id').replace('/','')

In [None]:
for _doc in _docs[:10]:
    print(_doc.get('id').replace('/',''))

In [None]:
processed = 0
for _doc in _docs[:100]:
    try:
        es.index(index='dogforum', doc_type=type_name, id=_doc.get('id').replace('/',''), body=_doc)
        processed += 1
    except:
        traceback.print_exc()
print('Processed: ' + str(processed))