# Introduction to Elasticsearch

First, let's initialize a connection to Elasticsearch. We'll call it `es`. 

In [42]:
from elasticsearch import Elasticsearch
import pandas as pd
from datetime import datetime
# by default we connect to localhost:9200
es = Elasticsearch()

Next, let's create an index called `test-index`. We can use this for storing some sample documents, which we will later visualize in Kibana.

In [43]:
es.indices.delete(index='speeches')

{'acknowledged': True}

In [44]:
es.indices.create(index='speeches')

{'acknowledged': True, 'index': 'speeches', 'shards_acknowledged': True}

Now that we have an index, we can put some documents into the index

In [45]:
speeches = pd.read_csv('data/SecDef_Speeches.csv')

In [46]:
months = {'Jan': 'January',
          'Feb': 'February',
          'Mar': 'March',
          'Apr': 'April',
          'May': 'May',
          'Jun': 'June',
          'Jul': 'July',
          'Aug': 'August',
          'Sep': 'September',
          'Sept': 'September',
          'Oct': 'October',
          'Nov': 'November',
          'Dec': 'December',
         }

In [47]:
def format_date(date_str, months):
    if '.' in date_str:
        month = months[date_str.split('.')[0]]
        date_str = month + date_str.split('.')[1]
    date = datetime.strptime(date_str, '%B %d, %Y')
    return date

In [48]:
count = 1
for kk, vv in speeches.iterrows():
    doc = {
        "title": vv.title, 
        "document": vv.speech, 
        "timestamp": format_date(vv.date, months),
    }
    es.index(index="speeches", doc_type="secdef", id=count, body=doc)
    count += 1

In [56]:
vv

date                                         March 17, 2016
speech    I. PURPOSE OF THIS TESTIMONY Chairman McCain, ...
title     Submitted Statement -- Senate Armed Services C...
url       https://www.defense.gov/News/Speeches/Speech-V...
Name: 203, dtype: object

In [58]:
    doc = {
        "title": vv.title, 
        "document": vv.speech, 
        "my_random_field": "shouldn't this puke!!?!",
        "timestamp": format_date(vv.date, months),
    }

In [59]:
es.index(index="speeches", doc_type="secdef", id=count, body=doc)

{'_id': '205',
 '_index': 'speeches',
 '_primary_term': 1,
 '_seq_no': 47,
 '_shards': {'failed': 0, 'successful': 1, 'total': 2},
 '_type': 'secdef',
 '_version': 1,
 'result': 'created'}

In [60]:
query = {
    "query": {
        "match" : {
            "my_random_field": "puke"
            }
        }
    }

To actually run the search try:

In [61]:
results = es.search(body=query, index='speeches', doc_type='secdef')['hits']['hits']

In [62]:
for i in results:
    print(i['_source']['title'])

Submitted Statement -- Senate Armed Services Committee (FY 2017 Budget Request)
