In [None]:
%%capture

!pip install elasticsearch==7.14.0
!apt install default-jdk > /dev/null

In [None]:
try:
  import os
  import elasticsearch
  from elasticsearch import Elasticsearch
  import numpy as np
  import pandas as pd
  import sys
  import json
  from ast import literal_eval
  from tqdm import tqdm 
  import datetime
  from elasticsearch import helpers
  
except Exception as e:
  print(f"error: {e}")

In [None]:
# Download & extract Elasticsearch 7.0.0

!wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.0.0-linux-x86_64.tar.gz -q
!tar -xzf elasticsearch-7.0.0-linux-x86_64.tar.gz
!chown -R daemon:daemon elasticsearch-7.0.0

In [None]:
# Creating daemon instance of elasticsearch
import os
from subprocess import Popen, PIPE, STDOUT
es_server = Popen(['elasticsearch-7.0.0/bin/elasticsearch'], 
                  stdout=PIPE, stderr=STDOUT,
                  preexec_fn=lambda: os.setuid(1)  # as daemon
                 )

In [None]:
# This part is important, since it takes a little amount of time for instance to load
import time
time.sleep(20)

In [None]:
%%bash
# If you get 1 root & 2 daemon process then Elasticsearch instance has started successfully
ps -ef | grep elasticsearch

daemon       256      65 54 08:10 ?        00:00:32 /content/elasticsearch-7.0.0/jdk/bin/java -Xms1g -Xmx1g -XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=75 -XX:+UseCMSInitiatingOccupancyOnly -Des.networkaddress.cache.ttl=60 -Des.networkaddress.cache.negative.ttl=10 -XX:+AlwaysPreTouch -Xss1m -Djava.awt.headless=true -Dfile.encoding=UTF-8 -Djna.nosys=true -XX:-OmitStackTraceInFastThrow -Dio.netty.noUnsafe=true -Dio.netty.noKeySetOptimization=true -Dio.netty.recycler.maxCapacityPerThread=0 -Dlog4j.shutdownHookEnabled=false -Dlog4j2.disable.jmx=true -Djava.io.tmpdir=/tmp/elasticsearch-2826779375912509329 -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=data -XX:ErrorFile=logs/hs_err_pid%p.log -Xlog:gc*,gc+age=trace,safepoint:file=logs/gc.log:utctime,pid,tags:filecount=32,filesize=64m -Djava.locale.providers=COMPAT -Dio.netty.allocator.type=unpooled -Des.path.home=/content/elasticsearch-7.0.0 -Des.path.conf=/content/elasticsearch-7.0.0/config -Des.distribution.flavor=default 

In [None]:
# Check if elasticsearch is running
!curl -sX GET "localhost:9200/"

{
  "name" : "ec05a31c68b8",
  "cluster_name" : "elasticsearch",
  "cluster_uuid" : "o6m6xlVASIWfHoYp1mClgw",
  "version" : {
    "number" : "7.0.0",
    "build_flavor" : "default",
    "build_type" : "tar",
    "build_hash" : "b7e28a7",
    "build_date" : "2019-04-05T22:55:32.697037Z",
    "build_snapshot" : false,
    "lucene_version" : "8.0.0",
    "minimum_wire_compatibility_version" : "6.7.0",
    "minimum_index_compatibility_version" : "6.0.0-beta1"
  },
  "tagline" : "You Know, for Search"
}


In [None]:
es = Elasticsearch(hosts = [{"host":"localhost", "port":9200}])
# Check if python is connected to elasticsearch
es.ping()

True

In [None]:
# Importing test dataset
!git clone https://github.com/HamidRezaAttar/Elasticsearch-Jupyter-Colab
dataset = pd.read_csv("/content/Elasticsearch-Jupyter-Colab/data/test.csv.gz", compression="gzip")
dataset.drop("id", axis=1, inplace=True)
print(f"shape of dataset: {dataset.shape}")
dataset.head()

shape of dataset: (11490, 2)


Unnamed: 0,article,highlights
0,Ever noticed how plane seats appear to be gett...,Experts question if packed out planes are put...
1,A drunk teenage boy had to be rescued by secur...,Drunk teenage boy climbed into lion enclosure ...
2,Dougie Freedman is on the verge of agreeing a ...,Nottingham Forest are close to extending Dougi...
3,Liverpool target Neto is also wanted by PSG an...,Fiorentina goalkeeper Neto has been linked wit...
4,Bruce Jenner will break his silence in a two-h...,"Tell-all interview with the reality TV star, 6..."


In [None]:
# Define settings & mappings of Elasticsearch index
Settings = {
    "settings":{
        "number_of_shards":1,
        "number_of_replicas":0
    },
    "mappings":{
        "properties":{
            "article":{
                "type":"text"
            },
            "highlights":{
                "type":"text"
            }
        }
    }
}

In [None]:
def json_formatter(dataset, index_name, index_type='_doc'):
    """
    This function is used to create JSON formatted dictionaries for Elasticsearch.

    Args:
      dataset: The dataset you want to apply this function.
      index_name: Name of the index in Elasticsearch
      index_type: Type of the index in Elasticsearch.
      Note: It is suggested to keep index_type as '_doc' since it is deprecated from version 6.
      Note: This function formats all columns of your dataset, if you want to apply this to special columns only,
      you can delete the second for loop and add your custom fields.
    """
    try:
        List = []
        columns = dataset.columns
        for idx, row in dataset.iterrows():
            dic = {}
            dic['_index'] = index_name
            dic['_type'] = index_type
            source = {}
            for i in dataset.columns:
                source[i] = row[i]
            dic['_source'] = source
            List.append(dic)
        return List
    
    except Exception as e:
        print("There is a problem: {}".format(e))

In [None]:
MY_INDEX = es.indices.create(index="news_index", ignore=[400,404], body=Settings)
MY_INDEX

{'acknowledged': True, 'index': 'news_index', 'shards_acknowledged': True}

In [None]:
json_Formatted_dataset = json_formatter(dataset=dataset, index_name='news_index', index_type='_doc')
json_Formatted_dataset[0]

{'_index': 'news_index',
 '_source': {'article': "Ever noticed how plane seats appear to be getting smaller and smaller? With increasing numbers of people taking to the skies, some experts are questioning if having such packed out planes is putting passengers at risk. They say that the shrinking space on aeroplanes is not only uncomfortable - it's putting our health and safety in danger. More than squabbling over the arm rest, shrinking space on planes putting our health and safety in danger? This week, a U.S consumer advisory group set up by the Department of Transportation said at a public hearing that while the government is happy to set standards for animals flying on planes, it doesn't stipulate a minimum amount of space for humans. 'In a world where animals have more rights to space and food than humans,' said Charlie Leocha, consumer representative on the committee.\xa0'It is time that the DOT and FAA take a stand for humane treatment of passengers.' But could crowding on planes

In [None]:
# For importing Data to elasticsearch we use elasticsearch's bulk API from elasticsearch.helpers
try:
    res = helpers.bulk(es, json_Formatted_dataset)
    print("successfully imported to elasticsearch.")
except Exception as e:
    print(f"error: {e}")



successfully imported to elasticsearch.


In [27]:
# Get 10 sample of data
query = es.search(
    index="news_index",
    body={
      "size":10,
      "query": {
        "match_all":{}
      }
    }
)

output = pd.json_normalize((query['hits']['hits']))
output

Unnamed: 0,_index,_type,_id,_score,_source.article,_source.highlights
0,news_index,_doc,hNmlq38B3VIFmSUk-6g7,1.0,Ever noticed how plane seats appear to be gett...,Experts question if packed out planes are put...
1,news_index,_doc,hdmlq38B3VIFmSUk-6g7,1.0,A drunk teenage boy had to be rescued by secur...,Drunk teenage boy climbed into lion enclosure ...
2,news_index,_doc,htmlq38B3VIFmSUk-6g7,1.0,Dougie Freedman is on the verge of agreeing a ...,Nottingham Forest are close to extending Dougi...
3,news_index,_doc,h9mlq38B3VIFmSUk-6g7,1.0,Liverpool target Neto is also wanted by PSG an...,Fiorentina goalkeeper Neto has been linked wit...
4,news_index,_doc,iNmlq38B3VIFmSUk-6g7,1.0,Bruce Jenner will break his silence in a two-h...,"Tell-all interview with the reality TV star, 6..."
5,news_index,_doc,idmlq38B3VIFmSUk-6g7,1.0,This is the moment that a crew of firefighters...,Giant pig fell into the swimming pool at his h...
6,news_index,_doc,itmlq38B3VIFmSUk-6g7,1.0,The amount of time people spend listening to B...,Figures show that while millions still tune in...
7,news_index,_doc,i9mlq38B3VIFmSUk-6g7,1.0,"(CNN)So, you'd like a ""Full House"" reunion and...","Show will return with a one-hour special, foll..."
8,news_index,_doc,jNmlq38B3VIFmSUk-6g7,1.0,"At 11:20pm, former world champion Ken Doherty ...",Reanne Evans faced Ken Doherty in World Champi...
9,news_index,_doc,jdmlq38B3VIFmSUk-6g7,1.0,A gang of six men have been jailed for a total...,Gang have been jailed for a total of 31 years ...


In [28]:
# Complicated query
query = es.search(
    index="news_index",
    body={
        "size":20,
        "query":{
            "bool":{
                "must":[
                        {"match":{"article":"teenage boy"}}
                ],
                "should":[
                        {"match":{"highlights":"drunk"}}
                ]
            }
        }
    }
)

output = pd.json_normalize((query['hits']['hits']))
output

Unnamed: 0,_index,_type,_id,_score,_source.article,_source.highlights
0,news_index,_doc,hdmlq38B3VIFmSUk-6g7,15.533424,A drunk teenage boy had to be rescued by secur...,Drunk teenage boy climbed into lion enclosure ...
1,news_index,_doc,edmmq38B3VIFmSUkDbmb,11.526793,Teenage girls do better in single sex schools ...,'Boy free' environment stops girls from being ...
2,news_index,_doc,ftmmq38B3VIFmSUkIM9l,11.196183,He looks barely teenage. But this child has am...,Child has amassed thousands of Twitter followe...
3,news_index,_doc,t9mmq38B3VIFmSUkAKog,10.722628,A teenage boy who shot and killed a teacher an...,"Boy, 13, arrested after deadly attack at secon..."
4,news_index,_doc,LNmmq38B3VIFmSUkE8D0,10.383848,A female school worker has been arrested on su...,"Diane Blankenship, 45, was arrested at her hom..."
5,news_index,_doc,ntmmq38B3VIFmSUkIdDM,9.660924,An alcohol-fuelled 14-year-old boy killed a fa...,Kyle Major killed Paul Walker with a single pu...
6,news_index,_doc,NNmmq38B3VIFmSUkB7Ij,9.587729,Teenagers across America got a very realistic ...,The Glasford school in Illinois hosted one of ...
7,news_index,_doc,vtmmq38B3VIFmSUkCrSN,9.572909,The savages of Islamic State have stepped up t...,Teenage suicide bomber Abu Hafs al Badri is re...
8,news_index,_doc,XNmmq38B3VIFmSUkHsyW,9.094333,A teenager who allegedly played naked Twister ...,"Rachel Lehnardt, 35, 'allowed her 16-year-old ..."
9,news_index,_doc,b9mmq38B3VIFmSUkG8lL,9.034744,"Forget about the 'theory of everything', an ev...",The legendary physicist was speaking at the Sy...


In [29]:
# More complicated query
query = es.search( 
    index="news_index",
    body={
        "size":20,
        "query":{
            "bool":{
                "must":[
                        {"multi_match":{
                            "query":"The Hunger Games",
                            "fields":["article","highlights"]
                        }}
                ]
            }
        }
    }
)

output = pd.json_normalize((query['hits']['hits']))
output

Unnamed: 0,_index,_type,_id,_score,_source.article,_source.highlights
0,news_index,_doc,xtmmq38B3VIFmSUkELzp,14.259413,If you were spellbound by the death-defying ad...,"Motiongate Dubai will open in October, 2016 wi..."
1,news_index,_doc,qdmmq38B3VIFmSUkDLYa,13.301999,"In The Hunger Games, Katniss Everdeen is pulle...",Young adult novels regularly feature teen hero...
2,news_index,_doc,4dmmq38B3VIFmSUkIM5l,11.486975,"When complete strangers Niamh Geaney, 26, and ...","Hunger Games star Josh Hutcherson, 22, surpris..."
3,news_index,_doc,bNmmq38B3VIFmSUkEL3p,11.437349,"(CNN)Universal's ""Furious 7"" continues to buil...","The final film featuring the late Paul Walker,..."
4,news_index,_doc,_Nmmq38B3VIFmSUkIM5l,11.259434,Sky launched its Buy and Keep scheme for custo...,Films can be bought at Skystore.com or through...
5,news_index,_doc,yNmmq38B3VIFmSUkA60A,10.941918,"(CNN)Universal's ""Furious 7"" is about to make ...",The film is expected to gross $115 million or ...
6,news_index,_doc,xNmmq38B3VIFmSUkCbIH,10.669691,Mourinho with his wife Matilde Faria at the 20...,Jose Mourinho said he is proud to work with un...
7,news_index,_doc,U9mmq38B3VIFmSUkELzp,10.634808,It's the ultimate treat for Benedict Cumberbat...,A 6ft chocolate sculpture of Benedict Cumberba...
8,news_index,_doc,Gtmmq38B3VIFmSUkE8D0,10.627807,You would be forgiven for thinking these colou...,Wellington held its inaugural CubaDupa festiva...
9,news_index,_doc,C9mmq38B3VIFmSUkE8D0,10.489234,One-eyed police murderer Dale Cregan (above) i...,31-year-old was transferred to solitary confin...
