# Elasticsearch_API_Exploration

In [371]:
import csv
import pandas as pd
from elasticsearch import Elasticsearch, helpers
!pip install elasticsearch==7.16.3

#### Start Elasticsearch

In [372]:
es = Elasticsearch("http://localhost:9200")

In [373]:
print(es.info())

{'name': 'LAPTOP-ND6VT9QJ', 'cluster_name': 'elasticsearch', 'cluster_uuid': 'Ok320e_TRaO2lgcnSCf5MQ', 'version': {'number': '7.16.3', 'build_flavor': 'default', 'build_type': 'zip', 'build_hash': '4e6e4eab2297e949ec994e688dad46290d018022', 'build_date': '2022-01-06T23:43:02.825887787Z', 'build_snapshot': False, 'lucene_version': '8.10.1', 'minimum_wire_compatibility_version': '6.8.0', 'minimum_index_compatibility_version': '6.0.0-beta1'}, 'tagline': 'You Know, for Search'}


#### Import dataset and creation of samples

In [374]:
wiki_movies =  pd.read_csv("wiki_movie_plots_deduped.csv")

In [375]:
sample_movies = wiki_movies[:1000]

In [376]:
sample_movies.to_csv("sample_movies.csv")

#### Drop missing values

In [380]:
sample_updated = sample_movies.dropna()

In [382]:
sample_updated.isna

<bound method DataFrame.isna of      Release Year                            Title Origin/Ethnicity  \
5            1903              Alice in Wonderland         American   
13           1907                     Daniel Boone         American   
14           1907  How Brown Saw the Baseball Game         American   
15           1907                     Laughing Gas         American   
16           1908         The Adventures of Dollie         American   
..            ...                              ...              ...   
995          1930                   Playing Around         American   
996          1930                          Raffles         American   
997          1930            Reaching for the Moon         American   
998          1930                  Recaptured Love         American   
999          1930                      River's End         American   

                                    Director  \
5                             Cecil Hepworth   
13   Wallace McCutc

#### Transform the dataset into JSON

In [384]:
import json

json_str = sample_updated.to_json(orient='records')

json_wiki_movies = json.loads(json_str)
json_wiki_movies[0]

{'Release Year': 1903,
 'Title': 'Alice in Wonderland',
 'Origin/Ethnicity': 'American',
 'Director': 'Cecil Hepworth',
 'Cast': 'May Clark',
 'Genre': 'unknown',
 'Wiki Page': 'https://en.wikipedia.org/wiki/Alice_in_Wonderland_(1903_film)',
 'Plot': 'Alice follows a large white rabbit down a "Rabbit-hole". She finds a tiny door. When she finds a bottle labeled "Drink me", she does, and shrinks, but not enough to pass through the door. She then eats something labeled "Eat me" and grows larger. She finds a fan when enables her to shrink enough to get into the "Garden" and try to get a "Dog" to play with her. She enters the "White Rabbit\'s tiny House," but suddenly resumes her normal size. In order to get out, she has to use the "magic fan."\r\nShe enters a kitchen, in which there is a cook and a woman holding a baby. She persuades the woman to give her the child and takes the infant outside after the cook starts throwing things around. The baby then turns into a pig and squirms out of 

---

#### Body

In [429]:
body= {
    "settings":{
        "analysis":{
            "analyzer":{
                "my_new_analyzer":{
                    "type": "custom",
                    "tokenizer":"my_tokenizer",
                    "char_filter": ["my_char_filter"],
                    "filter":["lowercase","english_stop","truncate","word_delimiter_graph"],
                }

            },
            "char_filter":{
                "my_char_filter":{
                    "type": "html_strip"
                }
            },"filter": {
                "english_stop": { 
                    "type": "stop",
                    "stopwords": "_english_"
                }
            },
            "tokenizer":{
                "my_tokenizer":{
                    "type": "edge_ngram",
                    "min_gram":2,
                    "max_gram":10,
                    "token_chars":["letter","digit"]
                }
            }
        }
    },"mappings":{
                "_doc":{
                    "properties":{
                        "Plot":{
                            "type":"text",
                            "similarity": "BM25",
                            "analyzer":"my_new_analyzer"
                        },
                        "Title":{
                            "type":"text",
                            "similarity": "BM25",
                            "analyzer":"my_new_analyzer"
                        },
                        "Wiki Page":{
                            "type":"text",
                            "similarity": "BM25",
                            "analyzer":"my_new_analyzer"
                        },
                        "Director":{
                            "type":"text",
                            "similarity": "BM25",
                            "analyzer":"my_new_analyzer"
                        },
                        "Cast":{
                            "type":"text",
                            "analyzer": "my_new_analyzer",
                            "similarity": "BM25",
                        }
                    }
                }
    }
}

#### Create Index of the Body

In [425]:
es.indices.create(index = "wikipedia_movies", body=body, include_type_name=True)

  es.indices.create(index = "salir3", body=body, include_type_name=True)


{'acknowledged': True, 'shards_acknowledged': True, 'index': 'salir3'}

#### Import the dataset and assign the body 

In [426]:
helpers.bulk(es,json_wiki_movies, index="wikipedia_movies")

(980, [])

In [427]:
es.indices.get_mapping(index = "wikipedia_movies")

{'salir3': {'mappings': {'properties': {'Cast': {'type': 'text',
     'analyzer': 'my_new_analyzer',
     'similarity': 'BM25'},
    'Directo': {'type': 'text',
     'analyzer': 'my_new_analyzer',
     'similarity': 'BM25'},
    'Director': {'type': 'text',
     'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}},
    'Genre': {'type': 'text',
     'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}},
    'Origin/Ethnicity': {'type': 'text',
     'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}},
    'Plot': {'type': 'text',
     'analyzer': 'my_new_analyzer',
     'similarity': 'BM25'},
    'Release Year': {'type': 'long'},
    'Title': {'type': 'text',
     'analyzer': 'my_new_analyzer',
     'similarity': 'BM25'},
    'Wiki Page': {'type': 'text',
     'analyzer': 'my_new_analyzer',
     'similarity': 'BM25'}}}}}

In [428]:
es.search(index='wikipedia_movies')

{'took': 6,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 980, 'relation': 'eq'},
  'max_score': 1.0,
  'hits': [{'_index': 'salir3',
    '_type': '_doc',
    '_id': 'Oo7rUX8B08L66tnuIjBZ',
    '_score': 1.0,
    '_source': {'Release Year': 1903,
     'Title': 'Alice in Wonderland',
     'Origin/Ethnicity': 'American',
     'Director': 'Cecil Hepworth',
     'Cast': 'May Clark',
     'Genre': 'unknown',
     'Wiki Page': 'https://en.wikipedia.org/wiki/Alice_in_Wonderland_(1903_film)',
     'Plot': 'Alice follows a large white rabbit down a "Rabbit-hole". She finds a tiny door. When she finds a bottle labeled "Drink me", she does, and shrinks, but not enough to pass through the door. She then eats something labeled "Eat me" and grows larger. She finds a fan when enables her to shrink enough to get into the "Garden" and try to get a "Dog" to play with her. She enters the "White Rabbit\'s tiny House," but suddenly r

---

#### Queries

If you want to apply the query please insert anything that you are looking in the dataset by replacing the query

In [431]:
query1 = {
  "query": {
    "bool": {
      "must": {
        "multi_match":{
           "query": "ROCK",
            "fields": ["Plot"]
        }
      },
      "filter": {
        "range": {
          "Release Year": {
            "to": 1950
          }
        }
      }
    }
  }, "sort": [
    {
      "Genre.keyword": {
        "order": "asc"
      }
    }
  ]
}

In [119]:
query2 ={
  "query": {
    "bool": {
      "must": {
        "multi_match": {
          "query": "whi",
          "fields": [
            "*Plot",
            "Title^2",
            "Cast^3"
          ]
        }
      }
    }
  }
}

---

#### Analysis of the queries

In [432]:
es.search(body=query1,index='wikipedia_movies')

  es.search(body=query1,index='salir3')


{'took': 2,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 430, 'relation': 'eq'},
  'max_score': None,
  'hits': [{'_index': 'salir3',
    '_type': '_doc',
    '_id': '247rUX8B08L66tnuIjBZ',
    '_score': None,
    '_source': {'Release Year': 1917,
     'Title': 'A Modern Musketeer',
     'Origin/Ethnicity': 'American',
     'Director': 'Allan Dwan',
     'Cast': 'Douglas Fairbanks, Marjorie Daw',
     'Genre': 'adventure',
     'Wiki Page': 'https://en.wikipedia.org/wiki/A_Modern_Musketeer',
     'Plot': 'The film opens with a sequence in which D\'Artagnan (Douglas Fairbanks) rides up to a tavern on horseback and ends up brawling with sword and fist with the patrons inside in his haste to approach a fair young stranger. After triumphing, he morphs into modern day Ned Thacker (also played by Fairbanks).\r\nNed is born and raised in Kansas by a mother who passes along to him her love of D\'Artagnan and The Three M

In [433]:
es.search(body=query2, index="wikipedia_movies")

  es.search(body=query2, index="salir3")


{'took': 4,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 3, 'relation': 'eq'},
  'max_score': 22.865698,
  'hits': [{'_index': 'salir3',
    '_type': '_doc',
    '_id': 'So7rUX8B08L66tnuJTMD',
    '_score': 22.865698,
    '_source': {'Release Year': 1929,
     'Title': 'Happy Days',
     'Origin/Ethnicity': 'American',
     'Director': 'Benjamin Stoloff',
     'Cast': 'Marjorie White, Stuart Erwin, Janet Gaynor',
     'Genre': 'musical',
     'Wiki Page': 'https://en.wikipedia.org/wiki/Happy_Days_(1929_film)',
     'Plot': "Originally titled New Orleans Frolic, the story centers around Margie (played by Marjorie White), a singer on a showboat who, when she hears that the showboat is in financial trouble, travels to New York City in an effort to persuade all the boat's former stars to perform in a show to rescue it. She is successful and the stars all fly to New Orleans to surprise the showboat's owner, Colonel B

## __References__

<i class="fas fa-book"></i> Elasticsearch guide [8.0](https://www.elastic.co/guide/en/elasticsearch/reference/current/index.html)

<i class="fas fa-book"></i>  W. Bruce Croft; Donald Metzler; Trevor Strohman. 2010. Search engines: information retrieval in practice. [Pearson Education.](http://ciir.cs.umass.edu/downloads/SEIRiP.pdf) 