## Installation & configuration

In [None]:
!docker run -p 127.0.0.1:9200:9200 -d --name elasticsearch \
  -e "discovery.type=single-node" \
  -e "xpack.security.enabled=false" \
  -e "xpack.license.self_generated.type=trial" \
  -v "elasticsearch-data:/usr/share/elasticsearch/data" \
  docker.elastic.co/elasticsearch/elasticsearch:8.15.0


In [None]:
# requests       → to interact with the Elasticsearch REST API
# elasticsearch  → official Elasticsearch Python client
# pandas         → for handling and analyzing tabular data (e.g., dataset exploration)
# matplotlib     → for optional data visualization (e.g., query stats or aggregations)

!pip install requests elasticsearch==8.15.0 pandas matplotlib

In [None]:
from pprint import pprint
from elasticsearch import Elasticsearch, helpers

es = Elasticsearch('http://localhost:9200')
info = es.info()

print('Connected to ElasticSearch !')
pprint(info.body)


## Importing data with the bulk api

In [None]:
import json

with open("apod.json", "r") as f:
    data = json.load(f)

# Prepare the actions for the bulk
actions = [
    {
        "_index": "apod",
        "_id": doc["title"], # We use the title as index since it is a unique field (the unicity is important!)
        "_source": doc
    }
    for doc in data
]

# We import the data in bulk
try:
    helpers.bulk(es, actions)
    print("Bulk import terminé !")
except  Exception as e:
    print(e)

## Match

In [None]:
query = {
    "query": {
        "match": {
            "title": "moon"
        }
    }
}

response = es.search(index="apod", body=query)
for hit in response["hits"]["hits"]:
    print(hit["_score"], hit["_source"])

### Other types of match

In [None]:
query = {
    "query": {
        "match_phrase": {
            "title": "dark nebula"
        }
    }
}

response = es.search(index="apod", body=query)
for hit in response["hits"]["hits"]:
    print(hit["_score"], hit["_source"])

In [None]:
query = {
    "query": {
        "match_phrase": {
            "title": {
                "query": "dark nebula",
                "slop": 1
            }
        }
    }
}

response = es.search(index="apod", body=query)
for hit in response["hits"]["hits"]:
    print(hit["_score"], hit["_source"])

In [None]:
query = {
    "query": {
        "match_phrase_prefix": {
            "title": "dark neb",
        }
    }
}

response = es.search(index="apod", body=query)
for hit in response["hits"]["hits"]:
    print(hit["_score"], hit["_source"])

In [None]:
query = {
    "query": {
        "multi_match": {
            "query": "Pyrenees",
            "fields": ["title", "explanation"]
        }
    }
}

response = es.search(index="apod", body=query)
for hit in response["hits"]["hits"]:
    print(hit["_score"], hit["_source"])

## Fuzzy

In [None]:
# Typo-tolerant search with fuzzy
query = {
    "query": {
        "fuzzy": {
            "title": {
                "value": "Galaxi",
                "fuzziness": "AUTO"
            }
        }
    }
}

response = es.search(index="apod", body=query)
for hit in response["hits"]["hits"]:
    print(hit["_source"])