## Dependencies

In [None]:
!pip install elasticsearch pandas

from elasticsearch import Elasticsearch, helpers
from elasticsearch.helpers import BulkIndexError
import pandas as pd

## Elasticsearch Client

In [None]:
# Initialize the Elasticsearch client
es = Elasticsearch(
    ['HOST:PORT'],
    basic_auth=('USERNMAME', 'PASSWORD'),
    verify_certs=False
)

## Parsing dataset

In [None]:
# Define your index
index_name = 'recipes'

# Define the mapping
mapping = {
  "mappings": {
    "properties": {
      "group": { "type": "text" },
      "name": { "type": "text" },
      "rating": { "type": "text" },
      "n_rater": { "type": "text" },
      "n_reviewer": { "type": "text" },
      "summary": {
        "type": "text",
        "analyzer": "english"
      },
      "process": { "type": "text" },
      "ingredient": {
        "type": "text",
      },
      "ml.tokens": {
        "type": "rank_features"
      }
    }
  }
}

# Create index
es.indices.create(index=index_name, body=mapping)

# Read CSV file with pandas
with open('recipe_dataset.csv', 'r', encoding='utf-8', errors='ignore') as file:
    df = pd.read_csv(file)

# Convert DataFrame to a list of dictionaries for indexing
recipes = df.to_dict('records')
print(f"Number of documents: {len(recipes)}")

## Bulk Index

In [None]:
# Generate the sequence of JSON documents for a bulk index operation
bulk_index_body = []
for index, recipe in enumerate(recipes):
    document = {
        "_index": "recipes",
        "pipeline": "elser-v1-recipes",
        "_source": recipe
    }
    bulk_index_body.append(document)

# Bulk index the data and handle BulkIndexError
try:
    response = helpers.bulk(es, bulk_index_body, chunk_size=500, request_timeout=60*3)
    print ("\nRESPONSE:", response)
except BulkIndexError as e:
    for error in e.errors:
        print(f"Document ID: {error['index']['_id']}")
        print(f"Error Type: {error['index']['error']['type']}")
        print(f"Error Reason: {error['index']['error']['reason']}")