In [None]:
# Install dependencies
!pip install elasticsearch pandas

In [None]:
from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk
import pandas as pd
import json

# Connect to Elasticsearch
es = Elasticsearch(["http://localhost:9200"])

# Check connection
if es.ping():
    print("✓ Connected to Elasticsearch!")
    print(f"  Version: {es.info()['version']['number']}")
else:
    print("✗ Connection failed")

## 1. Create Index with Mappings

In [None]:
INDEX_NAME = "demo_products"

# Delete if exists
if es.indices.exists(index=INDEX_NAME):
    es.indices.delete(index=INDEX_NAME)

# Create index
mappings = {
    "mappings": {
        "properties": {
            "name": {"type": "text"},
            "price": {"type": "float"},
            "category": {"type": "keyword"},
            "description": {"type": "text"},
            "in_stock": {"type": "boolean"},
            "rating": {"type": "float"},
            "created_at": {"type": "date"}
        }
    }
}

es.indices.create(index=INDEX_NAME, body=mappings)
print(f"✓ Index '{INDEX_NAME}' created")

## 2. POST - Insert Documents

In [None]:
# Sample products data
products = [
    {"name": "iPhone 15 Pro", "price": 999.99, "category": "smartphones", "description": "Latest Apple smartphone", "in_stock": True, "rating": 4.8, "created_at": "2024-01-15"},
    {"name": "Samsung Galaxy S24", "price": 899.99, "category": "smartphones", "description": "Samsung flagship phone", "in_stock": True, "rating": 4.7, "created_at": "2024-01-20"},
    {"name": "MacBook Pro 14", "price": 1999.99, "category": "laptops", "description": "Apple laptop with M3 chip", "in_stock": True, "rating": 4.9, "created_at": "2024-02-01"},
    {"name": "Dell XPS 15", "price": 1499.99, "category": "laptops", "description": "Windows laptop", "in_stock": False, "rating": 4.5, "created_at": "2024-02-10"},
    {"name": "Sony WH-1000XM5", "price": 349.99, "category": "audio", "description": "Noise cancelling headphones", "in_stock": True, "rating": 4.8, "created_at": "2024-02-15"},
    {"name": "AirPods Pro 2", "price": 249.99, "category": "audio", "description": "Apple wireless earbuds", "in_stock": True, "rating": 4.7, "created_at": "2024-02-20"},
    {"name": "iPad Pro 12.9", "price": 1099.99, "category": "tablets", "description": "Apple tablet with M2", "in_stock": True, "rating": 4.8, "created_at": "2024-03-01"},
    {"name": "Samsung Tab S9", "price": 849.99, "category": "tablets", "description": "Android tablet", "in_stock": False, "rating": 4.5, "created_at": "2024-03-05"},
]

# Bulk insert
actions = [
    {"_index": INDEX_NAME, "_id": str(i+1), "_source": product}
    for i, product in enumerate(products)
]

success, _ = bulk(es, actions)
es.indices.refresh(index=INDEX_NAME)
print(f"✓ Inserted {success} documents")

## 3. GET - Retrieve Documents

In [None]:
# Get by ID
doc = es.get(index=INDEX_NAME, id="1")
print("Document ID 1:")
print(json.dumps(doc['_source'], indent=2))

In [None]:
# Search all
results = es.search(index=INDEX_NAME, query={"match_all": {}}, size=10)

# Convert to DataFrame
hits = [hit['_source'] for hit in results['hits']['hits']]
df = pd.DataFrame(hits)
df

## 4. PUT - Update Document

In [None]:
# Update price
es.update(index=INDEX_NAME, id="1", doc={"price": 949.99})

# Verify
updated = es.get(index=INDEX_NAME, id="1")
print(f"Updated price: ${updated['_source']['price']}")

## 5. DELETE - Remove Document

In [None]:
# Delete document
es.delete(index=INDEX_NAME, id="8")
print("✓ Document 8 deleted")

# Verify count
count = es.count(index=INDEX_NAME)['count']
print(f"Remaining documents: {count}")

## 6. Queries

In [None]:
# Match Query
results = es.search(index=INDEX_NAME, query={"match": {"description": "Apple"}})
print("Match Query - 'Apple' in description:")
for hit in results['hits']['hits']:
    print(f"  - {hit['_source']['name']}")

In [None]:
# Term Query (exact match on keyword field)
results = es.search(index=INDEX_NAME, query={"term": {"category": "smartphones"}})
print("Term Query - category='smartphones':")
for hit in results['hits']['hits']:
    print(f"  - {hit['_source']['name']} (${hit['_source']['price']})")

In [None]:
# Range Query
results = es.search(index=INDEX_NAME, query={
    "range": {
        "price": {"gte": 500, "lte": 1000}
    }
})
print("Range Query - price between $500 and $1000:")
for hit in results['hits']['hits']:
    print(f"  - {hit['_source']['name']} (${hit['_source']['price']})")

In [None]:
# Bool Query
results = es.search(index=INDEX_NAME, query={
    "bool": {
        "must": [{"match": {"category": "laptops"}}],
        "filter": [{"term": {"in_stock": True}}]
    }
})
print("Bool Query - laptops in stock:")
for hit in results['hits']['hits']:
    print(f"  - {hit['_source']['name']}")

## 7. Filters and Aggregations

In [None]:
# Terms Aggregation
results = es.search(index=INDEX_NAME, size=0, aggs={
    "by_category": {
        "terms": {"field": "category"}
    }
})

print("Products by Category:")
for bucket in results['aggregations']['by_category']['buckets']:
    print(f"  {bucket['key']}: {bucket['doc_count']} products")

In [None]:
# Stats Aggregation
results = es.search(index=INDEX_NAME, size=0, aggs={
    "price_stats": {
        "stats": {"field": "price"}
    }
})

stats = results['aggregations']['price_stats']
print("Price Statistics:")
print(f"  Count: {stats['count']}")
print(f"  Min: ${stats['min']}")
print(f"  Max: ${stats['max']}")
print(f"  Avg: ${stats['avg']:.2f}")
print(f"  Sum: ${stats['sum']:.2f}")

In [None]:
# Nested Aggregation - Avg price per category
results = es.search(index=INDEX_NAME, size=0, aggs={
    "by_category": {
        "terms": {"field": "category"},
        "aggs": {
            "avg_price": {"avg": {"field": "price"}},
            "avg_rating": {"avg": {"field": "rating"}}
        }
    }
})

print("Average Price & Rating by Category:")
for bucket in results['aggregations']['by_category']['buckets']:
    print(f"  {bucket['key']}: ${bucket['avg_price']['value']:.2f} (Rating: {bucket['avg_rating']['value']:.1f})")

## 8. Cleanup

In [None]:
# Delete index (uncomment to run)
# es.indices.delete(index=INDEX_NAME)
# print(f"✓ Index '{INDEX_NAME}' deleted")