In [30]:
from opensearchpy import OpenSearch
import pandas as pd
from datetime import datetime

## Connect to OpenSearch
Set the API endpoint URL, the search query and any required headers or parameters. Set the authentication credentials

In [31]:
client = OpenSearch(
    hosts = [{"host": "localhost", "port": 9200}],
    http_auth = ("admin", "admin"),
    use_ssl = True,
    verify_certs = False,
    ssl_assert_hostname = False,
    ssl_show_warn = False,
)
client.info()

{'name': 'opensearch-node1',
 'cluster_name': 'opensearch-cluster',
 'cluster_uuid': 'qPUqDsgnSoKoMiCuxecfyw',
 'version': {'distribution': 'opensearch',
  'number': '2.5.0',
  'build_type': 'tar',
  'build_hash': 'b8a8b6c4d7fc7a7e32eb2cb68ecad8057a4636ad',
  'build_date': '2023-01-18T23:48:48.981786100Z',
  'build_snapshot': False,
  'lucene_version': '9.4.2',
  'minimum_wire_compatibility_version': '7.10.0',
  'minimum_index_compatibility_version': '7.0.0'},
 'tagline': 'The OpenSearch Project: https://opensearch.org/'}

## Create an Index

In [32]:
index_name = 'search_index'
body = {
  "mappings": {
    "properties": {
      "doc_name": {
        "type": "text",
        "analyzer": "edge_ngram_analyzer"
      },
      "doc_type": {
        "type": "text",
        "analyzer": "standard"
      },
      "link": {
        "type": "text",
        "analyzer": "standard"
      },
      "source": {
        "type": "text",
        "analyzer": "standard"
      },
      "created_date": {
        "type" : "date",
        "format" : "basic_date_time_no_millis"
      },
      "modified_date": {
        "type" : "date",
        "format" : "basic_date_time_no_millis"
      },
      "summary": {
        "type" : "text",
        "analyzer": "standard"
      },
      "file_size": {
        "type" : "text",
        "analyzer": "standard"
      }
    }
  },
  "settings": {
    "analysis": {
      "analyzer": {
        "edge_ngram_analyzer": {
          "type": "custom",
          "tokenizer": "standard",
          "filter": ["lowercase", "edge_ngram_filter"]
        }
      },
      "filter": {
        "edge_ngram_filter": {
          "type": "edge_ngram",
          "min_gram": 2,
          "max_gram": 10
        }
      }
    }
  }
}
response = client.indices.create(index=index_name, body=body)

## Add data to index

First check the number of items in the index.

In [33]:
response = client.count(index=index_name)
id = int(response['count'])
print(id)

0


In [34]:
item1 = {
    "doc_name": "doc1",
    "doc_type": "txt",
    "link": "https://www.google.com/",
    "source": "Gmail",
    "date": None
}
item2 = {
    "doc_name": "doc2",
    "doc_type": "txt",
    "link": "https://www.youtube.com/",
    "source": "Gmail",
    "date": None
}
to_be_inserted = []
to_be_inserted.append(item1)
to_be_inserted.append(item2)

In [35]:
for body in to_be_inserted:
    client.index(index=index_name, id=str(id),body=body)
    id += 1

In [36]:
response = client.count(index=index_name)
id = int(response['count'])
print(id)

2


## Search Data

In [38]:

response = client.search(
    index=index_name,
    body={
        "query": {
            "match": {
                "doc_name": "doc1"
            }
        }          
    }
)
print(response)
print(type(response))

{'took': 22, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 2, 'relation': 'eq'}, 'max_score': 0.33425623, 'hits': [{'_index': 'search_index', '_id': '0', '_score': 0.33425623, '_source': {'doc_name': 'doc1', 'doc_type': 'txt', 'link': 'https://www.google.com/', 'source': 'Gmail', 'date': None}}, {'_index': 'search_index', '_id': '1', '_score': 0.3085442, '_source': {'doc_name': 'doc2', 'doc_type': 'txt', 'link': 'https://www.youtube.com/', 'source': 'Gmail', 'date': None}}]}}
<class 'dict'>


## Delete document from index

In [26]:
client.delete(index = index_name, id = "1")

{'_index': 'search_index',
 '_id': '1',
 '_version': 2,
 'result': 'deleted',
 '_shards': {'total': 2, 'successful': 2, 'failed': 0},
 '_seq_no': 4,
 '_primary_term': 1}

## Delete an index

In [28]:
client.indices.delete(index=index_name)

{'acknowledged': True}