# Connect to ES

In [None]:
import elasticsearch
from elasticsearch import Elasticsearch
from decouple import config

In [None]:
import urllib3
urllib3.disable_warnings()

In [None]:
elasticsearch.__version__

In [None]:
client = Elasticsearch(
    config('ES_URL'),
    basic_auth=("elastic", config('ES_PASS')),
    verify_certs=False,
)

In [None]:
client.info()

# Create indices

## mapping articles

In [None]:
client.indices.create(
    index="articles",
    settings = {"number_of_shards": 2,
                "number_of_replicas": 1},
    mappings = {"dynamic": "strict",
                "properties": {
                    "article_id": {"type": "keyword"},
                    "doi": {"type": "keyword"},
                    "pmc_id": {"type": "keyword"},
                    "pubmed_id": {"type": "keyword"},
                    "arxiv_id": {"type": "keyword"},
                    "title": {"type": "text"},
                    "authors": {"type": "text"},
                    "abstract": {"type": "text"},
                    "journal": {"type": "keyword"},
                    "publish_time": {"type": "date", "format": "yyyy-MM-dd"},
                    "license": {"type": "keyword"},
                    "is_english": {"type": "boolean"},
                }
            }
)

## mapping paragraphs

In [None]:
client.indices.create(
    index="paragraphs",
    settings = {"number_of_shards": 2,
                "number_of_replicas": 1},
    mappings = {
            "dynamic": "strict",
            "properties": {
                "article_id": {"type": "keyword"},
                "section_name": {"type": "keyword"},
                "paragraph_id": {"type": "short"},
                "text": {"type": "text"},
                "is_bad": {"type": "boolean"},
                "embedding": {
                    "type": "dense_vector",
                    "dims": 384,
                    "index": True,
                    "similarity": "dot_product"
                }
            }
        }
)

## check indices

In [None]:
indices = client.indices.get_alias().keys()
sorted(indices)