In [11]:
from dotenv import load_dotenv
import os
from elasticsearch import helpers  # For bulk Data Uploading
from elasticsearch import Elasticsearch  # Base function for interacting with Elasticsearch
from elasticsearch import RequestError
from pprint import pprint
from load_ini import *

## Connect to Elastic search 
This is done using your own API key, generated using kibana, which should be stored localy on your machine. We stored our API key in a .env file in the same folder as this script, and use the python module dotenv to load it as an environment variable.

In [12]:
load_dotenv()
client = Elasticsearch("https://localhost:9200/", api_key=os.getenv('API_KEY'),verify_certs=False)

#test client
print(client.info())

ConnectionError: Connection error caused by: ConnectionError(Connection error caused by: NewConnectionError(<elastic_transport._node._urllib3_chain_certs.HTTPSConnection object at 0x10bd822a0>: Failed to establish a new connection: [Errno 61] Connection refused))

## Create the dictionnaries containing all the mappings

We read the mappings list from the metadata.csv file, which is generated externally from the shared excel file, and the list is then transformed into a dictionnary of mappings with the proper formatting.

In [10]:
# grab mappings list from the csv file, which should be in the same directory as the script
DIR_NAME = os.getcwd()
FILE_NAME = "metadata.csv"
FILE_PATH = os.path.join(DIR_NAME, FILE_NAME)
data, header = read_csv(FILE_PATH)

# index name
INDEX_NAME = "wind"

# indicate if you want to update existing documents
UPDATE = True

# create the dictionnary of mappings
mappings = create_mapping(data, header)
pprint(mappings)

{'Model name': {'type': 'keyword'},
 'Publication/reference': {'type': 'keyword'},
 'Tfloor': {'meta': {'file': '.in'}, 'type': 'float'},
 'alpha_rad': {'meta': {'file': '.in'}, 'type': 'float'},
 'binary2_a': {'meta': {'file': '.setup'}, 'type': 'float'},
 'binary2_e': {'meta': {'file': '.setup'}, 'type': 'float'},
 'code hash': {'meta': {'file': 'header.txt'}, 'type': 'keyword'},
 'eccentricity': {'meta': {'file': '.setup'}, 'type': 'float'},
 'excitation_HI': {'meta': {'file': '.in'}, 'type': 'integer'},
 'f_acc': {'meta': {'file': '.in'}, 'type': 'float'},
 'icompanion_star': {'meta': {'file': '.setup'}, 'type': 'integer'},
 'icool_method': {'meta': {'file': '.in'}, 'type': 'integer'},
 'icooling': {'meta': {'file': '.in'}, 'type': 'integer'},
 'idust_opacity': {'meta': {'file': '.in'}, 'type': 'integer'},
 'ieos': {'meta': {'file': '.in'}, 'type': 'integer'},
 'iget_tdust': {'meta': {'file': '.in'}, 'type': 'integer'},
 'inclination': {'meta': {'file': '.setup'}, 'type': 'float'},

Add other settings to the index parameters

In [4]:
"""
"settings" is technically not needed if we are working on a simple local host, but can be changed to optimise search performance on a database that is hosted on a cluster and searched by multiple users.
"mappings" is required if you wish to explicitly map fields to specific values
"""

index_definition = {
    "settings": {
        "number_of_shards": 1,
    },
    "mappings": {"properties": mappings},
}

Create the index using the parameters set above.

In [None]:
""" create index if it does not exist """

if INDEX_NAME in client.indices.get_alias(index="*"):
    print("Index already exists, delete it if you want to recreate it")
else:
    client.indices.create(index="test_index", body=index_definition)


## Kibana

Once the index is created in elasticsearch, in order to view it in Kibana, go to **Management** and on left menu bar, scroll down to the **Kibana** subsection, and click **Data Views**. From here, on the top right, click **Create data view** in order to integrate the new index into the Kibana interface. This will allow you to view how kibana interprets the index you have created. 

## Loading data
Now that the index is created and visible in Kibana, we can start mapping models to the index.

In [2]:
# Define directories and file names here
DIR = "/Users/camille/Documents/runs/phantom/database/wind"  # Careful, this is a local path - change it to your own
PREFIX = "wind"

# read model list from external file
list_dir = "/Users/camille/Documents/PhantomDatabase/"
list_name = "model_list.txt"
MODELS = read_model_list(os.path.join(list_dir, list_name))


['a_3.7_e_0.0_eos_16_icooling_0_f_acc_0.8_vw_9.0_Rinj_1.3_mlr_1e-06_Tw_3000.0_iwr_0_wss_1.0', 'icompstar_2_subst_11_m1_1.6_aIn_5.0_eIn_0.0_m2_0.4_racc2_0.03_a_30.0_e_0.3_gamma_1.2_m3_1.5_racc3_0.03_eos_2_mu_2.381_icooling_1_icoolmeth_0_HIexcit_1_Tfloor_0.0_f_acc_0.8_vw_8.0_Rinj_1.2_mlr_1.1e-06_Tw_1500.0_iwr_5_wss_1.3_bound_1000.0', 'icompstar_2_subst_11_m1_1.6_aIn_5.0_eIn_0.0_m2_0.4_racc2_0.04_a_30.0_e_0.3_gamma_1.2_m3_1.5_racc3_0.04_eos_2_mu_2.381_icooling_1_icoolmeth_0_HIexcit_1_Tfloor_0.0_f_acc_0.8_vw_8.0_Rinj_1.2_mlr_1.1e-06_Tw_1500.0_iwr_4_wss_0.8_bound_1000.0', 'icompstar_2_subst_11_m1_1.6_aIn_5.0_eIn_0.0_m2_0.4_racc2_0.04_a_35.0_e_0.0_gamma_1.2_m3_0.1_racc3_0.04_eos_2_mu_1.26_icooling_1_icoolmeth_0_HIexcit_1_Tfloor_0.0_f_acc_0.8_vw_15.0_Rinj_1.2_mlr_1.1e-06_Tw_3000.0_iwr_4_wss_1.0_bound_1500.0', 'icompstar_2_subst_11_m1_1.6_aIn_5.0_eIn_0.0_m2_0.4_racc2_0.04_a_35.0_e_0.0_gamma_1.2_m3_0.1_racc3_0.04_eos_2_mu_1.26_icooling_1_icoolmeth_0_HIexcit_1_Tfloor_0.0_f_acc_0.8_vw_8.0_Rinj_1.

### Load information from .setup and .in file
We can upload Documents of interest by indexing them using the parameters in their data files, for instance .setup and .in files. We can load multiple models at a time, which is preferable of course.



In [3]:
# Add multiple models
operations = []
for model in MODELS[0:2]:
    base_command = {"_index": INDEX_NAME, "_op_type": "index"}

    # check if document already exists
    id = query_document(client, INDEX_NAME,model)

    # check if existing document should be updated
    if id and UPDATE:
        # delete and reupload
        print("Document already exists, updating.")
        client.delete(index=INDEX_NAME, id=id)
    elif id and not UPDATE:
        print("Document already exists, skipping.")
        continue
    # load data
    modelData = LoadDoc(DIR, model, PREFIX, index_definition)
    operations.append((base_command | {"_source": modelData}))

pprint(operations)
helpers.bulk(client, operations, refresh=True)


NameError: name 'INDEX_NAME' is not defined

### Now look into Dashboards: Analytics - Dashboards to visualise data
Documentation: https://www.elastic.co/guide/en/kibana/current/create-a-dashboard-of-panels-with-web-server-data.html

