In [1]:
import os
from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk, scan, streaming_bulk

In [None]:
type_map = {
    "int":"integer",
    "float":"float",
    "double":"double",
    "str": "text",
    "bool": "boolean",
    "datetime": "date",
    "list[int]":"integer",
    "list[str]":"text",
    "list[float]": "float",
    "list[double]": "double",
    "torch.tensor": "dense_vector",
    "numpy.ndarray": "dense_vector"
}

In [244]:
def traverse_map (map_dict):
    original_map_dict = dict(map_dict)
    for k, v in map_dict.items():
        if isinstance(v, dict):
            traverse_map(v)
        else:
            try:
                map_dict[k] = {"type":TYPE_MAP[v.lower()]}
            except Exception as e:
                print(f'{e.__class__.__name__}: Key {v} not found in TYPE_MAP. Mapping not updated')
                return original_map_dict
    return map_dict


In [7]:
TYPE_MAP =  {
    "int":"integer",
    "float":"float",
    "double":"double",
    "str": "text",
    "bool": "boolean",
    "datetime": "date",
    "list[int]":"integer",
    "list[str]":"text",
    "list[float]": "float",
    "list[double]": "double",
    "torch.tensor": "dense_vector",
    "numpy.ndarray": "dense_vector"
}

In [34]:
user_map = {
    "name":"str",
    "age":"int",
    "education":{
        "primary":{
            "school":"str"
        },
        "secondary":"str",
        "tertiary":"str"
    }
}

In [247]:
final_map = {"mappings":{"properties":{}}}

In [248]:
updated_map = traverse_map(user_map)

KeyError: Key meow not found in TYPE_MAP. Mapping not updated


In [249]:
updated_map

{'name': {'type': 'text'},
 'age': {'type': 'integer'},
 'education': {'primary': {'school': 'meow'},
  'secondary': {'type': 'text'},
  'tertiary': {'type': 'text'}}}

In [8]:
TYPE_MAP =  {
    "int":"integer",
    "float":"float",
    "double":"double",
    "str": "text",
    "bool": "boolean",
    "datetime": "date",
    "list[int]":"integer",
    "list[str]":"text",
    "list[float]": "float",
    "list[double]": "double",
    "torch.tensor": "dense_vector",
    "numpy.ndarray": "dense_vector"
}

In [24]:
user_map = {
    "name":"str",
    "age":"int",
    "education":{
        "primary":{
            "school":"str"
        },
        "secondary":"str",
        "tertiary":"str"
    }
}

In [103]:

class DocMgr():
    def __init__(self):
        self.url = f"https://{os.environ.get('ELASTICSEARCH_HOST')}:{os.environ.get('ELASTICSEARCH_C_PORT')}"
        self.username = os.environ.get('ELASTIC_USERNAME')
        self.password = os.environ.get('ELASTIC_PASSWORD')
        self.client = Elasticsearch(self.url, 
                                    verify_certs=False, 
                                    basic_auth=(self.username, self.password))

        
    def _check_valid_values(self, map_dict:dict) -> int:
        """
        Traverse mapping dictionary to ensure that all types are valid types within TYPE_MAP

        Args:
            map_dict (dict): Mapping to be checked

        Returns:
            int: 0 if there is invalid types, 1 otherwise

        """
        ret_val = 1
        for k, v in map_dict.items():
            if isinstance(v, dict):
                ret_val = self._check_valid_values(v)
            else:
                if not v in TYPE_MAP:
                    print(f"'{v}' type for '{k}' NOT FOUND")
                    return 0

        return ret_val * 1
    
    def _traverse_map (self, map_dict:dict) -> int:
        """
        Traverse mapping dictionary to convert data type into framework specific type

        Args:
            map_dict (dict): Mapping to be used to create ES index

        Returns:
            dict: updated mapping dictionary

        """
        dictionary ={"properties":dict()}
        for k, v in map_dict.items():
            if isinstance(v, dict):
                dictionary['properties'][k]= self._traverse_map(v)
            else:
                dictionary['properties'][k]={"type":TYPE_MAP[v]}       
        return dictionary
    
    def create_collection(self, collection_name: str, schema: dict) -> dict:
        """
        Create the index on ElasticSearch

        Args:
            collection_name (str): Index name of ES
            schema (dict): Mapping to be used to create ES index

        Returns:
            dict: response of error, or 200 if no errors caught
            
        """
        try:
            assert type(schema)==dict
        except Exception as e:
            return {"response":f"{e.__class__.__name__}: Type of 'schema' is not dict"}
        try:
            assert type(collection_name)==str
        except Exception as e:
            return {"response":f"{e.__class__.__name__}: Type of 'collection_name' is not str"}

        mapping_validity = self._check_valid_values(schema)
        if not mapping_validity:
            return {"response": "KeyError: data type not found in TYPE_MAP"}
        updated_mapping = self._traverse_map(schema)
        try:
            self.client.indices.create(index=collection_name, mappings=updated_mapping)
        except Exception as e:
            return {"response":f"{e}"}
        return {"response":"200"}
    
    def delete_collection(self, collection_name: str) -> dict:
        """
        Create the index on ElasticSearch

        Args:
            collection_name (str): Index name of ES
            schema (dict): Mapping to be used to create ES index

        Returns:
            dict: response of error, or 200 if no errors caught

        """
        try:
            self.client.indices.delete(index=collection_name)
        except Exception as e:
            return {"response": f"{e}"}
        return {"response":"200"}
            

In [104]:
es_mgr = DocMgr()

  _transport = transport_class(


In [105]:
res = es_mgr.create_collection(collection_name = "meow", schema = user_map)



In [79]:
res

{'response': "BadRequestError(400, 'resource_already_exists_exception', 'index [meow/biznYgwtSJior605EMG0_g] already exists')"}

In [31]:
url = f"https://{os.environ.get('ELASTICSEARCH_HOST')}:{os.environ.get('ELASTICSEARCH_C_PORT')}"
client = Elasticsearch(url, verify_certs=False, basic_auth=('elastic', 'changeme'))

In [48]:
try:
    client.indices.create(index="meow", body=new_map)
except Exception as e:
    print(e)

BadRequestError(400, 'resource_already_exists_exception', 'index [meow/VhVyQpGkSiaZNhdPLnGM-A] already exists')


  client.indices.create(index="meow", body=new_map)


In [53]:
new_map

{'response': 'Mapping Error'}

In [32]:
user_map

{'name': 'str',
 'age': 'int',
 'education': {'primary': {'school': 'str'},
  'secondary': 'str',
  'tertiary': 'str'}}

In [41]:
gold_map = {
    "mappings": {
      "properties": {
        "age": {
          "type": "integer"
        },
        "education": {
          "properties": {
            "primary": {
              "properties": {
                "school": {
                  "type": "text",
                }
              }
            },
            "secondary": {
              "type": "text"
            },
            "tertiary": {
              "type": "text"
            }
          }
        },
        "name": {
          "type": "text"
        }
      }
    }
  }

In [40]:
user = {
  "mappings": {
    "properties": {
      "name": {
        "type": "text"
      },
      "age": {
        "type": "integer"
      },
      "education": {
        "properties": {
          "primary": {
            "properties": {
              "school": {
                "type": "text"
              }
            }
          },
          "secondary": {
            "type": "text"
          },
          "tertiary": {
            "type": "text"
          }
        }
      }
    }
  }
}

In [21]:
new_map = {}

def traverse(dict_map):
    dictionary ={"properties":dict()}
    for k, v in dict_map.items():
        if isinstance(v, dict):
            dictionary['properties'][k]= traverse(v)
        else:
            dictionary['properties'][k]=v
    return dictionary
        

In [22]:
aa = traverse(user_map)

In [23]:
aa

{'properties': {'name': 'str',
  'age': 'int',
  'education': {'properties': {'primary': {'properties': {'school': 'str'}},
    'secondary': 'str',
    'tertiary': 'str'}}}}

In [13]:
def _check_valid_values(dict_map):
    ret_val = 1
    for k, v in dict_map.items():
        if isinstance(v, dict):
            ret_val = _check_valid_values(v)
        else:
            if not v in TYPE_MAP:
                print(f"'{v}' type for '{k}' NOT FOUND")
                return 0
                
    return ret_val * 1

In [14]:
user_map = {
    "name":"str",
    "age":"int",
    "education":{
        "primary":{
            "school":"woof"
        },
        "secondary":"meow",
        "tertiary":"str"
    }
}

In [15]:
_check_valid_values(user_map)

'woof' type for 'school' NOT FOUND
'meow' type for 'secondary' NOT FOUND


0

In [44]:
print(gold_map)

{'mappings': {'properties': {'name': {'type': 'text'}, 'age': {'type': 'integer'}, 'education': {'properties': {'primary': {'properties': {'school': {'type': 'text'}}}, 'secondary': {'type': 'text'}, 'tertiary': {'type': 'text'}}}}}}


In [46]:
new_map

{'mappings': {'properties': {'name': {'type': 'text'},
   'age': {'type': 'integer'},
   'education': {'properties': {'primary': {'properties': {'school': {'type': 'text'}}},
     'secondary': {'type': 'text'},
     'tertiary': {'type': 'text'}}}}}}

In [96]:
def create_collection(collection_name: str, schema: dict) -> dict:
    assert type(collection_name) == str
    assert type(schema) == dict, "Meow"
    print(collection_name)

    print(schema)

In [97]:
create_collection("meow", ["meow"])

AssertionError: Meow