In [7]:
import json
from pathlib import Path
from elasticsearch import Elasticsearch
from process_framework import Reference
from process_framework.steps.elasticsearch import Document, DataFrameToDocuments, IndexDocuments
from pandas import DataFrame, Series

import os
assert (root := os.environ["WORKSPACE_ROOT"]), 'expected to find `WORKSPACE_ROOT` env var with path to workspace root'

In [8]:
INDEX = 'test-process-model-index-step'
elasticsearch = Elasticsearch(**json.loads(Path(root, 'secrets', 'elasticsearch.json').read_text()))
elasticsearch

<Elasticsearch(['https://746546c864f349c8b41303b0a122ca9b.uksouth.azure.elastic-cloud.com:443'])>

In [3]:
try:
    elasticsearch.indices.delete(index=INDEX)
except: ...

In [4]:
records = [
    dict(name='Simon', car='Volvo', reg='EB22 YBV', colour=['grey', 'black']),
    dict(name='Dave', car='Porche', reg="F4ST", colour=['red', 'red', 'red'])
]

df = DataFrame.from_records(
    data=records,
    index='reg'
)

df

Unnamed: 0_level_0,name,car,colour
reg,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
EB22 YBV,Simon,Volvo,"[grey, black]"
F4ST,Dave,Porche,"[red, red, red]"


In [5]:
df_ref = Reference(DataFrame, df)
df_ref

Reference[DataFrame](           name     car           colour
reg                                     
EB22 YBV  Simon   Volvo    [grey, black]
F4ST       Dave  Porche  [red, red, red])

In [6]:
from typing import Any

class CarRegistration(Document):
    name:str
    car:str
    reg:str
    colour:list[str]

    def get_id(self) -> Any:
        return self.reg

CarRegistration(**records[0]) # type: ignore

CarRegistration(name='Simon', car='Volvo', reg='EB22 YBV', colour=['grey', 'black'])

In [7]:
docs_ref = Reference(Series)

to_documents = DataFrameToDocuments(df_ref, docs_ref, document_type=CarRegistration)
to_documents.do()

docs_ref

Reference[Series](reg
EB22 YBV    name='Simon' car='Volvo' reg='EB22 YBV' colour...
F4ST        name='Dave' car='Porche' reg='F4ST' colour=['r...
dtype: object)

In [8]:
from time import sleep

In [9]:
index_docs = IndexDocuments(
    subject=docs_ref,
    elasticsearch=elasticsearch,
    index=INDEX,
    assert_index_exists=False
)

index_docs.do()
sleep(1)

(2, [])


In [10]:
try:
    elasticsearch.search(
        index=INDEX
    ).body['hits']
except Exception as e:
    print(e)