In [1]:
from pydantic import computed_field, Field
from process_model.steps.elasticsearch.document import Document

In [3]:
# a basic document that implements its `_id` via an `id` field
class Car(Document):
    id:str
    wheels:int = 4

car = Car(id='alpha')
print(car)
print(car.get_index_action())

id='alpha' wheels=4
{'id': 'alpha', 'document': {'wheels': 4}}


In [4]:
# an implementation of _id via an override of `get_id`
class Fruit(Document):
    fruit_id:str = Field(coerce_numbers_to_str=True)
    type:str
    color:str
    weight:float

    def get_id(self):
        return self.fruit_id


fruit = Fruit(
    fruit_id=1, # type: ignore
    type='apple',
    color='red',
    weight=0.1
) 

print(fruit)
print(fruit.get_index_action())

fruit_id='1' type='apple' color='red' weight=0.1
{'id': '1', 'document': {'fruit_id': '1', 'type': 'apple', 'color': 'red', 'weight': 0.1}}


In [5]:
values = dict(fruit_id=1, fruit_id2=2, type='apple', color='g', weight=0.1)
values

{'fruit_id': 1, 'fruit_id2': 2, 'type': 'apple', 'color': 'g', 'weight': 0.1}

In [6]:
# construct from a dict
Fruit(**values)

Fruit(fruit_id='1', type='apple', color='g', weight=0.1)

In [7]:
# of course a pandas series (think 'row') is a dict, so it can be Fruit-ed
from pandas import Series
s = Series(values)
Fruit(**s.to_dict())

Fruit(fruit_id='1', type='apple', color='g', weight=0.1)

In [8]:
from pandas import DataFrame, MultiIndex
df = DataFrame.from_records([
    dict(fruit_id=1, fruit_id2=2, type='apple', color='green', weight=0.1),
    dict(fruit_id=2, fruit_id2=2, type='agave', color='indigo', weight=0.2),
    dict(fruit_id=3, fruit_id2=None, type='persimmon', color='yellowish', weight=0.8)
]).set_index('fruit_id')



display(df)
# df['document'] = df.reset_index().apply(lambda row: Fruit(**row.to_dict()), axis=1)
# df

Unnamed: 0_level_0,fruit_id2,type,color,weight
fruit_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,2.0,apple,green,0.1
2,2.0,agave,indigo,0.2
3,,persimmon,yellowish,0.8


In [9]:
def from_dataframe(df:DataFrame, cls:type) -> Series:
    df = df.copy()
    
    if all(df.index.names):
        for i, n in enumerate(df.index.names):
            df[n] = df.index.get_level_values(i)

    return df.apply(lambda row: cls(**row.to_dict()), axis=1)

from_dataframe(df, Fruit)

fruit_id
1    fruit_id='1' type='apple' color='green' weight...
2    fruit_id='2' type='agave' color='indigo' weigh...
3    fruit_id='3' type='persimmon' color='yellowish...
dtype: object

In [10]:
df['document'] = from_dataframe(df, Fruit)
df

Unnamed: 0_level_0,fruit_id2,type,color,weight,document
fruit_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,2.0,apple,green,0.1,fruit_id='1' type='apple' color='green' weight...
2,2.0,agave,indigo,0.2,fruit_id='2' type='agave' color='indigo' weigh...
3,,persimmon,yellowish,0.8,fruit_id='3' type='persimmon' color='yellowish...
