In [1]:
import json
from objectbox import (
    Entity,
    Id,
    String,
    Store,
    Box,
    Float32Vector,
    HnswIndex,
    VectorDistanceType,
)
from pydantic import BaseModel
from sentence_transformers import SentenceTransformer
from tqdm import tqdm
from pathlib import Path

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
rules_chunks_pth = Path("")

In [None]:
class Metadata(BaseModel):
    section: str
    subsection: str


class RulePydantic(BaseModel):
    id: str
    content: str
    metadata: Metadata

In [None]:
@Entity()
class Rule:
    id = Id
    entity_id = String
    content = String
    section = String
    subsection = String
    game = String
    vector = Float32Vector(
        index=HnswIndex(dimensions=768, distance_type=VectorDistanceType.EUCLIDEAN)
    )

In [5]:
rules_lines = [RulePydantic(**obj) for obj in rules]

In [6]:
store = Store()
rules_box = Box(store=store, entity=Rule)

---

In [7]:
model = SentenceTransformer("google/embeddinggemma-300m").to("cpu")

In [None]:
box_objects = [
    Rule(
        entity_id=obj.id,
        content=obj.content,
        section=obj.metadata.section,
        subsection=obj.metadata.subsection,
        game="Подземелье и пёсики",
        vector=model.encode(obj.content).tolist(),
    )
    for obj in tqdm(rules_lines)
]

100%|██████████| 50/50 [00:13<00:00,  3.63it/s]


In [9]:
rules_box.put(box_objects)

In [10]:
rules_box.get(1)

<__main__.Rule at 0x7f23c140a750>

In [11]:
store.close()