# Load

In [7]:
import panel as pn
from dotenv import load_dotenv
import param

import sys
sys.path.append('..')

load_dotenv()

pn.extension()

# Lance Playground

In [2]:
import lancedb
import pandas as pd
import pyarrow as pa

uri = "data/sample-lancedb"
db = lancedb.connect(uri)

# LanceDb offers both a synchronous and an asynchronous client.  There are still a
# few operations that are only supported by the synchronous client (e.g. embedding
# functions, full text search) but both APIs should soon be equivalent

# In this guide we will give examples of both clients.  In other guides we will
# typically only provide examples with one client or the other.
uri = "data/sample-lancedb"
async_db = await lancedb.connect_async(uri)

In [13]:
data = [
    {"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
    {"vector": [5.9, 26.5], "item": "bar", "price": 20.0},
]

# Synchronous client
tbl = db.create_table("my_table", data=data)
# Asynchronous client
async_tbl = await async_db.create_table("my_table2", data=data)

In [5]:
async_db.op

AttributeError: 'AsyncConnection' object has no attribute 'url'

# Main

In [45]:
class Collection(param.Parameterized):
    url = param.String(default="", doc="""
        The url of the data folder""")
    collection_name = param.String(default="default", doc="""
        The name of the collection""")
    db = param.Parameter(default=None)
    collection = param.Parameter(default=None)

    def load_db(self, url: str):
        """Loads a database from a url or creates a new one"""
        pass

    def load_collection(self, collection_name: str):
        """Loads a collection from the database"""
        pass

    def add_items(self, items: list[dict]):
        pass

from pydantic import create_model



class LanceDBCollection(Collection):
    url = param.String(default="data/lancedb", doc="""
        The url of the database""")
    schema = param.Parameter(
        default=create_model('Model', text=(str, ...), vector=(int, ...)),
        doc="""The pydantic schema of the collection""")

    def __init__(self, **params):
        super().__init__(**params)
        self.load_collection(self.collection_name)
    
    def load_collection(self, collection_name: str):
        """Loads a collection from the database"""
        self.db = lancedb.connect(self.url)
        self.collection = self.db.create_table(
            self.collection_name,
            schema=self.schema,
            exist_ok=True)
        
    def add_item(self, item: dict):
        """Adds an item to the collection"""
        self.collection.add(item)

    def add_items(self, items: list[dict]):
        """Adds items to the collection"""
        for item in items:
            self.add_item(item)

    

    

In [25]:
from lancedb.pydantic import LanceModel, Vector

model = create_model('test',__base__=LanceModel, vector=(Vector(768), ...))

db.create_table('text_v', schema=model)


LanceTable(connection=LanceDBConnection(/workspaces/pyllments/dev_nbs/data/sample-lancedb), name="text_v")

In [33]:
import pyarrow as pa

schema = pa.schema([
    pa.field('text', pa.string()),
    pa.field('vector', pa.list_(pa.float32(), 768))
])

tbl = db.create_table('text_v', schema=schema, mode='overwrite')

In [40]:
import numpy as np

# Create a 768-dimensional array filled with zeros
array_768 = np.ones(758)


In [44]:
tbl.search(np.ones(768)).to_list()

[{'text': 'some test text',
  'vector': [1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
  

In [41]:
tbl.add([{'text':'some test text', 'vector': array_768}])

ArrowTypeError: Size of FixedSizeList is not the same. input list: fixed_size_list<item: float>[758] output list: fixed_size_list<item: float>[768]

In [39]:
tbl.head()

pyarrow.Table
text: string
vector: fixed_size_list<item: float>[768]
  child 0, item: float
----
text: [["some test text"]]
vector: [[[1,1,1,1,1,...,1,1,1,1,1]]]

In [26]:
import lancedb

db = lancedb.connect("./.lancedb")

data = [{"vector": [1.1, 1.2], "lat": 45.5, "long": -122.7},
        {"vector": [0.2, 1.8], "lat": 40.1, "long": -74.1}]

db.create_table("my_table", data)

db["my_table"].head()

pyarrow.Table
vector: fixed_size_list<item: float>[2]
  child 0, item: float
lat: double
long: double
----
vector: [[[1.1,1.2],[0.2,1.8]]]
lat: [[45.5,40.1]]
long: [[-122.7,-74.1]]

In [29]:
db["my_table"].to_pandas()

Unnamed: 0,vector,lat,long
0,"[1.1, 1.2]",45.5,-122.7
1,"[0.2, 1.8]",40.1,-74.1


In [10]:
from lancedb.pydantic import LanceModel, Vector
from pydantic import create_model

from pyllments.base.model_base import Model


class RetrieverModel(Model):
    collection = param.ClassSelector(class_=Collection, doc="""
        The collection to retrieve from""")
    collection_name = param.String(default="", doc="""
        The name of the collection""")
    url = param.String(default="", doc="""
        The url of the database""")
    embedding_dims = param.Integer(default=768, doc="""
        The dimension of the embedding""")

    def __init__(self, **params):
        super().__init__(**params)
        if not self.collection_name:
            self.collection_name = self.name
        schema = pa.schema([
            pa.field('text', pa.string()),
            pa.field('embedding', pa.list_(pa.float32(), self.embedding_dims))
        ])
        self.collection = LanceDBCollection(
            collection_name=self.collection_name,
            schema=schema
        )
    
    
    


from typing import Union
from pyllments.base.element_base import Element
from pyllments.payloads.chunk import ChunkPayload


class RetrieverElement(Element):


    def __init__(self, **params):
        super().__init__(**params)
        if not self.collection_name:
            self.collection_name = self.name
        self.model = RetrieverModel()
        
    def _chunk_load_input_setup(self):
        """For the collection populating process"""
        def unpack(payload: Union[ChunkPayload, list[ChunkPayload]]):
            pass
    def _chunk_query_input_setup(self):
        """The input query used for retrieval"""
        pass

    def _chunk_result_output_setup(self):
        """The output of the retrieval process"""
        pass


In [11]:
retriever = RetrieverElement()


In [12]:
retriever.name

'RetrieverElement00117'