# Cell similarity example 

In [1]:
import numpy as np
import random
import tiledb
import tiledbsoma

from tiledb.vector_search.object_api import object_index
from tiledb.vector_search.object_readers import SomaRNAXRowReader
from tiledb.vector_search.embeddings import SomaGenePTwEmbedding

dataset = "soma"
base_uri = f"/tmp/{dataset}_demo"
soma_uri = f"{base_uri}/tabula-sapiens-immune"
gene_embeddings_uri="/tmp/soma_demo/GPT_3_5_gene_embeddings"
index_uri = f"{base_uri}/index"
config = {}

# Create vector search index

In [2]:
vfs = tiledb.VFS(config=config)
if vfs.is_dir(index_uri):
    vfs.remove_dir(index_uri)
vfs.create_dir(index_uri)


soma_reader = SomaRNAXRowReader(uri=soma_uri, cells_per_partition=10000, config=config)
soma_embedder = SomaGenePTwEmbedding(gene_embeddings_uri=gene_embeddings_uri,
                                        soma_uri=soma_uri,
                                        config=config)

index = object_index.create(
    uri=index_uri,
    index_type="IVF_FLAT",
    object_reader=soma_reader,
    embedding=soma_embedder,
    config=config,
)
index.update_index(
    max_tasks_per_stage=5,
    workers=5,
)

# Query

Pick a random cell from the dataset

#

In [3]:
import pandas as pd
def tiledb_to_pandas(obs):
    new_obs = {}
    for column_name in column_names:
        new_obs[column_name] = obs[column_name][0]
    return pd.DataFrame(data=new_obs)

rid = random.randint(0,3600)
query_cell = soma_reader.read_objects_by_external_ids(np.array([rid]))

context = tiledbsoma.SOMATileDBContext(tiledb_ctx=tiledb.Ctx(config))
exp = tiledbsoma.Experiment.open(soma_uri, "r", context=context)
column_names=["soma_joinid", "cell_type", "tissue"]
query_metadata = exp.obs.read(coords=[rid], column_names=column_names).concat().to_pandas()
query_cell_tissue=query_metadata["tissue"].values[0]
metadata_array_cond=f"tissue == '{query_cell_tissue}'"
query_metadata

Unnamed: 0,soma_joinid,cell_type,tissue
0,613,monocyte,liver


## Similarity query

In [4]:
distances, cells, obs = index.query(query_cell, k=10, nprobe=2, return_objects=False)
tiledb_to_pandas(obs)


Unnamed: 0,soma_joinid,cell_type,tissue
0,613,monocyte,liver
1,1488,monocyte,liver
2,1161,macrophage,liver
3,2159,monocyte,liver
4,244668,macrophage,rectus abdominis muscle
5,117618,macrophage,bladder organ
6,186324,macrophage,bladder organ
7,186033,macrophage,bladder organ
8,195183,macrophage,bladder organ
9,970,macrophage,liver


## Similarity query with structured restrict

Find similar cells from the same tissue

In [5]:
distances, cells, obs = index.query(query_cell, metadata_array_cond=metadata_array_cond, k=10, nprobe=2, return_objects=False)
tiledb_to_pandas(obs)

Unnamed: 0,soma_joinid,cell_type,tissue
0,613,monocyte,liver
1,1488,monocyte,liver
2,1161,macrophage,liver
3,2159,monocyte,liver
4,970,macrophage,liver
5,804,macrophage,liver
6,119,monocyte,liver
7,406,monocyte,liver
8,865,monocyte,liver
9,210,macrophage,liver
