# Understand Text Similarity Spaces

`TextSimilaritySpace` creates the opportunity to embed text data to perform semantic search on top.

In [1]:
%pip install superlinked==3.13.0

In [2]:
import pandas as pd

from superlinked.framework.common.schema.id_schema_object import IdField
from superlinked.framework.common.schema.schema import schema
from superlinked.framework.common.schema.schema_object import String
from superlinked.framework.dsl.index.index import Index
from superlinked.framework.dsl.space.text_similarity_space import TextSimilaritySpace
from superlinked.framework.dsl.query.param import Param

from superlinked.framework.dsl.executor.in_memory.in_memory_executor import (
    InMemoryExecutor,
)
from superlinked.framework.dsl.source.in_memory_source import InMemorySource
from superlinked.framework.dsl.query.query import Query

pd.set_option("display.max_colwidth", 100)

In [3]:
@schema
class Paragraph:
    id: IdField
    body: String


paragraph = Paragraph()

## Choosing your text embedding model

Currently we support [Sentence Transformers](https://www.sbert.net/docs/pretrained_models.html#model-overview) models for embedding textual data, as they are tailor-made for information retrieval on sentence long text.

Supply a [huggingface identifier](https://huggingface.co/sentence-transformers) of your choosing, and set the relevant schemafield containing your text inputs. 

In [4]:
body_space = TextSimilaritySpace(
    text=paragraph.body, model="sentence-transformers/all-mpnet-base-v2"
)
paragraph_index = Index(body_space)

In [5]:
source: InMemorySource = InMemorySource(paragraph)
executor = InMemoryExecutor(sources=[source], indices=[paragraph_index])
app = executor.run()

In [6]:
source.put(
    [
        {"id": "paragraph-1", "body": "Glorious animals live in the wilderness."},
        {
            "id": "paragraph-2",
            "body": "Growing computation power enables advancements in AI.",
        },
    ]
)

In [7]:
query = (
    Query(paragraph_index).find(paragraph).similar(body_space.text, Param("query_text"))
)

In [8]:
result = app.query(
    query,
    query_text="What makes the AI industry go forward?",
)

In [9]:
pd.DataFrame([entry.stored_object for entry in result.entries])

Unnamed: 0,body,id
0,Growing computation power enables advancements in AI.,paragraph-2
1,Glorious animals live in the wilderness.,paragraph-1
