# Pathway Retriever

In [None]:
from llama_index.retrievers import PathwayRetriever, PathwayVectorServer
import pathway as pw

AttributeError: type object 'pathway.engine.ConnectorMode' has no attribute 'STREAMING'

## Define data sources for Pathway

In [None]:
data_sources = []
data_sources.append(
    pw.io.fs.read(
        "sample_documents/",
        format="binary",
        mode="streaming",
        with_metadata=True,
    )  # This creates a `pathway` connector that tracks
    # all the files in the sample_documents directory
)

## Define Transformation pipeline

In [None]:
from llama_index.embeddings import OpenAIEmbedding
from llama_index.node_parser import TokenTextSplitter

embed_model = OpenAIEmbedding(embed_batch_size=10)

transformations_example = [
    TokenTextSplitter(
        chunk_size=80,
        chunk_overlap=40,
        separator=" ",
    ),
    embed_model,
]

## Run the Server

In [None]:
pr = PathwayVectorServer(
    *data_sources,
    transformations=transformations_example,
)

# Define the Host and port that Pathway will be on
PATHWAY_HOST = "127.0.0.1"
PATHWAY_PORT = 8754

pr.run_server(
    host=PATHWAY_HOST, port=PATHWAY_PORT, with_cache=False, threaded=True
)

## Create Retriever for llama-index

In [None]:
pr = PathwayRetriever(host=PATHWAY_HOST, port=PATHWAY_PORT)

In [None]:
pr.retrieve(str_or_query_bundle="something")