# VectorDB Python SDK Tutorial

**Prerequisites:**
Make sure your VectorDB server is running locally on port 8000.

In [None]:
# Install the SDK (assuming you are in the examples folder)
%pip install -e ../sdk

# Install helper Python libraries for this tutorial
%pip install sentence-transformers pandas

In [None]:
from sentence_transformers import SentenceTransformer
from vectordb.client import Client

# Initialize the embedding model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Initialize the Client
client = Client("http://localhost:8000")

In [7]:
import uuid

# Lets create a simple demo dataset for semantic search purposes
dataset = [
    # Animals
    {"text": "The dog barked loudly at the mailman.",          "category": "animal"},
    {"text": "My cat loves to sleep on the windowsill.",       "category": "animal"},
    {"text": "The golden retriever played fetch in the park.", "category": "animal"},
    {"text": "Lions are the kings of the jungle.",             "category": "animal"},
    
    # Finance
    {"text": "The stock market saw a significant rally today.", "category": "finance"},
    {"text": "Investment strategies for 2026 look promising.",  "category": "finance"},
    {"text": "Inflation rates are affecting consumer spending.","category": "finance"},
    {"text": "Cryptocurrency volatility remains high.",         "category": "finance"},
]

# We are going to embed these documents later
documents = [item["text"] for item in dataset]

# And we can use the whole dataset as metadata
metadatas = dataset 

# Use UUID for unique ids for each vector
ids = [str(uuid.uuid4()) for _ in dataset]

In [8]:
COLLECTION_NAME = "demo_notebook"
DIMENSION = 384 

try:
    client.delete_collection(COLLECTION_NAME)
except:
    pass

collection = client.create_collection(
    name=COLLECTION_NAME, 
    dimension=DIMENSION, 
    metric="cosine"
)

print(f"Collection '{COLLECTION_NAME}' created.")

Collection 'demo_notebook' created.


In [9]:
# Generate embeddings 
vectors = model.encode(documents)

# Upsert to DB
success = collection.upsert(
    ids=ids,
    vectors=vectors,
    metadatas=metadatas,
    batch_size=2
)

In [10]:
# Run a simple query
query_text = "financial news"
query_vector = model.encode(query_text).tolist()

results = collection.search(query=query_vector, k=3)

In [11]:
import pandas as pd

# Create a dataset of most similiar db records
data = []
for res in results:
    data.append({
        "id": res.vector.id,
        "score": res.score,
        "text": res.vector.metadata["text"],
        "category": res.vector.metadata["category"]
    })

df = pd.DataFrame(data)
display(df)

Unnamed: 0,id,score,text,category
0,49025188-4bc5-4e2f-8313-2ebf1ac99d70,0.3469,The stock market saw a significant rally today.,finance
1,4cceb42b-8001-4dd8-8947-3785ca382373,0.3419,Investment strategies for 2026 look promising.,finance
2,0e6abbf0-77d2-43b5-be6e-867abddebced,0.2478,Cryptocurrency volatility remains high.,finance
