In [4]:
!python -m pip install -q "qdrant-client[fastembed]>=1.14.2"

In [5]:
pip install --upgrade pip

Note: you may need to restart the kernel to use updated packages.


In [6]:
from qdrant_client import QdrantClient, models

In [7]:
client = QdrantClient("http://localhost:6333") #connecting to local Qdrant instance

In [8]:
from fastembed import TextEmbedding

In [9]:
import requests

docs_url = 'https://github.com/alexeygrigorev/llm-rag-workshop/raw/main/notebooks/documents.json'
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()

In [10]:
TextEmbedding.list_supported_models()

[{'model': 'BAAI/bge-base-en',
  'sources': {'hf': 'Qdrant/fast-bge-base-en',
   'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-bge-base-en.tar.gz',
   '_deprecated_tar_struct': True},
  'model_file': 'model_optimized.onnx',
  'description': 'Text embeddings, Unimodal (text), English, 512 input tokens truncation, Prefixes for queries/documents: necessary, 2023 year.',
  'license': 'mit',
  'size_in_GB': 0.42,
  'additional_files': [],
  'dim': 768,
  'tasks': {}},
 {'model': 'BAAI/bge-base-en-v1.5',
  'sources': {'hf': 'qdrant/bge-base-en-v1.5-onnx-q',
   'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-bge-base-en-v1.5.tar.gz',
   '_deprecated_tar_struct': True},
  'model_file': 'model_optimized.onnx',
  'description': 'Text embeddings, Unimodal (text), English, 512 input tokens truncation, Prefixes for queries/documents: not so necessary, 2023 year.',
  'license': 'mit',
  'size_in_GB': 0.21,
  'additional_files': [],
  'dim': 768,
  'tasks': {}},
 {'model':

In [11]:
import json

EMBEDDING_DIMENSIONALITY = 512

for model in TextEmbedding.list_supported_models():
    if model["dim"] == EMBEDDING_DIMENSIONALITY:
        print(json.dumps(model, indent=2))

{
  "model": "BAAI/bge-small-zh-v1.5",
  "sources": {
    "hf": "Qdrant/bge-small-zh-v1.5",
    "url": "https://storage.googleapis.com/qdrant-fastembed/fast-bge-small-zh-v1.5.tar.gz",
    "_deprecated_tar_struct": true
  },
  "model_file": "model_optimized.onnx",
  "description": "Text embeddings, Unimodal (text), Chinese, 512 input tokens truncation, Prefixes for queries/documents: not so necessary, 2023 year.",
  "license": "mit",
  "size_in_GB": 0.09,
  "additional_files": [],
  "dim": 512,
  "tasks": {}
}
{
  "model": "Qdrant/clip-ViT-B-32-text",
  "sources": {
    "hf": "Qdrant/clip-ViT-B-32-text",
    "url": null,
    "_deprecated_tar_struct": false
  },
  "model_file": "model.onnx",
  "description": "Text embeddings, Multimodal (text&image), English, 77 input tokens truncation, Prefixes for queries/documents: not necessary, 2021 year",
  "license": "mit",
  "size_in_GB": 0.25,
  "additional_files": [],
  "dim": 512,
  "tasks": {}
}
{
  "model": "jinaai/jina-embeddings-v2-small-e

In [12]:
model_handle = "jinaai/jina-embeddings-v2-small-en"

In [13]:
import numpy as np

In [14]:
embedder = TextEmbedding(model_name=model_handle)
query = ["I just discovered the course. Can I join now?"]
embedding = list(embedder.embed(query))[0]


In [15]:
embedding_array = np.array(embedding)
print("Shape:", embedding_array.shape)               # Should be (512,)
print("Min value:", np.min(embedding_array)) 

Shape: (512,)
Min value: -0.11726373885183883


In [16]:
doc = ["Can I still join the course after the start date?"]
doc_embedding = list(embedder.embed(doc))[0]
doc_array = np.array(doc_embedding)  # this is 'd'

# Step 2: Compute cosine similarity (dot product)
cos_sim = embedding_array.dot(doc_array)

# Step 3: Print similarity
print("Cosine similarity:", cos_sim)

Cosine similarity: 0.9008528895674548


In [17]:
documents = [
    {'text': "Yes, even if you don't register, you're still eligible to submit the homeworks.\nBe aware, however, that there will be deadlines for turning in the final projects. So don't leave everything for the last minute.", 'question': 'Course - Can I still join the course after the start date?'},
    {'text': "Yes, we will keep all the materials after the course finishes, so you can follow the course at your own pace after it finishes.\nYou can also continue looking at the homeworks and continue preparing for the next cohort. I guess you can also start working on your final capstone project.", 'question': 'Course - Can I follow the course after it finishes?'},
    {'text': "The purpose of this document is to capture frequently asked technical questions\nThe exact day and hour of the course will be 15th Jan 2024 at 17h00. The course will start with the first  “Office Hours'' live.1\nSubscribe to course public Google Calendar (it works from Desktop only).\nRegister before the course starts using this link.\nJoin the course Telegram channel with announcements.\nDon’t forget to register in DataTalks.Club's Slack and join the channel.", 'question': 'Course - When will the course start?'},
    {'text': "You can start by installing and setting up all the dependencies and requirements:\nGoogle cloud account\nGoogle Cloud SDK\nPython 3 (installed with Anaconda)\nTerraform\nGit\nLook over the prerequisites and syllabus to see if you are comfortable with these subjects.", 'question': 'Course - What can I do before the course starts?'},
    {'text': "Star the repo! Share it with friends if you find it useful ❣️\nCreate a PR if you see you can improve the text or the structure of the repository.", 'question': 'How can we contribute to the course?'}
]

# Step 4: Get all document text embeddings
doc_texts = [doc['text'] for doc in documents]
doc_embeddings = list(embedder.embed(doc_texts))  # list of vectors

# Step 5: Convert to matrix
V = np.array(doc_embeddings)  # shape: (5, 512)

# Step 6: Compute cosine similarity with query
similarities = V.dot(embedding_array)  # shape: (5,)

# Step 7: Find the index with the highest similarity
best_index = np.argmax(similarities)

# Print result
print("Similarities:", similarities)
print("Best document index:", best_index)


Similarities: [0.76296847 0.81823782 0.80853974 0.7133079  0.73044992]
Best document index: 1


In [18]:
full_texts = [doc['question'] + " " + doc['text'] for doc in documents]

# 4. Embed the full texts
doc_embeddings = list(embedder.embed(full_texts))
V = np.array(doc_embeddings)

# 5. Compute similarity
similarities = V.dot(embedding_array)

# 6. Find the highest score
best_index = int(np.argmax(similarities))
print("Similarities:", similarities)
print("Highest score:",best_index)

Similarities: [0.85145432 0.84365942 0.8408287  0.7755158  0.80860078]
Highest score: 0


In [19]:
#  Is it different from Q3?
# No — it's the same document as in Q3.

# But in some cases, yes — concatenating the question + text gives better semantic meaning and can change rankings, especially if the question contains more keywords than the text.

# In this case, since the text already aligns closely with the query, the ranking didn’t change — but adding question reinforces the match.

In [20]:
model_handle = "BAAI/bge-small-en"
embedder = TextEmbedding(model_name=model_handle)

# Example embedding
texts = ["Can I still join the course after the start date?"]
embeddings = list(embedder.embed(texts))

embedding_vector = np.array(embeddings[0])
print("Embedding shape:", embedding_vector.shape)

Embedding shape: (384,)


In [2]:
import requests
import numpy as np
from qdrant_client import QdrantClient
from qdrant_client.http.models import PointStruct
from fastembed import TextEmbedding  # Make sure this import matches your setup

# Step 1: Load and filter documents
print("Step 1: Loading documents...")
docs_url = 'https://github.com/alexeygrigorev/llm-rag-workshop/raw/main/notebooks/documents.json'
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()
print(f"Loaded {len(documents_raw)} courses")

documents = []
for course in documents_raw:
    if course['course'] != 'machine-learning-zoomcamp':
        continue
    for doc in course['documents']:
        doc['course'] = course['course']
        documents.append(doc)
print(f"Filtered {len(documents)} documents for machine-learning-zoomcamp")

# Step 2: Prepare texts and initialize embedder
print("Step 2: Preparing texts and initializing embedder...")
texts = [doc['question'] + ' ' + doc['text'] for doc in documents]
model_handle = "BAAI/bge-small-en"
embedder = TextEmbedding(model_name=model_handle)
print("Embedder initialized")

# Step 3: Embed documents in batches
print("Step 3: Embedding documents...")
batch_size = 10
embeddings = []
for i in range(0, len(texts), batch_size):
    batch = texts[i:i+batch_size]
    batch_embeddings = list(embedder.embed(batch))
    embeddings.extend(batch_embeddings)
    print(f"Embedded documents {i} to {i + len(batch) - 1}")
print(f"Done embedding {len(embeddings)} documents")

# Step 4: Initialize Qdrant client and create/recreate collection
print("Step 4: Initializing Qdrant client and recreating collection...")
client = QdrantClient()
collection_name = "ml-zoomcamp-faq"
client.recreate_collection(
    collection_name=collection_name,
    vectors_config={"size": len(embeddings[0]), "distance": "Cosine"},
)
print("Collection created/recreated")

# Step 5: Prepare points and upsert to Qdrant
print("Step 5: Uploading vectors to Qdrant...")
points = []
for i, (doc, vector) in enumerate(zip(documents, embeddings)):
    points.append(
        PointStruct(
            id=i,
            vector=vector.tolist(),
            payload={"question": doc['question'], "text": doc['text'], "course": doc['course']}
        )
    )
client.upsert(collection_name=collection_name, points=points)
print(f"Uploaded {len(points)} points to collection")

# Step 6: Embed query and search
print("Step 6: Embedding query and searching...")
query_text = "How do I execute a command in a running docker container?"
query_embedding = np.array(list(embedder.embed([query_text]))[0])

search_results = client.search(
    collection_name=collection_name,
    query_vector=query_embedding.tolist(),
    limit=5
)

# Step 7: Print highest score and top result
top_result = search_results[0]
print("Highest score:", top_result.score)
print("Top question:", top_result.payload['question'])
print("Top answer:", top_result.payload['text'])


Step 1: Loading documents...
Loaded 3 courses
Filtered 375 documents for machine-learning-zoomcamp
Step 2: Preparing texts and initializing embedder...
Embedder initialized
Step 3: Embedding documents...
Embedded documents 0 to 9
Embedded documents 10 to 19
Embedded documents 20 to 29
Embedded documents 30 to 39
Embedded documents 40 to 49
Embedded documents 50 to 59
Embedded documents 60 to 69
Embedded documents 70 to 79
Embedded documents 80 to 89
Embedded documents 90 to 99
Embedded documents 100 to 109
Embedded documents 110 to 119
Embedded documents 120 to 129
Embedded documents 130 to 139
Embedded documents 140 to 149
Embedded documents 150 to 159
Embedded documents 160 to 169
Embedded documents 170 to 179
Embedded documents 180 to 189
Embedded documents 190 to 199
Embedded documents 200 to 209
Embedded documents 210 to 219
Embedded documents 220 to 229
Embedded documents 230 to 239
Embedded documents 240 to 249
Embedded documents 250 to 259
Embedded documents 260 to 269
Embedded

  client.recreate_collection(


Collection created/recreated
Step 5: Uploading vectors to Qdrant...
Uploaded 375 points to collection
Step 6: Embedding query and searching...
Highest score: 0.88452435
Top question: How do I debug a docker container?
Top answer: Launch the container image in interactive mode and overriding the entrypoint, so that it starts a bash command.
docker run -it --entrypoint bash <image>
If the container is already running, execute a command in the specific container:
docker ps (find the container-id)
docker exec -it <container-id> bash
(Marcos MJD)


  search_results = client.search(
