In [1]:
import chromadb
from sentence_transformers import SentenceTransformer

# 1. Setup (No server management needed!)
client = chromadb.Client()
collection = client.create_collection("my_nlp_docs")


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
client

<chromadb.api.client.Client at 0x1fa2bf6bcb0>

In [3]:
collection

Collection(name=my_nlp_docs)

In [23]:

# 2. Add Documents (automatically converts text to vector embeddings)
embedder = SentenceTransformer('all-MiniLM-L6-v2') # Small, fast embedder
docs = ["Elasticsearch is a search engine.", "Chroma is a vector database.", "Apples are fruit."]
embeddings = embedder.encode(docs).tolist()
embeddings


[[0.020012231543660164,
  0.003088189521804452,
  -0.04201562702655792,
  0.03954428806900978,
  0.006659922190010548,
  -0.005695543717592955,
  -0.08921244740486145,
  0.018166659399867058,
  0.01614072546362877,
  0.0057895067147910595,
  -0.005844543222337961,
  0.015268984250724316,
  0.057440564036369324,
  -0.019523579627275467,
  -0.05264371261000633,
  -0.0551622100174427,
  -0.05248982086777687,
  0.018313370645046234,
  0.011423355899751186,
  -0.044344231486320496,
  0.0730939507484436,
  0.09609091281890869,
  -0.012810342013835907,
  -0.07412263005971909,
  -0.04260840639472008,
  0.003836888587102294,
  -0.05858964845538139,
  -0.01254334021359682,
  0.018559852614998817,
  -0.025612181052565575,
  -0.015573336742818356,
  -0.04276331141591072,
  0.06686984747648239,
  0.12795457243919373,
  -0.06598132103681564,
  0.014950267970561981,
  -0.06436608731746674,
  0.0033240076154470444,
  -0.05110226944088936,
  0.021754074841737747,
  -0.0330863781273365,
  -0.02796830795

In [7]:
len(embeddings)

3

In [9]:
for i in embeddings:
    print(len(i))

384
384
384


In [11]:
collection.add(
    documents=docs,
    embeddings=embeddings,
    ids=["id1", "id2", "id3"]
)
collection


Collection(name=my_nlp_docs)

In [14]:
collection.get_model()

Collection(id=UUID('55055e35-c26c-4ba8-b436-10c1bd0e1b54'), name='my_nlp_docs', configuration_json={'hnsw': {'space': 'l2', 'ef_construction': 100, 'ef_search': 100, 'max_neighbors': 16, 'resize_factor': 1.2, 'sync_threshold': 1000}, 'spann': None, 'embedding_function': {'type': 'known', 'name': 'default', 'config': {}}}, serialized_schema={'defaults': {'string': {'fts_index': {'enabled': False, 'config': {}}, 'string_inverted_index': {'enabled': True, 'config': {}}}, 'float_list': {'vector_index': {'enabled': False, 'config': {'space': 'l2', 'embedding_function': {'type': 'known', 'name': 'default', 'config': {}}, 'hnsw': {'ef_construction': 100, 'max_neighbors': 16, 'ef_search': 100, 'num_threads': 12, 'batch_size': 100, 'sync_threshold': 1000, 'resize_factor': 1.2}}}}, 'sparse_vector': {'sparse_vector_index': {'enabled': False, 'config': {'embedding_function': {'type': 'unknown'}, 'bm25': False}}}, 'int': {'int_inverted_index': {'enabled': True, 'config': {}}}, 'float': {'float_inve

In [None]:
query = "A monkey likes bananas"
query_emb = embedder.encode([query])
query_emb


array([[-6.38535693e-02, -6.33724853e-02,  2.92766131e-02,
         2.01455578e-02,  6.09527575e-03,  5.12121692e-02,
         5.66704310e-02,  4.75118272e-02, -5.42855880e-04,
         1.06325418e-01, -2.02920730e-03, -9.04936865e-02,
         1.45451380e-02, -3.11289486e-02,  1.09799057e-01,
         4.18650657e-02,  6.39450410e-03, -1.43641047e-02,
        -1.80189812e-03,  1.43238418e-02,  2.27460694e-02,
         2.00567562e-02, -2.27587186e-02, -3.38906683e-02,
        -2.37261150e-02,  2.62643341e-02,  1.84682030e-02,
        -1.53923593e-02,  6.97867051e-02, -2.66345777e-02,
        -1.01094162e-02, -6.75010756e-02,  9.93481651e-03,
         4.85309139e-02, -3.19994241e-02, -2.92467345e-02,
         7.19126314e-03, -1.49647236e-01,  3.26819792e-02,
         8.04173108e-03,  2.85648610e-02,  1.06457651e-01,
         3.86937819e-02, -9.47392210e-02, -4.12381068e-02,
         2.05221903e-02,  6.06106482e-02, -1.80805139e-02,
         8.96517858e-02, -9.65584517e-02,  3.00590135e-0

In [16]:
query_emb.shape

(1, 384)

In [18]:
query_emb = query_emb.tolist()
query_emb

[[-0.06385356932878494,
  -0.06337248533964157,
  0.029276613146066666,
  0.020145557820796967,
  0.0060952757485210896,
  0.05121216922998428,
  0.056670431047677994,
  0.04751182720065117,
  -0.0005428558797575533,
  0.1063254177570343,
  -0.0020292073022574186,
  -0.09049368649721146,
  0.014545137993991375,
  -0.03112894855439663,
  0.10979905724525452,
  0.041865065693855286,
  0.006394504103809595,
  -0.014364104717969894,
  -0.0018018981209024787,
  0.014323841780424118,
  0.022746069356799126,
  0.02005675621330738,
  -0.022758718580007553,
  -0.03389066830277443,
  -0.023726115003228188,
  0.026264334097504616,
  0.018468203023076057,
  -0.015392359346151352,
  0.06978670507669449,
  -0.0266345776617527,
  -0.010109416209161282,
  -0.06750107556581497,
  0.009934816509485245,
  0.04853091388940811,
  -0.03199942409992218,
  -0.029246734455227852,
  0.007191263139247894,
  -0.14964723587036133,
  0.03268197923898697,
  0.00804173108190298,
  0.02856486104428768,
  0.10645765066

In [21]:
results = collection.query(query_embeddings=query_emb, n_results=5)
results

{'ids': [['id3', 'id2', 'id1']],
 'embeddings': None,
 'documents': [['Apples are fruit.',
   'Chroma is a vector database.',
   'Elasticsearch is a search engine.']],
 'uris': None,
 'included': ['metadatas', 'documents', 'distances'],
 'data': None,
 'metadatas': [[None, None, None]],
 'distances': [[1.3465582132339478, 1.804137945175171, 2.0027194023132324]]}

In [None]:
# 3. Search (Semantic Search)
query = "What is good for AI search?"
query_emb = embedder.encode([query]).tolist()

results = collection.query(query_embeddings=query_emb, n_results=1)
print(results['documents']) 
# Output will likely be: [['Chroma is a vector database.']] because it's semantically closest.

In [22]:
from transformers import pipeline
# --- 2. SETUP THE READER (Replacement for FARMReader) ---
# We use the EXACT same model checkpoint from the book
model_ckpt = "deepset/minilm-uncased-squad2"

# In 2026, we use the standard HF pipeline instead of FARMReader
reader = pipeline("question-answering", model=model_ckpt, tokenizer=model_ckpt)

Some weights of the model checkpoint at deepset/minilm-uncased-squad2 were not used when initializing BertForQuestionAnswering: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use cuda:0


In [29]:
# --- 3. RUN THE PIPELINE ---

question = "what is a Chroma?"

# A. RETRIEVE: Ask ChromaDB for the best document
query_emb = embedder.encode([question]).tolist()
results = collection.query(query_embeddings=query_emb, n_results=2)

# Extract the text context from the result
retrieved_context = results['documents'][0][0] 
print(f"Retrieved Context: {retrieved_context}")


Retrieved Context: Chroma is a vector database.


In [30]:

# B. READ: Ask the Model to find the answer in that context
# This replaces the 'reader.predict' method from the book
answer = reader(question=question, context=retrieved_context)

print(f"Answer: {answer['answer']}")
print(f"Confidence: {answer['score']}")

Answer: vector database
Confidence: 0.5826009511947632


In [31]:
answer

{'score': 0.5826009511947632,
 'start': 12,
 'end': 27,
 'answer': 'vector database'}