# Index

In [4]:
from src.data.index_and_search import index_df, get_db_object
from loguru import logger

# run in batches to avoid memory issues - index every 100 rows in the df
def index(df_to_index, batch_size=100):
    n = len(df_to_index)
    for i in range(0, n, batch_size):
        batch_df = df_to_index.iloc[i:i+batch_size]
        print(f"--- {i} / {n} ---")
        try:
            index_df(
                df=batch_df,
                index_by_col="question_description",
                need_to_embed_col=True,
                id_col="question_description",
                collection_name="math_questions",
            )
        except Exception as e:
            logger.error(f"Error indexing batch starting at row {i}: {e}")


# Preprocess + Indexing - take first 20,000 rows from each math full dataset

In [2]:
db = get_db_object()
db.print_example("math_questions", 2)

Example from collection 'math_questions': 
([Record(id='000e2a02-ca5c-5d79-b01c-b093ecae91fb', payload={'question': "b'What is the second derivative of i**5 + 54*i**3*t + 2*i*t + 103*i + 2 wrt i?\\n'", 'answer': "b'20*i**3 + 324*i*t\\n'", 'module': 'calculus__differentiate', 'question_description': "Topic: calculus__differentiate\nQuestion: b'What is the second derivative of i**5 + 54*i**3*t + 2*i*t + 103*i + 2 wrt i?\\n'"}, vector=None, shard_key=None, order_value=None), Record(id='000e5735-9c20-5e1e-bf11-aaf35f819c0e', payload={'question': "b'Solve 27*i - 277 - 133 = -32 for i.\\n'", 'answer': "b'14\\n'", 'module': 'algebra__linear_1d', 'question_description': "Topic: algebra__linear_1d\nQuestion: b'Solve 27*i - 277 - 133 = -32 for i.\\n'"}, vector=None, shard_key=None, order_value=None)], '000fe6b6-eb2c-57f9-9a5b-b4df0a6809f3')


In [5]:
from src.utils.folders_utils import get_repo_folder
import pandas as pd
from pathlib import Path

repo_folder = get_repo_folder()
print(f"Repository folder: {repo_folder}")


directory = Path(repo_folder / "src/data/DB_questions/Math/math_dataset_csvs")

dfs = []

# Loop through all CSV files
for file_path in directory.glob("*.csv"):
    if "algebra__linear_1d_full.csv" in file_path.name or "mini" in file_path.name \
        or "algebra__polynomial_roots_full" in file_path.name:
        continue
    print(f"Processing {file_path.name}...")

    df = pd.read_csv(file_path)[22000:23000]

    df["question_description"] = df.apply(lambda row: f"Topic: {row['module']}\nQuestion: {row['question']}", axis=1)

    index(df)

Repository folder: /Users/kereng/Projects/PrivateTeacherAgent
Processing numbers__gcd_full.csv...
--- 0 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:22<00:00,  4.51it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 27699.80it/s]


Collection 'math_questions' size: count=25505
--- 100 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:21<00:00,  4.61it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 7856.12it/s]


Collection 'math_questions' size: count=25605
--- 200 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.88it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 10845.00it/s]


Collection 'math_questions' size: count=25705
--- 300 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:21<00:00,  4.72it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 12181.41it/s]


Collection 'math_questions' size: count=25805
--- 400 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.80it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 12305.79it/s]


Collection 'math_questions' size: count=25905
--- 500 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:21<00:00,  4.58it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 10871.99it/s]


Collection 'math_questions' size: count=26005
--- 600 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:21<00:00,  4.58it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 12682.73it/s]


Collection 'math_questions' size: count=26105
--- 700 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:21<00:00,  4.73it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 13768.07it/s]


Collection 'math_questions' size: count=26205
--- 800 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:19<00:00,  5.12it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 9022.73it/s]


Collection 'math_questions' size: count=26305
--- 900 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.99it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 12219.03it/s]


Collection 'math_questions' size: count=26405
Processing polynomials__expand_full.csv...
--- 0 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:21<00:00,  4.55it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 7325.40it/s]


Collection 'math_questions' size: count=26505
--- 100 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:21<00:00,  4.64it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 13574.24it/s]


Collection 'math_questions' size: count=26605
--- 200 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.78it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 11347.00it/s]


Collection 'math_questions' size: count=26705
--- 300 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.90it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 6988.99it/s]


Collection 'math_questions' size: count=26805
--- 400 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:21<00:00,  4.66it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 10185.54it/s]


Collection 'math_questions' size: count=26905
--- 500 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:21<00:00,  4.76it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 8778.92it/s]


Collection 'math_questions' size: count=27005
--- 600 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.99it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 10330.54it/s]


Collection 'math_questions' size: count=27105
--- 700 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:21<00:00,  4.74it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 11126.07it/s]


Collection 'math_questions' size: count=27205
--- 800 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:22<00:00,  4.50it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 9811.70it/s]


Collection 'math_questions' size: count=27305
--- 900 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.80it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 31626.48it/s]


Collection 'math_questions' size: count=27405
Processing arithmetic__mul_full.csv...
--- 0 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:19<00:00,  5.06it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 6692.15it/s]


Collection 'math_questions' size: count=27505
--- 100 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:21<00:00,  4.68it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 8909.45it/s]


Collection 'math_questions' size: count=27605
--- 200 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:19<00:00,  5.11it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 10940.90it/s]


Collection 'math_questions' size: count=27705
--- 300 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:19<00:00,  5.06it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 11513.01it/s]


Collection 'math_questions' size: count=27805
--- 400 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.81it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 29689.98it/s]


Collection 'math_questions' size: count=27905
--- 500 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.86it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 11499.75it/s]


Collection 'math_questions' size: count=28005
--- 600 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:22<00:00,  4.44it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 9254.86it/s]


Collection 'math_questions' size: count=28105
--- 700 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:19<00:00,  5.00it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 10277.64it/s]


Collection 'math_questions' size: count=28205
--- 800 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.96it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 10742.51it/s]


Collection 'math_questions' size: count=28305
--- 900 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:19<00:00,  5.05it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 9741.06it/s]


Collection 'math_questions' size: count=28405
Processing calculus__differentiate_full.csv...
--- 0 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.85it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 9748.53it/s]


Collection 'math_questions' size: count=28505
--- 100 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.96it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 9952.32it/s]


Collection 'math_questions' size: count=28605
--- 200 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:19<00:00,  5.01it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 15936.41it/s]


Collection 'math_questions' size: count=28705
--- 300 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.98it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 9053.11it/s]


Collection 'math_questions' size: count=28805
--- 400 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:19<00:00,  5.06it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 10304.91it/s]


Collection 'math_questions' size: count=28905
--- 500 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:21<00:00,  4.72it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 9362.49it/s]


Collection 'math_questions' size: count=29005
--- 600 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.84it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 8724.68it/s]


Collection 'math_questions' size: count=29105
--- 700 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.96it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 10441.64it/s]


Collection 'math_questions' size: count=29205
--- 800 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:19<00:00,  5.03it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 11827.27it/s]


Collection 'math_questions' size: count=29305
--- 900 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.92it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 10614.46it/s]


Collection 'math_questions' size: count=29405
Processing arithmetic__add_or_sub_full.csv...
--- 0 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:22<00:00,  4.54it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 11702.86it/s]


Collection 'math_questions' size: count=29505
--- 100 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:21<00:00,  4.75it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 15825.77it/s]


Collection 'math_questions' size: count=29605
--- 200 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:21<00:00,  4.71it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 28718.27it/s]


Collection 'math_questions' size: count=29705
--- 300 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.91it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 10361.68it/s]


Collection 'math_questions' size: count=29805
--- 400 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.86it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 12294.24it/s]


Collection 'math_questions' size: count=29905
--- 500 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.84it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 9624.82it/s]


Collection 'math_questions' size: count=30005
--- 600 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:21<00:00,  4.76it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 11358.37it/s]


Collection 'math_questions' size: count=30105
--- 700 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.99it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 13732.01it/s]


Collection 'math_questions' size: count=30205
--- 800 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.81it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 6818.45it/s]


Collection 'math_questions' size: count=30305
--- 900 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:34<00:00,  2.88it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 5574.05it/s]


Collection 'math_questions' size: count=30405
Processing probability__swr_p_sequence_full.csv...
--- 0 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:21<00:00,  4.68it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 10568.72it/s]


Collection 'math_questions' size: count=30505
--- 100 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:19<00:00,  5.04it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 7408.99it/s]


Collection 'math_questions' size: count=30605
--- 200 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:22<00:00,  4.42it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 8294.55it/s]


Collection 'math_questions' size: count=30705
--- 300 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:21<00:00,  4.71it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 10094.59it/s]


Collection 'math_questions' size: count=30805
--- 400 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:19<00:00,  5.13it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 9310.95it/s]


Collection 'math_questions' size: count=30905
--- 500 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:22<00:00,  4.50it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 10592.21it/s]


Collection 'math_questions' size: count=31005
--- 600 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:19<00:00,  5.05it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 10705.49it/s]
[32m2025-08-14 11:04:07.716[0m | [31m[1mERROR   [0m | [36m__main__[0m:[36mindex[0m:[36m19[0m - [31m[1mError indexing batch starting at row 600: The write operation timed out[0m


--- 700 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:21<00:00,  4.63it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 11004.92it/s]


Collection 'math_questions' size: count=31105
--- 800 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:21<00:00,  4.62it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 32093.53it/s]


Collection 'math_questions' size: count=31205
--- 900 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:21<00:00,  4.57it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 9541.83it/s]


Collection 'math_questions' size: count=31305
Processing numbers__is_prime_full.csv...
--- 0 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.89it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 9881.51it/s]


Collection 'math_questions' size: count=31405
--- 100 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:19<00:00,  5.05it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 11001.16it/s]


Collection 'math_questions' size: count=31505
--- 200 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.96it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 8651.26it/s]


Collection 'math_questions' size: count=31605
--- 300 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:19<00:00,  5.06it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 9921.71it/s]


Collection 'math_questions' size: count=31705
--- 400 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.99it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 17440.66it/s]


Collection 'math_questions' size: count=31805
--- 500 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.86it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 9540.10it/s]


Collection 'math_questions' size: count=31905
--- 600 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:19<00:00,  5.16it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 23859.74it/s]


Collection 'math_questions' size: count=32005
--- 700 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.77it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 9761.91it/s]


Collection 'math_questions' size: count=32105
--- 800 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.92it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 10432.04it/s]


Collection 'math_questions' size: count=32205
--- 900 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:42<00:00,  2.33it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 11124.59it/s]


Collection 'math_questions' size: count=32305
Processing comparison__sort_full.csv...
--- 0 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.98it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 13827.53it/s]


Collection 'math_questions' size: count=32405
--- 100 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:43<00:00,  2.29it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 24237.53it/s]


Collection 'math_questions' size: count=32505
--- 200 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:39<00:00,  2.50it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 9509.38it/s]


Collection 'math_questions' size: count=32605
--- 300 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:23<00:00,  4.29it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 8735.40it/s]


Collection 'math_questions' size: count=32705
--- 400 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:22<00:00,  4.43it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 11862.73it/s]


Collection 'math_questions' size: count=32805
--- 500 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:22<00:00,  4.52it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 8631.32it/s]


Collection 'math_questions' size: count=32905
--- 600 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:21<00:00,  4.73it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 10508.88it/s]


Collection 'math_questions' size: count=33005
--- 700 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:19<00:00,  5.11it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 11786.72it/s]


Collection 'math_questions' size: count=33105
--- 800 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:19<00:00,  5.12it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 35499.82it/s]


Collection 'math_questions' size: count=33205
--- 900 / 1000 ---


Generating embeddings: 100%|██████████| 100/100 [00:21<00:00,  4.68it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 9571.01it/s]


Collection 'math_questions' size: count=33305


In [None]:
dfs[0]
