# Index

In [2]:
from src.data.index_and_search import index_df, get_db_object
from loguru import logger

# run in batches to avoid memory issues - index every 100 rows in the df
def index(df_to_index, batch_size=100):
    n = len(df_to_index)
    for i in range(0, n, batch_size):
        batch_df = df_to_index.iloc[i:i+batch_size]
        print(f"--- {i} / {n} ---")
        try:
            index_df(
                df=batch_df,
                index_by_col="question_description",
                need_to_embed_col=True,
                id_col="question_description",
                collection_name="math_questions",
            )
        except Exception as e:
            logger.error(f"Error indexing batch starting at row {i}: {e}")


# Preprocess + Indexing - take first 20,000 rows from each math full dataset

In [2]:
db = get_db_object()
db.print_example("math_questions", 2)

Example from collection 'math_questions': 
([Record(id='000e2a02-ca5c-5d79-b01c-b093ecae91fb', payload={'question': "b'What is the second derivative of i**5 + 54*i**3*t + 2*i*t + 103*i + 2 wrt i?\\n'", 'answer': "b'20*i**3 + 324*i*t\\n'", 'module': 'calculus__differentiate', 'question_description': "Topic: calculus__differentiate\nQuestion: b'What is the second derivative of i**5 + 54*i**3*t + 2*i*t + 103*i + 2 wrt i?\\n'"}, vector=None, shard_key=None, order_value=None), Record(id='000e5735-9c20-5e1e-bf11-aaf35f819c0e', payload={'question': "b'Solve 27*i - 277 - 133 = -32 for i.\\n'", 'answer': "b'14\\n'", 'module': 'algebra__linear_1d', 'question_description': "Topic: algebra__linear_1d\nQuestion: b'Solve 27*i - 277 - 133 = -32 for i.\\n'"}, vector=None, shard_key=None, order_value=None)], '000fe6b6-eb2c-57f9-9a5b-b4df0a6809f3')


In [None]:
from src.utils.folders_utils import get_repo_folder
import pandas as pd
from pathlib import Path

repo_folder = get_repo_folder()
print(f"Repository folder: {repo_folder}")


directory = Path(repo_folder / "src/data/DB_questions/Math/math_dataset_csvs")

dfs = []

# Loop through all CSV files
for file_path in directory.glob("*.csv"):
    if "algebra__linear_1d_full.csv" in file_path.name or "mini" in file_path.name \
        or "algebra__polynomial_roots_full" in file_path.name:
        continue
    print(f"Processing {file_path.name}...")

    df = pd.read_csv(file_path)[23000:25000]

    df["question_description"] = df.apply(lambda row: f"Topic: {row['module']}\nQuestion: {row['question']}", axis=1)

    index(df)

Repository folder: /Users/kereng/Projects/PrivateTeacherAgent
Processing numbers__gcd_full.csv...
--- 0 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.81it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 44407.67it/s]


Collection 'math_questions' size: count=33405
--- 100 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:23<00:00,  4.24it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 34781.52it/s]


Collection 'math_questions' size: count=33505
--- 200 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.80it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 30506.25it/s]


Collection 'math_questions' size: count=33605
--- 300 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:21<00:00,  4.69it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 22988.79it/s]


Collection 'math_questions' size: count=33705
--- 400 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:19<00:00,  5.00it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 28546.27it/s]


Collection 'math_questions' size: count=33805
--- 500 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:19<00:00,  5.01it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 27654.14it/s]


Collection 'math_questions' size: count=33905
--- 600 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.83it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 37526.21it/s]


Collection 'math_questions' size: count=34005
--- 700 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:21<00:00,  4.69it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 46294.75it/s]


Collection 'math_questions' size: count=34105
--- 800 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.84it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 48686.06it/s]


Collection 'math_questions' size: count=34205
--- 900 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.79it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 11479.92it/s]


Collection 'math_questions' size: count=34305
--- 1000 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:21<00:00,  4.74it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 34876.97it/s]


Collection 'math_questions' size: count=34405
--- 1100 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.79it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 26983.43it/s]


Collection 'math_questions' size: count=34505
--- 1200 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.95it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 43859.71it/s]


Collection 'math_questions' size: count=34605
--- 1300 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.78it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 47211.89it/s]


Collection 'math_questions' size: count=34705
--- 1400 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:21<00:00,  4.75it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 38832.55it/s]


Collection 'math_questions' size: count=34805
--- 1500 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:23<00:00,  4.33it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 11011.85it/s]


Collection 'math_questions' size: count=34905
--- 1600 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.92it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 11675.49it/s]


Collection 'math_questions' size: count=35005
--- 1700 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.91it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 14621.94it/s]


Collection 'math_questions' size: count=35105
--- 1800 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.77it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 13906.38it/s]


Collection 'math_questions' size: count=35205
--- 1900 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:22<00:00,  4.38it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 41630.81it/s]


Collection 'math_questions' size: count=35305
Processing polynomials__expand_full.csv...
--- 0 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.90it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 8392.80it/s]


Collection 'math_questions' size: count=35405
--- 100 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:26<00:00,  3.81it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 34649.35it/s]


Collection 'math_questions' size: count=35505
--- 200 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:22<00:00,  4.38it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 46448.55it/s]


Collection 'math_questions' size: count=35605
--- 300 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:21<00:00,  4.65it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 15913.43it/s]


Collection 'math_questions' size: count=35705
--- 400 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:21<00:00,  4.56it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 16485.10it/s]


Collection 'math_questions' size: count=35805
--- 500 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.79it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 16375.04it/s]


Collection 'math_questions' size: count=35905
--- 600 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.95it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 30497.38it/s]


Collection 'math_questions' size: count=36005
--- 700 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:21<00:00,  4.69it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 16929.58it/s]


Collection 'math_questions' size: count=36105
--- 800 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:19<00:00,  5.07it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 16526.67it/s]


Collection 'math_questions' size: count=36205
--- 900 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:22<00:00,  4.52it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 24765.61it/s]


Collection 'math_questions' size: count=36305
--- 1000 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.90it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 33600.13it/s]


Collection 'math_questions' size: count=36405
--- 1100 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:21<00:00,  4.57it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 35638.58it/s]


Collection 'math_questions' size: count=36505
--- 1200 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:19<00:00,  5.08it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 26519.37it/s]


Collection 'math_questions' size: count=36605
--- 1300 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:19<00:00,  5.09it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 35677.99it/s]


Collection 'math_questions' size: count=36705
--- 1400 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.97it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 14285.29it/s]


Collection 'math_questions' size: count=36805
--- 1500 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.95it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 42917.26it/s]


Collection 'math_questions' size: count=36905
--- 1600 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:24<00:00,  4.15it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 20370.59it/s]


Collection 'math_questions' size: count=37005
--- 1700 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.77it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 39587.58it/s]


Collection 'math_questions' size: count=37105
--- 1800 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:21<00:00,  4.76it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 32164.91it/s]


Collection 'math_questions' size: count=37205
--- 1900 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:21<00:00,  4.75it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 43482.31it/s]


Collection 'math_questions' size: count=37305
Processing arithmetic__mul_full.csv...
--- 0 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:23<00:00,  4.18it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 18825.42it/s]


Collection 'math_questions' size: count=37405
--- 100 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.87it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 11672.57it/s]


Collection 'math_questions' size: count=37505
--- 200 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:21<00:00,  4.72it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 25051.09it/s]


Collection 'math_questions' size: count=37605
--- 300 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.93it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 30561.82it/s]


Collection 'math_questions' size: count=37705
--- 400 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:19<00:00,  5.04it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 26184.94it/s]


Collection 'math_questions' size: count=37805
--- 500 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:19<00:00,  5.03it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 35487.81it/s]


Collection 'math_questions' size: count=37905
--- 600 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:23<00:00,  4.20it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 41152.90it/s]


Collection 'math_questions' size: count=38005
--- 700 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:21<00:00,  4.62it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 13583.03it/s]


Collection 'math_questions' size: count=38105
--- 800 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.82it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 47662.55it/s]


Collection 'math_questions' size: count=38205
--- 900 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:22<00:00,  4.53it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 39498.11it/s]


Collection 'math_questions' size: count=38305
--- 1000 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:24<00:00,  4.02it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 35066.50it/s]


Collection 'math_questions' size: count=38405
--- 1100 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:24<00:00,  4.13it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 32468.68it/s]


Collection 'math_questions' size: count=38505
--- 1200 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:21<00:00,  4.75it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 38462.21it/s]


Collection 'math_questions' size: count=38605
--- 1300 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.76it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 9232.66it/s]


Collection 'math_questions' size: count=38705
--- 1400 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.90it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 12878.21it/s]


Collection 'math_questions' size: count=38805
--- 1500 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:22<00:00,  4.45it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 9241.40it/s]


Collection 'math_questions' size: count=38905
--- 1600 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.91it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 24401.09it/s]


Collection 'math_questions' size: count=39005
--- 1700 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:19<00:00,  5.06it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 23397.88it/s]


Collection 'math_questions' size: count=39105
--- 1800 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:21<00:00,  4.68it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 37022.72it/s]


Collection 'math_questions' size: count=39205
--- 1900 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:19<00:00,  5.06it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 15576.57it/s]


Collection 'math_questions' size: count=39305
Processing calculus__differentiate_full.csv...
--- 0 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:21<00:00,  4.70it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 29585.27it/s]


Collection 'math_questions' size: count=39405
--- 100 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:24<00:00,  4.15it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 41892.77it/s]


Collection 'math_questions' size: count=39505
--- 200 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:21<00:00,  4.64it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 16171.12it/s]


Collection 'math_questions' size: count=39605
--- 300 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:22<00:00,  4.37it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 12384.27it/s]


Collection 'math_questions' size: count=39705
--- 400 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.77it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 7817.32it/s]


Collection 'math_questions' size: count=39805
--- 500 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:21<00:00,  4.61it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 7794.22it/s]


Collection 'math_questions' size: count=39905
--- 600 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:19<00:00,  5.13it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 30185.71it/s]


Collection 'math_questions' size: count=40005
--- 700 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.93it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 30488.51it/s]


Collection 'math_questions' size: count=40105
--- 800 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.78it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 13128.53it/s]


Collection 'math_questions' size: count=40205
--- 900 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.79it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 4275.36it/s]


Collection 'math_questions' size: count=40305
--- 1000 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:24<00:00,  4.13it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 32446.07it/s]


Collection 'math_questions' size: count=40405
--- 1100 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:22<00:00,  4.38it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 42794.65it/s]


Collection 'math_questions' size: count=40505
--- 1200 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:19<00:00,  5.12it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 37299.28it/s]


Collection 'math_questions' size: count=40605
--- 1300 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:19<00:00,  5.08it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 34932.16it/s]


Collection 'math_questions' size: count=40705
--- 1400 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.79it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 11877.84it/s]


Collection 'math_questions' size: count=40805
--- 1500 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.96it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 35599.25it/s]


Collection 'math_questions' size: count=40905
--- 1600 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.82it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 37499.37it/s]


Collection 'math_questions' size: count=41005
--- 1700 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.93it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 32321.06it/s]


Collection 'math_questions' size: count=41105
--- 1800 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:21<00:00,  4.58it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 17605.37it/s]


Collection 'math_questions' size: count=41205
--- 1900 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:20<00:00,  4.98it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 17421.82it/s]


Collection 'math_questions' size: count=41305
Processing arithmetic__add_or_sub_full.csv...
--- 0 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:21<00:00,  4.64it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 11507.64it/s]


Collection 'math_questions' size: count=41405
--- 100 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:23<00:00,  4.27it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 35738.79it/s]


Collection 'math_questions' size: count=41505
--- 200 / 2000 ---


Generating embeddings: 100%|██████████| 100/100 [00:23<00:00,  4.33it/s]
Inserting data into math_questions: 100%|██████████| 100/100 [00:00<00:00, 48953.13it/s]


In [None]:
dfs[0]
