In [1]:
import os
import papermill as pm
from logger import logger

## Generate Embeddings

In [2]:
notebook = "embeddings.ipynb"
pm.inspect_notebook(notebook)

{'CONTRIBUTOR': {'name': 'CONTRIBUTOR',
  'inferred_type_name': 'str',
  'default': '"Health Promotion Board"',
  'help': ''},
 'CATEGORY': {'name': 'CATEGORY',
  'inferred_type_name': 'str',
  'default': '"live-healthy"',
  'help': ''},
 'MODEL_NAME': {'name': 'MODEL_NAME',
  'inferred_type_name': 'str',
  'default': '"all-MiniLM-L6-v2"',
  'help': ''},
 'POOLING_STRATEGY': {'name': 'POOLING_STRATEGY',
  'inferred_type_name': 'str',
  'default': '"max"',
  'help': ''}}

In [3]:
# model_name = "all-MiniLM-L6-v2"
# model_name = "all-mpnet-base-v2"
model_name = "msmarco-bert-base-dot-v5"

for strategy in ["mean", "max"]:
    logger.info(
        f"Running {notebook} for {model_name} model with {strategy} pooling strategy"
    )
    pm.execute_notebook(
        input_path="embeddings.ipynb",
        output_path=os.path.join(
            "..", "artifacts", "notebooks", f"{model_name}_{strategy}_emb.ipynb"
        ),
        parameters={"MODEL_NAME": model_name, "POOLING_STRATEGY": strategy},
    )

2024-06-11 15:45:24,036 - Running embeddings.ipynb for msmarco-bert-base-dot-v5 model with mean pooling strategy


Executing:   0%|          | 0/18 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.
2024-06-11 15:51:41,602 - Running embeddings.ipynb for msmarco-bert-base-dot-v5 model with max pooling strategy


Executing:   0%|          | 0/18 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


## Evaluate Similarities

In [4]:
notebook = "similarity.ipynb"
pm.inspect_notebook(notebook)

{'MODEL_NAME': {'name': 'MODEL_NAME',
  'inferred_type_name': 'str',
  'default': '"all-MiniLM-L6-v2"',
  'help': ''},
 'POOLING_STRATEGY': {'name': 'POOLING_STRATEGY',
  'inferred_type_name': 'str',
  'default': '"max"',
  'help': ''}}

In [5]:
# model_name = "all-MiniLM-L6-v2"
# model_name = "all-mpnet-base-v2"

for strategy in ["mean", "max"]:
    logger.info(
        f"Running {notebook} for {model_name} model with {strategy} pooling strategy"
    )
    pm.execute_notebook(
        input_path="similarity.ipynb",
        output_path=os.path.join(
            "..", "artifacts", "notebooks", f"{model_name}_{strategy}_sim.ipynb"
        ),
        parameters={"MODEL_NAME": model_name, "POOLING_STRATEGY": strategy},
    )

2024-06-11 16:03:26,930 - Running similarity.ipynb for msmarco-bert-base-dot-v5 model with mean pooling strategy


Executing:   0%|          | 0/16 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.
2024-06-11 16:03:44,020 - Running similarity.ipynb for msmarco-bert-base-dot-v5 model with max pooling strategy


Executing:   0%|          | 0/16 [00:00<?, ?cell/s]

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.
