In [1]:
import torch

from ml_training import settings
from ml_training.models.baseline.bow_dssm import BoWDSSM
from ml_training.models.encoders.hub_averaging_encoder import HubEncoder
from ml_training.models.encoders.tf_idf_encoder import TextEncoder
from ml_training.models.predictors.ranking_nn import RankingModel

predictor = RankingModel(input_dim=10000)
predictor.load_state_dict(torch.load(settings.Settings().models_dir / "bow_dssm_1.pt"))

bow_dssm = BoWDSSM(
    TextEncoder.load(settings.Settings().models_dir / "text_encoder.pt"),
    HubEncoder.load(settings.Settings().models_dir / "hub_encoder.pt"),
    predictor,
)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [2]:
for hub in ["yandex", "natural_language_processing", "cpp", "1C"]:
    proba = bow_dssm.predict_proba("Яндекс ML, NLP, Поиск", hub)
    print(f"{hub}: {proba}")

yandex: 0.9414007067680359
natural_language_processing: 0.5178830623626709
cpp: 0.28635796904563904
1C: 0.5039178729057312




In [3]:
bow_dssm.save(settings.Settings().models_dir / "baseline_bow_nn.pt")

In [4]:
from ml_training import settings
from ml_training.models.baseline.bow_dssm import BoWDSSM

loaded_bow_dssm = BoWDSSM.load(settings.Settings().models_dir / "baseline_bow_nn.pt")

In [5]:
for hub in ["yandex", "natural_language_processing", "cpp", "1C"]:
    proba = loaded_bow_dssm.predict_proba("Яндекс ML, NLP, Поиск", hub)
    print(f"{hub}: {proba}")

yandex: 0.9414007067680359
natural_language_processing: 0.5178830623626709
cpp: 0.28635796904563904
1C: 0.5039178729057312


In [6]:
from ml_training.converters.export import export_model

export_model(loaded_bow_dssm, settings.Settings().models_dir / "compressed.bowdssm")

Hub encoder: 2392 hubs
Text encoder: 5000 features


  torch.onnx.export(
W1225 22:41:49.001000 647460 torch/onnx/_internal/exporter/_compat.py:114] Setting ONNX exporter to use operator set version 18 because the requested opset_version 15 is a lower version than we have implementations for. Automatic version conversion will be performed, which may not be successful at converting to the requested version. If version conversion is unsuccessful, the opset version of the exported model will be kept at 18. Please consider setting opset_version >=18 to leverage latest ONNX features
W1225 22:41:49.517000 647460 torch/onnx/_internal/exporter/_registration.py:107] torchvision is not installed. Skipping torchvision::nms


[torch.onnx] Obtain model graph for `RankingModel([...]` with `torch.export.export(..., strict=False)`...
[torch.onnx] Obtain model graph for `RankingModel([...]` with `torch.export.export(..., strict=False)`... ✅
[torch.onnx] Run decomposition...


The model version conversion is not supported by the onnxscript version converter and fallback is enabled. The model will be converted using the onnx C API (target version: 15).
Failed to convert the model to the target version 15 using the ONNX C API. The model was not modified
Traceback (most recent call last):
  File "/home/sergiyar/.cache/pypoetry/virtualenvs/habr-article-analyzer-fSU603ju-py3.12/lib/python3.12/site-packages/onnxscript/version_converter/__init__.py", line 127, in call
    converted_proto = _c_api_utils.call_onnx_api(
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/sergiyar/.cache/pypoetry/virtualenvs/habr-article-analyzer-fSU603ju-py3.12/lib/python3.12/site-packages/onnxscript/version_converter/_c_api_utils.py", line 65, in call_onnx_api
    result = func(proto)
             ^^^^^^^^^^^
  File "/home/sergiyar/.cache/pypoetry/virtualenvs/habr-article-analyzer-fSU603ju-py3.12/lib/python3.12/site-packages/onnxscript/version_converter/__init__.py", line

[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
Applied 3 of general pattern rewrite rules.
Predictor: input_dim=10000
Metadata saved
Model exported to /home/sergiyar/habr-article-analyzer/models/compressed.bowdssm
File size: 61.27 MB


In [7]:
from ml_training.converters.inference import BowDSSMInference

model = BowDSSMInference(settings.Settings().models_dir / "compressed.bowdssm")

Hub encoder: 2392 hubs, dim=5000
Text encoder loaded
Predictor loaded
Metadata: version=1.0
Model loaded from /home/sergiyar/habr-article-analyzer/models/compressed.bowdssm


In [8]:
for hub in ["yandex", "natural_language_processing", "cpp", "1C"]:
    proba = model.predict_proba("Яндекс ML, NLP, Поиск", hub)
    print(f"{hub}: {proba}")

yandex: 0.9414006471633911
natural_language_processing: 0.5178832411766052
cpp: 0.28635770082473755
1C: 0.5039184093475342


In [19]:
from ml_training.models.predictors.ranking_nn import RankingModel

nano_predictor = RankingModel(input_dim=10000, hidden_dims=[2, 2, 2])
loaded_bow_dssm.hub_encoder.hub_to_vec = {
    "1C": loaded_bow_dssm.hub_encoder.hub_to_vec["1C"]
}
loaded_bow_dssm.predictor = nano_predictor
loaded_bow_dssm.predictor.eval()

export_model(
    loaded_bow_dssm, settings.Settings().models_dir / "compressed_nano.bowdssm"
)
model_nano = BowDSSMInference(
    settings.Settings().models_dir / "compressed_nano.bowdssm"
)

for hub in ["yandex", "natural_language_processing", "cpp", "1C"]:
    proba = model_nano.predict_proba("Яндекс ML, NLP, Поиск", hub)
    print(f"{hub}: {proba}")

Hub encoder: 1 hubs


  torch.onnx.export(
W1225 22:51:25.410000 647460 torch/onnx/_internal/exporter/_compat.py:114] Setting ONNX exporter to use operator set version 18 because the requested opset_version 15 is a lower version than we have implementations for. Automatic version conversion will be performed, which may not be successful at converting to the requested version. If version conversion is unsuccessful, the opset version of the exported model will be kept at 18. Please consider setting opset_version >=18 to leverage latest ONNX features


Text encoder: 5000 features


W1225 22:51:25.785000 647460 torch/onnx/_internal/exporter/_registration.py:107] torchvision is not installed. Skipping torchvision::nms


[torch.onnx] Obtain model graph for `RankingModel([...]` with `torch.export.export(..., strict=False)`...
[torch.onnx] Obtain model graph for `RankingModel([...]` with `torch.export.export(..., strict=False)`... ✅
[torch.onnx] Run decomposition...


The model version conversion is not supported by the onnxscript version converter and fallback is enabled. The model will be converted using the onnx C API (target version: 15).
Failed to convert the model to the target version 15 using the ONNX C API. The model was not modified
Traceback (most recent call last):
  File "/home/sergiyar/.cache/pypoetry/virtualenvs/habr-article-analyzer-fSU603ju-py3.12/lib/python3.12/site-packages/onnxscript/version_converter/__init__.py", line 127, in call
    converted_proto = _c_api_utils.call_onnx_api(
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/sergiyar/.cache/pypoetry/virtualenvs/habr-article-analyzer-fSU603ju-py3.12/lib/python3.12/site-packages/onnxscript/version_converter/_c_api_utils.py", line 65, in call_onnx_api
    result = func(proto)
             ^^^^^^^^^^^
  File "/home/sergiyar/.cache/pypoetry/virtualenvs/habr-article-analyzer-fSU603ju-py3.12/lib/python3.12/site-packages/onnxscript/version_converter/__init__.py", line

[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
Applied 3 of general pattern rewrite rules.
Predictor: input_dim=10000
Metadata saved
Model exported to /home/sergiyar/habr-article-analyzer/models/compressed_nano.bowdssm
File size: 0.16 MB
Hub encoder: 1 hubs, dim=5000
Text encoder loaded
Predictor loaded
Metadata: version=1.0
Model loaded from /home/sergiyar/habr-article-analyzer/models/compressed_nano.bowdssm
yandex: 0.5437959432601929
natural_language_processing: 0.5437959432601929
cpp: 0.5437959432601929
1C: 0.5437959432601929
