Skip to content

Commit

Permalink
Merge pull request #89 from KennethEnevoldsen/stuff_runs_tests
Browse files Browse the repository at this point in the history
Added integration test for four model types
  • Loading branch information
x-tabdeveloping committed Jan 26, 2024
2 parents aeb32ce + ccdd886 commit f427a6d
Show file tree
Hide file tree
Showing 5 changed files with 94 additions and 10 deletions.
2 changes: 1 addition & 1 deletion makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
install:
@echo "--- 🚀 Installing project ---"
pip install -e ".[dev, docs, openai, cohere, tests, mistral]"
pip install -e ".[dev, docs, openai, cohere, tests, mistral, fasttext]"

static-type-check:
@echo "--- 🔍 Running static type check ---"
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ sonar = [
"fairseq2",
"sonar-space>=0.1.0",
] # fairseq2 only works for linux at the moment
fasttext = ["fasttext>=0.9.0"]
fasttext = ["pybind11>=2.11.0", "fasttext-wheel>=0.9.0"]


[project.readme]
Expand Down
5 changes: 3 additions & 2 deletions src/seb/registered_models/fasttext.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,14 @@

class FastTextModel(seb.Encoder):
def __init__(self, model_name: str, lang: str) -> None:
self.model_name = model_name
self.lang = lang

import fasttext # type: ignore
import fasttext.util # type: ignore

fasttext.util.download_model(self.lang, if_exists="ignore")
self.model = fasttext.load_model(self.model_name)
self.model_name = model_name
self.lang = lang

def get_embedding_dim(self) -> int:
v = self.encode(["get emb dim"])
Expand Down
30 changes: 24 additions & 6 deletions tests/cli/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import numpy as np
import pytest

import seb
from seb.cli import cli, run_benchmark_cli

Expand All @@ -30,11 +31,24 @@ def to_command(self, output_path: Path) -> list[str]:
cli_command_parametrize = pytest.mark.parametrize(
"inputs",
[
BenchmarkCliTestInput("sentence-transformers/all-MiniLM-L6-v2", 0.550, tasks=["DKHate"]),
BenchmarkCliTestInput("sentence-transformers/all-MiniLM-L6-v2", 0.525, tasks=["DKHate", "ScaLA"]),
BenchmarkCliTestInput("sentence-transformers/all-MiniLM-L6-v2", 0.50, tasks=["DKHate", "ScaLA"], languages=["sv", "nn", "nb"]),
BenchmarkCliTestInput(
"test_model", np.nan, code_path=(test_dir / "benchmark_cli_code_inject.py"), tasks=["test-encode-task"], ignore_cache=True
"sentence-transformers/all-MiniLM-L6-v2", 0.550, tasks=["DKHate"]
),
BenchmarkCliTestInput(
"sentence-transformers/all-MiniLM-L6-v2", 0.525, tasks=["DKHate", "ScaLA"]
),
BenchmarkCliTestInput(
"sentence-transformers/all-MiniLM-L6-v2",
0.50,
tasks=["DKHate", "ScaLA"],
languages=["sv", "nn", "nb"],
),
BenchmarkCliTestInput(
"test_model",
np.nan,
code_path=(test_dir / "benchmark_cli_code_inject.py"),
tasks=["test-encode-task"],
ignore_cache=True,
),
],
)
Expand Down Expand Up @@ -64,7 +78,9 @@ def test_run_benchmark_cli(inputs: BenchmarkCliTestInput, tmp_path: Path):
res = load_results(tmp_path)
assert len(res) == 1
bench_res = res[0]
bench_res.task_results = [tr for tr in bench_res.task_results if tr.task_name != "test-encode-task"]
bench_res.task_results = [
tr for tr in bench_res.task_results if tr.task_name != "test-encode-task"
]
assert is_approximately_equal(bench_res.get_main_score(), inputs.score)


Expand All @@ -75,5 +91,7 @@ def test_run_cli(inputs: BenchmarkCliTestInput, tmp_path: Path):

assert len(res) == 1
bench_res = res[0]
bench_res.task_results = [tr for tr in bench_res.task_results if tr.task_name != "test-encode-task"]
bench_res.task_results = [
tr for tr in bench_res.task_results if tr.task_name != "test-encode-task"
]
assert is_approximately_equal(bench_res.get_main_score(), inputs.score)
65 changes: 65 additions & 0 deletions tests/test_integration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
from datetime import datetime

import pytest

import seb
from seb.cli import run_benchmark_cli
from seb.registries import tasks


@tasks.register("test-encode-task")
def create_test_encode_task() -> seb.Task:
class DummyTask(seb.Task):
name = "test-encode-task"
main_score = "a_metric"
description = "NA"
reference = "NA"
version = "NA"
languages = [] # noqa: RUF012
domain = [] # noqa: RUF012
task_type = "Classification"

def evaluate(self, model: seb.Encoder) -> seb.TaskResult:
model.encode(["a test sentence"], task=self)

return seb.TaskResult(
task_name="test-encode-task",
task_description="NA",
task_version="NA",
time_of_run=datetime.now(),
scores={"en": {"a_metric": 1.0}},
main_score="a_metric",
)

def get_descriptive_stats(self) -> dict:
return {}

return DummyTask()


@pytest.mark.parametrize(
"model",
[
"fasttext-cc-da-300",
"intfloat/e5-small",
"translate-e5-small",
],
)
def test_integration_dummy(model: str):
"""Runs all sorts of models on a dummy task to see if they can run without breaking.
Cache is ignored so that the models are actually run.
"""
tasks = ["test-encode-task"]
run_benchmark_cli(models=[model], tasks=tasks, ignore_cache=True)


@pytest.mark.parametrize(
"model",
[
"sentence-transformers/all-MiniLM-L6-v2",
],
)
def test_integration_lcc(model: str):
"""Runs model(s) on LCC to see if everything works in order with tasks included."""
tasks = ["LCC"]
run_benchmark_cli(models=[model], tasks=tasks, ignore_cache=True)

0 comments on commit f427a6d

Please sign in to comment.