Skip to content

Commit

Permalink
WIP Unify type of params to Dict[str, Any] or DefaultDict[str, Dict
Browse files Browse the repository at this point in the history
  • Loading branch information
juhoinkinen committed May 25, 2023
1 parent 961dd09 commit a193ade
Show file tree
Hide file tree
Showing 8 changed files with 39 additions and 49 deletions.
16 changes: 8 additions & 8 deletions annif/backend/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import os.path
from datetime import datetime, timezone
from glob import glob
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
from typing import TYPE_CHECKING, Any, Dict, List, Optional

from annif import logger
from annif.suggestion import SuggestionBatch
Expand All @@ -24,7 +24,7 @@ class AnnifBackend(metaclass=abc.ABCMeta):
DEFAULT_PARAMETERS = {"limit": 100}

def __init__(
self, backend_id: str, config_params: Any, project: AnnifProject
self, backend_id: str, config_params: Dict[str, Any], project: AnnifProject
) -> None:
"""Initialize backend with specific parameters. The
parameters are a dict. Keys and values depend on the specific
Expand All @@ -34,7 +34,7 @@ def __init__(
self.project = project
self.datadir = project.datadir

def default_params(self) -> Dict[str, Union[str, bool, int]]:
def default_params(self) -> Dict[str, Any]:
return self.DEFAULT_PARAMETERS

@property
Expand All @@ -61,7 +61,7 @@ def modification_time(self) -> Optional[datetime.datetime]:

def _get_backend_params(
self,
params: Optional[Union[Dict[str, str], Dict[str, int], Dict[str, float]]],
params: Optional[Dict[str, Any]],
) -> Dict[str, Any]:
backend_params = dict(self.params)
if params is not None:
Expand All @@ -71,7 +71,7 @@ def _get_backend_params(
def _train(
self,
corpus: DocumentCorpus,
params: Dict[str, Union[int, str]],
params: Dict[str, Any],
jobs: int = 0,
) -> None:
"""This method can be overridden by backends. It implements
Expand All @@ -81,7 +81,7 @@ def _train(
def train(
self,
corpus: DocumentCorpus,
params: Optional[Union[Dict[str, float], Dict[str, int]]] = None,
params: Optional[Dict[str, Any]] = None,
jobs: int = 0,
) -> None:
"""Train the model on the given document or subject corpus."""
Expand Down Expand Up @@ -116,7 +116,7 @@ def _suggest_batch(
def suggest(
self,
texts: List[str],
params: Optional[Union[Dict[str, str], Dict[str, int]]] = None,
params: Optional[Dict[str, Any]] = None,
) -> SuggestionBatch:
"""Suggest subjects for the input documents and return a list of subject sets
represented as a list of SubjectSuggestion objects."""
Expand Down Expand Up @@ -149,7 +149,7 @@ def _learn(self, corpus, params):
def learn(
self,
corpus: DocumentCorpus,
params: Optional[Dict[str, int]] = None,
params: Optional[Dict[str, Any]] = None,
) -> None:
"""Further train the model on the given document or subject corpus."""
beparams = self._get_backend_params(params)
Expand Down
8 changes: 3 additions & 5 deletions annif/backend/dummy.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Dummy backend for testing basic interaction of projects and backends"""
from __future__ import annotations

from typing import TYPE_CHECKING, Dict, List, Union
from typing import TYPE_CHECKING, Any, Dict, List

from annif.suggestion import SubjectSuggestion

Expand All @@ -24,9 +24,7 @@ def default_params(self) -> Dict[str, int]:
def initialize(self, parallel: bool = False) -> None:
self.initialized = True

def _suggest(
self, text: str, params: Dict[str, Union[int, str]]
) -> List[SubjectSuggestion]:
def _suggest(self, text: str, params: Dict[str, Any]) -> List[SubjectSuggestion]:
score = float(params.get("score", 1.0))

# Ensure tests fail if "text" with wrong type ends up here
Expand All @@ -47,7 +45,7 @@ def _suggest(
def _learn(
self,
corpus: DocumentCorpus,
params: Dict[str, Union[int, str]],
params: Dict[str, Any],
) -> None:
# in this dummy backend we "learn" by picking up the subject ID
# of the first subject of the first document in the learning set
Expand Down
10 changes: 4 additions & 6 deletions annif/backend/ensemble.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Ensemble backend that combines results from multiple projects"""
from __future__ import annotations

from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple

import annif.eval
import annif.parallel
Expand Down Expand Up @@ -49,7 +49,7 @@ def _merge_source_batches(
self,
batch_by_source: Dict[str, SuggestionBatch],
sources: List[Tuple[str, float]],
params: Dict[str, Union[int, str]],
params: Dict[str, Any],
) -> SuggestionBatch:
"""Merge the given SuggestionBatches from each source into a single
SuggestionBatch. The default implementation computes a weighted
Expand All @@ -63,7 +63,7 @@ def _merge_source_batches(
)

def _suggest_batch(
self, texts: List[str], params: Dict[str, Union[float, str]]
self, texts: List[str], params: Dict[str, Any]
) -> SuggestionBatch:
sources = annif.util.parse_sources(params["sources"])
batch_by_source = self._suggest_with_sources(texts, sources)
Expand Down Expand Up @@ -159,7 +159,5 @@ def get_hp_optimizer(
) -> EnsembleOptimizer:
return EnsembleOptimizer(self, corpus, metric)

def _train(
self, corpus: DocumentCorpus, params: Dict[str, Union[int, str]], jobs: int = 0
):
def _train(self, corpus: DocumentCorpus, params: Dict[str, Any], jobs: int = 0):
raise NotSupportedException("Training ensemble backend is not possible.")
8 changes: 4 additions & 4 deletions annif/backend/fasttext.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import collections
import os.path
from typing import TYPE_CHECKING, Dict, List, Tuple, Union
from typing import TYPE_CHECKING, Any, Dict, List, Tuple, Union

import fasttext

Expand Down Expand Up @@ -119,7 +119,7 @@ def _create_train_file(
corpus, self.datadir, self.TRAIN_FILE, method=self._write_train_file
)

def _create_model(self, params: Dict[str, Union[float, str]], jobs: int) -> None:
def _create_model(self, params: Dict[str, Any], jobs: int) -> None:
self.info("creating fastText model")
trainpath = os.path.join(self.datadir, self.TRAIN_FILE)
modelpath = os.path.join(self.datadir, self.MODEL_FILE)
Expand All @@ -137,7 +137,7 @@ def _create_model(self, params: Dict[str, Union[float, str]], jobs: int) -> None
def _train(
self,
corpus: DocumentCorpus,
params: Dict[str, Union[float, str]],
params: Dict[str, Any],
jobs: int = 0,
) -> None:
if corpus != "cached":
Expand All @@ -163,7 +163,7 @@ def _predict_chunks(
)

def _suggest_chunks(
self, chunktexts: List[str], params: Dict[str, Union[float, str]]
self, chunktexts: List[str], params: Dict[str, Any]
) -> List[SubjectSuggestion]:
limit = int(params["limit"])
chunklabels, chunkscores = self._predict_chunks(chunktexts, limit)
Expand Down
10 changes: 4 additions & 6 deletions annif/backend/mllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from __future__ import annotations

import os.path
from typing import TYPE_CHECKING, Dict, Iterator, List, Tuple, Union
from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Tuple, Union

import joblib
import numpy as np
Expand Down Expand Up @@ -124,7 +124,7 @@ def initialize(self, parallel: bool = False) -> None:
def _train(
self,
corpus: DocumentCorpus,
params: Dict[str, Union[float, bool, str]],
params: Dict[str, Any],
jobs: int = 0,
) -> None:
self.info("starting train")
Expand Down Expand Up @@ -158,16 +158,14 @@ def _generate_candidates(self, text: str) -> List[Candidate]:
def _prediction_to_result(
self,
prediction: List[Tuple[np.float64, int]],
params: Dict[str, Union[float, bool, str]],
params: Dict[str, Any],
) -> Iterator:
vector = np.zeros(len(self.project.subjects), dtype=np.float32)
for score, subject_id in prediction:
vector[subject_id] = score
return vector_to_suggestions(vector, int(params["limit"]))

def _suggest(
self, text: str, params: Dict[str, Union[float, bool, str]]
) -> Iterator:
def _suggest(self, text: str, params: Dict[str, Any]) -> Iterator:
candidates = self._generate_candidates(text)
prediction = self._model.predict(candidates)
return self._prediction_to_result(prediction, params)
8 changes: 4 additions & 4 deletions annif/backend/nn_ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import os.path
import shutil
from io import BytesIO
from typing import TYPE_CHECKING, Dict, List, Tuple, Union
from typing import TYPE_CHECKING, Any, Dict, List, Tuple, Union

import joblib
import lmdb
Expand Down Expand Up @@ -140,7 +140,7 @@ def _merge_source_batches(
self,
batch_by_source: Dict[str, SuggestionBatch],
sources: List[Tuple[str, float]],
params: Dict[str, Union[float, str]],
params: Dict[str, Any],
) -> SuggestionBatch:
src_weight = dict(sources)
score_vectors = np.array(
Expand Down Expand Up @@ -199,7 +199,7 @@ def _create_model(self, sources: List[Tuple[str, float]]) -> None:
def _train(
self,
corpus: DocumentCorpus,
params: Dict[str, Union[float, str]],
params: Dict[str, Any],
jobs: int = 0,
) -> None:
sources = annif.util.parse_sources(self.params["sources"])
Expand Down Expand Up @@ -286,7 +286,7 @@ def _fit_model(
def _learn(
self,
corpus: DocumentCorpus,
params: Dict[str, Union[float, str]],
params: Dict[str, Any],
) -> None:
self.initialize()
self._fit_model(
Expand Down
18 changes: 7 additions & 11 deletions annif/lexical/mllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import math
from enum import IntEnum
from statistics import mean
from typing import TYPE_CHECKING, DefaultDict, Dict, List, Tuple, Union
from typing import TYPE_CHECKING, Any, DefaultDict, Dict, List, Tuple, Union

import joblib
import numpy as np
Expand Down Expand Up @@ -169,9 +169,7 @@ def _candidates_to_features(self, candidates: List[Candidate]) -> np.ndarray:
return candidates_to_features(candidates, self._model_data)

@staticmethod
def _get_label_props(
params: Dict[str, Union[float, bool, str]]
) -> Tuple[List[URIRef], List[URIRef]]:
def _get_label_props(params: Dict[str, Any]) -> Tuple[List[URIRef], List[URIRef]]:
pref_label_props = [SKOS.prefLabel]

if annif.util.boolean(params["use_hidden_labels"]):
Expand All @@ -185,7 +183,7 @@ def _prepare_terms(
self,
graph: Graph,
vocab: AnnifVocabulary,
params: Dict[str, Union[float, bool, str]],
params: Dict[str, Any],
) -> Tuple[List[Term], List[int]]:
pref_label_props, nonpref_label_props = self._get_label_props(params)

Expand Down Expand Up @@ -216,7 +214,7 @@ def _prepare_train_index(
self,
vocab: AnnifVocabulary,
analyzer: Analyzer,
params: Dict[str, Union[float, bool, str]],
params: Dict[str, Any],
) -> List[int]:
graph = vocab.as_graph()
terms, subject_ids = self._prepare_terms(graph, vocab, params)
Expand Down Expand Up @@ -301,7 +299,7 @@ def prepare_train(
corpus: DocumentCorpus,
vocab: AnnifVocabulary,
analyzer: Analyzer,
params: Dict[str, Union[float, bool, str]],
params: Dict[str, Any],
n_jobs: int,
) -> Tuple[np.ndarray, np.ndarray]:
# create an index from the vocabulary terms
Expand All @@ -318,9 +316,7 @@ def prepare_train(

return (np.vstack(features), np.array(train_y))

def _create_classifier(
self, params: Dict[str, Union[float, bool, str]]
) -> BaggingClassifier:
def _create_classifier(self, params: Dict[str, Any]) -> BaggingClassifier:
return BaggingClassifier(
DecisionTreeClassifier(
min_samples_leaf=int(params["min_samples_leaf"]),
Expand All @@ -333,7 +329,7 @@ def train(
self,
train_x: Union[np.ndarray, List[Tuple[int, int]]],
train_y: Union[List[bool], np.ndarray],
params: Dict[str, Union[float, bool, str]],
params: Dict[str, Any],
) -> None:
# fit the model on the training corpus
self._classifier = self._create_classifier(params)
Expand Down
10 changes: 5 additions & 5 deletions annif/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def initialize(self, parallel: bool = False) -> None:
def _suggest_with_backend(
self,
texts: List[str],
backend_params: Optional[DefaultDict[str, Dict[str, str]]],
backend_params: Optional[DefaultDict[str, Dict]],
) -> annif.suggestion.SuggestionBatch:
if backend_params is None:
backend_params = {}
Expand Down Expand Up @@ -226,7 +226,7 @@ def modification_time(self) -> Optional[datetime]:
def suggest_corpus(
self,
corpus: DocumentCorpus,
backend_params: Optional[DefaultDict[str, Dict[str, str]]] = None,
backend_params: Optional[DefaultDict[str, Dict]] = None,
) -> annif.suggestion.SuggestionResults:
"""Suggest subjects for the given documents corpus in batches of documents."""
suggestions = (
Expand All @@ -239,7 +239,7 @@ def suggest_corpus(
def suggest(
self,
texts: List[str],
backend_params: Optional[DefaultDict[str, Dict[str, str]]] = None,
backend_params: Optional[DefaultDict[str, Dict]] = None,
) -> annif.suggestion.SuggestionBatch:
"""Suggest subjects for the given documents batch."""
if not self.is_trained:
Expand All @@ -253,7 +253,7 @@ def suggest(
def train(
self,
corpus: DocumentCorpus,
backend_params: Optional[DefaultDict[str, Dict[str, str]]] = None,
backend_params: Optional[DefaultDict[str, Dict]] = None,
jobs: int = 0,
) -> None:
"""train the project using documents from a metadata source"""
Expand All @@ -267,7 +267,7 @@ def train(
def learn(
self,
corpus: DocumentCorpus,
backend_params: Optional[DefaultDict[str, Dict[str, str]]] = None,
backend_params: Optional[DefaultDict[str, Dict]] = None,
) -> None:
"""further train the project using documents from a metadata source"""
if backend_params is None:
Expand Down

0 comments on commit a193ade

Please sign in to comment.