Skip to content

Commit

Permalink
Simplify the type annotations in benchmarks (optuna#4703)
Browse files Browse the repository at this point in the history
* refactor: simplified typing annotations under

* Fix: fixed syntax error on python3.7 and 3.8
  • Loading branch information
caprest committed Jun 13, 2023
1 parent acee2bd commit cabfb43
Show file tree
Hide file tree
Showing 8 changed files with 84 additions and 80 deletions.
8 changes: 4 additions & 4 deletions benchmarks/asv/optimize.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import annotations

from typing import cast
from typing import List
from typing import Union

import optuna
from optuna.samplers import BaseSampler
Expand All @@ -10,8 +10,8 @@
from optuna.testing.storages import StorageSupplier


def parse_args(args: str) -> List[Union[int, str]]:
ret: List[Union[int, str]] = []
def parse_args(args: str) -> list[int | str]:
ret: list[int | str] = []
for arg in map(lambda s: s.strip(), args.split(",")):
try:
ret.append(int(arg))
Expand Down
15 changes: 8 additions & 7 deletions benchmarks/bayesmark/optuna_optimizer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

from typing import Any
from typing import Dict
from typing import List
from typing import Union

import numpy as np
Expand Down Expand Up @@ -50,14 +51,14 @@ def __init__(self, api_config: ApiConfig, **kwargs: Any) -> None:

try:
sampler = _SAMPLERS[kwargs["sampler"]]
sampler_kwargs: Dict[str, Any] = kwargs["sampler_kwargs"]
sampler_kwargs: dict[str, Any] = kwargs["sampler_kwargs"]

except KeyError:
raise ValueError("Unknown sampler passed to Optuna optimizer.")

try:
pruner = _PRUNERS[kwargs["pruner"]]
pruner_kwargs: Dict[str, Any] = kwargs["pruner_kwargs"]
pruner_kwargs: dict[str, Any] = kwargs["pruner_kwargs"]

except KeyError:
raise ValueError("Unknown pruner passed to Optuna optimizer.")
Expand All @@ -69,7 +70,7 @@ def __init__(self, api_config: ApiConfig, **kwargs: Any) -> None:
sampler=sampler(**sampler_kwargs),
pruner=pruner(**pruner_kwargs),
)
self.current_trials: Dict[int, int] = dict()
self.current_trials: dict[int, int] = dict()

def _suggest(self, trial: optuna.trial.Trial) -> Suggestion:
suggestions: Suggestion = dict()
Expand All @@ -96,8 +97,8 @@ def _suggest(self, trial: optuna.trial.Trial) -> Suggestion:

return suggestions

def suggest(self, n_suggestions: int) -> List[Suggestion]:
suggestions: List[Suggestion] = list()
def suggest(self, n_suggestions: int) -> list[Suggestion]:
suggestions: list[Suggestion] = list()
for _ in range(n_suggestions):
trial = self.study.ask()
params = self._suggest(trial)
Expand All @@ -107,7 +108,7 @@ def suggest(self, n_suggestions: int) -> List[Suggestion]:

return suggestions

def observe(self, X: List[Suggestion], y: List[float]) -> None:
def observe(self, X: list[Suggestion], y: list[float]) -> None:
for params, objective_value in zip(X, y):
sid = hash(frozenset(params.items()))
trial = self.current_trials.pop(sid)
Expand Down
60 changes: 30 additions & 30 deletions benchmarks/bayesmark/report_bayesmark.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

import abc
from collections import defaultdict
from dataclasses import dataclass
Expand All @@ -6,8 +8,6 @@
from typing import Dict
from typing import Generator
from typing import List
from typing import Optional
from typing import Set
from typing import Tuple

from jinja2 import Environment
Expand Down Expand Up @@ -38,7 +38,7 @@ def precision(self) -> int:
raise NotImplementedError

@abc.abstractmethod
def calculate(self, data: pd.DataFrame) -> List[float]:
def calculate(self, data: pd.DataFrame) -> list[float]:
"""Calculates metric for each study in data frame."""

raise NotImplementedError
Expand All @@ -48,16 +48,16 @@ class BestValueMetric(BaseMetric):
name = "Best value"
precision = 6

def calculate(self, data: pd.DataFrame) -> List[float]:
def calculate(self, data: pd.DataFrame) -> list[float]:
return data.groupby("uuid").generalization.min().values


class AUCMetric(BaseMetric):
name = "AUC"
precision = 3

def calculate(self, data: pd.DataFrame) -> List[float]:
aucs: List[float] = list()
def calculate(self, data: pd.DataFrame) -> list[float]:
aucs: list[float] = list()
for _, grp in data.groupby("uuid"):
auc = np.sum(grp.generalization.cummin())
aucs.append(auc / grp.shape[0])
Expand All @@ -68,7 +68,7 @@ class ElapsedMetric(BaseMetric):
name = "Elapsed"
precision = 3

def calculate(self, data: pd.DataFrame) -> List[float]:
def calculate(self, data: pd.DataFrame) -> list[float]:
# Total time does not include evaluation of bayesmark
# objective function (no Optuna APIs are called there).
time_cols = ["suggest", "observe"]
Expand All @@ -80,7 +80,7 @@ def __init__(self, data: pd.DataFrame) -> None:
self._data = data

@property
def optimizers(self) -> List[str]:
def optimizers(self) -> list[str]:
return list(self._data.opt.unique())

@classmethod
Expand All @@ -97,24 +97,24 @@ def summarize_solver(self, solver: str, metric: BaseMetric) -> Moments:
return np.mean(run_metrics).item(), np.var(run_metrics).item()

def sample_performance(self, metric: BaseMetric) -> Samples:
performance: Dict[str, List[float]] = {}
performance: dict[str, list[float]] = {}
for solver, data in self._data.groupby("opt"):
run_metrics = metric.calculate(data)
performance[solver] = run_metrics
return performance


class DewanckerRanker:
def __init__(self, metrics: List[BaseMetric]) -> None:
def __init__(self, metrics: list[BaseMetric]) -> None:
self._metrics = metrics
self._ranking: Optional[List[str]] = None
self._borda: Optional[np.ndarray] = None
self._ranking: list[str] | None = None
self._borda: np.ndarray | None = None

def __iter__(self) -> Generator[Tuple[str, int], None, None]:
def __iter__(self) -> Generator[tuple[str, int], None, None]:
yield from zip(self.solvers, self.borda)

@property
def solvers(self) -> List[str]:
def solvers(self) -> list[str]:
if self._ranking is None:
raise ValueError("Call rank first.")
return self._ranking
Expand All @@ -136,11 +136,11 @@ def pick_alpha(report: PartialReport) -> float:
return cand
return candidates[-1]

def _set_ranking(self, wins: Dict[str, int]) -> None:
def _set_ranking(self, wins: dict[str, int]) -> None:
sorted_wins = [k for k, _ in sorted(wins.items(), key=lambda x: x[1])]
self._ranking = sorted_wins[::-1]

def _set_borda(self, wins: Dict[str, int]) -> None:
def _set_borda(self, wins: dict[str, int]) -> None:
sorted_wins = np.array(sorted(wins.values()))
num_wins, num_ties = np.unique(sorted_wins, return_counts=True)
points = np.searchsorted(sorted_wins, num_wins)
Expand All @@ -149,7 +149,7 @@ def _set_borda(self, wins: Dict[str, int]) -> None:
def rank(self, report: PartialReport) -> None:
# Implements Section 2.1.1
# https://proceedings.mlr.press/v64/dewancker_strategy_2016.pdf
wins: Dict[str, int] = defaultdict(int)
wins: dict[str, int] = defaultdict(int)
alpha = DewanckerRanker.pick_alpha(report)
for metric in self._metrics:
samples = report.sample_performance(metric)
Expand All @@ -172,42 +172,42 @@ def rank(self, report: PartialReport) -> None:
class Solver:
rank: int
name: str
results: List[str]
results: list[str]


@dataclass
class Problem:
number: int
name: str
metrics: List[BaseMetric]
solvers: List[Solver]
metrics: list[BaseMetric]
solvers: list[Solver]


class BayesmarkReportBuilder:
def __init__(self) -> None:
self.solvers: Set[str] = set()
self.datasets: Set[str] = set()
self.models: Set[str] = set()
self.firsts: Dict[str, int] = defaultdict(int)
self.borda: Dict[str, int] = defaultdict(int)
self.solvers: set[str] = set()
self.datasets: set[str] = set()
self.models: set[str] = set()
self.firsts: dict[str, int] = defaultdict(int)
self.borda: dict[str, int] = defaultdict(int)
self.metric_precedence = ""
self.problems: List[Problem] = []
self.problems: list[Problem] = []

def set_precedence(self, metrics: List[BaseMetric]) -> None:
def set_precedence(self, metrics: list[BaseMetric]) -> None:
self.metric_precedence = " -> ".join([m.name for m in metrics])

def add_problem(
self,
name: str,
report: PartialReport,
ranking: DewanckerRanker,
metrics: List[BaseMetric],
metrics: list[BaseMetric],
) -> "BayesmarkReportBuilder":
solvers: List[Solver] = list()
solvers: list[Solver] = list()
positions = np.abs(ranking.borda - (max(ranking.borda) + 1))
for pos, solver in zip(positions, ranking.solvers):
self.solvers.add(solver)
results: List[str] = list()
results: list[str] = list()
for metric in metrics:
mean, variance = report.summarize_solver(solver, metric)
precision = metric.precision
Expand Down
Loading

0 comments on commit cabfb43

Please sign in to comment.