Skip to content

Commit

Permalink
[ASR] Confidence measure -> method renames (NVIDIA#7434)
Browse files Browse the repository at this point in the history
* measure -> method

Signed-off-by: Aleksandr Laptev <alaptev@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Signed-off-by: Aleksandr Laptev <alaptev@nvidia.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Signed-off-by: BestJuly <chntaoli@163.com>
  • Loading branch information
2 people authored and BestJuly committed Sep 23, 2023
1 parent 23896b3 commit dd1bca2
Show file tree
Hide file tree
Showing 17 changed files with 172 additions and 255 deletions.
28 changes: 14 additions & 14 deletions nemo/collections/asr/metrics/rnnt_wer.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,16 +100,16 @@ class AbstractRNNTDecoding(ConfidenceMixin):
from the `token_confidence`.
aggregation: Which aggregation type to use for collapsing per-token confidence into per-word confidence.
Valid options are `mean`, `min`, `max`, `prod`.
measure_cfg: A dict-like object which contains the measure name and settings to compute per-frame
method_cfg: A dict-like object which contains the method name and settings to compute per-frame
confidence scores.
name: The measure name (str).
name: The method name (str).
Supported values:
- 'max_prob' for using the maximum token probability as a confidence.
- 'entropy' for using a normalized entropy of a log-likelihood vector.
entropy_type: Which type of entropy to use (str).
Used if confidence_measure_cfg.name is set to `entropy`.
Used if confidence_method_cfg.name is set to `entropy`.
Supported values:
- 'gibbs' for the (standard) Gibbs entropy. If the alpha (α) is provided,
the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)).
Expand Down Expand Up @@ -140,7 +140,7 @@ class AbstractRNNTDecoding(ConfidenceMixin):
timestep during greedy decoding. Setting to larger values allows longer sentences
to be decoded, at the cost of increased execution time.
preserve_frame_confidence: Same as above, overrides above value.
confidence_measure_cfg: Same as above, overrides confidence_cfg.measure_cfg.
confidence_method_cfg: Same as above, overrides confidence_cfg.method_cfg.
"beam":
beam_size: int, defining the beam size for beam search. Must be >= 1.
Expand Down Expand Up @@ -277,7 +277,7 @@ def __init__(self, decoding_cfg, decoder, joint, blank_id: int):
),
preserve_alignments=self.preserve_alignments,
preserve_frame_confidence=self.preserve_frame_confidence,
confidence_measure_cfg=self.confidence_measure_cfg,
confidence_method_cfg=self.confidence_method_cfg,
)
else:
self.decoding = greedy_decode.GreedyTDTInfer(
Expand All @@ -291,7 +291,7 @@ def __init__(self, decoding_cfg, decoder, joint, blank_id: int):
),
preserve_alignments=self.preserve_alignments,
preserve_frame_confidence=self.preserve_frame_confidence,
confidence_measure_cfg=self.confidence_measure_cfg,
confidence_method_cfg=self.confidence_method_cfg,
)
else:
self.decoding = greedy_decode.GreedyMultiblankRNNTInfer(
Expand All @@ -304,7 +304,7 @@ def __init__(self, decoding_cfg, decoder, joint, blank_id: int):
),
preserve_alignments=self.preserve_alignments,
preserve_frame_confidence=self.preserve_frame_confidence,
confidence_measure_cfg=self.confidence_measure_cfg,
confidence_method_cfg=self.confidence_method_cfg,
)

elif self.cfg.strategy == 'greedy_batch':
Expand All @@ -320,7 +320,7 @@ def __init__(self, decoding_cfg, decoder, joint, blank_id: int):
),
preserve_alignments=self.preserve_alignments,
preserve_frame_confidence=self.preserve_frame_confidence,
confidence_measure_cfg=self.confidence_measure_cfg,
confidence_method_cfg=self.confidence_method_cfg,
)
else:
self.decoding = greedy_decode.GreedyBatchedTDTInfer(
Expand All @@ -334,7 +334,7 @@ def __init__(self, decoding_cfg, decoder, joint, blank_id: int):
),
preserve_alignments=self.preserve_alignments,
preserve_frame_confidence=self.preserve_frame_confidence,
confidence_measure_cfg=self.confidence_measure_cfg,
confidence_method_cfg=self.confidence_method_cfg,
)

else:
Expand All @@ -348,7 +348,7 @@ def __init__(self, decoding_cfg, decoder, joint, blank_id: int):
),
preserve_alignments=self.preserve_alignments,
preserve_frame_confidence=self.preserve_frame_confidence,
confidence_measure_cfg=self.confidence_measure_cfg,
confidence_method_cfg=self.confidence_method_cfg,
)

elif self.cfg.strategy == 'beam':
Expand Down Expand Up @@ -1005,16 +1005,16 @@ class RNNTDecoding(AbstractRNNTDecoding):
from the `token_confidence`.
aggregation: Which aggregation type to use for collapsing per-token confidence into per-word confidence.
Valid options are `mean`, `min`, `max`, `prod`.
measure_cfg: A dict-like object which contains the measure name and settings to compute per-frame
method_cfg: A dict-like object which contains the method name and settings to compute per-frame
confidence scores.
name: The measure name (str).
name: The method name (str).
Supported values:
- 'max_prob' for using the maximum token probability as a confidence.
- 'entropy' for using a normalized entropy of a log-likelihood vector.
entropy_type: Which type of entropy to use (str).
Used if confidence_measure_cfg.name is set to `entropy`.
Used if confidence_method_cfg.name is set to `entropy`.
Supported values:
- 'gibbs' for the (standard) Gibbs entropy. If the alpha (α) is provided,
the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)).
Expand Down Expand Up @@ -1047,7 +1047,7 @@ class RNNTDecoding(AbstractRNNTDecoding):
preserve_frame_confidence: Same as above, overrides above value.
confidence_measure_cfg: Same as above, overrides confidence_cfg.measure_cfg.
confidence_method_cfg: Same as above, overrides confidence_cfg.method_cfg.
"beam":
beam_size: int, defining the beam size for beam search. Must be >= 1.
Expand Down
8 changes: 4 additions & 4 deletions nemo/collections/asr/metrics/rnnt_wer_bpe.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,16 +100,16 @@ class RNNTBPEDecoding(AbstractRNNTDecoding):
from the `token_confidence`.
aggregation: Which aggregation type to use for collapsing per-token confidence into per-word confidence.
Valid options are `mean`, `min`, `max`, `prod`.
measure_cfg: A dict-like object which contains the measure name and settings to compute per-frame
method_cfg: A dict-like object which contains the method name and settings to compute per-frame
confidence scores.
name: The measure name (str).
name: The method name (str).
Supported values:
- 'max_prob' for using the maximum token probability as a confidence.
- 'entropy' for using a normalized entropy of a log-likelihood vector.
entropy_type: Which type of entropy to use (str).
Used if confidence_measure_cfg.name is set to `entropy`.
Used if confidence_method_cfg.name is set to `entropy`.
Supported values:
- 'gibbs' for the (standard) Gibbs entropy. If the alpha (α) is provided,
the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)).
Expand Down Expand Up @@ -142,7 +142,7 @@ class RNNTBPEDecoding(AbstractRNNTDecoding):
preserve_frame_confidence: Same as above, overrides above value.
confidence_measure_cfg: Same as above, overrides confidence_cfg.measure_cfg.
confidence_method_cfg: Same as above, overrides confidence_cfg.method_cfg.
"beam":
beam_size: int, defining the beam size for beam search. Must be >= 1.
Expand Down
18 changes: 9 additions & 9 deletions nemo/collections/asr/metrics/wer.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,16 +258,16 @@ class AbstractCTCDecoding(ConfidenceMixin):
from the `token_confidence`.
aggregation: Which aggregation type to use for collapsing per-token confidence into per-word confidence.
Valid options are `mean`, `min`, `max`, `prod`.
measure_cfg: A dict-like object which contains the measure name and settings to compute per-frame
method_cfg: A dict-like object which contains the method name and settings to compute per-frame
confidence scores.
name: The measure name (str).
name: The method name (str).
Supported values:
- 'max_prob' for using the maximum token probability as a confidence.
- 'entropy' for using a normalized entropy of a log-likelihood vector.
entropy_type: Which type of entropy to use (str).
Used if confidence_measure_cfg.name is set to `entropy`.
Used if confidence_method_cfg.name is set to `entropy`.
Supported values:
- 'gibbs' for the (standard) Gibbs entropy. If the alpha (α) is provided,
the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)).
Expand Down Expand Up @@ -300,7 +300,7 @@ class AbstractCTCDecoding(ConfidenceMixin):
preserve_alignments: Same as above, overrides above value.
compute_timestamps: Same as above, overrides above value.
preserve_frame_confidence: Same as above, overrides above value.
confidence_measure_cfg: Same as above, overrides confidence_cfg.measure_cfg.
confidence_method_cfg: Same as above, overrides confidence_cfg.method_cfg.
"beam":
beam_size: int, defining the beam size for beam search. Must be >= 1.
Expand Down Expand Up @@ -389,7 +389,7 @@ def __init__(self, decoding_cfg, blank_id: int):
preserve_alignments=self.preserve_alignments,
compute_timestamps=self.compute_timestamps,
preserve_frame_confidence=self.preserve_frame_confidence,
confidence_measure_cfg=self.confidence_measure_cfg,
confidence_method_cfg=self.confidence_method_cfg,
)

elif self.cfg.strategy == 'beam':
Expand Down Expand Up @@ -1037,16 +1037,16 @@ class CTCDecoding(AbstractCTCDecoding):
from the `token_confidence`.
aggregation: Which aggregation type to use for collapsing per-token confidence into per-word confidence.
Valid options are `mean`, `min`, `max`, `prod`.
measure_cfg: A dict-like object which contains the measure name and settings to compute per-frame
method_cfg: A dict-like object which contains the method name and settings to compute per-frame
confidence scores.
name: The measure name (str).
name: The method name (str).
Supported values:
- 'max_prob' for using the maximum token probability as a confidence.
- 'entropy' for using a normalized entropy of a log-likelihood vector.
entropy_type: Which type of entropy to use (str).
Used if confidence_measure_cfg.name is set to `entropy`.
Used if confidence_method_cfg.name is set to `entropy`.
Supported values:
- 'gibbs' for the (standard) Gibbs entropy. If the alpha (α) is provided,
the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)).
Expand Down Expand Up @@ -1079,7 +1079,7 @@ class CTCDecoding(AbstractCTCDecoding):
preserve_alignments: Same as above, overrides above value.
compute_timestamps: Same as above, overrides above value.
preserve_frame_confidence: Same as above, overrides above value.
confidence_measure_cfg: Same as above, overrides confidence_cfg.measure_cfg.
confidence_method_cfg: Same as above, overrides confidence_cfg.method_cfg.
"beam":
beam_size: int, defining the beam size for beam search. Must be >= 1.
Expand Down
8 changes: 4 additions & 4 deletions nemo/collections/asr/metrics/wer_bpe.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,16 +74,16 @@ class CTCBPEDecoding(AbstractCTCDecoding):
from the `token_confidence`.
aggregation: Which aggregation type to use for collapsing per-token confidence into per-word confidence.
Valid options are `mean`, `min`, `max`, `prod`.
measure_cfg: A dict-like object which contains the measure name and settings to compute per-frame
method_cfg: A dict-like object which contains the method name and settings to compute per-frame
confidence scores.
name: The measure name (str).
name: The method name (str).
Supported values:
- 'max_prob' for using the maximum token probability as a confidence.
- 'entropy' for using a normalized entropy of a log-likelihood vector.
entropy_type: Which type of entropy to use (str).
Used if confidence_measure_cfg.name is set to `entropy`.
Used if confidence_method_cfg.name is set to `entropy`.
Supported values:
- 'gibbs' for the (standard) Gibbs entropy. If the alpha (α) is provided,
the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)).
Expand Down Expand Up @@ -116,7 +116,7 @@ class CTCBPEDecoding(AbstractCTCDecoding):
preserve_alignments: Same as above, overrides above value.
compute_timestamps: Same as above, overrides above value.
preserve_frame_confidence: Same as above, overrides above value.
confidence_measure_cfg: Same as above, overrides confidence_cfg.measure_cfg.
confidence_method_cfg: Same as above, overrides confidence_cfg.method_cfg.
"beam":
beam_size: int, defining the beam size for beam search. Must be >= 1.
Expand Down
12 changes: 6 additions & 6 deletions nemo/collections/asr/models/confidence_ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from nemo.collections.asr.models.hybrid_rnnt_ctc_models import EncDecHybridRNNTCTCModel
from nemo.collections.asr.parts.utils.asr_confidence_utils import (
ConfidenceConfig,
ConfidenceMeasureConfig,
ConfidenceMethodConfig,
get_confidence_aggregation_bank,
get_confidence_measure_bank,
)
Expand Down Expand Up @@ -61,7 +61,7 @@ def to_confidence_config(self) -> ConfidenceConfig:
return ConfidenceConfig(
exclude_blank=self.exclude_blank,
aggregation=self.aggregation,
measure_cfg=ConfidenceMeasureConfig(
method_cfg=ConfidenceMethodConfig(
name=name, entropy_type=entropy_type, alpha=self.alpha, entropy_norm=entropy_norm,
),
)
Expand Down Expand Up @@ -126,7 +126,7 @@ def compute_confidence(hypothesis: Hypothesis, confidence_cfg: ConfidenceConfig)
hypothesis: generated hypothesis as returned from the transcribe
method of the ASR model.
confidence_cfg: confidence config specifying what kind of
measure/aggregation should be used.
method/aggregation should be used.
Returns:
float: confidence score.
Expand All @@ -135,12 +135,12 @@ def compute_confidence(hypothesis: Hypothesis, confidence_cfg: ConfidenceConfig)
filtered_logprobs = get_filtered_logprobs(hypothesis, confidence_cfg.exclude_blank)
vocab_size = filtered_logprobs.shape[1]
aggr_func = get_confidence_aggregation_bank()[confidence_cfg.aggregation]
if confidence_cfg.measure_cfg.name == "max_prob":
if confidence_cfg.method_cfg.name == "max_prob":
conf_type = "max_prob"
alpha = 1.0
else:
conf_type = f"entropy_{confidence_cfg.measure_cfg.entropy_type}_{confidence_cfg.measure_cfg.entropy_norm}"
alpha = confidence_cfg.measure_cfg.alpha
conf_type = f"entropy_{confidence_cfg.method_cfg.entropy_type}_{confidence_cfg.method_cfg.entropy_norm}"
alpha = confidence_cfg.method_cfg.alpha
conf_func = get_confidence_measure_bank()[conf_type]

conf_value = aggr_func(conf_func(filtered_logprobs, v=vocab_size, t=alpha)).cpu().item()
Expand Down

0 comments on commit dd1bca2

Please sign in to comment.