Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
813e969
Remove unnecessary test fileà
gabegma Jun 3, 2022
f69febe
Merge pull request #107 from ServiceNow/hotfix/remove-unnecessary-tes…
gabegma Jun 3, 2022
3ba4a6b
Fix unexpected borders in Performance Analysis table (#118)
JosephMarinier Jun 6, 2022
c4ded87
Update release notes to v2.1.1
gabegma Jun 6, 2022
eb82420
Merge pull request #119 from ServiceNow/hotfix/update-release-notes-v…
gabegma Jun 6, 2022
ee820a9
Fix issue where dependency caching was not done properly in the backe…
Dref360 Jun 6, 2022
f84612e
Show 20 similar utterances instead of 10 (#121)
Dref360 Jun 7, 2022
0b1efdc
Merge branch 'main' into dev
JosephMarinier Jun 8, 2022
aa1b57d
Toggle normalized confusion matrix (#86)
nandhinibsn Jun 8, 2022
1145959
Offer hiding columns from the Performance Analysis table (#129)
JosephMarinier Jun 10, 2022
94cc329
Add documentation around custom utterances (#143)
Dref360 Jun 13, 2022
ec038f5
Raise exceptions inside ValidationModule to avoid hanging without inf…
Dref360 Jun 14, 2022
071841f
Move confidence threshold out of `/dataset_info` (#61)
JosephMarinier Jun 16, 2022
579a61a
Smart tag families (#145)
JosephMarinier Jun 20, 2022
0aad890
Add F1 (#155)
JosephMarinier Jun 21, 2022
4c4e176
Merge remote-tracking branch 'origin/main' into dev
JosephMarinier Jun 22, 2022
fe896d0
Description in UI (#146)
nandhinibsn Jun 27, 2022
8173b4e
Add pipeline comparison smart tag family (#161)
JosephMarinier Jul 7, 2022
a21020a
Add visual bars to Performance Analysis table (#159)
nandhinibsn Jul 8, 2022
0df14df
Merge remote-tracking branch 'origin/main' into dev
gabegma Jul 8, 2022
18d54ab
Various fixes before releasing (#163)
JosephMarinier Jul 8, 2022
a64ef74
Update documentation for v2.2 (#162)
gabegma Jul 11, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions CITATION.cff
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cff-version: 1.1.0
cff-version: 1.0.0
message: "If you use Azimuth in your projects, please cite it as below."
authors:
- family-names: "Branchaud-Charron"
Expand All @@ -18,7 +18,7 @@ authors:
- family-names: "Babu"
given-names: "Nandhini"
title: "Azimuth, an open-source dataset and error analysis tool for text classification"
version: 2.1
version: 2.2
doi: 10.5281/zenodo.6511558
date-released: 2022-05-27
date-released: 2022-07-08
url: "https://github.com/ServiceNow/azimuth"
14 changes: 4 additions & 10 deletions azimuth/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# in the root directory of this source tree.
import logging
from threading import Event
from typing import Dict, Optional, cast
from typing import Dict, Optional

import structlog
from fastapi import APIRouter, Depends, FastAPI, HTTPException
Expand All @@ -18,10 +18,9 @@
from azimuth.modules.utilities.validation import ValidationModule
from azimuth.task_manager import TaskManager
from azimuth.types import DatasetSplitName, ModuleOptions
from azimuth.types.validation import ValidationResponse
from azimuth.utils.cluster import default_cluster
from azimuth.utils.conversion import JSONResponseIgnoreNan
from azimuth.utils.logs import MultipleException, set_logger_config
from azimuth.utils.logs import set_logger_config
from azimuth.utils.project import load_dataset_split_managers_from_config
from azimuth.utils.validation import assert_not_none

Expand Down Expand Up @@ -267,20 +266,15 @@ def run_validation(
MultipleException if the validation failed.
"""

def raise_exception_if_needed(validation_module):
validation_module.result()
response = cast(ValidationResponse, validation_module.result()[0])
if response.exceptions:
raise MultipleException(response.exceptions)

def run_validation_module(pipeline_index=None):
validation_module = ValidationModule(
config=config,
dataset_split_name=dataset_split,
mod_options=ModuleOptions(pipeline_index=pipeline_index),
)
validation_module.start_task_on_dataset_split(task_manager.client)
raise_exception_if_needed(validation_module)
# Will raise exceptions as needed.
validation_module.result()

if config.pipelines is None:
run_validation_module()
Expand Down
5 changes: 5 additions & 0 deletions azimuth/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,11 @@ class ModelContractConfig(CommonFieldsConfig):
kwargs={"path": "recall"},
additional_kwargs={"average": "weighted"},
),
"F1": MetricDefinition(
class_name="datasets.load_metric",
kwargs={"path": "f1"},
additional_kwargs={"average": "weighted"},
),
}

@validator("pipelines", pre=True)
Expand Down
4 changes: 3 additions & 1 deletion azimuth/modules/base_classes/dask_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,9 @@ def start_task(self, client: Client, custom_query: Dict[str, Any]) -> "DaskModul
log.info(f"Starting custom query {self.name}")
# pure=false to be sure that everything is rerun.
# Using self.name as key as we don't have indices
self.future = client.submit(self.compute, custom_query, key=self.name, pure=False)
self.future = client.submit(
self.compute, custom_query, key=f"{self.name}_{hash(str(custom_query))}", pure=False
)
# Tell that this future is for custom use only.
self.future.is_custom = True
self.add_done_callback(self.on_end)
Expand Down
20 changes: 10 additions & 10 deletions azimuth/modules/model_performance/confidence_binning.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,13 @@ def compute_on_dataset_split(self) -> List[ConfidenceHistogramResponse]: # type

ds: Dataset = assert_not_none(self.get_dataset_split())

result = []
if len(ds) > 0:
# Get the bin index for each prediction.
confidences = np.max(self._get_confidences_from_ds(), axis=1)
bin_indices = np.floor(confidences * CONFIDENCE_BINS_COUNT)

# Create the records. We drop the last bin as it's the maximum.
result = []
for bin_index, bin_min_value in enumerate(bins[:-1]):
bin_mask = bin_indices == bin_index
outcome_count = defaultdict(int)
Expand All @@ -65,17 +65,17 @@ def compute_on_dataset_split(self) -> List[ConfidenceHistogramResponse]: # type
)
else:
# Create empty bins
for bin_index, bin_min_value in enumerate(bins[:-1]):
result.append(
ConfidenceBinDetails(
bin_index=bin_index,
bin_confidence=0,
mean_bin_confidence=0,
outcome_count={outcome: 0 for outcome in ALL_OUTCOMES},
)
result = [
ConfidenceBinDetails(
bin_index=bin_index,
bin_confidence=0,
mean_bin_confidence=0,
outcome_count={outcome: 0 for outcome in ALL_OUTCOMES},
)
for bin_index, bin_min_value in enumerate(bins[:-1])
]

return [ConfidenceHistogramResponse(details_all_bins=result)]
return [ConfidenceHistogramResponse(bins=result, confidence_threshold=self.get_threshold())]


class ConfidenceBinIndexModule(DatasetResultModule[ModelContractConfig]):
Expand Down
4 changes: 3 additions & 1 deletion azimuth/modules/model_performance/confusion_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,6 @@ def compute_on_dataset_split(self) -> List[ConfusionMatrixResponse]: # type: ig
labels=class_ids,
normalize="true" if self.mod_options.cf_normalized else None,
)
return [ConfusionMatrixResponse(confusion_matrix=cf)]
return [
ConfusionMatrixResponse(confusion_matrix=cf, normalized=self.mod_options.cf_normalized)
]
39 changes: 26 additions & 13 deletions azimuth/modules/model_performance/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import json
import warnings
from collections import Counter
from typing import Dict, List, Optional, Sequence
from typing import Dict, List, Optional

import numpy as np
from datasets import Dataset, Metric
Expand All @@ -27,7 +27,12 @@
MetricsPerFilterValue,
)
from azimuth.types.outcomes import ALL_OUTCOMES
from azimuth.types.tag import ALL_DATA_ACTION_FILTERS, ALL_SMART_TAG_FILTERS
from azimuth.types.tag import (
ALL_DATA_ACTION_FILTERS,
SMART_TAGS_FAMILY_MAPPING,
SmartTag,
SmartTagFamily,
)
from azimuth.utils.ml.ece import compute_ece_from_bins
from azimuth.utils.ml.model_performance import sorted_by_utterance_count_with_last
from azimuth.utils.validation import assert_not_none
Expand Down Expand Up @@ -70,7 +75,7 @@ def compute_on_dataset_split(self) -> List[MetricsModuleResponse]: # type: igno
config=self.config,
mod_options=self.mod_options,
)
bins = conf_hist_mod.compute_on_dataset_split()[0].details_all_bins
bins = conf_hist_mod.compute_on_dataset_split()[0].bins
ece, acc, expected = compute_ece_from_bins(bins)
count_per_bin = [sum(b.outcome_count.values()) for b in bins]

Expand Down Expand Up @@ -157,7 +162,7 @@ class MetricsPerFilterModule(AggregationModule[AzimuthConfig]):
"""Computes the metrics for each filter."""

def get_metrics_for_filter(
self, filters_dict: Dict[str, Sequence[DatasetFilters]]
self, filters_dict: Dict[str, DatasetFilters]
) -> List[MetricsPerFilterValue]:
"""Get metrics for a list of filters.

Expand Down Expand Up @@ -218,16 +223,24 @@ def compute_on_dataset_split(self) -> List[MetricsPerFilterModuleResponse]: # t
)
pbar.update()

smart_tag_filters = {
smart_tag: self.edit_filter(self.mod_options.filters, smart_tag=smart_tag)
for smart_tag in ALL_SMART_TAG_FILTERS
smart_tag_filters: Dict[SmartTagFamily, Dict[str, DatasetFilters]] = {
tag_family: {
smart_tag: self.edit_filter(
self.mod_options.filters, smart_tag={tag_family: [smart_tag]}
)
for smart_tag in tags + [SmartTag.no_smart_tag]
}
for tag_family, tags in SMART_TAGS_FAMILY_MAPPING.items()
}
metrics_per_smart_tag = {
tag_family.value: sorted_by_utterance_count_with_last(
self.get_metrics_for_filter(filters_for_family), -1
)
for tag_family, filters_for_family in smart_tag_filters.items()
}
metrics_per_smart_tag = sorted_by_utterance_count_with_last(
self.get_metrics_for_filter(smart_tag_filters), -1
)
pbar.update()

outcomes_filters: Dict[str, Sequence[DatasetFilters]] = {
outcomes_filters: Dict[str, DatasetFilters] = {
outcome: self.edit_filter(self.mod_options.filters, outcome=outcome)
for outcome in ALL_OUTCOMES
}
Expand All @@ -240,7 +253,7 @@ def compute_on_dataset_split(self) -> List[MetricsPerFilterModuleResponse]: # t
label=metrics_per_label,
prediction=metrics_per_prediction,
data_action=metrics_per_data_action,
smart_tag=metrics_per_smart_tag,
**metrics_per_smart_tag,
outcome=metrics_per_outcome,
),
utterance_count=len(ds),
Expand Down Expand Up @@ -279,5 +292,5 @@ def edit_filter(
if outcome is not None:
filter_copy.outcomes.append(outcome)
if smart_tag is not None:
filter_copy.smart_tags.append(smart_tag)
filter_copy.smart_tags.update(smart_tag)
return filter_copy
13 changes: 11 additions & 2 deletions azimuth/modules/model_performance/outcome_count.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,11 @@
OutcomeCountPerThresholdValue,
)
from azimuth.types.outcomes import ALL_OUTCOMES, OutcomeName
from azimuth.types.tag import ALL_DATA_ACTION_FILTERS, ALL_SMART_TAG_FILTERS
from azimuth.types.tag import (
ALL_DATA_ACTION_FILTERS,
SMART_TAGS_FAMILY_MAPPING,
SmartTag,
)
from azimuth.utils.ml.model_performance import (
sorted_by_utterance_count,
sorted_by_utterance_count_with_last,
Expand Down Expand Up @@ -152,7 +156,12 @@ def compute_on_dataset_split(self) -> List[OutcomeCountPerFilterResponse]: # ty
),
data_action=self.get_outcome_count_per_tag(dm, ds, ALL_DATA_ACTION_FILTERS),
outcome=self.get_outcome_count_per_outcome(ds),
smart_tag=self.get_outcome_count_per_tag(dm, ds, ALL_SMART_TAG_FILTERS),
**{
family.value: self.get_outcome_count_per_tag(
dm, ds, [t.value for t in tags + [SmartTag.no_smart_tag]]
)
for family, tags in SMART_TAGS_FAMILY_MAPPING.items()
},
),
utterance_count=len(ds),
)
Expand Down
4 changes: 3 additions & 1 deletion azimuth/modules/utilities/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from azimuth.modules.model_contract_task_mapping import model_contract_task_mapping
from azimuth.types import ModuleOptions, SupportedMethod, SupportedModelContract
from azimuth.types.validation import ValidationResponse
from azimuth.utils.logs import MultipleExceptions
from azimuth.utils.validation import assert_not_none


Expand Down Expand Up @@ -69,6 +70,8 @@ def compute_on_dataset_split(self) -> List[ValidationResponse]: # type: ignore
can_make_saliency = False

# Should we raise instead?
if exception_gatherer.exceptions:
raise MultipleExceptions(exceptions=exception_gatherer.exceptions)
return [
ValidationResponse(
is_cuda_available=cuda_available,
Expand All @@ -77,7 +80,6 @@ def compute_on_dataset_split(self) -> List[ValidationResponse]: # type: ignore
model_has_correct_type=model_has_correct_type,
can_make_prediction=can_make_prediction,
can_make_saliency=can_make_saliency,
exceptions=exception_gatherer.exceptions,
)
]

Expand Down
11 changes: 0 additions & 11 deletions azimuth/routers/v1/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
from azimuth.utils.project import (
perturbation_testing_available,
postprocessing_editable,
postprocessing_known,
predictions_available,
similarity_available,
)
Expand Down Expand Up @@ -96,15 +95,6 @@ def get_dataset_info(

model_contract = task_manager.config.model_contract

threshold = (
None
if config.pipelines is None
else [
pipeline.threshold if postprocessing_known(task_manager.config, idx) else None
for idx, pipeline in enumerate(config.pipelines)
]
)

return DatasetInfoResponse(
project_name=config.name,
class_names=eval_dm.get_class_names(),
Expand All @@ -115,7 +105,6 @@ def get_dataset_info(
if training_dm is not None
else [],
startup_tasks={k: v.status() for k, v in startup_tasks.items()},
default_threshold=threshold,
model_contract=model_contract,
prediction_available=predictions_available(task_manager.config),
perturbation_testing_available=perturbation_testing_available(task_manager.config),
Expand Down
7 changes: 4 additions & 3 deletions azimuth/routers/v1/custom_utterances.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from azimuth.config import AzimuthConfig
from azimuth.modules.perturbation_testing import PerturbationTestingModule
from azimuth.task_manager import TaskManager
from azimuth.types import DatasetSplitName, SupportedMethod
from azimuth.types import DatasetSplitName, ModuleOptions, SupportedMethod
from azimuth.types.perturbation_testing import (
PRETTY_PERTURBATION_TYPES,
PerturbationTestFailureReason,
Expand All @@ -22,7 +22,7 @@
)
from azimuth.types.task import SaliencyResponse
from azimuth.utils.conversion import orjson_dumps
from azimuth.utils.routers import get_custom_task_result, require_available_model
from azimuth.utils.routers import get_custom_task_result, require_pipeline_index

router = APIRouter()

Expand Down Expand Up @@ -92,16 +92,17 @@ def get_perturbed_utterances(
description="Get saliency for custom utterances.",
tags=TAGS,
response_model=List[SaliencyResponse],
dependencies=[Depends(require_available_model)],
)
def get_saliency(
utterances: List[str] = Query([], title="Utterances"),
pipeline_index: int = Depends(require_pipeline_index),
task_manager: TaskManager = Depends(get_task_manager),
) -> List[SaliencyResponse]:
task_result: List[SaliencyResponse] = get_custom_task_result(
SupportedMethod.Saliency,
task_manager=task_manager,
custom_query={task_manager.config.columns.text_input: utterances},
mod_options=ModuleOptions(pipeline_index=pipeline_index),
)

return task_result
13 changes: 4 additions & 9 deletions azimuth/routers/v1/model_performance/confidence_histogram.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
# This source code is licensed under the Apache 2.0 license found in the LICENSE file
# in the root directory of this source tree.

from typing import List

from fastapi import APIRouter, Depends, Query

from azimuth.app import get_dataset_split_manager, get_task_manager
Expand All @@ -15,10 +13,7 @@
NamedDatasetFilters,
SupportedModule,
)
from azimuth.types.model_performance import (
ConfidenceBinDetails,
ConfidenceHistogramResponse,
)
from azimuth.types.model_performance import ConfidenceHistogramResponse
from azimuth.utils.routers import (
build_named_dataset_filters,
get_standard_task_result,
Expand All @@ -35,7 +30,7 @@
summary="Get confidence histogram values",
description="Get all confidence bins with their confidence and the outcome count",
tags=TAGS,
response_model=List[ConfidenceBinDetails],
response_model=ConfidenceHistogramResponse,
)
def get_confidence_histogram(
dataset_split_name: DatasetSplitName,
Expand All @@ -46,7 +41,7 @@ def get_confidence_histogram(
without_postprocessing: bool = Query(
False, title="Without Postprocessing", alias="withoutPostprocessing"
),
) -> List[ConfidenceBinDetails]:
) -> ConfidenceHistogramResponse:
mod_options = ModuleOptions(
filters=named_filters.to_dataset_filters(dataset_split_manager.get_class_names()),
pipeline_index=pipeline_index,
Expand All @@ -61,4 +56,4 @@ def get_confidence_histogram(
last_update=dataset_split_manager.last_update,
)[0]

return result.details_all_bins
return result
Loading