Skip to content

Commit

Permalink
Changes model and dataset loading to use init_spec() and constructors.
Browse files Browse the repository at this point in the history
* Changes Global Settings to provide UI elements based on the Model or Dataset's init_spec() contained in a LitApp's metadata.
* Changes the create_dataset and create_model HTTP APIs to:
    * Accepting and validating config objects via the POST data
    * Initialize Models and Datasets by passing configs to Python constructors
    * Raising errors instead of returning None, so that the user can see where/understand why instantiation failed.
* Updating the client's ApiService to pass init_spec configs from the UI to the HTTP Requests.

PiperOrigin-RevId: 523096706
  • Loading branch information
RyanMullins authored and LIT team committed Apr 10, 2023
1 parent c0ff131 commit 99e78ff
Show file tree
Hide file tree
Showing 11 changed files with 549 additions and 187 deletions.
135 changes: 99 additions & 36 deletions lit_nlp/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
import functools
import glob
import math
import os
import random
import threading
import time
Expand Down Expand Up @@ -56,6 +55,39 @@
ModelLoadersMap = dict[str, ModelLoader]


# TODO(b/277249726): Move this function to utils.py, add tests, and expand usage
# across HTTP and Interpreter APIs.
def _validate_config_against_spec(config: JsonDict, spec: types.Spec):
"""Validates that the provided config is compatible with the Spec.
Args:
config: The configuration parameters, typically extracted from the data of
an HTTP Request, that are to be used in a function call.
spec: A Spec defining the shape of allowed configuration parameters for the
associated LIT component.
Raises:
KeyError: Under two conditions: 1) the `config` is missing one or more
required fields defined in the `spec`, or 2) the `config` contains fields
not defined in the `spec`. Either of these conditions would likely result
in a TypeError (for missing or unexpected arguments) if the `config` was
used in a call.
"""
missing_required_keys = [
param_name for param_name, param_type in spec.items()
if param_type.required and param_name not in config
]
if missing_required_keys:
raise KeyError(f'Missing required parameters: {missing_required_keys}')

unsupported_keys = [
param_name for param_name in config
if param_name not in spec
]
if unsupported_keys:
raise KeyError(f'Received unsupported parameters: {unsupported_keys}')


class LitApp(object):
"""LIT WSGI application."""

Expand Down Expand Up @@ -300,44 +332,68 @@ def _get_dataset(self,
return list(self._datasets[dataset_name].indexed_examples)

def _create_dataset(self,
unused_data,
data: JsonDict,
dataset_name: Optional[str] = None,
dataset_path: Optional[str] = None,
**unused_kw):
"""Create dataset from a path, updating and returning the metadata."""
"""Create a dataset, updating and returning the metadata."""
if dataset_name is None:
raise ValueError('No base dataset specified.')

assert dataset_name is not None, 'No dataset specified.'
assert dataset_path is not None, 'No dataset path specified.'

new_dataset = self._datasets[dataset_name].load(dataset_path)
if new_dataset is not None:
new_dataset_name = dataset_name + '-' + os.path.basename(dataset_path)
self._datasets[new_dataset_name] = new_dataset
self._info = self._build_metadata()
return (self._info, new_dataset_name)
else:
logging.error('Not able to load: %s', dataset_name)
return None
config: Optional[JsonDict] = data.get('config')
if config is None:
raise ValueError('No config specified.')

new_name: Optional[str] = config.pop('new_name', None)
if new_name is None:
raise ValueError('No name provided for the new dataset.')

if (loader_info := self._dataset_loaders.get(dataset_name)) is None:
raise ValueError(
f'No loader information (Cls + init_spec) found for {dataset_name}')

dataset_cls, dataset_init_spec = loader_info

if dataset_init_spec is not None:
_validate_config_against_spec(config, dataset_init_spec)

new_dataset = dataset_cls(**config)
annotated_dataset = self._run_annotators(new_dataset)
self._datasets[new_name] = lit_dataset.IndexedDataset(
base=annotated_dataset, id_fn=caching.input_hash
)
self._info = self._build_metadata()
return (self._info, new_name)

def _create_model(self,
unused_data,
data: JsonDict,
model_name: Optional[str] = None,
model_path: Optional[str] = None,
**unused_kw):
"""Create model from a path, updating and returning the metadata."""

assert model_name is not None, 'No model specified.'
assert model_path is not None, 'No model path specified.'
# Load using the underlying model class, then wrap explicitly in a cache.
new_model = self._models[model_name].wrapped.load(model_path)
if new_model is not None:
new_model_name = model_name + ':' + os.path.basename(model_path)
self._models[new_model_name] = caching.CachingModelWrapper(
new_model, new_model_name, cache_dir=self._data_dir)
self._info = self._build_metadata()
return (self._info, new_model_name)
else:
return None
"""Create a model, updating and returning the metadata."""
if model_name is None:
raise ValueError('No base model specified.')

config: Optional[JsonDict] = data.get('config')
if config is None:
raise ValueError('No config specified.')

new_name: Optional[str] = config.pop('new_name', None)
if new_name is None:
raise ValueError('No name provided for the new model.')

if (loader_info := self._model_loaders.get(model_name)) is None:
raise ValueError(
f'No loader information (Cls + init_spec) found for {model_name}')

model_cls, model_init_spec = loader_info

if model_init_spec is not None:
_validate_config_against_spec(config, model_init_spec)

new_model = model_cls(**config)
self._models[new_name] = caching.CachingModelWrapper(
new_model, new_name, cache_dir=self._data_dir)
self._info = self._build_metadata()
return (self._info, new_name)

def _get_generated(self, data, model: str, dataset_name: str, generator: str,
**unused_kw):
Expand Down Expand Up @@ -505,12 +561,19 @@ def _handler(app: wsgi_app.App, request, environ):
# but for requests from Python we may want to use the invertible encoding
# so that datatypes from remote models are the same as local ones.
response_simple_json = utils.coerce_bool(
kw.pop('response_simple_json', True))
kw.pop('response_simple_json', True)
)
data = serialize.from_json(request.data) if len(request.data) else None
# Special handling to dereference IDs.
if data and 'inputs' in data.keys() and 'dataset_name' in kw:
data['inputs'] = self._reconstitute_inputs(data['inputs'],
kw['dataset_name'])
if (
data
and 'inputs' in data.keys()
and len(data.get('inputs'))
and 'dataset_name' in kw
):
data['inputs'] = self._reconstitute_inputs(
data['inputs'], kw['dataset_name']
)

outputs = fn(data, **kw)
response_body = serialize.to_json(outputs, simple=response_simple_json)
Expand Down
70 changes: 69 additions & 1 deletion lit_nlp/client/core/global_settings.css
Original file line number Diff line number Diff line change
Expand Up @@ -160,9 +160,9 @@ a:hover {

.config-list {
border: 1px solid #dadce0;
flex: 1 1;
margin: 12pt;
overflow-y: scroll;
height: 100%;
}

.config-line {
Expand Down Expand Up @@ -200,6 +200,74 @@ a:hover {
margin-left: 40pt;
}

.model-dataset-loading {
flex: 1 1;
margin: 0 12pt;
width: calc(100% - 24pt);
}

.model-dataset-loading .controls {
border: 1px solid var(--lit-neutral-300);
display: flex;
flex-grow: 1;
flex-direction: row;
padding: 16px;
}

.model-dataset-loading .controls .actions {
align-items: end;
row-gap: 4px;
}

.model-dataset-loading .controls .actions,
.model-dataset-loading .controls .selector {
display: flex;
flex-direction: column;
flex-shrink: 0;
}

.model-dataset-loading .controls .options {
display: flex;
flex-direction: column;
flex-grow: 1;
height: 100%;
max-height: 100%;
overflow-y: scroll;
padding: 0 12pt;
row-gap: 8px;
}

.model-dataset-loading .controls .options .option-entry {
align-items: center;
column-gap: 4px;
display: flex;
flex-direction: row;
}

/* The first child of .options is always the new_name field, which is special */
.model-dataset-loading .controls .options .option-entry:first-child {
border-bottom: 1px solid var(--lit-neutral-300);
padding-bottom: 8px;
}

.model-dataset-loading .controls .options .option-entry label {
flex: 0 0 200px;
}

.model-dataset-loading .controls .options .option-entry lit-input-field {
flex-grow: 1;
}

.model-dataset-loading .header {
color: var(--lit-neutral-700);
font-size: 16px;
margin-bottom: 12pt;
}

#bottombar .status-area {
color: var(--google-red-500);
}

.expanded-info {
display: flex;
max-height: 0;
Expand Down

0 comments on commit 99e78ff

Please sign in to comment.