Skip to content

Commit

Permalink
Refactor rest server implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
blythed committed May 23, 2024
1 parent 6499ab4 commit 9b541ee
Show file tree
Hide file tree
Showing 39 changed files with 663 additions and 590 deletions.
10 changes: 3 additions & 7 deletions .github/workflows/ci_code.yml
Original file line number Diff line number Diff line change
Expand Up @@ -80,13 +80,9 @@ jobs:
run: |
make ext_testing
- name: Upload code coverage to Codecov
uses: codecov/codecov-action@v3.1.4
with:
env_vars: RUNNER_OS,PYTHON_VERSION
file: ./coverage.xml
fail_ci_if_error: false
name: codecov-umbrella
- name: Rest Testing
run: |
make rest_testing
# ---------------------------------
# Integration Testing
Expand Down
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Force load vector indices during backfill
- Fix pandas database (in-memory)
- Add and update docstrings in component classes and methods.
- Changed the rest implementation to use new serialization

#### New Features & Functionality

- Add nightly image for pre-release testing in the cloud environment
- Fix torch model fit and make schedule_jobs at db add
- Add requires functionality for all extension modules
Expand Down
8 changes: 8 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -222,5 +222,13 @@ ext_testing: ## Execute integration testing
find ./test -type f -name "*.pyc" -delete
pytest $(PYTEST_ARGUMENTS) ./test/integration/ext

rest_testing: ## Execute smoke testing
echo "starting rest server"
SUPERDUPERDB_CONFIG=deploy/testenv/env/rest/rest_mock.yaml python -m superduperdb rest &
sleep 10
SUPERDUPERDB_CONFIG=deploy/testenv/env/rest/rest_mock.yaml pytest test/rest/test_rest.py
echo "stopping rest server"
lsof -ti:8002 | xargs kill -9

smoke_testing: ## Execute smoke testing
SUPERDUPERDB_CONFIG=deploy/testenv/env/smoke/config.yaml pytest $(PYTEST_ARGUMENTS) ./test/smoke
2 changes: 1 addition & 1 deletion deploy/images/superduperdb/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ VOLUME /artifacts
RUN apt-get update \
&& apt-get upgrade -y \
&& apt-get install -y --no-install-recommends \
python3 python3-pip python-is-python3 \
python3 python3-pip python-is-python3 gcc python3-dev \
# Required for downloading code/data from the internet \
wget curl unzip git \
# DevOps
Expand Down
231 changes: 231 additions & 0 deletions deploy/rest/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,231 @@
components:
- model
- listener
- vector_index
- datatype

leaves:
query:
MongoQuery:
_path: superduperdb/backends/mongodb/query/parse_query
query:
type: code
documents:
type: json
default: []
code:
Code:
_path: superduperdb/Code
identifier:
type: str
code:
type: str
default: |
from superduperdb import code
@code
def my_code(x):
return x
lazy_artifact:
LazyArtifact:
_path: superduperdb/components/datatype/LazyArtifact
identifier:
type: str
file_id:
type: blob
vector_index:
VectorIndex:
_path: superduperdb/VectorIndex
identifier:
type: str
measure:
type: str
choices:
- cosine
- dot
- l2
indexing_listener:
type: listener
compatible_listener:
type: listener
optional: True
datatype:
image:
_path: superduperdb/ext/pillow/image_type
identifier:
type: str
media_type:
type: str
default: image/png
vector:
_path: superduperdb/vector
identifier:
type: str
shape:
type: int
stack:
Stack:
_path: superduperdb/Stack
identifier:
type: str
components:
type: [model, listener, vector_index]
listener:
Listener:
_path: superduperdb/Listener
identifier:
type: str
key:
type: str
select:
type: query
optional: True
model:
ObjectModel:
_path: superduperdb/ObjectModel
identifier:
type: str
object:
type: lazy_artifact
datatype:
type: datatype
optional: True
predict_kwargs:
type: json
optional: True
default: {}
signature:
type: str
optional: True
default: "*args,**kwargs"
SequentialModel:
_path: superduperdb/SequentialModel
identifier:
type: str
models:
type: model
sequence: True
QueryModel:
_path: superduperdb/QueryModel
identifier:
type: str
select:
type: query
optional: True
default:
documents:
- {"<key-1>": "$my_value"}
- {"_outputs": 0, "_id": 0}
query: |
<collection_name>.like(documents[0], vector_index='<index_id>').find({}, documents[1]).limit(10)
CodeModel:
_path: superduperdb/CodeModel
identifier:
type: str
object:
type: code
datatype:
type: datatype
optional: True
predict_kwargs:
type: json
optional: True
default: {}
signature:
type: str
optional: True
default: "*args,**kwargs"
RetrievalPrompt:
_path: superduperdb/ext/llm/prompt/RetrievalPrompt
select:
type: query
prompt_explanation:
type: str
default: |
HERE ARE SOME FACTS SEPARATED BY '---' IN OUR DATA
REPOSITORY WHICH WILL HELP YOU ANSWER THE QUESTION.
prompt_introduction:
type: str
default: |
HERE IS THE QUESTION WHICH YOU SHOULD ANSWER BASED
ONLY ON THE PREVIOUS FACTS
join:
type: str
default: "\n---\n"
SklearnEstimator:
_path: superduperdb/ext/sklearn/Estimator
identifier:
type: str
object:
type: lazy_artifact
preprocess:
type: code
optional: True
postprocess:
type: code
optional: True
OpenAIEmbedding:
_path: superduperdb/ext/openai/OpenAIEmbedding
identifier:
type: str
model:
type: str
openai_api_key:
type: str
optional: True
openai_api_base:
type: str
optional: True
OpenAIChatCompletion:
_path: superduperdb/ext/openai/OpenAIChatCompletion
identifier:
type: str
model:
type: str
openai_api_key:
type: str
optional: True
openai_api_base:
type: str
optional: True
SentenceTransformer:
_path: superduperdb/ext/sentence_transformers/SentenceTransformer
identifier:
type: str
model:
type: str
device:
type: str
default: cpu
predict_kwargs:
type: json
default:
show_progress_bar: true
postprocess:
type: code
default: |
from superduperdb import code
@code
def my_code(x):
return x.tolist()
signature:
type: str
default: singleton

presets:
datatype:
pickle:
_path: superduperdb/components/datatype/get_serializer
identifier: pickle_lazy
method: pickle
encodable: lazy_artifact
dill:
_path: superduperdb/components/datatype/get_serializer
identifier: dill_lazy
method: dill
encodable: lazy_artifact
image:
_path: superduperdb/ext/pillow/encoder/image_type
identifier: image
media_type: image/png
1 change: 1 addition & 0 deletions deploy/testenv/env/rest/rest_mock.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ bytes_encoding: Bytes
cluster:
rest:
uri: http://localhost:8002
config: deploy/rest/config.yaml
data_backend: mongomock://test
downloads:
folder: null
Expand Down
1 change: 1 addition & 0 deletions deploy/testenv/env/smoke/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ cluster:
backfill_batch_size: 100
rest:
uri: http://rest:8002
config: deploy/rest/config.yaml
data_backend: mongodb://superduper:superduper@mongodb:27017/test_db
downloads:
folder: null
Expand Down
19 changes: 9 additions & 10 deletions superduperdb/backends/base/artifacts.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,16 +97,15 @@ def exists(
return self._exists(file_id)

@abstractmethod
def _save_bytes(self, serialized: bytes, file_id: str):
def put_bytes(self, serialized: bytes, file_id: str):
"""Save bytes in artifact store""" ""
pass

@abstractmethod
def _save_file(self, file_path: str, file_id: str) -> str:
def put_file(self, file_path: str, file_id: str) -> str:
"""Save file in artifact store and return file_id."""
pass

#
def save_artifact(self, r: t.Dict):
"""Save serialized object in the artifact store.
Expand All @@ -117,13 +116,13 @@ def save_artifact(self, r: t.Dict):

for file_id, blob in blobs.items():
try:
self._save_bytes(blob, file_id=file_id)
self.put_bytes(blob, file_id=file_id)
except FileExistsError:
continue

for file_id, file_path in files.items():
try:
self._save_file(file_path, file_id=file_id)
self.put_file(file_path, file_id=file_id)
except FileExistsError:
continue

Expand Down Expand Up @@ -154,7 +153,7 @@ def update_artifact(self, old_r: t.Dict, new_r: t.Dict):
return self.save_artifact(new_r)

@abstractmethod
def _load_bytes(self, file_id: str) -> bytes:
def get_bytes(self, file_id: str) -> bytes:
"""
Load bytes from artifact store.
Expand All @@ -163,7 +162,7 @@ def _load_bytes(self, file_id: str) -> bytes:
pass

@abstractmethod
def _load_file(self, file_id: str) -> str:
def get_file(self, file_id: str) -> str:
"""
Load file from artifact store and return path.
Expand All @@ -180,14 +179,14 @@ def load_artifact(self, r):
datatype = self.serializers[r['datatype']]
file_id = r.get('file_id')
if r.get('encodable') == 'file':
x = self._load_file(file_id)
x = self.get_file(file_id)
else:
# We should always have file_id available at load time (because saved)
# TODO We should always have file_id available at load time (because saved)
uri = r.get('uri')
if file_id is None:
assert uri is not None, '"uri" and "file_id" can\'t both be None'
file_id = _construct_file_id_from_uri(uri)
x = self._load_bytes(file_id)
x = self.get_bytes(file_id)
return datatype.decode_data(x)

def save(self, r: t.Dict) -> t.Dict:
Expand Down
Loading

0 comments on commit 9b541ee

Please sign in to comment.