Refactor rest server implementation

SuperDuperDB · May 23, 2024 · 9b541ee · 9b541ee
1 parent 6499ab4
commit 9b541ee
Show file tree

Hide file tree

Showing 39 changed files with 663 additions and 590 deletions.
diff --git a/.github/workflows/ci_code.yml b/.github/workflows/ci_code.yml
@@ -80,13 +80,9 @@ jobs:
       run: |
         make ext_testing
 
-    - name: Upload code coverage to Codecov
-      uses: codecov/codecov-action@v3.1.4
-      with:
-        env_vars: RUNNER_OS,PYTHON_VERSION
-        file: ./coverage.xml
-        fail_ci_if_error: false
-        name: codecov-umbrella
+    - name: Rest Testing
+      run: |
+        make rest_testing
 
   # ---------------------------------
   # Integration Testing

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -20,8 +20,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Force load vector indices during backfill
 - Fix pandas database (in-memory)
 - Add and update docstrings in component classes and methods.
+- Changed the rest implementation to use new serialization
 
 #### New Features & Functionality
+
 - Add nightly image for pre-release testing in the cloud environment
 - Fix  torch model fit and make schedule_jobs at db add
 - Add requires functionality for all extension modules

diff --git a/Makefile b/Makefile
@@ -222,5 +222,13 @@ ext_testing: ## Execute integration testing
 	find ./test -type f -name "*.pyc" -delete
 	pytest $(PYTEST_ARGUMENTS) ./test/integration/ext
 
+rest_testing: ## Execute smoke testing
+	echo "starting rest server"
+	SUPERDUPERDB_CONFIG=deploy/testenv/env/rest/rest_mock.yaml python -m superduperdb rest &
+	sleep 10
+	SUPERDUPERDB_CONFIG=deploy/testenv/env/rest/rest_mock.yaml pytest test/rest/test_rest.py
+	echo "stopping rest server"
+	lsof -ti:8002 | xargs kill -9
+
 smoke_testing: ## Execute smoke testing
 	SUPERDUPERDB_CONFIG=deploy/testenv/env/smoke/config.yaml pytest $(PYTEST_ARGUMENTS) ./test/smoke
diff --git a/deploy/images/superduperdb/Dockerfile b/deploy/images/superduperdb/Dockerfile
@@ -34,7 +34,7 @@ VOLUME /artifacts
 RUN apt-get update \
  && apt-get upgrade -y \
  && apt-get install -y --no-install-recommends \
-        python3 python3-pip python-is-python3 \
+        python3 python3-pip python-is-python3 gcc python3-dev \
         # Required for downloading code/data from the internet \
         wget curl unzip git \
         # DevOps

diff --git a/deploy/rest/config.yaml b/deploy/rest/config.yaml
@@ -0,0 +1,231 @@
+components:
+  - model
+  - listener
+  - vector_index
+  - datatype
+
+leaves:
+  query:
+    MongoQuery:
+      _path: superduperdb/backends/mongodb/query/parse_query
+      query:
+        type: code
+      documents:
+        type: json
+        default: []
+  code:
+    Code:
+      _path: superduperdb/Code
+      identifier:
+        type: str
+      code:
+        type: str
+        default: |
+          from superduperdb import code
+
+          @code
+          def my_code(x):
+            return x
+  lazy_artifact:
+    LazyArtifact:
+      _path: superduperdb/components/datatype/LazyArtifact
+      identifier:
+        type: str
+      file_id:
+        type: blob
+  vector_index:
+    VectorIndex:
+      _path: superduperdb/VectorIndex
+      identifier:
+        type: str
+      measure:
+        type: str
+        choices:
+          - cosine
+          - dot
+          - l2
+      indexing_listener:
+        type: listener
+      compatible_listener:
+        type: listener
+        optional: True
+  datatype:
+    image:
+      _path: superduperdb/ext/pillow/image_type
+      identifier:
+        type: str
+      media_type:
+        type: str
+        default: image/png
+    vector:
+      _path: superduperdb/vector
+      identifier:
+        type: str
+      shape:
+        type: int
+  stack:
+    Stack:
+      _path: superduperdb/Stack
+      identifier:
+        type: str
+      components:
+        type: [model, listener, vector_index]
+  listener:
+    Listener:
+      _path: superduperdb/Listener
+      identifier:
+        type: str
+      key:
+        type: str
+      select:
+        type: query
+        optional: True
+  model:
+    ObjectModel: 
+      _path: superduperdb/ObjectModel
+      identifier:
+        type: str
+      object:
+        type: lazy_artifact
+      datatype:
+        type: datatype
+        optional: True
+      predict_kwargs:
+        type: json
+        optional: True
+        default: {}
+      signature:
+        type: str
+        optional: True
+        default: "*args,**kwargs"
+    SequentialModel:
+      _path: superduperdb/SequentialModel
+      identifier:
+        type: str
+      models:
+        type: model
+        sequence: True
+    QueryModel:
+      _path: superduperdb/QueryModel
+      identifier:
+        type: str
+      select:
+        type: query
+        optional: True
+        default:
+          documents:
+          - {"<key-1>": "$my_value"}
+          - {"_outputs": 0, "_id": 0}
+          query: | 
+            <collection_name>.like(documents[0], vector_index='<index_id>').find({}, documents[1]).limit(10)
+    CodeModel:
+      _path: superduperdb/CodeModel
+      identifier:
+        type: str
+      object:
+        type: code
+      datatype:
+        type: datatype
+        optional: True
+      predict_kwargs:
+        type: json
+        optional: True
+        default: {}
+      signature:
+        type: str
+        optional: True
+        default: "*args,**kwargs"
+    RetrievalPrompt: 
+      _path: superduperdb/ext/llm/prompt/RetrievalPrompt
+      select:
+        type: query
+        prompt_explanation:
+          type: str
+          default: |
+            HERE ARE SOME FACTS SEPARATED BY '---' IN OUR DATA
+            REPOSITORY WHICH WILL HELP YOU ANSWER THE QUESTION.
+        prompt_introduction:
+          type: str
+          default: |
+            HERE IS THE QUESTION WHICH YOU SHOULD ANSWER BASED
+            ONLY ON THE PREVIOUS FACTS
+        join:
+          type: str
+          default: "\n---\n"
+    SklearnEstimator:
+      _path: superduperdb/ext/sklearn/Estimator
+      identifier:
+        type: str
+      object:
+        type: lazy_artifact
+      preprocess:
+        type: code
+        optional: True
+      postprocess:
+        type: code
+        optional: True
+    OpenAIEmbedding:
+      _path: superduperdb/ext/openai/OpenAIEmbedding
+      identifier:
+        type: str
+      model: 
+        type: str
+      openai_api_key:
+        type: str
+        optional: True
+      openai_api_base:
+        type: str
+        optional: True
+    OpenAIChatCompletion:
+      _path: superduperdb/ext/openai/OpenAIChatCompletion
+      identifier:
+        type: str
+      model: 
+        type: str
+      openai_api_key:
+        type: str
+        optional: True
+      openai_api_base:
+        type: str
+        optional: True
+    SentenceTransformer:
+      _path: superduperdb/ext/sentence_transformers/SentenceTransformer
+      identifier:
+        type: str
+      model: 
+        type: str
+      device: 
+        type: str
+        default: cpu
+      predict_kwargs:
+        type: json
+        default:
+          show_progress_bar: true
+      postprocess:
+        type: code
+        default: |
+          from superduperdb import code
+
+          @code
+          def my_code(x):
+            return x.tolist()
+      signature:
+        type: str
+        default: singleton
+
+presets:
+  datatype:
+    pickle:
+      _path: superduperdb/components/datatype/get_serializer
+      identifier: pickle_lazy
+      method: pickle
+      encodable: lazy_artifact
+    dill:
+      _path: superduperdb/components/datatype/get_serializer
+      identifier: dill_lazy
+      method: dill
+      encodable: lazy_artifact
+    image:
+      _path: superduperdb/ext/pillow/encoder/image_type
+      identifier: image
+      media_type: image/png
diff --git a/deploy/testenv/env/rest/rest_mock.yaml b/deploy/testenv/env/rest/rest_mock.yaml
@@ -4,6 +4,7 @@ bytes_encoding: Bytes
 cluster:
   rest:
     uri: http://localhost:8002
+    config: deploy/rest/config.yaml
 data_backend: mongomock://test
 downloads:
   folder: null

diff --git a/deploy/testenv/env/smoke/config.yaml b/deploy/testenv/env/smoke/config.yaml
@@ -11,6 +11,7 @@ cluster:
     backfill_batch_size: 100
   rest:
     uri: http://rest:8002
+    config: deploy/rest/config.yaml
 data_backend: mongodb://superduper:superduper@mongodb:27017/test_db
 downloads:
   folder: null

diff --git a/superduperdb/backends/base/artifacts.py b/superduperdb/backends/base/artifacts.py
@@ -97,16 +97,15 @@ def exists(
         return self._exists(file_id)
 
     @abstractmethod
-    def _save_bytes(self, serialized: bytes, file_id: str):
+    def put_bytes(self, serialized: bytes, file_id: str):
         """Save bytes in artifact store""" ""
         pass
 
     @abstractmethod
-    def _save_file(self, file_path: str, file_id: str) -> str:
+    def put_file(self, file_path: str, file_id: str) -> str:
         """Save file in artifact store and return file_id."""
         pass
 
-    #
     def save_artifact(self, r: t.Dict):
         """Save serialized object in the artifact store.
 
@@ -117,13 +116,13 @@ def save_artifact(self, r: t.Dict):
 
         for file_id, blob in blobs.items():
             try:
-                self._save_bytes(blob, file_id=file_id)
+                self.put_bytes(blob, file_id=file_id)
             except FileExistsError:
                 continue
 
         for file_id, file_path in files.items():
             try:
-                self._save_file(file_path, file_id=file_id)
+                self.put_file(file_path, file_id=file_id)
             except FileExistsError:
                 continue
 
@@ -154,7 +153,7 @@ def update_artifact(self, old_r: t.Dict, new_r: t.Dict):
         return self.save_artifact(new_r)
 
     @abstractmethod
-    def _load_bytes(self, file_id: str) -> bytes:
+    def get_bytes(self, file_id: str) -> bytes:
         """
         Load bytes from artifact store.
 
@@ -163,7 +162,7 @@ def _load_bytes(self, file_id: str) -> bytes:
         pass
 
     @abstractmethod
-    def _load_file(self, file_id: str) -> str:
+    def get_file(self, file_id: str) -> str:
         """
         Load file from artifact store and return path.
 
@@ -180,14 +179,14 @@ def load_artifact(self, r):
         datatype = self.serializers[r['datatype']]
         file_id = r.get('file_id')
         if r.get('encodable') == 'file':
-            x = self._load_file(file_id)
+            x = self.get_file(file_id)
         else:
-            # We should always have file_id available at load time (because saved)
+            # TODO We should always have file_id available at load time (because saved)
             uri = r.get('uri')
             if file_id is None:
                 assert uri is not None, '"uri" and "file_id" can\'t both be None'
                 file_id = _construct_file_id_from_uri(uri)
-            x = self._load_bytes(file_id)
+            x = self.get_bytes(file_id)
         return datatype.decode_data(x)
 
     def save(self, r: t.Dict) -> t.Dict: