Skip to content

Commit

Permalink
Built-in Karton integration (#298)
Browse files Browse the repository at this point in the history
  • Loading branch information
psrok1 committed May 10, 2021
1 parent b05a8c1 commit a964e03
Show file tree
Hide file tree
Showing 29 changed files with 1,254 additions and 42 deletions.
14 changes: 14 additions & 0 deletions dev/karton.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[redis]
host=redis

[minio]
access_key = mwdb-test-access
secret_key = mwdb-test-key
address = minio:9000
bucket = karton
secure = 0

[mwdb]
api_url = http://mwdb-web.:3000/api/
username = admin
password = admin
105 changes: 105 additions & 0 deletions docker-compose-dev-karton.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
# Docker Compose file for mwdb-core development environment

version: "3.3"
services:
minio:
image: minio/minio
command: server /data
volumes:
- /tmp/minio:/data
ports:
- "127.0.0.1:9000:9000"
environment:
- MINIO_ACCESS_KEY=mwdb-test-access
- MINIO_SECRET_KEY=mwdb-test-key
mwdb:
build:
context: .
dockerfile: deploy/docker/Dockerfile
depends_on:
- postgres
- redis
- minio
restart: on-failure
env_file:
# NOTE: use gen_vars.sh in order to generate this file
- mwdb-vars.env
environment:
UWSGI_PY_AUTORELOAD: 1
UWSGI_ENABLE_THREADS: 1
MWDB_MAIL_SMTP: "mailhog:1025"
MWDB_MAIL_FROM: "noreply@mwdb.dev"
MWDB_RECAPTCHA_SITE_KEY: "6LeIxAcTAAAAAJcZVRqyHh71UMIEGNQ_MXjiZKhI"
MWDB_RECAPTCHA_SECRET: "6LeIxAcTAAAAAGG-vFI1TnRWxMZNFuojJ4WifJWe"
MWDB_ENABLE_REGISTRATION: 1
MWDB_ENABLE_KARTON: 1
# Uncomment if you want to test S3 functions
# MWDB_STORAGE_PROVIDER: s3
# MWDB_HASH_PATHING: 0
# MWDB_S3_STORAGE_ENDPOINT: "minio:9000"
# MWDB_S3_STORAGE_ACCESS_KEY: "mwdb-test-access"
# MWDB_S3_STORAGE_SECRET_KEY: "mwdb-test-key"
# MWDB_S3_STORAGE_BUCKET_NAME: "mwdb"
volumes:
- "./docker/mail_templates:/app/mail_templates"
- "./mwdb:/app/mwdb"
- "./dev/karton.ini:/etc/karton/karton.ini"
mwdb-web:
depends_on:
- mwdb
build:
context: .
dockerfile: deploy/docker/Dockerfile-web-dev
ports:
- "80:3000"
environment:
HOST: 0.0.0.0
REACT_APP_API_URL: "/api/"
volumes:
- "./mwdb/web/public:/app/public"
- "./mwdb/web/src:/app/src"
restart: on-failure
postgres:
image: postgres
restart: always
env_file:
# NOTE: use gen_vars.sh in order to generate this file
- postgres-vars.env
redis:
image: redis:alpine
mailhog:
image: mailhog/mailhog:latest
ports:
- "127.0.0.1:8025:8025"
karton-system:
image: certpl/karton-system:158d07960d8cd39dee8771af719f560df93b3212
depends_on:
- redis
- minio
volumes:
- "./dev/karton.ini:/etc/karton/karton.ini"
entrypoint: karton-system
command: --setup-bucket
karton-classifier:
image: certpl/karton-classifier:320c99de8e05ff3d029609e73ac31bcf2d0ba56c
depends_on:
- redis
- minio
volumes:
- "./dev/karton.ini:/etc/karton/karton.ini"
karton-dashboard:
image: certpl/karton-dashboard:2c35fb3cae399dba8913f8ad1be9dd6b820b1fd6
depends_on:
- redis
- minio
volumes:
- "./dev/karton.ini:/etc/karton/karton.ini"
ports:
- "127.0.0.1:8030:5000"
karton-mwdb-reporter:
image: certpl/karton-mwdb-reporter:a4a5a2ece14073e3a25a38bd5f1b0fa14ba725d3
depends_on:
- redis
- minio
volumes:
- "./dev/karton.ini:/etc/karton/karton.ini"
11 changes: 11 additions & 0 deletions mwdb/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
from mwdb.resources.download import DownloadResource, RequestSampleDownloadResource
from mwdb.resources.file import FileDownloadResource, FileItemResource, FileResource
from mwdb.resources.group import GroupListResource, GroupMemberResource, GroupResource
from mwdb.resources.karton import KartonAnalysisResource, KartonObjectResource
from mwdb.resources.metakey import (
MetakeyDefinitionManageResource,
MetakeyListDefinitionManageResource,
Expand Down Expand Up @@ -296,6 +297,16 @@ def require_auth():
MetakeyPermissionResource, "/meta/manage/<key>/permissions/<group_name>"
)

# Karton endpoints
api.add_resource(
KartonObjectResource,
"/<any(file, config, blob, object):type>/<hash64:identifier>/karton",
)
api.add_resource(
KartonAnalysisResource,
"/<any(file, config, blob, object):type>/<hash64:identifier>/karton/<analysis_id>",
)

# User endpoints
api.add_resource(UserListResource, "/user")
api.add_resource(UserResource, "/user/<login>")
Expand Down
4 changes: 4 additions & 0 deletions mwdb/core/capabilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ class Capabilities(object):
manage_profile = "manage_profile"
# Can personalize own profile (mark as favorite, manage quick queries)
personalize = "personalize"
# Can assign existing Karton analysis to the object
karton_assign = "karton_assign"
# Can resubmit object to Karton
karton_reanalyze = "karton_reanalyze"

@classmethod
def all(cls):
Expand Down
7 changes: 7 additions & 0 deletions mwdb/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ class MWDBConfig(Config):
enable_registration = key(cast=intbool, required=False, default=False)
enable_maintenance = key(cast=intbool, required=False, default=False)
enable_hooks = key(cast=intbool, required=False, default=True)
enable_karton = key(cast=intbool, required=False, default=False)

mail_smtp = key(cast=str, required=False)
mail_from = key(cast=str, required=False, default="noreply@mwdb")
Expand All @@ -115,8 +116,14 @@ class MWDBConfig(Config):
enable_json_logger = key(cast=intbool, required=False, default=False)


@section("karton")
class KartonConfig(Config):
config_path = key(cast=str, required=False, default=None)


class AppConfig(Config):
mwdb = group_key(MWDBConfig)
karton = group_key(KartonConfig)


def _config_sources():
Expand Down
100 changes: 100 additions & 0 deletions mwdb/core/karton.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
import logging
import shutil
import tempfile

from flask import g
from karton.core import Config as KartonConfig
from karton.core import Producer, Resource, Task
from karton.core.backend import KartonBackend
from karton.core.inspect import KartonState
from karton.core.task import TaskPriority

from .config import app_config

logger = logging.getLogger("mwdb.karton")


def get_karton_producer() -> Producer:
return Producer(
identity="karton.mwdb", config=KartonConfig(app_config.karton.config_path)
)


def send_file_to_karton(file) -> str:
from mwdb.model.file import File

tmpfile = None

try:
# TODO: Use file.open() directly when Resource(fd=...)
# is implemented in Karton
try:
# If file contents are available via path: just use the path
path = file.get_path()
except (ValueError, IOError):
# If get_path doesn't work: download content to NamedTemporaryFile
tmpfile = tempfile.NamedTemporaryFile()
file_stream = file.open()
shutil.copyfileobj(file_stream, tmpfile)
File.close(file_stream)
path = tmpfile.name

producer = get_karton_producer()
feed_quality = g.auth_user.feed_quality
task_priority = (
TaskPriority.NORMAL if feed_quality == "high" else TaskPriority.LOW
)
task = Task(
headers={"type": "sample", "kind": "raw", "quality": feed_quality},
payload={
"sample": Resource(file.file_name, path=path, sha256=file.sha256),
"attributes": file.get_metakeys(as_dict=True, check_permissions=False),
},
priority=task_priority,
)
producer.send_task(task)
finally:
if tmpfile is not None:
tmpfile.close()

logger.info("File sent to Karton with %s", task.root_uid)
return task.root_uid


def send_config_to_karton(config) -> str:
producer = get_karton_producer()
task = Task(
headers={"type": "config", "kind": config.config_type, "family": config.family},
payload={
"config": config.cfg,
"dhash": config.dhash,
"attributes": config.get_metakeys(as_dict=True, check_permissions=False),
},
)
producer.send_task(task)

logger.info("Configuration sent to Karton with %s", task.root_uid)
return task.root_uid


def send_blob_to_karton(blob) -> str:
producer = get_karton_producer()
task = Task(
headers={"type": "blob", "kind": blob.blob_type},
payload={
"content": blob.content,
"dhash": blob.dhash,
"attributes": blob.get_metakeys(as_dict=True, check_permissions=False),
},
)
producer.send_task(task)

logger.info("Blob sent to Karton with %s", task.root_uid)
return task.root_uid


def get_karton_state():
karton_config = KartonConfig(app_config.karton.config_path)
karton_backend = KartonBackend(karton_config)
karton_state = KartonState(karton_backend)
return karton_state
27 changes: 27 additions & 0 deletions mwdb/core/search/fields.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import re
import uuid
from datetime import datetime, timedelta
from typing import Any, List, Type, Union

Expand Down Expand Up @@ -150,11 +151,37 @@ def get_condition(self, expression: Expression, remainder: List[str]) -> Any:
value = get_term_value(expression)

if expression.has_wildcard():
if not self.support_wildcards:
raise UnsupportedGrammarException("Wildcards are not allowed here")
return self.column.any(self.value_column.like(value))
else:
return self.column.any(self.value_column == value)


class UUIDField(BaseField):
def __init__(self, column, value_column):
super().__init__(column)
self.value_column = value_column

def get_condition(self, expression: Expression, remainder: List[str]) -> Any:
if remainder:
raise FieldNotQueryableException(
f"Field doesn't have subfields: {'.'.join(remainder)}"
)

value = get_term_value(expression)

if expression.has_wildcard():
raise UnsupportedGrammarException("Wildcards are not allowed here")

try:
uuid_value = uuid.UUID(value)
except ValueError:
raise UnsupportedGrammarException("Field accepts only correct UUID values")

return self.column.any(self.value_column == uuid_value)


class FavoritesField(BaseField):
def get_condition(self, expression: Expression, remainder: List[str]) -> Any:
if remainder:
Expand Down
4 changes: 3 additions & 1 deletion mwdb/core/search/mappings.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import Dict, List, Tuple, Type

from mwdb.model import Comment, Config, File, Object, Tag, TextBlob
from mwdb.model import Comment, Config, File, KartonAnalysis, Object, Tag, TextBlob

from .exceptions import FieldNotQueryableException, MultipleObjectsQueryException
from .fields import (
Expand All @@ -15,6 +15,7 @@
SizeField,
StringField,
UploaderField,
UUIDField,
)

object_mapping: Dict[str, Type[Object]] = {
Expand All @@ -37,6 +38,7 @@
"parent": RelationField(Object.parents),
"child": RelationField(Object.children),
"favorites": FavoritesField(Object.followers),
"karton": UUIDField(Object.analyses, KartonAnalysis.id),
},
File.__name__: {
"name": StringField(File.file_name),
Expand Down
3 changes: 3 additions & 0 deletions mwdb/model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from .config import Config, StaticConfig # noqa: E402
from .file import File # noqa: E402
from .group import Group, Member # noqa: E402
from .karton import KartonAnalysis, karton_object # noqa: E402
from .metakey import Metakey, MetakeyDefinition, MetakeyPermission # noqa: E402
from .object import Object, ObjectPermission, relation # noqa: E402
from .quick_query import QuickQuery # noqa: E402
Expand All @@ -25,6 +26,8 @@
"StaticConfig",
"File",
"Group",
"KartonAnalysis",
"karton_object",
"Member",
"Metakey",
"MetakeyDefinition",
Expand Down

0 comments on commit a964e03

Please sign in to comment.