Skip to content

Commit

Permalink
Work on passing execute field to karton (#904)
Browse files Browse the repository at this point in the history
Co-authored-by: msm <msm@cert.pl>
Co-authored-by: Paweł Srokosz <pawel.srokosz@cert.pl>
  • Loading branch information
3 people committed Feb 1, 2024
1 parent 606f7c7 commit ee5cb12
Show file tree
Hide file tree
Showing 8 changed files with 61 additions and 40 deletions.
58 changes: 40 additions & 18 deletions mwdb/core/karton.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import logging
from typing import Optional
from typing import TYPE_CHECKING, Any, Dict, Optional

from flask import g
from karton.core import Config as KartonConfig
Expand All @@ -10,6 +10,12 @@
from ..version import app_version
from .config import app_config

if TYPE_CHECKING:
from ..model.blob import TextBlob
from ..model.config import Config
from ..model.file import File
from ..model.object import Object

logger = logging.getLogger("mwdb.karton")


Expand All @@ -35,27 +41,47 @@ def get_karton_producer() -> Optional[Producer]:
return karton_producer


def send_file_to_karton(file) -> str:
def prepare_headers(obj: "Object", arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Prepare headers to use when submitting this object to Karton.
Takes into account object arguments to this analysis, some attributes,
and share_3rd_party field (in this order of precedence).
"""
headers = {
"share_3rd_party": obj.share_3rd_party,
}

ALLOWED_HEADERS = ["execute"]
for attribute in obj.attributes:
if attribute.key in ALLOWED_HEADERS:
headers[attribute.key] = attribute.value

for argument, value in arguments.items():
if argument not in ALLOWED_HEADERS:
raise RuntimeError(f"Argument {argument} is not allowed")
headers[argument] = value

return headers


def send_file_to_karton(file: "File", arguments: Dict[str, Any]) -> str:
producer = get_karton_producer()

if producer is None:
raise RuntimeError("Karton is not enabled or failed to load properly")

feed_quality = g.auth_user.feed_quality
headers_persistent = prepare_headers(file, arguments)
headers_persistent["quality"] = feed_quality
task_priority = TaskPriority.NORMAL if feed_quality == "high" else TaskPriority.LOW

file_stream = file.open()
try:
feed_quality = g.auth_user.feed_quality
task_priority = (
TaskPriority.NORMAL if feed_quality == "high" else TaskPriority.LOW
)
task = Task(
headers={
"type": "sample",
"kind": "raw",
},
headers_persistent={
"quality": feed_quality,
"share_3rd_party": file.share_3rd_party,
},
headers_persistent=headers_persistent,
payload={
"sample": Resource(file.file_name, fd=file_stream, sha256=file.sha256),
"attributes": file.get_attributes(
Expand All @@ -72,7 +98,7 @@ def send_file_to_karton(file) -> str:
return task.root_uid


def send_config_to_karton(config) -> str:
def send_config_to_karton(config: "Config", arguments: Dict[str, Any]) -> str:
producer = get_karton_producer()

if producer is None:
Expand All @@ -84,9 +110,7 @@ def send_config_to_karton(config) -> str:
"kind": config.config_type,
"family": config.family,
},
headers_persistent={
"share_3rd_party": config.share_3rd_party,
},
headers_persistent=prepare_headers(config, arguments),
payload={
"config": config.cfg,
"dhash": config.dhash,
Expand All @@ -99,7 +123,7 @@ def send_config_to_karton(config) -> str:
return task.root_uid


def send_blob_to_karton(blob) -> str:
def send_blob_to_karton(blob: "TextBlob", arguments: Dict[str, Any]) -> str:
producer = get_karton_producer()

if producer is None:
Expand All @@ -110,9 +134,7 @@ def send_blob_to_karton(blob) -> str:
"type": "blob",
"kind": blob.blob_type,
},
headers_persistent={
"share_3rd_party": blob.share_3rd_party,
},
headers_persistent=prepare_headers(blob, arguments),
payload={
"content": blob.content,
"dhash": blob.dhash,
Expand Down
5 changes: 3 additions & 2 deletions mwdb/model/blob.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import datetime
import hashlib
from typing import Any, Dict

from sqlalchemy.ext.hybrid import hybrid_property

Expand Down Expand Up @@ -63,5 +64,5 @@ def get_or_create(

return blob_obj, is_new

def _send_to_karton(self):
return send_blob_to_karton(self)
def _send_to_karton(self, arguments: Dict[str, Any]):
return send_blob_to_karton(self, arguments)
6 changes: 4 additions & 2 deletions mwdb/model/config.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import Any, Dict

from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.ext.hybrid import hybrid_property

Expand Down Expand Up @@ -57,8 +59,8 @@ def get_or_create(
tags=tags,
)

def _send_to_karton(self):
return send_config_to_karton(self)
def _send_to_karton(self, arguments: Dict[str, Any]):
return send_config_to_karton(self, arguments)


# Compatibility reasons
Expand Down
5 changes: 3 additions & 2 deletions mwdb/model/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os
import shutil
import tempfile
from typing import Any, Dict

import pyzipper
from Cryptodome.Util.strxor import strxor_c
Expand Down Expand Up @@ -356,5 +357,5 @@ def get_by_download_token(download_token):
return None
return File.get(download_req["identifier"]).first()

def _send_to_karton(self):
return send_file_to_karton(self)
def _send_to_karton(self, arguments: Dict[str, Any]):
return send_file_to_karton(self, arguments)
8 changes: 4 additions & 4 deletions mwdb/model/object.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import datetime
from collections import namedtuple
from typing import Optional
from typing import Any, Dict, Optional
from uuid import UUID

from flask import g
Expand Down Expand Up @@ -988,14 +988,14 @@ def get_shares(self):
.order_by(ObjectPermission.access_time.asc())
).all()

def _send_to_karton(self):
def _send_to_karton(self, arguments: Dict[str, Any]):
raise NotImplementedError

def spawn_analysis(self, arguments, commit=True):
def spawn_analysis(self, arguments: Dict[str, Any], commit=True):
"""
Spawns new KartonAnalysis for this object
"""
analysis_id = self._send_to_karton()
analysis_id = self._send_to_karton(arguments)
analysis = KartonAnalysis.create(
analysis_id=UUID(analysis_id),
initial_object=self,
Expand Down
3 changes: 2 additions & 1 deletion mwdb/schema/attribute.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ class AttributeValueSchema(Schema):

@validates("value")
def validate_value(self, value):
if not value:
# Currently only truthy values and False are allowed as values
if not value and value is not False:
raise ValidationError("Value shouldn't be empty")


Expand Down
2 changes: 1 addition & 1 deletion mwdb/schema/object.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class ObjectCreateRequestSchemaBase(Schema):
attributes = fields.Nested(AttributeItemRequestSchema, many=True, missing=[])
upload_as = fields.Str(missing="*", allow_none=False)
karton_id = fields.UUID(missing=None)
karton_arguments = fields.Dict(missing={}, keys=fields.Str(), values=fields.Str())
karton_arguments = fields.Dict(missing={}, keys=fields.Str())
tags = fields.Nested(TagRequestSchema, many=True, missing=[])
share_3rd_party = fields.Boolean(missing=True)

Expand Down
14 changes: 4 additions & 10 deletions tests/backend/test_karton.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,27 +43,24 @@ def test_karton_analysis_after_adding_sample(admin_session):
assert len(analyses) == 1


def test_karton_reanalyze_object_with_args(admin_session):
def test_karton_reanalyze_blob(admin_session):
test = admin_session
blob_name = rand_string(15)
argument_key = rand_string(5)
argument_value = rand_string(5)
blob = test.add_blob(None, blobname=blob_name, blobtype="inject", content="""
Binary junk: \x00\x01\x02\x03\x04\x05\x07
HELLO WORLD!
========""" + random_name())
blob_dhash = blob["id"]
new_analysis = test.reanalyze_object(blob_dhash, arguments={argument_key: argument_value})
test.reanalyze_object(blob_dhash)
analyses = test.get_analyses(blob_dhash)["analyses"]
assert new_analysis["arguments"] == {argument_key: argument_value}
assert len(analyses) == 2

incorrect_object_dhash = "abcdefghi"
with ShouldRaise(status_code=404):
test.reanalyze_object(incorrect_object_dhash)


def test_karton_reanalyze_object_without_args(admin_session):
def test_karton_reanalyze_file(admin_session):
test = admin_session
file_name = rand_string(15)
file_content = rand_string()
Expand All @@ -77,18 +74,15 @@ def test_karton_reanalyze_object_without_args(admin_session):
def test_get_karton_analysis_info(admin_session):
test = admin_session
blob_name = rand_string(15)
argument_key = rand_string(5)
argument_value = rand_string(5)
blob = test.add_blob(None, blobname=blob_name, blobtype="inject", content="""
Binary junk: \x00\x01\x02\x03\x04\x05\x07
HELLO WORLD!
========""" + random_name())
blob_dhash = blob["id"]
new_analysis = test.reanalyze_object(blob_dhash, arguments={argument_key: argument_value})
new_analysis = test.reanalyze_object(blob_dhash)
analysis_id = new_analysis["id"]
analysis_info = test.get_analysis_info(blob_dhash, analysis_id)
assert analysis_id == analysis_info["id"]
assert analysis_info["arguments"] == {argument_key: argument_value}


def test_assign_analysis_to_object(admin_session):
Expand Down

0 comments on commit ee5cb12

Please sign in to comment.