Skip to content

Commit

Permalink
Merge pull request #2 from GSA-TTS/peewee-sqlite
Browse files Browse the repository at this point in the history
Track scan results, timestamps in a SQLite database
  • Loading branch information
timoballard committed May 7, 2024
2 parents 8b6376a + cd86de6 commit 504ff59
Show file tree
Hide file tree
Showing 4 changed files with 132 additions and 41 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
venv/

.env
.env

scanner.db
160 changes: 120 additions & 40 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from boto3 import client as boto3_client
from botocore.client import ClientError, Config

from datetime import datetime
from enum import Enum
import environs
from io import BytesIO
Expand All @@ -19,29 +19,50 @@

from config import S3Config, EnvS3Config, ClamAVConfig, EnvClamAVConfig

from peewee import *

env = environs.Env()

dictConfig({
'version': 1,
'formatters': {'default': {
'format': '[%(asctime)s] %(levelname)s in %(module)s: %(message)s',
}},
'handlers': {'default': {
'class': 'logging.StreamHandler',
'stream': sys.stdout,
'formatter': 'default'
}},
'root': {
'level': 'INFO',
'handlers': ['default']
dictConfig(
{
"version": 1,
"formatters": {
"default": {
"format": "[%(asctime)s] %(levelname)s in %(module)s: %(message)s",
}
},
"handlers": {
"default": {
"class": "logging.StreamHandler",
"stream": sys.stdout,
"formatter": "default",
}
},
"root": {"level": "INFO", "handlers": ["default"]},
}
})
)

FILE_REFRESH_INTERVAL_SECS = 600
FILE_SCAN_INTERVAL_SECS = 1

db = SqliteDatabase("scanner.db")


class ScannedFile(Model):
filename = CharField(unique=True)
last_scan_timestamp = DateTimeField(null=True)
last_scan_result = CharField()

class Meta:
database = db


logger = logging.getLogger(__name__)


class ScanResult(Enum):
CLEAN = 1,
INFECTED = 2,
CLEAN = (1,)
INFECTED = (2,)
UNKNOWN = 3

@classmethod
Expand All @@ -66,6 +87,7 @@ def construct_s3_client(config: S3Config) -> boto3_client:

return s3_client


def scan_file(clamav_config: ClamAVConfig, file) -> ScanResult:
response = requests.post(
clamav_config.endpoint_url,
Expand All @@ -78,8 +100,8 @@ def scan_file(clamav_config: ClamAVConfig, file) -> ScanResult:

def prepare_env():
# read .env file if there is one
env.read_env(recurse=False)
env.read_env(recurse=False, override=True)

# load VCAP_SERVICES into env if defined
try:
vcap_services = json.loads(env.str("VCAP_SERVICES"))
Expand All @@ -102,49 +124,107 @@ def prepare_env():
logger.info("no VCAP_SERVICES defined in env")


def scan_loop():
prepare_env()
def create_scanned_file(filename, scan_timestamp, scan_result):
ScannedFile.insert(
filename=filename,
last_scan_timestamp=scan_timestamp,
last_scan_result=scan_result,
).on_conflict_ignore().execute()


def upsert_scanned_file(filename, scan_timestamp, scan_result):
ScannedFile.insert(
filename=filename,
last_scan_timestamp=scan_timestamp,
last_scan_result=scan_result,
).on_conflict(
conflict_target=[ScannedFile.filename],
preserve=[ScannedFile.last_scan_timestamp, ScannedFile.last_scan_result],
).execute()


def refresh_files():
"""
Periodically scan the target S3 bucket and created a ScannedFile entry in the database for file
"""
s3_config = EnvS3Config(env)
s3_client = construct_s3_client(s3_config)

while True:
s3_config = EnvS3Config(env)
s3_client = construct_s3_client(s3_config)

clamav_config = EnvClamAVConfig(env)
logger.info("refreshing files...")

paginator = s3_client.get_paginator('list_objects_v2')
paginator = s3_client.get_paginator("list_objects_v2")
pages = paginator.paginate(Bucket=s3_config.bucket)

if pages:
for page in pages:
if "Contents" in page:
for object_summary in page["Contents"]:
object_name = object_summary["Key"]
scan_timestamp = None
scan_result = ScanResult.UNKNOWN

file = BytesIO()
s3_client.download_fileobj(s3_config.bucket, object_name, file)
file.seek(0)
create_scanned_file(object_name, scan_timestamp, scan_result)

scan_result = scan_file(clamav_config, file)
logger.info("done refreshing files...")

logger.info(f"{object_name}: scan result: {scan_result}")
sleep(FILE_REFRESH_INTERVAL_SECS)


def scan_files():
"""
Fetch the least recently scanned file from the database, scan it, and update its database record with the new scan result & timestamp
"""
while True:
s3_config = EnvS3Config(env)

s3_client = construct_s3_client(s3_config)

clamav_config = EnvClamAVConfig(env)

# scan the least recently scanned file in the database
for scanned_file in (
ScannedFile.select().order_by(ScannedFile.last_scan_timestamp).limit(1)
):
file = BytesIO()
s3_client.download_fileobj(s3_config.bucket, scanned_file.filename, file)
file.seek(0)

scan_result = scan_file(clamav_config, file)

upsert_scanned_file(scanned_file.filename, datetime.utcnow(), scan_result)

logger.info(f"{scanned_file.filename}: scan result: {scan_result}")

sleep(FILE_SCAN_INTERVAL_SECS)

sleep(1)

app = Flask(__name__)

@app.route('/')

@app.route("/")
def health_check():
logger.info('handling health check')
return 'healthy'
logger.info("handling health check")
return "healthy"


def create_app():
worker = Thread(target=scan_loop, daemon=True)
worker.start()
prepare_env()

db.connect()
db.create_tables([ScannedFile])

file_refresh_worker = Thread(target=refresh_files, daemon=True)
file_refresh_worker.start()

scan_worker = Thread(target=scan_files, daemon=True)
scan_worker.start()

return app

if __name__ == '__main__':

if __name__ == "__main__":
logger.info("starting up...")

port = int(os.getenv('PORT', '8080'))
create_app().run(host='0.0.0.0', port=port)
port = int(os.getenv("PORT", "8080"))
create_app().run(host="0.0.0.0", port=port)
1 change: 1 addition & 0 deletions config.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import environs


class S3Config:
def __init__(self):
self.region_name = ""
Expand Down
8 changes: 8 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
black==24.4.2
blinker==1.7.0
boto3==1.34.92
botocore==1.34.92
Expand All @@ -14,12 +15,19 @@ Jinja2==3.1.3
jmespath==1.0.1
MarkupSafe==2.1.5
marshmallow==3.21.1
mypy-extensions==1.0.0
newrelic==9.9.0
packaging==24.0
pathspec==0.12.1
peewee==3.17.3
platformdirs==4.2.1
python-dateutil==2.9.0.post0
python-dotenv==1.0.1
requests==2.31.0
s3transfer==0.10.1
six==1.16.0
tomli==2.0.1
typing_extensions==4.11.0
urllib3==1.26.18
Werkzeug==3.0.2
zipp==3.18.1

0 comments on commit 504ff59

Please sign in to comment.