Skip to content

Commit

Permalink
Add atomic scanner for pip, npm and gem package managers
Browse files Browse the repository at this point in the history
  • Loading branch information
dharmit committed Jan 4, 2017
1 parent 7b3953e commit 03099bc
Show file tree
Hide file tree
Showing 10 changed files with 417 additions and 1 deletion.
9 changes: 9 additions & 0 deletions atomic_scanners/misc-package-updates/Dockerfile
@@ -0,0 +1,9 @@
FROM registry.centos.org/centos/centos

LABEL INSTALL='docker run -it --rm --privileged -v /etc/atomic.d:/host/etc/atomic.d/ $IMAGE sh /install.sh'

RUN yum -y update && \
yum -y install python-docker-py && \
yum clean all

ADD misc-package-updates scanner.py install.sh /
30 changes: 30 additions & 0 deletions atomic_scanners/misc-package-updates/README.md
@@ -0,0 +1,30 @@
Atomic scanner: misc-package-updates
--------------------------------

This is a container image scanner based on `atomic scan`. The goal of the
scanner is to scan CentOS based container images in the CentOS Community Container
Pipeline and generate relevant results.

Steps to use:

- Pull container image from **registry.centos.org**:

```
$ docker pull registry.centos.org/pipeline-images/misc-package-updates
```

- Install it using `atomic`:

```
$ atomic install registry.centos.org/pipeline-images/misc-package-updates
```


- Run the scanner on CentOS based images:

```
$ IMAGE_NAME=registry.centos.org/centos/centos atomic scan --scanner misc-package-updates registry.centos.org/centos/centos
```

Scanner needs an environment variable `IMAGE_NAME` set on the host system to be
able to scan the image and report the results.
1 change: 1 addition & 0 deletions atomic_scanners/misc-package-updates/cccp.yml
@@ -0,0 +1 @@
job-id: misc-package-managers
3 changes: 3 additions & 0 deletions atomic_scanners/misc-package-updates/install.sh
@@ -0,0 +1,3 @@
#/bin/bash
echo "Copying misc-package-updates scanner configuration file to host filesystem..."
cp -v /misc-package-updates /host/etc/atomic.d/
20 changes: 20 additions & 0 deletions atomic_scanners/misc-package-updates/misc-package-updates
@@ -0,0 +1,20 @@
type: scanner
scanner_name: misc-package-updates
image_name: registry.centos.org/pipeline-images/misc-package-updates
default_scan: pip-updates
custom_args: ["-v", "/var/run/docker.sock:/var/run/docker.sock", "-e", "IMAGE_NAME=$IMAGE_NAME"]
scans: [
{ name: pip-updates,
args: ['python', 'scanner.py', 'pip'],
description: "Check for updates from pip package managers"
},
{ name: gem-updates,
args: ['python', 'scanner.py', 'gem'],
description: "Check for updates from gem package managers"
},
{ name: npm-updates,
args: ['python', 'scanner.py', 'npm'],
description: "Check for updates from npm package managers"
}

]
208 changes: 208 additions & 0 deletions atomic_scanners/misc-package-updates/scanner.py
@@ -0,0 +1,208 @@
#!/usr/bin/env python

from datetime import datetime
import docker
import json
import logging
import os
import sys

OUTDIR = "/scanout"
IMAGE_NAME = os.environ.get("IMAGE_NAME")

# set up logging
logger = logging.getLogger("container-pipeline")
logger.setLevel(logging.DEBUG)

ch = logging.StreamHandler(sys.stdout)
ch.setLevel(logging.DEBUG)
formatter = logging.Formatter(
'%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
ch.setFormatter(formatter)
logger.addHandler(ch)

# Client connecting to Docker socket
client = docker.Client(base_url="unix:///var/run/docker.sock")

# Argument passed to script. Decides package manager to check for.
cli_arg = sys.argv[1]

# image UUID
UUID = client.inspect_image(IMAGE_NAME)["Id"]


def binary_does_not_exist(response):
"""
Used to figure if the npm, pip, gem binary exists in the container image
"""
return 'executable file not found in' in response


def split_by_newline(response):
"""
Return a list of string split by newline character
"""
return response.split("\n")


def remove_last_blank_character(response):
"""
Last value of response could be ''. Remove that if that's the case
"""
if response[-1] == '':
response = response[:-1]
return response


def list_of_outdated_packages(response):
"""
Return a list containing outdated packages for a package manager
"""
outdated_packages = []

for _ in response:
outdated_packages.append(_.split()[0])

return outdated_packages


def pip_list_of_outdated_packages(response):
"""
List of oudated packages for pip package manager
"""
# pip returns a string output separated by '\n'
response = split_by_newline(response)

response = remove_last_blank_character(response)

outdated_packages = list_of_outdated_packages(response)

# if newer version of pip is being used inside the container under scan, a
# DEPRECATION warning is raised by pip; we must ignore this.

if "DEPRECATION:" in outdated_packages[0]:
return outdated_packages[1:]

return outdated_packages


def npm_list_of_outdated_packages(response):
"""
List of oudated packages for npm package manager
"""
# like pip, npm returns '\n' separated output but first line can be ignored
response = split_by_newline(response)[1:]

response = remove_last_blank_character(response)

return list_of_outdated_packages(response)


def gem_list_of_outdated_packages(response):
"""
List of oudated packages for gem package manager
"""
# like pip, gem returns '\n' separated output
response = split_by_newline(response)

response = remove_last_blank_character(response)

return list_of_outdated_packages(response)


def format_response(cli_arg, response):
"""
Based on the CLI argument provided, check for the package updates related to
the package manager
"""
if cli_arg == 'pip':
return pip_list_of_outdated_packages(response)
elif cli_arg == "npm":
return npm_list_of_outdated_packages(response)
elif cli_arg == "gem":
return gem_list_of_outdated_packages(response)


def template_json_data(scan_type):
"""
Template data on top of which additional data that is provided by different
functions is added
"""
current_time = datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
json_out = {
"Start Time": current_time,
"Successful": "",
"Scan Type": scan_type + "-updates",
"UUID": UUID,
"CVE Feed Last Updated": "NA",
"Scanner": "Misc Package Updates",
"Scan Results": {"{} package updates".format(cli_arg): []}
}
return json_out

json_out = template_json_data(cli_arg)

try:
# Create the container before starting/running it
container = client.create_container(image=IMAGE_NAME,
command="tail -f /dev/null")

# Running the container
client.start(container.get('Id'))

# Check for pip updates
if cli_arg == "pip":
# variable to store info about exec_start
exe = client.exec_create(
container=container.get("Id"),
cmd="pip list --outdated"
)

response = client.exec_start(exe)

# Check for rubygem updates
elif cli_arg == "gem":
exe = client.exec_create(
container=container.get("Id"),
cmd="gem outdated"
)

response = client.exec_start(exe)

# Check for npm updates
elif cli_arg == "npm":
exe = client.exec_create(
container=container.get("Id"),
cmd="npm outdated"
)

response = client.exec_start(exe)

if binary_does_not_exist(response):
json_out["Scan Results"] = \
"Could not find {} executable in the image".format(cli_arg)
else:
json_out["Scan Results"]["{} package updates".format(cli_arg)] = \
format_response(cli_arg, response)
json_out["Finished Time"] = \
datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')

# remove the container
client.remove_container(container=container.get("Id"), force=True)
except Exception as e:
logger.log(
level=logging.ERROR,
msg="Scanner failed: {}".format(e)
)


output_dir = os.path.join(OUTDIR, UUID)
os.makedirs(output_dir)

output_file_relative = "image_scan_results.json"

output_file_absoulte = os.path.join(output_dir, output_file_relative)

with open(output_file_absoulte, "w") as f:
f.write(json.dumps(json_out, indent=4))
7 changes: 7 additions & 0 deletions beanstalk_worker/constants.py
@@ -0,0 +1,7 @@
SCANNERS_OUTPUT = {
"registry.centos.org/pipeline-images/pipeline-scanner": [
"image_scan_results.json"],
"registry.centos.org/pipeline-images/misc-package-updates": [
"image_scan_results.json"],
"registry.centos.org/pipeline-images/scanner-rpm-verify": [
"RPMVerify.json"]}
95 changes: 95 additions & 0 deletions beanstalk_worker/scanner.py
@@ -0,0 +1,95 @@
#!/usr/bin/env python

# This file contains a class `Scanner` that can be used as super class for
# other scanners in the project to avoid duplication of code

import json
import logging
import os
import subprocess
import sys

from Atomic import Atomic
from constants import SCANNERS_OUTPUT


logger = logging.getLogger("container-pipeline")
logger.setLevel(logging.DEBUG)

ch = logging.StreamHandler(sys.stdout)
ch.setLevel(logging.DEBUG)
formatter = logging.Formatter(
'%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
ch.setFormatter(formatter)
logger.addHandler(ch)


class Scanner(object):
def __init__(self, image_under_test, scanner_name, full_scanner_name):
# to be provided by child class
self.scanner_name = scanner_name
self.full_scanner_name = full_scanner_name
self.image_under_test = image_under_test

# Scanner class's own attributes
self.atomic_obj = Atomic()
self.image_id = self.atomic_obj.get_input_id(self.image_under_test)

def run_atomic_scanner(self, cmd):
"""
Run the scanner with the cmd provided by child class
"""
process = subprocess.Popen(
cmd, stderr=subprocess.PIPE, stdout=subprocess.PIPE
)

# returns out, err
return process.communicate()

def run(self, cmd):
# self.image_under_test = image_under_test
# self.image_id = self.atomic_obj.get_input_id(self.image_under_test)

json_data = {}

out, err = self.run_atomic_scanner(cmd)

if out != "":
output_json_file = os.path.join(
out.strip().split()[-1].split('.')[0],
self.image_id,
SCANNERS_OUTPUT[self.full_scanner_name][0]
)

if os.path.exists(output_json_file):
json_data = json.loads(open(output_json_file).read())
else:
logger.log(
level=logging.FATAL,
msg="No scan results found at {}".format(output_json_file)
)
return False, json_data
else:
logger.log(
level=logging.FATAL,
msg="Error running the scanner {}. Error {}".format(
self.scanner_name,
err
)
)
return False, json_data

return True, self.process_output(json_data)

def process_output(self, json_data):
"""
Process the output from scanner
"""
data = {}

data["scanner_name"] = self.scanner_name

data["msg"] = "{} results.".format(self.scanner_name)
data["logs"] = json_data
return data

0 comments on commit 03099bc

Please sign in to comment.