diff --git a/Dockerfile b/Dockerfile index f26d558e..0ed74d48 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,7 +2,7 @@ # USAGE -FROM ubuntu:14.04.4 +FROM ubuntu:16.04 MAINTAINER V. David Zvenyach ### @@ -34,7 +34,6 @@ RUN \ unzip \ wget \ zlib1g-dev \ - autoconf \ automake \ bison \ @@ -46,7 +45,6 @@ RUN \ libtool \ pkg-config \ sqlite3 \ - # Additional dependencies for python-build libbz2-dev \ llvm \ @@ -57,10 +55,8 @@ RUN apt-get install \ --yes \ --no-install-recommends \ --no-install-suggests \ - nodejs \ - npm \ - python3-dev \ - python3-pip + nodejs \ + npm # Clean up packages. RUN apt-get clean \ @@ -81,12 +77,29 @@ RUN wget ${PYENV_REPO}/archive/v${PYENV_RELEASE}.zip \ && mv $PYENV_ROOT/pyenv-$PYENV_RELEASE/* $PYENV_ROOT/ \ && rm -r $PYENV_ROOT/pyenv-$PYENV_RELEASE -ENV PATH $PYENV_ROOT/bin:$PATH - +# +# Uncomment these lines if you just want to install python... +# +# ENV PATH $PYENV_ROOT/bin:$PYENV_ROOT/versions/${PYENV_PYTHON_VERSION}/bin:$PATH +# RUN echo 'eval "$(pyenv init -)"' >> /etc/profile \ +# && eval "$(pyenv init -)" \ +# && pyenv install $PYENV_PYTHON_VERSION \ +# && pyenv local ${PYENV_PYTHON_VERSION} + +# +# ...uncomment these lines if you want to also debug python code in GDB +# +ENV PATH $PYENV_ROOT/bin:$PYENV_ROOT/versions/${PYENV_PYTHON_VERSION}-debug/bin:$PATH RUN echo 'eval "$(pyenv init -)"' >> /etc/profile \ && eval "$(pyenv init -)" \ - && pyenv install $PYENV_PYTHON_VERSION \ - && pyenv local $PYENV_PYTHON_VERSION + && pyenv install --debug --keep $PYENV_PYTHON_VERSION \ + && pyenv local ${PYENV_PYTHON_VERSION}-debug +RUN ln -s /opt/pyenv/sources/${PYENV_PYTHON_VERSION}-debug/Python-${PYENV_PYTHON_VERSION}/python-gdb.py /opt/pyenv/versions/${PYENV_PYTHON_VERSION}-debug/bin/python3.6-gdb.py +RUN ln -s /opt/pyenv/sources/${PYENV_PYTHON_VERSION}-debug/Python-${PYENV_PYTHON_VERSION}/python-gdb.py /opt/pyenv/versions/${PYENV_PYTHON_VERSION}-debug/bin/python3-gdb.py +RUN ln -s /opt/pyenv/sources/${PYENV_PYTHON_VERSION}-debug/Python-${PYENV_PYTHON_VERSION}/python-gdb.py /opt/pyenv/versions/${PYENV_PYTHON_VERSION}-debug/bin/python-gdb.py +RUN apt-get -qq update && \ + apt-get -qq --yes --no-install-recommends --no-install-suggests install gdb +RUN echo add-auto-load-safe-path /opt/pyenv/sources/${PYENV_PYTHON_VERSION}-debug/Python-${PYENV_PYTHON_VERSION}/ >> etc/gdb/gdbinit COPY requirements.txt requirements.txt RUN pip3 install --upgrade pip @@ -112,16 +125,6 @@ ENV PATH /go/bin:$PATH # Node RUN ln -s /usr/bin/nodejs /usr/bin/node -### -# ssllabs-scan - -RUN mkdir -p /go/src /go/bin \ - && chmod -R 777 /go -RUN go get github.com/ssllabs/ssllabs-scan -RUN cd /go/src/github.com/ssllabs/ssllabs-scan/ \ - && git checkout stable \ - && go install -ENV SSLLABS_PATH /go/bin/ssllabs-scan ### # phantomas @@ -136,7 +139,12 @@ RUN npm install \ ### # pshtt -RUN pip3 install pshtt==0.2.1 +RUN apt-get install -qq --yes locales +RUN locale-gen en_US.UTF-8 +ENV LANG en_US.UTF-8 +ENV LANGUAGE en_US:en +ENV LC_ALL en_US.UTF-8 +RUN pip3 install pshtt ### @@ -145,8 +153,6 @@ RUN pip3 install pshtt==0.2.1 ENV SCANNER_HOME /home/scanner RUN mkdir $SCANNER_HOME -COPY . $SCANNER_HOME - RUN groupadd -r scanner \ && useradd -r -c "Scanner user" -g scanner scanner \ && chown -R scanner:scanner ${SCANNER_HOME} @@ -161,3 +167,5 @@ WORKDIR $SCANNER_HOME VOLUME /data ENTRYPOINT ["./scan_wrap.sh"] + +COPY . $SCANNER_HOME diff --git a/requirements.txt b/requirements.txt index 6d7bea46..63326e1b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,6 +6,7 @@ pyyaml # to support sslyze scanner sslyze cryptography +timeout-decorator # to support censys gatherer censys diff --git a/scanners/pshtt.py b/scanners/pshtt.py index daffb55a..99d856f3 100644 --- a/scanners/pshtt.py +++ b/scanners/pshtt.py @@ -50,7 +50,7 @@ def scan(domain, options): if (force is False) and (os.path.exists(cache_pshtt)): logging.debug("\tCached.") - raw = open(cache_pshtt).read() + raw = utils.read(cache_pshtt) data = json.loads(raw) if (data.__class__ is dict) and data.get('invalid'): return None diff --git a/scanners/sslyze.py b/scanners/sslyze.py index 2202a556..3e39c7e1 100644 --- a/scanners/sslyze.py +++ b/scanners/sslyze.py @@ -2,9 +2,16 @@ from scanners import utils import os +import sslyze +from sslyze.synchronous_scanner import SynchronousScanner +from sslyze.concurrent_scanner import ConcurrentScanner, PluginRaisedExceptionScanResult +from sslyze.plugins.openssl_cipher_suites_plugin import Tlsv10ScanCommand, Tlsv11ScanCommand, Tlsv12ScanCommand, Sslv20ScanCommand, Sslv30ScanCommand +from sslyze.plugins.certificate_info_plugin import CertificateInfoScanCommand + import json import cryptography import cryptography.hazmat.backends.openssl +from cryptography.hazmat.primitives.serialization import Encoding from cryptography.hazmat.primitives.asymmetric import ec, dsa, rsa ### @@ -14,8 +21,17 @@ # # If data exists for a domain from `pshtt`, will check results # and only process domains with valid HTTPS, or broken chains. +# +# Supported options: +# +# --sslyze-serial - If set, will use a synchronous (single-threaded +# in-process) scanner. Defaults to false. ### +# Number of seconds to wait during sslyze connection check. +# Not much patience here, and very willing to move on. +network_timeout = 5 + command = os.environ.get("SSLYZE_PATH", "sslyze") @@ -44,7 +60,7 @@ def scan(domain, options): if (force is False) and (os.path.exists(cache_json)): logging.debug("\tCached.") - raw_json = open(cache_json).read() + raw_json = utils.read(cache_json) try: data = json.loads(raw_json) if (data.__class__ is dict) and data.get('invalid'): @@ -57,42 +73,27 @@ def scan(domain, options): # use scan_domain (possibly www-prefixed) to do actual scan logging.debug("\t %s %s" % (command, scan_domain)) - # This is --regular minus --heartbleed - # See: https://github.com/nabla-c0d3/sslyze/issues/217 - raw_response = utils.scan([ - command, - "--sslv2", "--sslv3", "--tlsv1", "--tlsv1_1", "--tlsv1_2", - "--reneg", "--resum", "--certinfo", - "--http_get", "--hide_rejected_ciphers", - "--compression", "--openssl_ccs", - "--fallback", "--quiet", - scan_domain, "--json_out=%s" % cache_json - ]) - - if raw_response is None: + data = run_sslyze(scan_domain, options) + + if data is None: # TODO: save standard invalid JSON data...? utils.write(utils.invalid({}), cache_json) logging.warn("\tBad news scanning, sorry!") return None - raw_json = utils.scan(["cat", cache_json]) - if not raw_json: - logging.warn("\tBad news reading JSON, sorry!") - return None - + # not so raw... + raw_json = utils.json_for(data) utils.write(raw_json, cache_json) - data = parse_sslyze(raw_json) - if data is None: logging.warn("\tNo valid target for scanning, couldn't connect.") return None yield [ scan_domain, - data['protocols']['sslv2'], data['protocols']['sslv3'], - data['protocols']['tlsv1.0'], data['protocols']['tlsv1.1'], - data['protocols']['tlsv1.2'], + data['protocols'].get('sslv2'), data['protocols'].get('sslv3'), + data['protocols'].get('tlsv1.0'), data['protocols'].get('tlsv1.1'), + data['protocols'].get('tlsv1.2'), data['config'].get('any_dhe'), data['config'].get('all_dhe'), data['config'].get('weakest_dh'), @@ -129,34 +130,16 @@ def scan(domain, options): "Errors" ] + # Get the relevant fields out of sslyze's JSON format. # # Certificate PEM data must be separately parsed using # the Python cryptography module. -# -# If we were using the sslyze Python API, this would be -# done for us automatically, but serializing the results -# to disk for caching would be prohibitively complex. - -def parse_sslyze(raw_json): - - data = json.loads(raw_json) - - # 1. Isolate first successful scanned IP. - if len(data['accepted_targets']) == 0: - return None - target = data['accepted_targets'][0]['commands_results'] - - # Protocol version support. +def run_sslyze(hostname, options): + # Parse the results into a dict, which will also be cached as JSON. data = { - 'protocols': { - 'sslv2': supported_protocol(target, 'sslv2'), - 'sslv3': supported_protocol(target, 'sslv3'), - 'tlsv1.0': supported_protocol(target, 'tlsv1'), - 'tlsv1.1': supported_protocol(target, 'tlsv1_1'), - 'tlsv1.2': supported_protocol(target, 'tlsv1_2') - }, + 'protocols': {}, 'config': {}, @@ -165,19 +148,35 @@ def parse_sslyze(raw_json): 'errors': None } - # TODO: Whether OCSP stapling is enabled. - # Relevant fields: https://nabla-c0d3.github.io/sslyze/documentation/available-scan-commands.html#sslyze.plugins.certificate_info_plugin.CertificateInfoScanResult.ocsp_response + sync = options.get("sslyze-serial", False) + + # Initialize either a synchronous or concurrent scanner. + server_info, scanner = init_sslyze(hostname, options, sync=sync) - # ocsp = target.select_one('ocspStapling') - # if ocsp: - # data['config']['ocsp_stapling'] = (ocsp["isSupported"] == 'True') + if server_info is None: + data['errors'] = "Connectivity not established." + return data + + # Whether sync or concurrent, get responses for all scans. + if sync: + sslv2, sslv3, tlsv1, tlsv1_1, tlsv1_2, certs = scan_serial(scanner, server_info, options) + else: + sslv2, sslv3, tlsv1, tlsv1_1, tlsv1_2, certs = scan_parallel(scanner, server_info, options) + + data['protocols'] = { + 'sslv2': supported_protocol(sslv2), + 'sslv3': supported_protocol(sslv3), + 'tlsv1.0': supported_protocol(tlsv1), + 'tlsv1.1': supported_protocol(tlsv1_1), + 'tlsv1.2': supported_protocol(tlsv1_2) + } accepted_ciphers = ( - target['sslv2'].get("accepted_cipher_list", []) + - target['sslv3'].get("accepted_cipher_list", []) + - target['tlsv1'].get("accepted_cipher_list", []) + - target['tlsv1_1'].get("accepted_cipher_list", []) + - target['tlsv1_2'].get("accepted_cipher_list", []) + (sslv2.accepted_cipher_list or []) + + (sslv3.accepted_cipher_list or []) + + (tlsv1.accepted_cipher_list or []) + + (tlsv1_1.accepted_cipher_list or []) + + (tlsv1_2.accepted_cipher_list or []) ) if len(accepted_ciphers) > 0: @@ -191,7 +190,7 @@ def parse_sslyze(raw_json): any_3des = False for cipher in accepted_ciphers: - name = cipher["openssl_name"] + name = cipher.openssl_name if "RC4" in name: any_rc4 = True else: @@ -214,8 +213,8 @@ def parse_sslyze(raw_json): # Find the weakest available DH group size, if any are available. weakest_dh = 1234567890 # nonsense maximum for cipher in accepted_ciphers: - if cipher.get('dh_info', None) is not None: - size = int(cipher['dh_info']['GroupSize']) + if cipher.dh_info is not None: + size = int(cipher.dh_info['GroupSize']) if size < weakest_dh: weakest_dh = size @@ -224,80 +223,83 @@ def parse_sslyze(raw_json): data['config']['weakest_dh'] = weakest_dh - # If there was an exception parsing the certificate, catch it before fetching cert info. - if False: - data['errors'] = "TODO" + if certs: + data['certs'] = analyze_certs(certs) - else: + return data - # Served chain. - served_chain = target['certinfo']['certificate_chain'] - # Constructed chain may not be there if it didn't validate. - constructed_chain = target['certinfo']['verified_certificate_chain'] +def analyze_certs(certs): + data = {'certs': {}} - highest_served = parse_cert(served_chain[-1]) - issuer = cert_issuer_name(highest_served) + # Served chain. + served_chain = certs.certificate_chain - if issuer: - data['certs']['served_issuer'] = issuer - else: - data['certs']['served_issuer'] = "(None found)" + # Constructed chain may not be there if it didn't validate. + constructed_chain = certs.verified_certificate_chain - if (constructed_chain and (len(constructed_chain) > 0)): - highest_constructed = parse_cert(constructed_chain[-1]) - issuer = cert_issuer_name(highest_constructed) - if issuer: - data['certs']['constructed_issuer'] = issuer - else: - data['certs']['constructed_issuer'] = "(None constructed)" + highest_served = parse_cert(served_chain[-1]) + issuer = cert_issuer_name(highest_served) - leaf = parse_cert(served_chain[0]) - leaf_key = leaf.public_key() + if issuer: + data['certs']['served_issuer'] = issuer + else: + data['certs']['served_issuer'] = "(None found)" - if hasattr(leaf_key, "key_size"): - data['certs']['key_length'] = leaf_key.key_size - elif hasattr(leaf_key, "curve"): - data['certs']['key_length'] = leaf_key.curve.key_size - else: - data['certs']['key_length'] = None - - if isinstance(leaf_key, rsa.RSAPublicKey): - leaf_key_type = "RSA" - elif isinstance(leaf_key, dsa.DSAPublicKey): - leaf_key_type = "DSA" - elif isinstance(leaf_key, ec.EllipticCurvePublicKey): - leaf_key_type = "ECDSA" + if (constructed_chain and (len(constructed_chain) > 0)): + highest_constructed = parse_cert(constructed_chain[-1]) + issuer = cert_issuer_name(highest_constructed) + if issuer: + data['certs']['constructed_issuer'] = issuer else: - leaf_key_type == str(leaf_key.__class__) + data['certs']['constructed_issuer'] = "(None constructed)" - data['certs']['key_type'] = leaf_key_type + leaf = parse_cert(served_chain[0]) + leaf_key = leaf.public_key() - # Signature of the leaf certificate only. - data['certs']['leaf_signature'] = leaf.signature_hash_algorithm.name + if hasattr(leaf_key, "key_size"): + data['certs']['key_length'] = leaf_key.key_size + elif hasattr(leaf_key, "curve"): + data['certs']['key_length'] = leaf_key.curve.key_size + else: + data['certs']['key_length'] = None + + if isinstance(leaf_key, rsa.RSAPublicKey): + leaf_key_type = "RSA" + elif isinstance(leaf_key, dsa.DSAPublicKey): + leaf_key_type = "DSA" + elif isinstance(leaf_key, ec.EllipticCurvePublicKey): + leaf_key_type = "ECDSA" + else: + leaf_key_type == str(leaf_key.__class__) - # Beginning and expiration dates of the leaf certificate - data['certs']['not_before'] = leaf.not_valid_before - data['certs']['not_after'] = leaf.not_valid_after + data['certs']['key_type'] = leaf_key_type - any_sha1_served = False - for cert in served_chain: - if parse_cert(cert).signature_hash_algorithm.name == "sha1": - any_sha1_served = True + # Signature of the leaf certificate only. + data['certs']['leaf_signature'] = leaf.signature_hash_algorithm.name - data['certs']['any_sha1_served'] = any_sha1_served + # Beginning and expiration dates of the leaf certificate + data['certs']['not_before'] = leaf.not_valid_before + data['certs']['not_after'] = leaf.not_valid_after - if data['certs'].get('constructed_issuer'): - data['certs']['any_sha1_constructed'] = target['certinfo']['has_sha1_in_certificate_chain'] + any_sha1_served = False + for cert in served_chain: + if parse_cert(cert).signature_hash_algorithm.name == "sha1": + any_sha1_served = True - return data + data['certs']['any_sha1_served'] = any_sha1_served + + if data['certs'].get('constructed_issuer'): + data['certs']['any_sha1_constructed'] = certs.has_sha1_in_certificate_chain + + return data['certs'] # Given the cert sub-obj from the sslyze JSON, use # the cryptography module to parse its PEM contents. def parse_cert(cert): backend = cryptography.hazmat.backends.openssl.backend - pem_bytes = cert['as_pem'].encode('utf-8') + pem_bytes = cert.public_bytes(Encoding.PEM).decode('ascii').encode('utf-8') return cryptography.x509.load_pem_x509_certificate(pem_bytes, backend) @@ -312,12 +314,133 @@ def cert_issuer_name(parsed): return attrs[0].value -# examines whether the protocol version turned out ot be supported -def supported_protocol(target, protocol): - if target[protocol].get("error_message", None) is not None: - logging.debug("Error connecting to %s: %s" % (protocol, target[protocol]["error_message"])) - return False - elif target[protocol].get("accepted_cipher_list", None) is None: - return False +# Given CipherSuiteScanResult, whether the protocol is supported +def supported_protocol(result): + return (len(result.accepted_cipher_list) > 0) + + +# SSlyze initialization boilerplate +def init_sslyze(hostname, options, sync=False): + global network_timeout + + network_timeout = int(options.get("network_timeout", network_timeout)) + + try: + server_info = sslyze.server_connectivity.ServerConnectivityInfo(hostname=hostname, port=443) + except sslyze.server_connectivity.ServerConnectivityError as error: + logging.warn("\tServer connectivity not established during initialization.") + return None, None + except Exception as err: + utils.notify(err) + logging.warn("\tUnknown exception when initializing server connectivity info.") + return None, None + + try: + logging.debug("\tTesting connectivity with timeout of %is." % network_timeout) + server_info.test_connectivity_to_server(network_timeout=network_timeout) + except sslyze.server_connectivity.ServerConnectivityError as err: + logging.warn("\tServer connectivity not established during test.") + return None, None + except Exception as err: + utils.notify(err) + logging.warn("\tUnknown exception when performing server connectivity info.") + return None, None + + if sync: + scanner = SynchronousScanner() + else: + scanner = ConcurrentScanner() + + return server_info, scanner + + +# Run each scan in-process, one at a time. +# Takes longer, but no multi-process funny business. +def scan_serial(scanner, server_info, options): + logging.debug("\tRunning scans in serial.") + logging.debug("\t\tSSLv2 scan.") + sslv2 = scanner.run_scan_command(server_info, Sslv20ScanCommand()) + logging.debug("\t\tSSLv3 scan.") + sslv3 = scanner.run_scan_command(server_info, Sslv30ScanCommand()) + logging.debug("\t\tTLSv1.0 scan.") + tlsv1 = scanner.run_scan_command(server_info, Tlsv10ScanCommand()) + logging.debug("\t\tTLSv1.1 scan.") + tlsv1_1 = scanner.run_scan_command(server_info, Tlsv11ScanCommand()) + logging.debug("\t\tTLSv1.2 scan.") + tlsv1_2 = scanner.run_scan_command(server_info, Tlsv12ScanCommand()) + + # Default to cert info on + if options.get("sslyze-no-certs", False) is False: + logging.debug("\t\tCertificate information scan.") + certs = scanner.run_scan_command(server_info, CertificateInfoScanCommand()) else: - return (len(target[protocol]["accepted_cipher_list"]) > 0) + certs = None + + logging.debug("\tDone scanning.") + + return sslv2, sslv3, tlsv1, tlsv1_1, tlsv1_2, certs + + +# Run each scan in parallel, using multi-processing. +# Faster, but can generate many processes. +def scan_parallel(scanner, server_info, options): + logging.debug("\tRunning scans in parallel.") + + def queue(command): + try: + return scanner.queue_scan_command(server_info, command) + except Exception as err: + utils.notify(err) + logging.warn("Unknown exception queueing sslyze command.") + return None + + # Initialize commands and result containers + sslv2, sslv3, tlsv1, tlsv1_1, tlsv1_2, certs = None, None, None, None, None, None + + # Queue them all up + queue(Sslv20ScanCommand()) + queue(Sslv30ScanCommand()) + queue(Tlsv10ScanCommand()) + queue(Tlsv11ScanCommand()) + queue(Tlsv12ScanCommand()) + + # Default to cert info on. + if options.get("sslyze-no-certs", False) is False: + queue(CertificateInfoScanCommand()) + + # Reassign them back to predictable places after they're all done + was_error = False + for result in scanner.get_results(): + try: + if isinstance(result, PluginRaisedExceptionScanResult): + logging.warn(u'Scan command failed: {}'.format(result.as_text())) + return None + + if type(result.scan_command) == Sslv20ScanCommand: + sslv2 = result + elif type(result.scan_command) == Sslv30ScanCommand: + sslv3 = result + elif type(result.scan_command) == Tlsv10ScanCommand: + tlsv1 = result + elif type(result.scan_command) == Tlsv11ScanCommand: + tlsv1_1 = result + elif type(result.scan_command) == Tlsv12ScanCommand: + tlsv1_2 = result + elif type(result.scan_command) == CertificateInfoScanCommand: + certs = result + else: + logging.warn("\tCouldn't match scan result with command! %s" % result) + was_error = True + + except Exception as err: + logging.warn("\t Exception inside async scanner result processing.") + was_error = True + utils.notify(err) + + # There was an error during async processing. + if was_error: + return None + + logging.debug("\tDone scanning.") + + return sslv2, sslv3, tlsv1, tlsv1_1, tlsv1_2, certs diff --git a/scanners/utils.py b/scanners/utils.py index ba068d83..739a9c48 100644 --- a/scanners/utils.py +++ b/scanners/utils.py @@ -111,6 +111,12 @@ def write(content, destination, binary=False): f.close() +def read(source): + with open(source) as f: + contents = f.read() + return contents + + def report_dir(): return options().get("output", "./") @@ -191,7 +197,7 @@ def cache_single(filename): def data_for(domain, operation): path = cache_path(domain, operation) if os.path.exists(path): - raw = open(path).read() + raw = read(path) data = json.loads(raw) if isinstance(data, dict) and (data.get('invalid', False)): return None