# CERTUTIL hunt

This notebook helps to collect all cmd (cmd.exe) and (certutil.exe) process executions in order to find suspicious activity.

This example demonstrates how to find suspicious executions that are downloaded by using certutil.exe, and then using certutil.exe to attack.

In [8]:
# Install dependencies.
!pip3 install git+https://github.com/IBM/ibm-security-notebooks.git --user

Collecting git+https://github.com/IBM/ibm-security-notebooks.git
  Cloning https://github.com/IBM/ibm-security-notebooks.git to /private/var/folders/_0/cmjj5d1d0y95znfyc3fw43sm0000gn/T/pip-req-build-dqazp9wc
  Running command git clone -q https://github.com/IBM/ibm-security-notebooks.git /private/var/folders/_0/cmjj5d1d0y95znfyc3fw43sm0000gn/T/pip-req-build-dqazp9wc


Building wheels for collected packages: ibm-security-notebooks
  Building wheel for ibm-security-notebooks (setup.py) ... [?25ldone
[?25h  Created wheel for ibm-security-notebooks: filename=ibm_security_notebooks-0.0.1-cp38-none-any.whl size=13271 sha256=28f4cbddf7ba27dc139a6024d316e2218dd273ab24127f32bdc269e5632aa9d8
  Stored in directory: /private/var/folders/_0/cmjj5d1d0y95znfyc3fw43sm0000gn/T/pip-ephem-wheel-cache-t86spauv/wheels/9e/ef/24/3e71756937f716b442b11861bcbc6633e8910eb6ba39edae7a
Successfully built ibm-security-notebooks
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [11]:
from pyclient.stix_shifter_dataframe import StixShifterDataFrame
from dateutil import parser
import re
import pandas as pd
from collections import Counter
import math
import hashlib


ModuleNotFoundError: No module named 'pyclient'

In [2]:
# Ready made STIX bundles to use
carbon_black_stix_bundle_1 = 'https://raw.githubusercontent.com/opencybersecurityalliance/stix-shifter/master/data/cybox/carbon_black/carbon_black_observable.json'
sb_config_1 = {
    'translation_module': 'stix_bundle',
    'transmission_module': 'stix_bundle',
    'connection': {
        "host": carbon_black_stix_bundle_1,
        "port": 443
    },
    'configuration': {
        "auth": {
            "username": None,
            "password": None
        }
    },
    'data_source': '{"type": "identity", "id": "identity--3532c56d-ea72-48be-a2ad-1a53f4c9c6d3", "name": "stix_boundle", "identity_class": "events"}'
}

In [3]:
carbon_black_stix_bundle_2 = 'https://raw.githubusercontent.com/opencybersecurityalliance/stix-shifter/develop/data/cybox/carbon_black/cb_observed_156.json'
sb_config_2 = {
    'translation_module': 'stix_bundle',
    'transmission_module': 'stix_bundle',
    'connection': {
        "host": carbon_black_stix_bundle_2,
        "port": 443
    },
    'configuration': {
        "auth": {
            "username": None,
            "password": None
        }
    },
    'data_source': '{"type": "identity", "id": "identity--3532c56d-ea72-48be-a2ad-1a53f4c9c6d3", "name": "stix_boundle", "identity_class": "events"}'
}

In [4]:
def get_duration(duration):
    days, seconds = duration.days, duration.seconds
    hours = seconds // 3600
    minutes = (seconds % 3600) // 60
    seconds = seconds % 60
    return f"{days}d {hours}h {minutes}m {seconds}.{duration.microseconds//1000}s"

In [5]:
def defang(url):
    return re.sub('http', 'hxxp', url)

# Fetch process data that are spawn by cmd

In [6]:
ssdf = StixShifterDataFrame()
ssdf.add_config('cb_stix_bundle_1', sb_config_1)
ssdf.add_config('cb_stix_bundle_2', sb_config_2)


# stix-shifter uses STIX patterning as its query language
# See http://docs.oasis-open.org/cti/stix/v2.0/cs01/part5-stix-patterning/stix-v2.0-cs01-part5-stix-patterning.html
cmd_query = "[process:name = 'cmd.exe']"
df = ssdf.search_df(query=cmd_query, config_names=['cb_stix_bundle_1', 'cb_stix_bundle_2'])

NameError: name 'StixShifterDataFrame' is not defined

# Prepare features

In [7]:
import math
def is_prime(n):
    if n % 2 == 0 and n > 2: 
        return False
    return all(n % i for i in range(3, int(math.sqrt(n)) + 1, 2))

# def get_str_hash(x, divisor):
#     return hash(x) % divisor

def get_str_hash(x, divisor):
    m = hashlib.md5()
    m.update(x.encode("utf-8"))
    h = m.hexdigest()
    number = int(h, 16)
    return number % divisor

def entropy(s):
    """Compute the Shannon entropy of string s"""
    counter = Counter(s)
    nchars = float(len(s))
    return -sum(count / nchars * math.log(count / nchars, 2) for count in counter.values())

features = []
primes = [i for i in range(2, 50) if is_prime(i)]
for i in primes:
    feature_name = 'cmd_hash_dv' + str(i)
    df[feature_name] = df['process:command_line'].apply(lambda x: get_str_hash(x, i))
    features.append(feature_name)

features.extend(['suspicion_score', 'command_line_entropy', 'command_line_len'])

df['suspicion_score'] = 0
df['command_line_entropy'] = df.apply(lambda x: entropy(x['process:command_line']), axis=1)
df['command_line_len'] = df.apply(lambda x: len(x['process:command_line']), axis=1)

In [8]:
test_df = df[features]

scoring_payload = {
  "input_data": [
    {
      "fields": list(test_df.columns),
      "values": test_df.values.tolist()
    }
  ]
}

# Get prediction

In [12]:
import requests

API_KEY = "6n49T-JYAwVc6-bVDfsNy6iOF5Pug4nFt7XoAB2qVbpl"
token_response = requests.post('https://iam.eu-gb.bluemix.net/identity/token', data={"apikey": API_KEY, "grant_type": 'urn:ibm:params:oauth:grant-type:apikey'})
mltoken = token_response.json()["access_token"]

header = {'Content-Type': 'application/json', 'Authorization': 'Bearer ' + mltoken}


response_scoring = requests.post('https://eu-gb.ml.cloud.ibm.com/ml/v4/deployments/57fbd180-1e5f-46d1-aa86-d50f66360d03/predictions?version=2020-10-19', json=scoring_payload, headers={'Authorization': 'Bearer ' + mltoken})
print("Scoring response")
response_scoring.json()

Scoring response


{'predictions': [{'fields': ['prediction', 'probability'],
   'values': [[2,
     [0.3793103448275862, 0.1724137931034483, 0.4482758620689655]],
    [2, [0.2988505747126437, 0.3448275862068966, 0.3563218390804598]],
    [0, [0.7816091954022989, 0.022988505747126436, 0.19540229885057472]],
    [0, [0.7586206896551724, 0.034482758620689655, 0.20689655172413793]],
    [0, [0.4482758620689655, 0.2988505747126437, 0.25287356321839083]]]}]}

# Get prediction using WML API

In [13]:
from ibm_watson_machine_learning import APIClient
wml_credentials = {
    "apikey":"FILLME",
    "url": "https://eu-gb.ml.cloud.ibm.com"
}
sus_c64_deployment_id = 'FILLME'

wml_client = APIClient(wml_credentials)

# Set space id
spaces_detail = wml_client.spaces.get_details()
wml_client.set.default_space(space_guid)

#
pred = wml_client.deployments.score(sus_c64_deployment_id, scoring_payload)

DEPRECATED!! Python 3.6 framework is deprecated and will be removed on Jan 20th, 2021.It will be read-only mode starting Nov 20th, 2020. i.e you won't be able to create new assets using this client.Use Python 3.7 instead. For details, see https://dataplatform.cloud.ibm.com/docs/content/wsj/analyze-data/pm_service_supported_frameworks.html


In [14]:
pred

{'predictions': [{'fields': ['prediction', 'probability'],
   'values': [[2,
     [0.3793103448275862, 0.1724137931034483, 0.4482758620689655]],
    [2, [0.2988505747126437, 0.3448275862068966, 0.3563218390804598]],
    [0, [0.7816091954022989, 0.022988505747126436, 0.19540229885057472]],
    [0, [0.7586206896551724, 0.034482758620689655, 0.20689655172413793]],
    [0, [0.4482758620689655, 0.2988505747126437, 0.25287356321839083]]]}]}