# SEC LAB

In [None]:
# Install dependencies.
!pip install git+https://github.com/IBM/ibm-security-notebooks.git 

In [None]:
from pyclient.stix_shifter_dataframe import StixShifterDataFrame
from dateutil import parser
import re
import pandas as pd
from collections import Counter
import math
import hashlib


In [None]:
# Ready made STIX bundles to use
carbon_black_stix_bundle_1 = 'https://raw.githubusercontent.com/opencybersecurityalliance/stix-shifter/master/data/cybox/carbon_black/carbon_black_observable.json'
sb_config_1 = {
    'translation_module': 'stix_bundle',
    'transmission_module': 'stix_bundle',
    'connection': {
        "host": carbon_black_stix_bundle_1,
        "port": 443
    },
    'configuration': {
        "auth": {
            "username": None,
            "password": None
        }
    },
    'data_source': '{"type": "identity", "id": "identity--3532c56d-ea72-48be-a2ad-1a53f4c9c6d3", "name": "stix_boundle", "identity_class": "events"}'
}

In [None]:
carbon_black_stix_bundle_2 = 'https://raw.githubusercontent.com/opencybersecurityalliance/stix-shifter/develop/data/cybox/carbon_black/cb_observed_156.json'
sb_config_2 = {
    'translation_module': 'stix_bundle',
    'transmission_module': 'stix_bundle',
    'connection': {
        "host": carbon_black_stix_bundle_2,
        "port": 443
    },
    'configuration': {
        "auth": {
            "username": None,
            "password": None
        }
    },
    'data_source': '{"type": "identity", "id": "identity--3532c56d-ea72-48be-a2ad-1a53f4c9c6d3", "name": "stix_boundle", "identity_class": "events"}'
}

In [None]:
def get_duration(duration):
    days, seconds = duration.days, duration.seconds
    hours = seconds // 3600
    minutes = (seconds % 3600) // 60
    seconds = seconds % 60
    return f"{days}d {hours}h {minutes}m {seconds}.{duration.microseconds//1000}s"

In [None]:
def defang(url):
    return re.sub('http', 'hxxp', url)

# Fetch process data that are spawn by cmd using STIX-SHIFTER

In [None]:
ssdf = StixShifterDataFrame()
ssdf.add_config('cb_stix_bundle_1', sb_config_1)
ssdf.add_config('cb_stix_bundle_2', sb_config_2)


# stix-shifter uses STIX patterning as its query language
# See http://docs.oasis-open.org/cti/stix/v2.0/cs01/part5-stix-patterning/stix-v2.0-cs01-part5-stix-patterning.html
cmd_query = "[process:name = 'cmd.exe']"
df = ssdf.search_df(query=cmd_query, config_names=['cb_stix_bundle_1', 'cb_stix_bundle_2'])

# Prepare features

In [None]:
# Need to compute some fields used in feature extraction like command_line_entropy.

import math
def is_prime(n):
    if n % 2 == 0 and n > 2: 
        return False
    return all(n % i for i in range(3, int(math.sqrt(n)) + 1, 2))

# def get_str_hash(x, divisor):
#     return hash(x) % divisor

def get_str_hash(x, divisor):
    m = hashlib.md5()
    m.update(x.encode("utf-8"))
    h = m.hexdigest()
    number = int(h, 16)
    return number % divisor

def entropy(s):
    """Compute the Shannon entropy of string s"""
    counter = Counter(s)
    nchars = float(len(s))
    return -sum(count / nchars * math.log(count / nchars, 2) for count in counter.values())

features = []
primes = [i for i in range(2, 50) if is_prime(i)]
for i in primes:
    feature_name = 'cmd_hash_dv' + str(i)
    df[feature_name] = df['process:command_line'].apply(lambda x: get_str_hash(x, i))
    features.append(feature_name)

features.extend(['suspicion_score', 'command_line_entropy', 'command_line_len'])

df['suspicion_score'] = 0
df['command_line_entropy'] = df.apply(lambda x: entropy(x['process:command_line']), axis=1)
df['command_line_len'] = df.apply(lambda x: len(x['process:command_line']), axis=1)

In [None]:
test_df = df[features]

scoring_payload = {
  "input_data": [
    {
      "fields": list(test_df.columns),
      "values": test_df.values.tolist()
    }
  ]
}

# Get prediction

# Get prediction using WML API

In [None]:
from ibm_watson_machine_learning import APIClient
wml_credentials = {
    "apikey":"FILLME",
    "url": "https://eu-gb.ml.cloud.ibm.com"
}
sus_c64_deployment_id = 'FILLME'

wml_client = APIClient(wml_credentials)

# Set space id
spaces_detail = wml_client.spaces.get_details()
wml_client.set.default_space(spaces_detail['resources'][0]['metadata']['id'])

#
pred = wml_client.deployments.score(sus_c64_deployment_id, scoring_payload)

In [None]:
pred

In [None]:
# the prediction above corresponds to suspicious score for elements in df
df.head()
df.iloc[0]["process:command_line"]
#notice that the cert util connecting to an IP address and downloading c64.exe which eventually runs with f64.data.
# this is the classic APT41