In [1]:
%load_ext nb_black

<IPython.core.display.Javascript object>

---

In [1]:
from functional import seq

In [2]:
feed = seq.json("../data/hellokitty.feeds.json").to_dict()
stix = seq.json("../data/hellokitty.stix21.6047944bd5bd6b6f323e59fa.json").to_dict()

In [3]:
feed

{'id': '6047944bd5bd6b6f323e59fa',
 'name': 'HelloKitty Ransomware Lacks Stealth, But Still Strikes Home',
 'description': 'HelloKitty is a ransomware family that emerged in late 2020. While it lacks the sophistication of some of the more well-known families such as Ryuk, REvil, and Conti, it has nevertheless struck some notable targets, including CEMIG0. In this post, Sentinel Labs analyzes a recent HelloKitty sample and outlines the basic behaviors and traits associated with this family of ransomware.',
 'author_name': 'AlienVault',
 'modified': '2021-03-09T15:29:14.999000',
 'created': '2021-03-09T15:29:14.999000',
 'revision': 1,
 'tlp': 'white',
 'public': 1,
 'adversary': '',
 'indicators': [{'id': 2892282605,
   'indicator': '136bd70f7aa98f52861879d7dca03cf2',
   'type': 'FileHash-MD5',
   'created': '2021-03-09T15:29:16',
   'content': '',
   'title': 'Ransom:Win32/Death.DB!MTB',
   'description': 'MD5 of fadd8d7c13a18c251ded1f645ffea18a37f1c2de',
   'expiration': None,
   'is_

In [5]:
stix

{'id': 'bundle--027a551d-bdba-4d19-b75d-ccc9b7f8f0c5',
 'objects': [{'created': '2021-03-09T15:29:14.999Z',
   'created_by_ref': 'identity--ab072f15-9b87-4ee1-898f-b584d41f29b0',
   'description': 'HelloKitty is a ransomware family that emerged in late 2020. While it lacks the sophistication of some of the more well-known families such as Ryuk, REvil, and Conti, it has nevertheless struck some notable targets, including CEMIG0. In this post, Sentinel Labs analyzes a recent HelloKitty sample and outlines the basic behaviors and traits associated with this family of ransomware.',
   'external_references': [{'source_name': 'web',
     'url': 'https://labs.sentinelone.com/hellokitty-ransomware-lacks-stealth-but-still-strikes-home/'}],
   'id': 'report--027a551d-bdba-4d19-b75d-ccc9b7f8f0c5',
   'labels': ['threat-report'],
   'modified': '2021-03-09T15:29:14.999Z',
   'name': 'HelloKitty Ransomware Lacks Stealth, But Still Strikes Home',
   'object_refs': ['identity--ab072f15-9b87-4ee1-898f

<IPython.core.display.Javascript object>

---

---

# Path

In [6]:
from pathlib import Path, PosixPath, WindowsPath
from pathlib import PurePath, PureWindowsPath

<IPython.core.display.Javascript object>

---

---

# pyparsing



- YARA
- ~~FileHash-SHA256~~  (Length=64)
- ~~FileHash-MD5 / FileHash-IMPHASH / JA3~~ (Length=32)
- ~~FileHash-SHA1 / FileHash-PEHASH~~ (Length=40)
- ~~BitcoinAddress~~ (Length=34)

- ~~domain / hostname~~
- ~~URL / URI~~
- Mutex
- FilePath

- ~~email~~
- ~~CVE~~
- ~~SSLCertFingerprint~~
- ~~IPv4~~
- ~~IPv6~~


---

# Modulize Domain / Hostname / Email / URL / URI Parser


In [10]:
from pyparsing import (
    Combine,
    Word,
    Keyword,
    Literal,
    alphas,
    nums,
    hexnums,
    alphanums,
    oneOf,
    OneOrMore,
    ZeroOrMore,
    Optional,
    printables,
    pyparsing_common,
)

## Domain

- Get tlds list

In [7]:
import requests

# get top layer domains
response = requests.get("https://data.iana.org/TLD/tlds-alpha-by-domain.txt")

regular_domains = response.text.lower().splitlines()

# add special domains
# https://en.wikipedia.org/wiki/List_of_Internet_top-level_domains
special_use_domains = ["example", "invalid", "local", "localhost", "onion", "test"]
blackchain_registered_domains = ["eth", "zil", "crypto", "bit"]

# merge all domains to text
tlds = regular_domains + special_use_domains + blackchain_registered_domains
tlds_text = (
    seq(tlds)
    .drop(1)
    .filter_not(lambda tld: tld.startswith("xn--"))
    .sorted()
    .make_string(" ")
)

- Verify domain and hostname

In [8]:
import tldextract

In [11]:
tld = OneOrMore("." + oneOf(tlds_text, asKeyword=True))
prefix = Word(alphanums.lower() + "-", excludeChars="/")

In [14]:
def verify_domain(string, loc, tokens):
    # tokens is pyparsing.ParseResults
    extraction = tldextract.extract(tokens[0])
    verification = tokens[0] == "{}.{}".format(extraction.domain, extraction.suffix)
    return verification

In [15]:
domain = (
    Combine(prefix + ZeroOrMore("." + prefix, stopOn=tld) + tld)
    .addCondition(verify_domain)
    .setResultsName("domain")
)

In [16]:
domain

Combine:({{W:(-0-9a-z) [{'.' W:(-0-9a-z)}]...} {{'.' aaa | aarp | abarth | abbott | abbvie | abb | abc | able | abogado | abudhabi | academy | accenture | accountants | accountant | aco | actor | ac | ads | adult | ad | aeg | aero | aetna | ae | afl | africa | af | agakhan | agency | ag | aig | airbus | airforce | airtel | ai | akdn | alfaromeo | alibaba | alipay | allfinanz | allstate | ally | alsace | alstom | al | amazon | americanexpress | americanfamily | amex | amfam | amica | amsterdam | am | analytics | android | anquan | anz | aol | ao | apartments | apple | app | aquarelle | aq | arab | aramco | archi | army | arpa | arte | art | ar | asda | asia | associates | as | athleta | attorney | at | auction | audible | audio | audi | auspost | author | autos | auto | au | avianca | aws | aw | axa | ax | azure | az | baby | baidu | banamex | bananarepublic | band | bank | barcelona | barclaycard | barclays | barefoot | bargains | bar | baseball | basketball | bauhaus | bayern | ba | b

## Hostname

In [13]:
def verify_hostname(string, loc, tokens):

    # tokens is pyparsing.ParseResults
    extraction = tldextract.extract(tokens[0][0])
    verification = tokens[0][0] == "{}.{}.{}".format(
        extraction.subdomain, extraction.domain, extraction.suffix
    )
    return verification

<IPython.core.display.Javascript object>

In [14]:
hostname = (
    Combine(prefix + OneOrMore("." + domain | "." + prefix))
    .addCondition(verify_hostname)
    .setResultsName("hostname")
)

<IPython.core.display.Javascript object>

---

In [15]:
seq(hostname.searchString("https://attack.mitre.org/techniques/T1003/")).map(
    lambda r: (r.getName(), r.asDict())
)

0,1
hostname,{'hostname': {'domain': 'mitre.org'}}


<IPython.core.display.Javascript object>

---

## IP

In [16]:
# IPv4
ipv4 = pyparsing_common.ipv4_address.setResultsName("IPv4")

# IPv6
ipv6 = pyparsing_common.ipv6_address.setResultsName("IPv6")

<IPython.core.display.Javascript object>

## URL

In [17]:
url = Combine(
    oneOf("https:// http://")
    + (hostname | domain | ipv4 | ipv6)
    + "/"
    + ZeroOrMore(Word(alphanums + "-.") + "/")
    # + Optional(Word(alphanums))
).setResultsName("URL")

<IPython.core.display.Javascript object>

## URI

In [18]:
uri = Combine(url + Word(printables)).setResultsName("URI")

<IPython.core.display.Javascript object>

## Email

In [19]:
email = Combine(Word(alphanums + "-_.") + "@" + (domain | hostname)).setResultsName(
    "email"
)

<IPython.core.display.Javascript object>

## FilePath

In [20]:
filename = Combine(
    Word(alphanums + "-_") + OneOrMore("." + Word(alphanums))
).setResultsName("FileName")

windows_filepath = Combine(
    Optional(oneOf(alphas.upper()) + ":\\")
    + OneOrMore(Word(printables, excludeChars="\\") + "\\")
    + filename
)

unixlike_filepath = Combine(
    "/" + OneOrMore(Word(printables, excludeChars="/") + "/") + filename
)

filepath = (windows_filepath | unixlike_filepath | filename).setResultsName("FilePath")

<IPython.core.display.Javascript object>

---

## Hash-based

In [21]:
from scipy.stats import entropy
from collections import Counter


def entropy_threshold(string, loc, tokens, base=22, threshold=0.8):
    char_counts = Counter(tokens[0])
    # length of hexnums (A-Fa-f0-9) is 22, length of hexnums lowercase (a-f0-9) is 17
    char_entropy = entropy(list(char_counts.values()), base=base)
    # print(char_entropy)
    return char_entropy > threshold

<IPython.core.display.Javascript object>

In [22]:
# FileHash-SHA256
# length of hexnums lowercase (a-f0-9) is 17
sha256 = (
    Word(hexnums.lower(), exact=64)
    .addCondition(
        lambda string, loc, tokens: entropy_threshold(
            string, loc, tokens, base=17, threshold=0.9
        )
    )
    .setResultsName("FileHash-SHA256")
)

<IPython.core.display.Javascript object>

---

### Length = 32

In [23]:
# FileHash-MD5

md5 = (
    Word(hexnums.lower(), exact=32)
    .addCondition(
        lambda string, loc, tokens: entropy_threshold(
            string, loc, tokens, base=17, threshold=0.8
        )
    )
    .setResultsName("FileHash-MD5")
)

<IPython.core.display.Javascript object>

In [24]:
# JA3

ja3 = (
    Word(hexnums.lower(), exact=32)
    .addCondition(
        lambda string, loc, tokens: entropy_threshold(
            string, loc, tokens, base=17, threshold=0.8
        )
    )
    .setResultsName("JA3")
)

<IPython.core.display.Javascript object>

In [25]:
# FileHash-IMPHASH

imphash = (
    Word(hexnums.lower(), exact=32)
    .addCondition(
        lambda string, loc, tokens: entropy_threshold(
            string, loc, tokens, base=17, threshold=0.8
        )
    )
    .setResultsName("FileHash-IMPHASH")
)

<IPython.core.display.Javascript object>

### Length = 40

In [26]:
# FileHash-SHA1

sha1 = (
    Word(hexnums.lower(), exact=40)
    .addCondition(
        lambda string, loc, tokens: entropy_threshold(
            string, loc, tokens, base=17, threshold=0.8
        )
    )
    .setResultsName("FileHash-SHA1")
)

<IPython.core.display.Javascript object>

In [27]:
# FileHash-PEHASH

pehash = (
    Word(hexnums.lower(), exact=40)
    .addCondition(
        lambda string, loc, tokens: entropy_threshold(
            string, loc, tokens, base=17, threshold=0.8
        )
    )
    .setResultsName("FileHash-PEHASH")
)

<IPython.core.display.Javascript object>

### Length = 34

In [28]:
# BitcoinAddress

bitcoin_address = (
    Word(alphanums, exact=34)
    .addCondition(
        lambda string, loc, tokens: entropy_threshold(
            string, loc, tokens, base=62, threshold=0.7
        )
    )
    .setResultsName("BitcoinAddress")
)

<IPython.core.display.Javascript object>

---

## Others

In [29]:
# CVE
cve = Combine(
    Keyword("CVE") + "-" + Word(nums, exact=4) + "-" + Word(nums, min=4)
).setResultsName("CVE")

<IPython.core.display.Javascript object>

In [30]:
# SSLCertFingerprint
ssl_cert_fingerprint = Combine(
    Word(hexnums.lower(), exact=2) + (":" + Word(hexnums.lower(), exact=2)) * 19
).setResultsName("SSLCertFingerprint")

<IPython.core.display.Javascript object>

In [31]:
# ATT&CK Technique
mitre_attack_technique = Combine(
    Literal("T1") + Word(nums, exact=3) + Optional(".0" + Word(nums, exact=2))
).setResultsName("MitreAttackTechnique")

<IPython.core.display.Javascript object>

---

---

# Parser

In [32]:
parser = (
    uri
    | url
    | email
    | hostname
    | domain
    | ssl_cert_fingerprint
    | ipv6
    | ipv4
    | cve
    | mitre_attack_technique
    | filepath
    | sha256  # len = 64
    | sha1  # len = 40
    | pehash  # len = 40
    | bitcoin_address  # len = 34
    | md5  # len = 32
    | imphash  # len = 32
    | ja3  # len = 32
)

<IPython.core.display.Javascript object>

---

# Parsing Result

In [33]:
sample_text = (
    "email, adonilifranky@gmail.com "
    "Once launched, HelloKitty will attempt to disable and terminate a number of processes and services so as to reduce interference with the encryption process. This includes processes and services associated with IIS, MSSQL, Quickbooks, Sharepoint, and more. These actions are carried out via taskkill.exe and net.exe."
    "It is also important to note that as of this writing, the onion address associated with HelloKitty ransom notes is not active. 6x7dp6h3w6q3ugjv4yv5gycj3femb24kysgry5b44hhgfwc5ml5qrdad.onion Conclusion HelloKitty may be easier to spot than other modern ransomware families, but upon execution it is no less dangerous. There are currently no known ‘weaknesses’ in the encryption routines, and there are no thirdy-party decrypters available for the HelloKitty ransomware. Therefore, the only true defense is prevention. While this family does not appear to be actively leaking victim data at the moment, that could change at any point, in addition to them choosing to adopt some of the more recent extortion methods that go along with ransomware (DDoS). Actors behind the more recent campaign(s) are reportedly attempting to auction the CD Projekt data off in various ‘underground’ forums. At present this sale of this data does appear to be legitimate. Time will tell if additional victim data is dealt with in the same way. To protect yourself against HelloKitty, make sure you are armed with a modern Endpoint Security platform, which is configured correctly and up to date. The SentinelOne Singularity Platform is fully capable of preventing and detecting all malicious behaviors associated with the HelloKitty ransomware family. IOCs SHA1 fadd8d7c13a18c251ded1f645ffea18a37f1c2de SHA256 501487b025f25ddf1ca32deb57a2b4db43ccf6635c1edc74b9cff54ce0e5bcfe MITRE ATT&CK Data from Local System – T1005 Modify Registry – T1112 Query Registry – T1012 System Information Discovery – T1082 Data Encrypted for Impact – T1486 File Deletion – T1070.004 Command and Scripting Interpreter: Windows Command Shell – T1059.003 Windows Management Instrumentation – T1047"
)

<IPython.core.display.Javascript object>

In [34]:
parser.searchString(sample_text)

([([(['adonilifranky@gmail.com'], {'domain': ['gmail.com']})], {'email': [(['adonilifranky@gmail.com'], {'domain': ['gmail.com']})]}), (['taskkill.exe'], {'FileName': ['taskkill.exe'], 'FilePath': ['taskkill.exe']}), (['net.exe.It'], {'FileName': ['net.exe.It'], 'FilePath': ['net.exe.It']}), (['6x7dp6h3w6q3ugjv4yv5gycj3femb24kysgry5b44hhgfwc5ml5qrdad.onion'], {'domain': ['6x7dp6h3w6q3ugjv4yv5gycj3femb24kysgry5b44hhgfwc5ml5qrdad.onion']}), (['fadd8d7c13a18c251ded1f645ffea18a37f1c2de'], {'FileHash-SHA1': ['fadd8d7c13a18c251ded1f645ffea18a37f1c2de']}), (['501487b025f25ddf1ca32deb57a2b4db43ccf6635c1edc74b9cff54ce0e5bcfe'], {'FileHash-SHA256': ['501487b025f25ddf1ca32deb57a2b4db43ccf6635c1edc74b9cff54ce0e5bcfe']}), (['T1005'], {'MitreAttackTechnique': ['T1005']}), (['T1112'], {'MitreAttackTechnique': ['T1112']}), (['T1012'], {'MitreAttackTechnique': ['T1012']}), (['T1082'], {'MitreAttackTechnique': ['T1082']}), (['T1486'], {'MitreAttackTechnique': ['T1486']}), (['T1070.004'], {'MitreAttackTe

<IPython.core.display.Javascript object>

In [35]:
result, start, end = list(parser.scanString(sample_text))[0]

<IPython.core.display.Javascript object>

In [36]:
result.asDict()

{'email': {'domain': 'gmail.com'}}

<IPython.core.display.Javascript object>

In [37]:
parsing_results = parser.scanString(sample_text)

<IPython.core.display.Javascript object>

In [38]:
seq(parser.scanString(sample_text)).starmap(
    lambda result, start, end: (result, result.asDict(), result.getName(), (start, end))
)

0,1,2,3
[['adonilifranky@gmail.com']],{'email': {'domain': 'gmail.com'}},email,"(7, 30)"
['taskkill.exe'],"{'FileName': 'taskkill.exe', 'FilePath': 'taskkill.exe'}",FilePath,"(321, 333)"
['net.exe.It'],"{'FileName': 'net.exe.It', 'FilePath': 'net.exe.It'}",FilePath,"(338, 348)"
['6x7dp6h3w6q3ugjv4yv5gycj3femb24kysgry5b44hhgfwc5ml5qrdad.onion'],{'domain': '6x7dp6h3w6q3ugjv4yv5gycj3femb24kysgry5b44hhgfwc5ml5qrdad.onion'},domain,"(473, 535)"
['fadd8d7c13a18c251ded1f645ffea18a37f1c2de'],{'FileHash-SHA1': 'fadd8d7c13a18c251ded1f645ffea18a37f1c2de'},FileHash-SHA1,"(1687, 1727)"
['501487b025f25ddf1ca32deb57a2b4db43ccf6635c1edc74b9cff54ce0e5bcfe'],{'FileHash-SHA256': '501487b025f25ddf1ca32deb57a2b4db43ccf6635c1edc74b9cff54ce0e5bcfe'},FileHash-SHA256,"(1735, 1799)"
['T1005'],{'MitreAttackTechnique': 'T1005'},MitreAttackTechnique,"(1838, 1843)"
['T1112'],{'MitreAttackTechnique': 'T1112'},MitreAttackTechnique,"(1862, 1867)"
['T1012'],{'MitreAttackTechnique': 'T1012'},MitreAttackTechnique,"(1885, 1890)"
['T1082'],{'MitreAttackTechnique': 'T1082'},MitreAttackTechnique,"(1922, 1927)"


<IPython.core.display.Javascript object>

---

---

# Testing

## Generate Indicator Testset for Regex

```python
import joblib

feeds = joblib.load("../pulse_20210310.pkl")

# get test indicators for regex
test_indicators = (
    seq(feeds)
    .map(lambda feed: feed.get("indicators"))
    .flatten()
    .map(lambda indicator: (indicator.get("type"), indicator.get("indicator")))
    .group_by_key()
    .starmap(
        lambda _type, _indicators: seq(_indicators)
        .take(10)
        .map(lambda _indicator: "{}, {}\n".format(_type, _indicator))
    )
    .flatten()
    .to_list()
)
```

```python
with open("../data/indicator_testset.csv", "w") as f:
    f.writelines(test_indicators)
```

In [145]:
indicator_testset = seq.open("../data/indicator_testset.csv").to_list()
indicator_testset

['FileHash-SHA256, 097549cf7d0f76f0d99edf8b2d91c60977fd6a96e4b8c3c94b0b1733dc026d3e\n',
 'FileHash-SHA256, 1631a90eb5395c4e19c7dbcbf611bbe6444ff312eb7937e286e4637cb9e72944\n',
 'FileHash-SHA256, 2b6f1ebb2208e93ade4a6424555d6a8341fd6d9f60c25e44afe11008f5c1aad1\n',
 'FileHash-SHA256, 4edc7770464a14f54d17f36dc9d0fe854f68b346b27b35a6f5839adf1f13f8ea\n',
 'FileHash-SHA256, 511df0e2df9bfa5521b588cc4bb5f8c5a321801b803394ebc493db1ef3c78fa1\n',
 'FileHash-SHA256, 65149e036fff06026d80ac9ad4d156332822dc93142cf1a122b1841ec8de34b5\n',
 'FileHash-SHA256, 811157f9c7003ba8d17b45eb3cf09bef2cecd2701cedb675274949296a6a183d\n',
 'FileHash-SHA256, b75f163ca9b9240bf4b37ad92bc7556b40a17e27c2b8ed5c8991385fe07d17d0\n',
 'FileHash-SHA256, 893cd3583b49cb706b3e55ecb2ed0757b977a21f5c72e041392d1256f31166e2\n',
 'FileHash-SHA256, 2fa06333188795110bba14a482020699a96f76fb1ceb80cbfa2df9d3008b5b0a\n',
 'CVE, CVE-2021-26858\n',
 'CVE, CVE-2021-26855\n',
 'CVE, CVE-2021-27065\n',
 'CVE, CVE-2021-26857\n',
 'CVE, CVE-2019-

<IPython.core.display.Javascript object>

In [38]:
seq(indicator_testset).map(lambda i: (i, uri.searchString(i))).filter(
    lambda x: len(x[1]) > 0
).show(40)

-----------------------------------------------------------  --------------------------------------------------------------
URL, https://morrislibraryconsulting.com/favicam/gertnm.php  [[['https://morrislibraryconsulting.com/favicam/gertnm.php']]]
URL, http://45.145.185.83/S1eJ3/IObeENwjarm                  [[['http://45.145.185.83/S1eJ3/IObeENwjarm']]]
URL, http://45.145.185.83/S1eJ3/IObeENwjarm4                 [[['http://45.145.185.83/S1eJ3/IObeENwjarm4']]]
URI, http://adminpanel.000a.biz/rec.php                      [[['http://adminpanel.000a.biz/rec.php']]]
-----------------------------------------------------------  --------------------------------------------------------------


<IPython.core.display.Javascript object>

---

---

# DNS

In [39]:
import dns

n = dns.name.from_text("yahoo.co.jp")

n.is_absolute()
n.is_wild()
n.labels
n.parent()

AttributeError: module 'dns' has no attribute 'name'

<IPython.core.display.Javascript object>

## https://lookup.icann.org/

In [None]:
import dns.resolver

In [None]:
for rdata in dns.resolver.resolve("www.yahoo.com", "CNAME"):
    print(rdata.target)

In [None]:
!dig www.pythondns.org

# answers = dns.resolver.resolve("yahoo.com.tw", "A")
answers = dns.resolver.resolve("www.dnspython.org", "A")

for rdata in answers:
    print(rdata)
    # print(rdata.address)
    # print(rdata.covers())

# answers = dns.resolver.resolve("yahoo.com.tw", "A")
answers = dns.resolver.resolve("dnspython.org", "A")

for rdata in answers:
    print(rdata)
    # print(rdata.address)
    # print(rdata.covers())

answers = dns.resolver.resolve("dnspython.org", "MX")

for rdata in answers:
    print(rdata)
    print(rdata.exchange)
    print(rdata.preference)
    # print(rdata.address)
    # print(rdata.covers())

answers = dns.resolver.resolve("www.dnspython.org", "CNAME")

for rdata in answers:
    # print(dir(rdata))
    print(rdata.rdclass)
    print(rdata.covers())
    print(rdata.target)