# Training a NER on Potential Host Indicators of Compromise
## A Feasibility Study

This notebook is for setup and not required to continue.

In [1]:
####
# Define Variables & Setup Paths
import sys
from pathlib import (Path, PureWindowsPath, PurePosixPath)


####
# Create a global variable w/ a creative name. This can be
# updated from w/in a function using the `global` keyword.
global GLOBAL; GLOBAL = { }

# Important directories
GLOBAL["PATHS"]              = {}
# GLOBAL["PATHS"]["PROJECT"] = Path().resolve()
GLOBAL["PATHS"]["DATA"]      = Path("./data").resolve()       # Store for logs and data
GLOBAL["PATHS"]["IMPORTS"]   = Path("../imports").resolve()   # Store for custom code


####
# Add project paths to python path.
sys.path.insert(0, str(PureWindowsPath(GLOBAL["PATHS"]["IMPORTS"])))

####
# Create project paths if they might not already exist
if not all([d.exists() for d in list(GLOBAL["PATHS"].values())]):
  _ = [d.mkdir(parents=True, exist_ok=True) for d in list(GLOBAL["PATHS"].values())]


In [3]:
####
# Download necessary libraries and resources

if 'github' in sys.modules: del sys.modules['github']  # TODO: Cleanup any old instances of the library.
from github import GithubDownloader

downloads_dir = Path(f"{GLOBAL['PATHS']['DATA']}/downloads")
_downloads = [
  "https://github.com/SigmaHQ/sigma",
  "https://github.com/joesecurity/sigma-rules",
  "https://github.com/Neo23x0/evt2sigma",
  "https://github.com/sbousseaden/EVTX-ATTACK-SAMPLES",
  "https://github.com/redcanaryco/atomic-red-team",
]

for url in _downloads:
  GithubDownloader(url, downloads_dir)


In [None]:
####
# RULES: Sigma - Setup Rules & Tools

def move_and_setup(sigma_dest):
  import shutil
  # Move Sigma to top level directory.
  sigma_src  = Path(f"{GLOBAL['PATHS']['DATA']}/downloads/sigma")
  if sigma_src.exists():
    for _file in [f for f in sigma_src.iterdir()]:
      shutil.move(_file, sigma_dest)
    sigma_src.rmdir()

  # Move rules to Sigma directory.
  rules_src  = Path(f"{GLOBAL['PATHS']['DATA']}/downloads/sigma-rules")
  if rules_src.exists():
    rules_dest = Path(f"{sigma_dest}/rules")
    for _file in [f for f in Path(f"{rules_src}/rules").iterdir()]:
      shutil.move(_file, rules_dest)
    shutil.rmtree(rules_src)


move_and_setup(Path(f"{GLOBAL['PATHS']['DATA']}/sigma"))


In [9]:
####
# RULES: Atomic Red - Setup Rules & Tools

import shutil

atomic_dir = Path(f"{GLOBAL['PATHS']['DATA']}/atomic")

if not atomic_dir.exists():
  atomic_dir.mkdir(parents=True, exist_ok=True)

  atomic_download = Path(f"{GLOBAL['PATHS']['DATA']}/downloads/atomic-red-team")
  atomics_dir = Path(f"{atomic_download}/atomics")

  for _dir in [d for d in atomics_dir.iterdir() if d.is_dir() if d.name != "Indexes"]:
    shutil.move(_dir, atomic_dir)
  shutil.rmtree(atomic_download)



In [12]:
####
# Events: Move files

downloads_dir = Path(f"{GLOBAL['PATHS']['DATA']}/downloads")
evtx_src  = Path(f"{downloads_dir}/EVTX-ATTACK-SAMPLES")
evtx_dest = Path(f"{GLOBAL['PATHS']['DATA']}/evtx")

if not evtx_dest.exists():
  evtx_dest.mkdir(parents=True, exist_ok=True)

  tactics = [ e for e in evtx_src.iterdir() if e.is_dir() and not e.name.startswith(".") and "EVTX-ATTACK" not in e.name ]

  for tactic in tactics:
    shutil.move(tactic, evtx_dest)
  shutil.rmtree(evtx_src)


## Tools & Resources

- [Sigma](https://github.com/SigmaHQ/sigma) - Generic Signature Format for SIEM Systems.
- [JoeSandbox + Sigma Rules](https://github.com/joesecurity/sigma-rules) - Community written Sigma Rules for threat hunting.
- [EVT2Sigma](https://github.com/Neo23x0/evt2sigma)
- [EVTX Attack Samples](https://github.com/sbousseaden/EVTX-ATTACK-SAMPLES)
- [Atomic Threat Coverage](https://github.com/atc-project/atomic-threat-coverage) - Actionable analytics designed to combat threats based on MITRE's ATT&CK.
- [Atomic Red Team](https://github.com/redcanaryco/atomic-red-team) - Library of tests mapped to the MITRE ATT&CK® framework.
- [Chainsaw](https://github.com/countercept/chainsaw)
- [MITRE ATT&CK](https://attack.mitre.org/)
- [What are IOCs](https://attacksimulator.com/blog/how-to-recognize-indicators-of-compromise/)
- [MITRE STIX Data](https://github.com/mitre-attack/attack-stix-data)
- [MITRE CAR](https://github.com/mitre-attack/car)
- [ATT&CK Data Sources](https://github.com/mitre-attack/attack-datasources)