## Import modules

In [2]:
from neo4j import GraphDatabase
from bs4 import BeautifulSoup
import requests
import re
import pandas as pd

## Establish Connection with Database

In [4]:
class Neo4jConnection:
    
    def __init__(self, uri, user, pwd):
        self.__uri = uri
        self.__user = user
        self.__pwd = pwd
        self.__driver = None
        try:
            self.__driver = GraphDatabase.driver(self.__uri, auth=(self.__user, self.__pwd))
        except Exception as e:
            print("Failed to create the driver:", e)
        
    def close(self):
        if self.__driver is not None:
            self.__driver.close()
        
    def query(self, query, db=None):
        assert self.__driver is not None, "Driver not initialized!"
        session = None
        response = None
        try: 
            session = self.__driver.session(database=db) if db is not None else self.__driver.session() 
            response = list(session.run(query))
        except Exception as e:
            print("Query failed:", e)
        finally: 
            if session is not None:
                session.close()
        return response

In [10]:
conn = Neo4jConnection(uri="bolt://localhost:7687", user="neo4j", pwd="team")

## Create Attack nodes

In [11]:
attack_path = 'https://attack.mitre.org/docs/enterprise-attack-v10.1/enterprise-attack-v10.1-techniques.xlsx'
#defend_path = 'https://d3fend.mitre.org/ontologies/d3fend.csv'

In [12]:
attack_df = pd.read_excel(attack_path)
#defend_df = pd.read_csv(defend_path)

In [13]:
#parent_attacks = attack_df[attack_df["ID"].str.len() < 6]
#child_attacks = attack_df[attack_df["ID"].str.len() > 5]

In [14]:
for index, row in attack_df.iterrows():
    conn.query("CREATE (n:Attack{ID:'" + row[0] + "', name:'" + row[1] + "'})")

## Scrape Relation Data

In [15]:
attack_techniques = attack_df.iloc[:, 0]

In [16]:
attack_artifact = []
defend_artifact = []
defend_attack = []

In [17]:
for attack_t in attack_techniques:
    
    url = f'https://d3fend.mitre.org/offensive-technique/attack/{attack_t}'
    page = requests.get(url).content
    soup = BeautifulSoup(page, 'html.parser')

    try:
        match = soup.find('div', class_='hidden').text
    except:
        continue

    step_0 = re.sub(r'\s+', '', match)  # delete whitespaces
    step_1 = step_0.split(';') #split after ;
    step_2 = [string for string in step_1 if '-->' in string] #delete elements without '-->'
    for step in range(len(step_2)):
        step_3 = re.sub(r'\[[^]]*\]', '', step_2[step]) #delete things in parentheses '[]'
        step_4 = re.sub(r'-->', '', step_3) #delete '-->'
        step_5 = step_4.replace('-', '_') #replaces '-' with '_'
        step_6 = step_5.split('|') #split by seperator '|'#

        if step_6[0].startswith('T1'):
            if len(step_6[0]) > 5:
                step_6[0] = step_6[0][:5] + '.' + step_6[0][5:]
            attack_artifact.append(step_6)

        elif step_6[2].startswith('T1'):
            if len(step_6[2]) > 5:
                step_6[2] = step_6[2][:5] + '.' + step_6[2][5:]
            defend_attack.append(step_6)

        else:
            defend_artifact.append(step_6)

## Create DEFEND, Artifact Nodes and create relation

Attack -> Artifact

In [21]:
for row in attack_artifact:
    conn.query("MERGE (at:Attack{ID:'" + row[0] + "'}) \
               MERGE (ar:Artifact{name:'" + row[2] + "'}) \
               MERGE (at)-[:" + row[1] + "]->(ar)")

Defend -> Artifact

In [22]:
for row in defend_artifact:
    conn.query("MERGE (de:Defend{name:'" + row[0] + "'}) \
               MERGE (ar:Artifact{name:'" + row[2] + "'}) \
               MERGE (de)-[:" + row[1] + "]->(ar)")

Defend -> Attack

In [23]:
for row in defend_attack:
    conn.query("MERGE (de:Defend{name:'" + row[0] + "'}) \
               MERGE (at:Attack{ID:'" + row[2] + "'}) \
               MERGE (de)-[:" + row[1] + "]->(at)")