In [56]:
import json
import sys
sys.path.append('../')
import config

In [57]:
# Load JSON file
data_dir = config.DATA_DIR
print("data dir: ",data_dir)
with open(data_dir+'attack.json', encoding='utf-8') as f:
    data = json.load(f)

data dir:  /home/afarhan/post-doc/AWEB_GCL/datasets/enterprise_attack/


In [58]:
data_keys = data.keys()
print(data_keys)

dict_keys(['type', 'id', 'spec_version', 'objects'])


#### data_keys contains the top level elements of this data file. It seems objects is of our interest

How many objects are in the file?

In [59]:
print("Number of objects: ", len(data['objects']))

Number of objects:  21542


#### Objects have a **type** field. How many such type fileds are there? 
How many objects of each type?

In [60]:
from collections import Counter

# Get the list of objects
objects = data['objects']

# Get the 'type' of each object
types = [obj['type'] for obj in objects]

# Count the occurrences of each type
type_counts = Counter(types)

# print(type_counts)
# Get the maximum length of the type names
max_length = max(len(type) for type in type_counts.keys())

# Print the counts in two columns
for type, count in type_counts.items():
    print(f'{type.ljust(max_length)}: {count}')

x-mitre-collection    : 1
attack-pattern        : 780
campaign              : 28
course-of-action      : 284
identity              : 1
intrusion-set         : 165
malware               : 596
marking-definition    : 1
relationship          : 19438
tool                  : 86
x-mitre-data-component: 109
x-mitre-data-source   : 38
x-mitre-matrix        : 1
x-mitre-tactic        : 14


#### The target is to create the graph using the relationship type objects

In [61]:
def getObjectsOfSpecificType(objects, theType='relationship'):
    objOfThisType=[]
    for i in range(0, len(objects)):
        if 'type' in objects[i]:
            if objects[i]['type']==theType:
                objOfThisType.append(objects[i])
    return objOfThisType

def idVSobjects(objects):
    idVSObjects={}
    for i in range(0, len(objects)):
        if 'id' in objects[i]:
            idVSObjects[objects[i]['id']]=objects[i]
    return idVSObjects

In [62]:
relationObjects = getObjectsOfSpecificType(objects, 'relationship')
idVsObj=idVSobjects(objects)

In [63]:
len(idVsObj)

21542

In [64]:
relationObjects[0]

{'object_marking_refs': ['marking-definition--fa42a846-8d90-4e51-bc29-71d5b4802168'],
 'id': 'relationship--00038d0e-7fc7-41c3-9055-edb4d87ea912',
 'type': 'relationship',
 'created': '2021-04-27T01:56:35.810Z',
 'created_by_ref': 'identity--c78cb6e5-0c4b-4611-8297-d1b8b55e40b5',
 'external_references': [{'source_name': 'CheckPoint Volatile Cedar March 2015',
   'url': 'https://media.kasperskycontenthub.com/wp-content/uploads/sites/43/2015/03/20082004/volatile-cedar-technical-report.pdf',
   'description': 'Threat Intelligence and Research. (2015, March 30). VOLATILE CEDAR. Retrieved February 8, 2021.'}],
 'modified': '2021-04-27T01:56:35.810Z',
 'description': " [Explosive](https://attack.mitre.org/software/S0569) has collected the MAC address from the victim's machine.(Citation: CheckPoint Volatile Cedar March 2015) ",
 'relationship_type': 'uses',
 'source_ref': 'malware--6a21e3a4-5ffe-4581-af9a-6a54c7536f44',
 'target_ref': 'attack-pattern--707399d6-ab3e-4963-9315-d9d3818cd6a0',
 '

In [65]:
from sklearn.metrics.pairwise import cosine_similarity

# construct nodes and links
nodes = []
links = []
count=0
# add all unique source and target refs as nodes
for relation in relationObjects:
    source_ref = relation['source_ref']
    target_ref = relation['target_ref']
    if(source_ref not in idVsObj.keys() or target_ref not in idVsObj.keys()):
        count+=1
        continue
    if source_ref not in nodes:
        nodes.append(source_ref)
    if target_ref not in nodes:
        nodes.append(target_ref)
    links.append({'source': source_ref, 'target': target_ref})

# write graph.json file
print("missing relationship: ", count)
for i in range (0, len(nodes)):
    nodeID=nodes[i]
    
    nodes[i]=idVsObj[nodeID]
    #print(nodes[i])


#graph = {'nodes': [{'id': node, 'group': 1} for node in nodes], 'links': links}
graph = {'nodes': nodes, 'links': links}



missing relationship:  2


In [66]:
links

[{'source': 'malware--6a21e3a4-5ffe-4581-af9a-6a54c7536f44',
  'target': 'attack-pattern--707399d6-ab3e-4963-9315-d9d3818cd6a0'},
 {'source': 'course-of-action--21da4fd4-27ad-4e9c-b93d-0b9b14d02c96',
  'target': 'attack-pattern--43c9bc06-715b-42db-972f-52d25c09a20c'},
 {'source': 'x-mitre-data-component--a7f22107-02e5-4982-9067-6625d4a1765a',
  'target': 'attack-pattern--18cffc21-3260-437e-80e4-4ab8bf2ba5e9'},
 {'source': 'intrusion-set--01e28736-2ffc-455b-9880-ed4d1407ae07',
  'target': 'attack-pattern--65f2d882-3f41-4d48-8a06-29af77ec9f90'},
 {'source': 'malware--425771c5-48b4-4ecd-9f95-74ed3fc9da59',
  'target': 'attack-pattern--bf176076-b789-408e-8cba-7275e81c0ada'},
 {'source': 'malware--b7010785-699f-412f-ba49-524da6033c76',
  'target': 'attack-pattern--132d5b37-aac5-4378-a8dc-3127b18a73dc'},
 {'source': 'x-mitre-data-component--45fd904d-6eb0-4b50-8478-a961f09f898b',
  'target': 'attack-pattern--144e007b-e638-431d-a894-45d90c54ab90'},
 {'source': 'malware--a7881f21-e978-4fe4-af56

In [67]:
with open(data_dir+'attack_graph.json', 'w') as f:
    json.dump(graph, f)