In [1]:
import json

In [33]:
# Load JSON file
data_dir = "../datasets/ics_attack/"
with open(data_dir+'attack.json', encoding='utf-8') as f:
    data = json.load(f)

In [34]:
data_keys = data.keys()
print(data_keys)

dict_keys(['type', 'id', 'objects', 'spec_version'])


#### data_keys contains the top level elements of this data file. It seems objects is of our interest

How many objects are in the file?

In [35]:
print("Number of objects: ", len(data['objects']))

Number of objects:  1040


#### Objects have a **type** field. How many such type fileds are there? 
How many objects of each type?

In [36]:
from collections import Counter

# Get the list of objects
objects = data['objects']

# Get the 'type' of each object
types = [obj['type'] for obj in objects]

# Count the occurrences of each type
type_counts = Counter(types)

# print(type_counts)
# Get the maximum length of the type names
max_length = max(len(type) for type in type_counts.keys())

# Print the counts in two columns
for type, count in type_counts.items():
    print(f'{type.ljust(max_length)}: {count}')

x-mitre-matrix        : 1
course-of-action      : 52
malware               : 28
x-mitre-tactic        : 12
attack-pattern        : 93
relationship          : 781
intrusion-set         : 15
x-mitre-data-component: 36
campaign              : 3
x-mitre-data-source   : 17
identity              : 1
marking-definition    : 1


#### The target is to create the graph using the relationship type objects

In [37]:
def getObjectsOfSpecificType(objects, theType='relationship'):
    objOfThisType=[]
    for i in range(0, len(objects)):
        if 'type' in objects[i]:
            if objects[i]['type']==theType:
                objOfThisType.append(objects[i])
    return objOfThisType

def idVSobjects(objects):
    idVSObjects={}
    for i in range(0, len(objects)):
        if 'id' in objects[i]:
            idVSObjects[objects[i]['id']]=objects[i]
    return idVSObjects

In [38]:
relationObjects = getObjectsOfSpecificType(objects, 'relationship')
idVsObj=idVSobjects(objects)

In [41]:
len(idVsObj)

1040

In [31]:
relationObjects[0]

{'type': 'relationship',
 'id': 'relationship--228b9a13-0545-4ecf-99ff-be02addaf7fe',
 'created': '2018-10-17T00:14:20.652Z',
 'created_by_ref': 'identity--c78cb6e5-0c4b-4611-8297-d1b8b55e40b5',
 'revoked': False,
 'external_references': [{'source_name': 'ESET',
   'description': 'ESET   ACAD/Medre.A: 10000s of AutoCAD Designs Leaked in Suspected Industrial Espionage Retrieved. 2021/04/13 ',
   'url': 'https://www.welivesecurity.com/wp-content/uploads/200x/white-papers/ESET_ACAD_Medre_A_whitepaper.pdf'}],
 'object_marking_refs': ['marking-definition--fa42a846-8d90-4e51-bc29-71d5b4802168'],
 'modified': '2022-10-12T17:10:58.645Z',
 'description': '[ACAD/Medre.A](https://attack.mitre.org/software/S1000) can collect AutoCad files with drawings. These drawings may contain operational information. (Citation: ESET)\n',
 'relationship_type': 'uses',
 'source_ref': 'malware--a4a98eab-b691-45d9-8c48-869ef8fefd57',
 'target_ref': 'attack-pattern--b7e13ee8-182c-4f19-92a4-a88d7d855d54',
 'x_mitre_

In [42]:
from sklearn.metrics.pairwise import cosine_similarity

# construct nodes and links
nodes = []
links = []
count=0
# add all unique source and target refs as nodes
for relation in relationObjects:
    source_ref = relation['source_ref']
    target_ref = relation['target_ref']
    if(source_ref not in idVsObj.keys() or target_ref not in idVsObj.keys()):
        count+=1
        continue
    if source_ref not in nodes:
        nodes.append(source_ref)
    if target_ref not in nodes:
        nodes.append(target_ref)
    links.append({'source': source_ref, 'target': target_ref})

# write graph.json file
print("missing relationship: ", count)
for i in range (0, len(nodes)):
    nodeID=nodes[i]
    
    nodes[i]=idVsObj[nodeID]
    #print(nodes[i])


#graph = {'nodes': [{'id': node, 'group': 1} for node in nodes], 'links': links}
graph = {'nodes': nodes, 'links': links}

# with open(data_dir+'attack-graph.json', 'w') as f:
#     json.dump(graph, f)

missing relationship:  0


In [43]:
links

[{'source': 'malware--a4a98eab-b691-45d9-8c48-869ef8fefd57',
  'target': 'attack-pattern--b7e13ee8-182c-4f19-92a4-a88d7d855d54'},
 {'source': 'course-of-action--aadac250-bcdc-44e3-a4ae-f52bd0a7a16a',
  'target': 'attack-pattern--1c478716-71d9-46a4-9a53-fa5d576adb60'},
 {'source': 'x-mitre-data-component--9c2fa0ae-7abc-485a-97f6-699e3b6cf9fa',
  'target': 'attack-pattern--097924ce-a9a9-4039-8591-e0deedfb8722'},
 {'source': 'intrusion-set--381fcf73-60f6-4ab2-9991-6af3cbc35192',
  'target': 'attack-pattern--40b300ba-f553-48bf-862e-9471b220d455'},
 {'source': 'course-of-action--97f33c84-8508-45b9-8a1d-cac921828c9e',
  'target': 'attack-pattern--35392fb4-a31d-4c6a-b9f2-1c65b7f5e6b9'},
 {'source': 'x-mitre-data-component--9c2fa0ae-7abc-485a-97f6-699e3b6cf9fa',
  'target': 'attack-pattern--be69c571-d746-4b1f-bdd0-c0c9817e9068'},
 {'source': 'x-mitre-data-component--3d20385b-24ef-40e1-9f56-f39750379077',
  'target': 'attack-pattern--fa3aa267-da22-4bdd-961f-03223322a8d5'},
 {'source': 'course-o