In [10]:
import os
import json
import numpy as np
import torch
import networkx as nx
from gensim.models import Word2Vec
from torch_geometric.data import Data
from node2vec import Node2Vec


In [11]:
# Set CUDA device and ensure the correct device is used
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f'Using device: {device}')

Using device: cuda


In [12]:
# Paths
data_dir = "../../datasets/ics_attack/"
embeddings_dir = "../../model_outputs/ics_attack/embeddings/"



In [13]:
# Load data
def load_data(data_dir):
    with open(os.path.join(data_dir, 'attack_weak_range.json')) as fp:
        attack_weak_range = json.load(fp)
    with open(os.path.join(data_dir, 'combined_edges.json')) as fp:
        edges_json = json.load(fp)
    return attack_weak_range, edges_json

In [None]:
# Generate random walks for DeepWalk
def generate_random_walks(G, num_walks, walk_length):
    walks = []
    nodes = list(G.nodes())
    for _ in range(num_walks):
        np.random.shuffle(nodes)
        for node in nodes:
            walk = [node]
            while len(walk) < walk_length:
                cur = walk[-1]
                neighbors = list(G.neighbors(cur))
                if neighbors:
                    next_node = np.random.choice(neighbors)
                    walk.append(next_node)
                else:
                    break
            walks.append(walk)
    return walks

In [None]:
# Create and save node2vec embeddings
def create_node2vec_embeddings(G, dimensions, walk_length, num_walks, window, min_count, batch_words, embeddings_dir):
    node2vec = Node2Vec(G, dimensions=dimensions, walk_length=walk_length, num_walks=num_walks, workers=4)
    model = node2vec.fit(window=window, min_count=min_count, batch_words=batch_words)
    embeddings = np.array([model.wv[str(n)] for n in G.nodes()])
    np.save(os.path.join(embeddings_dir, 'node2vec.npy'), embeddings)

In [None]:
# Create and save DeepWalk embeddings
def create_deepwalk_embeddings(G, num_walks, walk_length, vector_size, window, min_count, sg, workers, epochs, embeddings_dir):
    walks = generate_random_walks(G, num_walks, walk_length)
    walks = [[str(node) for node in walk] for walk in walks]
    model = Word2Vec(sentences=walks, vector_size=vector_size, window=window, min_count=min_count, sg=sg, workers=workers, epochs=epochs)
    node_ids = list(G.nodes())
    embeddings_array = np.array([model.wv[str(node)] for node in node_ids])
    np.save(os.path.join(embeddings_dir, 'deepwalk.npy'), embeddings_array)

In [None]:
def main():
    # Load data
    attack_weak_range, edges_json = load_data(data_dir)

    attack_range = attack_weak_range['attack']
    weak_range = attack_weak_range['cwe']
    n_nodes = attack_weak_range['n_nodes']
    node_list = list(range(0, n_nodes))
    edge_list = [(int(e[0]), int(e[1])) for e in edges_json]

    # Create graph
    G = nx.Graph()
    G.add_edges_from(edge_list)

    # Create and save node2vec embeddings
    create_node2vec_embeddings(G, dimensions=64, walk_length=30, num_walks=100, window=10, min_count=1, batch_words=4, embeddings_dir=embeddings_dir)

    # Create and save DeepWalk embeddings
    create_deepwalk_embeddings(G, num_walks=200, walk_length=30, vector_size=256, window=5, min_count=0, sg=1, workers=4, epochs=10, embeddings_dir=embeddings_dir)


In [None]:
if __name__ == "__main__":
    main()

In [2]:

# attack_range = (0,203)
# weak_range = (203,1136)
# # Generate embeddings for both sets of nodes
# attack_nodes = list(range(attack_range[0],attack_range[1]))
# weakness_nodes = list(range(weak_range[0],weak_range[1]))
# attack_embeddings = node2vec_emb[attack_nodes]
# weakness_embeddings = node2vec_emb[weakness_nodes]

In [3]:
# # Compute cosine similarity between all pairs of nodes
# weak_attack_matrix = cosine_similarity(weakness_embeddings, attack_embeddings)
# # Compute cosine similarity between all pairs of nodes
# attack_weak_matrix = cosine_similarity(attack_embeddings, weakness_embeddings)

In [4]:
# import matplotlib.pyplot as plt
# import matplotlib.colors as mcolors
# def histogram2(attack_weak_matrix):
#     # print(len(cosine_sim_pairs))
#     # print(len(cosine_sim_pairs[0]))
#     # for row in cosine_sim_pairs:
#     #     print(row[0][1], row[1][1])
#     #Extract the cosine similarity values from the filtered results
#     #cosine_sim_values = [pair[1] for row in cosine_sim_pairs for pair in row[-30:]]
#     cosine_sim_values=[]
#     for i in range(len(attack_weak_matrix)):
#         for j in range(len(attack_weak_matrix[0])):
#             cosine_sim_values.append(attack_weak_matrix[i][j])
#     #print(cosine_sim_values)
#     # Define the bins for the histogram
#     bins = np.arange(0, 1.1, 0.05)  # Bins from 0 to 1 with step size 0.1
    
#     # Create the histogram
#     plt.hist(cosine_sim_values, bins=bins, edgecolor='black')
    
#     # Set the x-axis and y-axis labels
#     plt.xlabel('Cosine Similarity')
#     plt.ylabel('Frequency')
    
#     # Set the title of the histogram
#     plt.title('Attack & Weak Positive')
#     # plt.savefig(dir2+'/histogram_pos_t_40.png',dpi=300)
#     # Show the plot
#     plt.show()

In [5]:
# histogram2(attack_weak_matrix)

In [6]:
# def get_anchor_pos_neg(positive_threshold,negative_threshold):
#     anchor_pos_pair = []
#     anchor_neg_pair = []
#     # positive_threshold = 0.65  # Similarity threshold for positive pairs
#     # negative_threshold = 0.45  # Similarity threshold for negative pairs
#     for i, attack_node in enumerate(attack_nodes):
#         for j, weakness_node in enumerate(weakness_nodes):
#             if attack_weak_matrix[i, j] > positive_threshold:
#                 anchor_pos_pair.append((attack_node,weakness_node, attack_weak_matrix[i, j]))
#             if attack_weak_matrix[i, j] < negative_threshold:
#                 anchor_neg_pair.append((attack_node,weakness_node, attack_weak_matrix[i, j]))
            
#     for i, weakness_node in enumerate(weakness_nodes):
#         for j, attack_node in enumerate(attack_nodes):
#             if weak_attack_matrix[i, j] > positive_threshold:
#                 anchor_pos_pair.append((weakness_node,attack_node, weak_attack_matrix[i, j]))
#             if weak_attack_matrix[i, j] < negative_threshold:
#                 anchor_neg_pair.append((weakness_node,attack_node, weak_attack_matrix[i, j]))
    
#     anchor_pos_pair.sort(reverse=True, key=lambda x:x[2])
#     anchor_neg_pair.sort(key=lambda x:x[2])
#     print(len(anchor_pos_pair))
#     print(len(anchor_neg_pair))
#     anchor_pos_neg_triple = []
#     anchor_for_neg = [pair[0] for pair in anchor_neg_pair]
#     pos_pair = []
#     neg_pair = []
#     for anchor,pos,val in anchor_pos_pair:
#         if(anchor in anchor_for_neg):
#             idx = anchor_for_neg.index(anchor)
#             anchor_pos_neg_triple.append((anchor,pos,anchor_neg_pair[idx][1]))
#             pos_pair.append((pos,val))
#             neg_pair.append((anchor_neg_pair[idx][1],anchor_neg_pair[idx][2]))
#             anchor_for_neg.pop(idx)
#             anchor_neg_pair.pop(idx)
#     return anchor_pos_neg_triple

In [7]:
# anchor_pos_neg_triple=get_anchor_pos_neg(positive_threshold=0.30,negative_threshold=0.15)

In [8]:
# ass = set()
# ps=set()
# ns=set()
# for a,p,n in anchor_pos_neg_triple:
#     ass.add(a)
#     ps.add(p)
#     ns.add(n)
# print(len(ass))
# print(len(ps))
# print(len(ns))

In [9]:
# import pickle
# with open(graph_path+'graph_features/anchor_pos_neg_triple_4_node2vec.npy', 'wb') as f:
#     pickle.dump(anchor_pos_neg_triple, f)