In [1]:
import json
import networkx as nx
from groq import Groq

In [2]:
# Read graph data from JSON
file_path = 'cybersecurity-json-data.json'
with open(file_path, 'r') as file:
    data = [json.loads(line) for line in file]

In [31]:
# Create a graph using NetworkX
graph = nx.Graph()

# Adding nodes and edges into graph
for item in data:
    if item['type'] == 'node':
        node_id = item['id']
        node_labels = item['labels']
        node_properties = item['properties']
        graph.add_node(node_id, labels=node_labels, **node_properties)
    elif item['type'] == 'relationship':
        source = item['start']['id']
        target = item['end']['id']
        edge_properties = item.get('properties', {})
        graph.add_edge(source, target, id=item['id'], label=item['label'], **edge_properties)


In [22]:
def create_node_string(graph):
    node_descriptions = []
    for node, props in graph.nodes(data=True):
        labels = props.pop('labels', [])
        labels_str = ', '.join(labels)
        prop_desc = ', '.join([f"{key}: {value}" for key, value in props.items()])
        node_descriptions.append(f"{node} [{labels_str}] ({prop_desc})")
    return ', '.join(node_descriptions)

In [16]:
# Function to create a string describing the edges and their attributes
def create_edge_string(graph):
    edge_descriptions = []
    for source_node in graph.nodes():
        target_nodes = list(graph.neighbors(source_node))
        target_nodes_str = ""
        nedges = 0
        for target_node in target_nodes:
            edge_props = graph.get_edge_data(source_node, target_node)
            edge_props_str = ', '.join([f"{key}: {value}" for key, value in edge_props.items()])
            target_nodes_str += f"{target_node} ({edge_props_str}), "
            nedges += 1
        if nedges > 1:
            edge_descriptions.append(f"Node {source_node} is connected to nodes {target_nodes_str[:-2]}.")
        elif nedges == 1:
            edge_descriptions.append(f"Node {source_node} is connected to node {target_nodes_str[:-2]}.")
    return '\n'.join(edge_descriptions)

In [17]:
def create_edge_string(graph):
    edge_descriptions = []
    for source, target, props in graph.edges(data=True):
        edge_props_str = ', '.join([f"{key}: {value}" for key, value in props.items()])
        edge_descriptions.append(f"Node {source} is connected to node {target} ({edge_props_str}).")
    return '\n'.join(edge_descriptions)


In [32]:
def encode_graph(graph):
    nodes_string = create_node_string(graph)
    edges_string = create_edge_string(graph)
    return nodes_string, edges_string

# Encoding graph function
nodes_string, edges_string = encode_graph(graph)
nodes_string_encode = "G describes a graph among nodes: " + nodes_string
edges_string_encode = "In this graph: " + edges_string

encoded_graph = f"G describes a graph among nodes: \n{nodes_string}.\n\nIn this graph:\n{edges_string}"

#print(encoded_graph)
print(nodes_string_encode)

G describes a graph among nodes: 0 [Group, HighValue] (highvalue: True, neo4jImportId: 0, domain: TestCompany.Local, name: DOMAIN ADMINS@TestCompany.Local, objectid: S-1-5-21-883232822-274137685-4173207997-512), 1 [Group] (neo4jImportId: 1, domain: TestCompany.Local, name: DOMAIN COMPUTERS@TestCompany.Local, objectid: S-1-5-21-883232822-274137685-4173207997-515), 2 [Group] (neo4jImportId: 2, domain: TestCompany.Local, name: DOMAIN USERS@TestCompany.Local, objectid: S-1-5-21-883232822-274137685-4173207997-513), 3 [Group, HighValue] (highvalue: True, neo4jImportId: 3, domain: TestCompany.Local, name: DOMAIN CONTROLLERS@TestCompany.Local, objectid: S-1-5-21-883232822-274137685-4173207997-516), 4 [Group, HighValue] (highvalue: True, neo4jImportId: 4, domain: TestCompany.Local, name: ENTERPRISE DOMAIN CONTROLLERS@TestCompany.Local, objectid: TestCompany.Local-S-1-5-9), 5 [Group] (neo4jImportId: 5, domain: TestCompany.Local, name: ENTERPRISE READ-ONLY DOMAIN CONTROLLERS@TestCompany.Local, ob

In [1]:
import openai
import time
import os
from openai import OpenAI

client = OpenAI(
    api_key = "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",
)

In [2]:
openai.api_key='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'

In [82]:
# The question to ask the model
question = "What are the possible inconsistencies (nodes and egdes) that could be detected in the relationships and interactions within this graph?"

In [41]:
# Combine the encoded graph and the question
prompt = encoded_graph[:500] + "\n\n" + question

In [6]:
# Retry logic with exponential backoff
def get_response_with_retry(prompt, model="gpt-4", max_retries=5):
    for retry in range(max_retries):
        response = client.chat.completions.create(
            model=model,
            messages=[
                    {"role": "system", "content": "You are a helpful assistant."},
                    {"role": "user", "content": prompt}
            ],
            max_tokens=550
        )
        return response
       

In [5]:
import pandas as pd

# Define the data for each rule
data = {
    "Rule Name": [
        "Unique neo4jImportId per node",
        "Unique objectid per node",
        "Allowed node labels",
        "Computer node membership",
        "GPO linkage"
    ],
    "Description": [
        "Each node in the database should have a unique `neo4jImportId` value.",
        "Each node in the database should have a unique `objectid` value.",
        "Only the allowed node labels (`User`, `Group`, `Domain`, `OU`, `GPO`, and `Computer`) should be present in the database.",
        "A `Computer` node should not have a relationship with a `Domain` node using the `MEMBER_OF` relationship.",
        "A `GPO` node should be linked to either a `Domain` node or an `OU` node through a relationship of `GP_LINKS`."
    ],
    "Cypher Query": [
        "MATCH (n) WITH n.neo4jImportId AS id, COUNT(n) AS countNodes WHERE id IS NOT NULL AND countNodes = 1 RETURN COUNT(id) AS support",
        "MATCH (n) WITH n.objectid AS id, COUNT(n) AS countNodes WHERE id IS NOT NULL AND countNodes = 1 RETURN COUNT(id) AS support",
        "MATCH (n) WHERE (n:User OR n:Group OR n:Domain OR n:OU OR n:GPO OR n:Computer) RETURN COUNT(n) AS support",
        "MATCH (c:Computer)-[r:MEMBER_OF]->(d:Domain) RETURN COUNT(*) AS support",
        "MATCH (g:GPO)-[:GP_LINK]->(target) WHERE (target:Domain OR target:OU) RETURN COUNT(DISTINCT g) AS support"
    ]
}

# Create a DataFrame
df = pd.DataFrame(data)

# Save to CSV
df.to_csv('evaluation_rules_3.csv', index=False)

print("CSV file 'evaluation_rules.csv' has been created.")


CSV file 'evaluation_rules.csv' has been created.
