## Import required libraries

In [None]:
import json
import networkx as nx
import openai
from groq import Groq
from neo4j import GraphDatabase
import json
import json
import networkx as nx
import time

## Connection Neo4j

In [None]:
# Neo4j connection details
uri = "bolt://localhost:7687"  
user = "neo4j"                 
password = "123456789"          

# Initialize the Neo4j driver
driver = GraphDatabase.driver(uri, auth=(user, password))

# Function to run a query and return the results
def run_query(query):
    with driver.session() as session:
        result = session.run(query)
        return [record.data() for record in result]

## Get data

In [None]:
# Get all nodes
def get_all_nodes(tx):
    query = """
    MATCH (n)
    RETURN labels(n) AS labels, n
    """
    result = tx.run(query)
    nodes = []
    for record in result:
        labels = record["labels"]
        node = record["n"]
        nodes.append({
            "labels": labels,
            "properties": dict(node)
        })
    return nodes

# Perform query
with driver.session() as session:
    all_nodes = session.read_transaction(get_all_nodes)

# Close connection driver
driver.close()

# Result
for node in all_nodes:
    print(f"Labels: {node['labels']}, Properties: {node['properties']}")

## Graph Encoding

In [None]:
def encode_data(data):
    encoded_str = ""
    for idx, node in enumerate(data):
        labels_str = ', '.join(node['labels'])
        properties_str = ', '.join([f"{key}: {value}" for key, value in node['properties'].items()])
        encoded_str += f"Node {idx} has labels [{labels_str}] and ({properties_str}), "
    return encoded_str[:-2]  # Loại bỏ dấu phẩy cuối cùng

# Encode data and print results
encoded_all_data = encode_data(all_nodes)
print(encoded_all_data)

In [None]:
len(encoded_all_data)

### Get relationships and properties¶

In [None]:
def get_nodes_and_relationships(tx):
    query = (
        "MATCH (a)-[r]->(b) "
        "RETURN DISTINCT labels(a) AS StartLabels, type(r) AS RelationshipType, labels(b) AS EndLabels, properties(r) AS RelationshipProperties "
        "ORDER BY StartLabels, RelationshipType, EndLabels"
    )
    result = tx.run(query)
    nodes_and_relationships = []
    for record in result:
        nodes_and_relationships.append({
            "StartLabels": record["StartLabels"],
            "RelationshipType": record["RelationshipType"],
            "EndLabels": record["EndLabels"],
            "RelationshipProperties": dict(record["RelationshipProperties"])  # Convert properties to dictionary
        })
    return nodes_and_relationships


with driver.session() as session:
    nodes_and_relationships = session.read_transaction(get_nodes_and_relationships)

# Close connection to driver
driver.close()

In [None]:
nodes_and_relationships

### Edges encoding

In [None]:
def encode_relationship(relationship):
    start_labels = ', '.join(relationship['StartLabels'])
    end_labels = ', '.join(relationship['EndLabels'])
    relationship_type = relationship['RelationshipType']
    properties = relationship['RelationshipProperties']

    properties_str = ', '.join([f"{key}: {value}" for key, value in properties.items()]) if properties else "No properties"

    return f"Node [{start_labels}] connect to node  [{end_labels}] with Relationship Type {relationship_type} and Properties {{{properties_str}}}"

encoded_relationship =  ""
# Print
for relationship in nodes_and_relationships:
    encoded_relationship += encode_relationship(relationship) + ". "

encoded_relationship

In [None]:
encoded_data = encoded_all_data[0:100000] +  "\nIn this graph: \n" + encoded_relationship 
encoded_data

## Rules generation

In [None]:
client = Groq(
    api_key="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",
)

In [None]:
def generator(encoded_graph, query, model, size):
    # Function to divide the text into smaller chunks
    def chunk_text(text, max_length):
        words = text.split()
        chunks = []
        current_chunk = []
        current_length = 0

        for word in words:
            if current_length + len(word) + 1 > max_length:
                chunks.append(' '.join(current_chunk))
                current_chunk = []
                current_length = 0
            current_chunk.append(word)
            current_length += len(word) + 1

        if current_chunk:
            chunks.append(' '.join(current_chunk))

        return chunks

    # Function to process each chunk
    def process_chunk(chunk):
        chat_completion = client.chat.completions.create(
            messages=[
                {
                    "role": "user",
                    "content": chunk,
                }
            ],
            model= model,
        )

        return chat_completion.choices[0].message.content

    # Split encoded_graph into chunks
    max_chunk_length = size  # Adjust this length based on your needs and model limits
    graph_chunks = chunk_text(encoded_graph, max_chunk_length)

    # Process each chunk of encoded_graph
    processed_graph_parts = [process_chunk(chunk) for chunk in graph_chunks]

    # Combine processed parts back into a single string
    processed_graph = ' '.join(processed_graph_parts)

    # Append query to the processed graph and process the final combined context
    context = processed_graph + "\n" + query
    final_result = process_chunk(context)

    return final_result

## Zero - shot prompt 

In [1]:
zero_prompt = """
Based on the following graph properties, generate detailed consistency rules (graph functional dependency and graph entity dependency).
Consider the structure, node information and relationships in the graph, and provide a set of rules that can be applied to maintain consistent 
and accurate data.

For each consistency rule you identify, provide a clear description of the rule and generated the corresponding Cypher query to check the 
number of nodes or relationships that satisfy the rule.
Your query should return the count of entities (nodes or relationships) that match the rule described. 
Provide the query in the format of valid Cypher syntax, simple and ready for execution in a Neo4j databaseBelow is the input data:
Graph Information:

- Nodes: ["Group", "HighValue"], ["Group"], ["Domain", "HighValue"], ["GPO"], ["OU"], ["Computer"], ["User"]. 
- Relationships: GENERIC_ALL", "WRITE_OWNER", "WRITE_DACL", DC_SYNC", "GET_CHANGES", "GET_CHANGES_ALL", "ADMIN_TO", "OWNS", "GP_LINK", "CONTAINS", "HAS_SESSION", "MEMBER_OF", "EXECUTE_DCOM", "CAN_RDP", "ALLOWED_TO_DELEGATE", "GENERIC_WRITE". 
- Properties: ["Group", "HighValue"].name, ["Group", "HighValue"].domain, ["Group", "HighValue"].objectid,  ["Group", "HighValue"].neo4jImportId, ["Group", "HighValue"].highvalue, ["Group"].name, ["Group"].domain, ["Group"].objectid, ["Group"].neo4jImportId,  ["Domain", "HighValue"].name, ["Domain", "HighValue"].domain,  ["Domain", "HighValue"],neo4jImportId,  ["Domain", "HighValue"].highvalue,  ["GPO"].name,  ["GPO"].neo4jImportId,  ["GPO"].domain,  ["GPO"].objectid,  ["OU"].blocksInheritance,  ["OU"].name,  ["OU"].neo4jImportId, ["OU"].domain,  ["OU"].objectid,  ["Computer"].operatingsystem, ["Computer"].name,  ["Computer"].domain, ["Computer"].objectid, ["Computer"].neo4jImportId, ["Computer"].owned,  ["Computer"].enabled, ["User"].displayname,  ["User"].owned,  ["User"].enabled,  ["User"].pwdlastset, ["User"].neo4jImportId, ["User"].domain, ["User"].lastlogon, ["User"].name, ["User"].objectid,  ["User"].hasspn,  ["Group"].enabled,  ["Group"].pwdlastset, ["Group"]displayname, ["Group"].owned, ["Group"].lastlogon, GENERIC_ALL.isacl, WRITE_OWNER.sacl,  WRITE_DACL.isacl, DC_SYNC.isacl,  GET_CHANGES.isacl,  GET_CHANGES_ALL.isacl, OWNS.isacl,  CONTAINS.isacl,  GP_LINK.enforced,  GP_LINK.isacl, 
 GENERIC_WRITE.isacl.
"""

### LLAMA

In [None]:
import time
time_start = time.time()
rules_llama = generator(encoded_data, zero_prompt, "llama3-70b-8192", 8000, 500)
time_end = time.time()
execution_time = time_end-time_start
print(f"Execution time: {execution_time}")

In [None]:
print(rules_llama)

### Mixtral

In [None]:
time_start_z_mixtral = time.time()
rules_mixtral = generator(encoded_data, zero_prompt, "mixtral-8x7b-32768", 8000, 500)
time_end_z_mixtral = time.time()
execution_time_z_mixtral = time_end_z_mixtral -time_start_z_mixtral
print(f"Execution time: {execution_time_z_mixtral}")

In [None]:
print(rules_mixtral)

## Few - shot prompt

In [2]:
few_shot_prompt = """
    Examples of consistency Rules:
    
    1. Unique `neo4jImportId` per node: Each node should have a unique `neo4jImportId` property
    2. Unique `objectid` per node: Each node should have a unique `objectid` property.
    3. Only allowed node labels are `User`, `Group`, `Domain`, `OU`, `GPO`, and `Computer`

    Task: Generate new rules to ensure consistency and accuracy in the graph database, considering all node types and relationships. 
    For each consistency rule you identify, provide a clear description of the rule and generated the corresponding Cypher query to check the 
    number of nodes or relationships that satisfy the rule.
    Your query should return the count of entities (nodes or relationships) that match the rule described. 
    Provide the query in the format of valid Cypher syntax, simple and ready for execution in a Neo4j databaseBelow is the input data:
    Graph Information:
    
    - Nodes: ["Group", "HighValue"], ["Group"], ["Domain", "HighValue"], ["GPO"], ["OU"], ["Computer"], ["User"]. 
    - Relationships: GENERIC_ALL", "WRITE_OWNER", "WRITE_DACL", DC_SYNC", "GET_CHANGES", "GET_CHANGES_ALL", "ADMIN_TO", "OWNS", "GP_LINK", "CONTAINS", "HAS_SESSION", "MEMBER_OF", "EXECUTE_DCOM", "CAN_RDP", "ALLOWED_TO_DELEGATE", "GENERIC_WRITE". 
    - Properties: ["Group", "HighValue"].name, ["Group", "HighValue"].domain, ["Group", "HighValue"].objectid,  ["Group", "HighValue"].neo4jImportId, ["Group", "HighValue"].highvalue, ["Group"].name, ["Group"].domain, ["Group"].objectid, ["Group"].neo4jImportId,  ["Domain", "HighValue"].name, ["Domain", "HighValue"].domain,  ["Domain", "HighValue"],neo4jImportId,  ["Domain", "HighValue"].highvalue,  ["GPO"].name,  ["GPO"].neo4jImportId,  ["GPO"].domain,  ["GPO"].objectid,  ["OU"].blocksInheritance,  ["OU"].name,  ["OU"].neo4jImportId, ["OU"].domain,  ["OU"].objectid,  ["Computer"].operatingsystem, ["Computer"].name,  ["Computer"].domain, ["Computer"].objectid, ["Computer"].neo4jImportId, ["Computer"].owned,  ["Computer"].enabled, ["User"].displayname,  ["User"].owned,  ["User"].enabled,  ["User"].pwdlastset, ["User"].neo4jImportId, ["User"].domain, ["User"].lastlogon, ["User"].name, ["User"].objectid,  ["User"].hasspn,  ["Group"].enabled,  ["Group"].pwdlastset, ["Group"]displayname, ["Group"].owned, ["Group"].lastlogon, GENERIC_ALL.isacl, WRITE_OWNER.sacl,  WRITE_DACL.isacl, DC_SYNC.isacl,  GET_CHANGES.isacl,  GET_CHANGES_ALL.isacl, OWNS.isacl,  CONTAINS.isacl,  GP_LINK.enforced,  GP_LINK.isacl, 
     GENERIC_WRITE.isacl.
"""

### LLAMA

In [None]:
time_start_fs_llama = time.time()
rules_fs_llama = generator(encoded_data, few_shot_prompt, "llama3-70b-8192", 8000, 500)
time_end_fs_llama = time.time()
execution_time_fs_llama = time_end_fs_llama - time_start_fs_llama
print(f"Execution time: {execution_time_fs_llama}")

In [None]:
print(rules_fs_llama)

### Mixtral

In [None]:
time_start_fs_mixtral = time.time()
rules_fs_mixtral = generator(encoded_data, few_shot_prompt, "mixtral-8x7b-32768", 8000, 500)
time_end_fs_mixtral = time.time()
execution_time_fs_mixtral = time_end_fs_mixtral - time_start_fs_mixtral
print(f"Execution time: {execution_time_fs_mixtral}")