In [10]:
from rdflib import Graph, RDF, RDFS, OWL

# Load the ontology
g = Graph()
g.parse("../ontologies/sepses_logs.ttl", format="turtle")

def get_local_name(uri):
    if isinstance(uri, str) and ("/" in uri or "#" in uri):
        return uri.split("/")[-1].split("#")[-1]  # Handles both "/" and "#" separators
    return uri 

# Query for all classes (both owl:Class and rdfs:Class)
query = """
    SELECT DISTINCT ?class WHERE {
        { ?class a owl:Class . }
        UNION
        { ?class a rdfs:Class . }
    }
"""

# Execute the query and print results
classes = [get_local_name(str(row[0])) for row in g.query(query)]
for c in classes:
    print(c)

Executable
File
Host
IPAddress
LocalUser
Process
RootUser
Socket
SystemObject
SystemUser
User
Technique


In [11]:
allowed_relations = []

for s, p, o in g.triples((None, RDF.type, OWL.ObjectProperty)):
    property_name = get_local_name(str(s))
    
    domain = None
    range_ = None
    
    # Get domain
    for _, _, d in g.triples((s, RDFS.domain, None)):
        domain = get_local_name(str(d))
    
    # Get range
    for _, _, r in g.triples((s, RDFS.range, None)):
        range_ = get_local_name(str(r))
    
    # Store relation if both domain and range exist
    if domain and range_:
        allowed_relations.append((domain, property_name, range_))

# Print extracted allowed relations
for relation in allowed_relations:
    print(relation)

('Process', 'forks', 'Process')
('Process', 'hasExe', 'Executable')
('Host', 'hasHostIP', 'IPAddress')
('Socket', 'hasSocketIP', 'IPAddress')
('Process', 'hasUser', 'User')
('File', 'isExecutedBy', 'Process')
('File', 'isReadBy', 'Process')
('Socket', 'isReceivedBy', 'Process')
('Process', 'originatesFrom', 'Host')
('SystemObject', 'provRel', 'SystemObject')
('Process', 'sends', 'Socket')
('Process', 'writes', 'File')


In [12]:
import os

from langchain_neo4j import Neo4jGraph

os.environ["NEO4J_URI"] = "bolt://localhost:7687"
os.environ["NEO4J_USERNAME"] = "neo4j"
os.environ["NEO4J_PASSWORD"] = "password"

graph = Neo4jGraph(refresh_schema=False)

In [18]:
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_ollama import ChatOllama

llm = ChatOllama(model="qwen2.5-coder:7b")

llm_transformer = LLMGraphTransformer(
    llm=llm, allowed_nodes=classes, allowed_relationships=allowed_relations
)

In [19]:
import pandas as pd
from langchain_core.documents import Document

log_df = pd.read_csv("../data/test.csv")
document = Document(page_content="\n".join(log_df["text"]))

print(document.page_content)

2022-01-21 00:09:11 jhall/192.168.230.165:46011 TLS: soft reset sec=3308/3308 bytes=45748/-1 pkts=649/0
2022-01-21 00:09:11 jhall/192.168.230.165:46011 VERIFY OK: depth=1, C=AT, ST=Vienna, L=Vienna, O=Some Organisation GmbH, CN=OpenVPN CA, emailAddress=admin@organisation.cyberrange.at
2022-01-21 00:09:11 jhall/192.168.230.165:46011 VERIFY KU OK
2022-01-21 00:09:11 jhall/192.168.230.165:46011 Validating certificate extended key usage
2022-01-21 00:09:11 jhall/192.168.230.165:46011 ++ Certificate has EKU (str) TLS Web Client Authentication, expects TLS Web Client Authentication
2022-01-21 00:09:11 jhall/192.168.230.165:46011 VERIFY EKU OK
2022-01-21 00:09:11 jhall/192.168.230.165:46011 VERIFY OK: depth=0, CN=jhall
2022-01-21 00:09:11 jhall/192.168.230.165:46011 peer info: IV_VER=2.4.4
2022-01-21 00:09:11 jhall/192.168.230.165:46011 peer info: IV_PLAT=linux
2022-01-21 00:09:11 jhall/192.168.230.165:46011 peer info: IV_PROTO=2
2022-01-21 00:09:11 jhall/192.168.230.165:46011 peer info: IV_L

In [21]:
graph_documents = llm_transformer.convert_to_graph_documents([document])
print(f"Nodes:{graph_documents[0].nodes}")
print(f"Relationships:{graph_documents[0].relationships}")

RemoteProtocolError: Server disconnected without sending a response.