In [9]:
import json
import pandas as pd
from py2neo import Graph, Node, Relationship

In [13]:
from neo4j import GraphDatabase

# Define connection details
uri = "bolt://localhost:7687"
username = "neo4j"
password = "logneo123"  # Replace with the password you set

# Create a driver instance
driver = GraphDatabase.driver(uri, auth=(username, password))

# Function to test connection
def test_connection():
    with driver.session() as session:
        result = session.run("RETURN 'Connection Successful!' AS message")
        for record in result:
            print(record["message"])

# Run the test function
if __name__ == "__main__":
    test_connection()

# Close the driver when done
driver.close()


Connection Successful!


In [10]:
# Connect to Neo4j
from py2neo import Graph

graph = Graph("bolt://localhost:7687", auth=("neo4j", "qwertyuiop"))

In [11]:
import os
# Function to parse a single log entry
def parse_log_entry(log_entry):
    parts = log_entry.split(',', 3)
    timestamp, message_code, event_id, payload = parts[0], parts[1], parts[2], parts[3]
    payload_json = json.loads(payload)
    b64_payload = json.loads(payload_json['b64Payload'])
    return {
        'EventId': payload_json['EventId'],
        'EventTime': payload_json['EventTime'],
        'IngestTime': payload_json['IngestTime'],
        'RequestId': b64_payload['requestId'],
        'SourceIp': b64_payload['sourceIp'],
        'HttpMethod': b64_payload['httpMethod'],
        'HttpUrl': b64_payload['httpUrl'],
        'HttpAuth': b64_payload['httpAuth'],
        'HttpAuthHash': b64_payload['httpAuthHash'],
        'Resource': b64_payload['resource'],
        'ResourceClass': b64_payload['resourceClass'],
        'ResourceMethod': b64_payload['resourceMethod'],
        'Organization': b64_payload['organization'],
        'App': b64_payload['app'],
        'User': b64_payload['user'],
        'Entity': b64_payload['entity'],
        'TimestampReq': b64_payload['timestamp_req'],
        'TimestampResp': b64_payload['timestamp_resp']
    }

# Function to process a single log file
def process_log_file(file_path):
    parsed_data_list = []
    with open(file_path, 'r') as file:
        for line in file:
            parsed_data = parse_log_entry(line.strip())
            parsed_data_list.append(parsed_data)
    return parsed_data_list

# Function to process all log files in the given directory
def process_log_directory(log_directory):
    all_parsed_data = []
    for root, dirs, files in os.walk(log_directory):
        for file in files:
            if file.endswith('.log'):
                file_path = os.path.join(root, file)
                parsed_data = process_log_file(file_path)
                all_parsed_data.extend(parsed_data)
    return all_parsed_data

# Main execution
log_directory = '/Users/omkarsadekar/Documents/NEU Study Material/NEU Study Material/XN Project/server_logs_customer'  
all_parsed_data = process_log_directory(log_directory)

# Example: print out the first 5 parsed log entries
for entry in all_parsed_data[:5]:
    print(entry)

{'EventId': '1TQ24VGTQ0QF1HVN7Q3EVR0RO', 'EventTime': '2022-12-01T09:48:02.82901', 'IngestTime': '2022-12-01T09:48:55.0758696Z', 'RequestId': '1TQ24VGTQ0QF1HVN7Q3EVR0RO', 'SourceIp': '3.224.122.159', 'HttpMethod': 'GET', 'HttpUrl': 'https://api.docdigitizer.com/api/v1/documents/7ad28068-6f38-4c1b-8c59-2207dd50b485', 'HttpAuth': 'API_KEY 9ffb', 'HttpAuthHash': '74dd77154e7f3618b72e47c501265152', 'Resource': 'DocumentResource.get', 'ResourceClass': 'DocumentResource.get', 'ResourceMethod': 'DocumentResource.get', 'Organization': 'f00716b5-e120-4699-b827-791d6e2a2565', 'App': 'f00716b5-e120-4699-b827-791d6e2a2565_app', 'User': None, 'Entity': 'f00716b5-e120-4699-b827-791d6e2a2565_app', 'TimestampReq': '2022-12-01 09:48:02.573556', 'TimestampResp': '2022-12-01 09:48:02.829010'}
{'EventId': '1F3XGJX8M29G66C91A4R1GS2Y', 'EventTime': '2022-12-01T09:48:03.435022', 'IngestTime': '2022-12-01T09:48:55.0758723Z', 'RequestId': '1F3XGJX8M29G66C91A4R1GS2Y', 'SourceIp': '80.172.211.18', 'HttpMethod': 

# Graph Model Description
## Nodes
### Event
**Properties:**
- `EventId`: Unique identifier for the event.
- `EventTime`: Timestamp when the event occurred.
- `IngestTime`: Timestamp when the event was ingested.
### Request
**Properties:**
- `RequestId`: Unique identifier for the request.
- `SourceIp`: IP address from which the request originated.
- `HttpMethod`: HTTP method used (e.g., GET, POST).
- `HttpUrl`: URL accessed by the request.
- `HttpAuth`: Authentication method used.
- `HttpAuthHash`: Hash of the authentication method.
- `TimestampReq`: Timestamp when the request was made.
- `TimestampResp`: Timestamp when the response was made.
#### Organization
**Properties:**
- `OrganizationId`: Unique identifier for the organization.
#### App
**Properties:**
- `AppId`: Unique identifier for the app.
#### User
**Properties:**
- `UserId`: Unique identifier for the user.
#### Entity
**Properties:**
- `EntityId`: Unique identifier for the entity.
### Relationships
### GENERATED_BY
- **From:** Event
- **To:** Request
- **Description:** Indicates that an event was generated by a specific request.
### BELONGS_TO
- **From:** Request
- **To:** Organization
- **Description:** Indicates that a request belongs to a specific organization.
### USED_BY
- **From:** Request
- **To:** App
- **Description:** Indicates that a request was made by a specific app.
### INITIATED_BY
- **From:** Request
- **To:** User
- **Description:** Indicates that a request was initiated by a specific user.
### ASSOCIATED_WITH
- **From:** Request
- **To:** Entity
- **Description:** Indicates that a request is associated with a specific entity.

In [18]:
# Function to create nodes and relationships in Neo4j
def populate_neo4j(data):
    with driver.session() as session:
        for entry in data:
            session.write_transaction(create_nodes_and_relationships, entry)

def create_nodes_and_relationships(tx, entry):
    # Create Event node
    tx.run("""
    MERGE (e:Event {EventId: $EventId})
    SET e.EventTime = $EventTime, e.IngestTime = $IngestTime
    """, entry)

    # Create Request node
    tx.run("""
    MERGE (r:Request {RequestId: $RequestId})
    SET r.SourceIp = $SourceIp, r.HttpMethod = $HttpMethod, r.HttpUrl = $HttpUrl,
        r.HttpAuth = $HttpAuth, r.HttpAuthHash = $HttpAuthHash, r.TimestampReq = $TimestampReq, 
        r.TimestampResp = $TimestampResp
    """, entry)

    # Conditionally create Organization node if OrganizationId is not null
    if entry.get("Organization"):
        tx.run("""
        MERGE (o:Organization {OrganizationId: $Organization})
        """, {"Organization": entry["Organization"]})

    # Conditionally create App node if AppId is not null
    if entry.get("App"):
        tx.run("""
        MERGE (a:App {AppId: $App})
        """, {"App": entry["App"]})

    # Conditionally create User node if UserId is not null
    if entry.get("User"):
        tx.run("""
        MERGE (u:User {UserId: $User})
        """, {"User": entry["User"]})

    # Conditionally create Entity node if EntityId is not null
    if entry.get("Entity"):
        tx.run("""
        MERGE (e:Entity {EntityId: $Entity})
        """, {"Entity": entry["Entity"]})

    # Create relationships
    query = """
    MATCH (e:Event {EventId: $EventId})
    MATCH (r:Request {RequestId: $RequestId})
    MERGE (e)-[:GENERATED_BY]->(r)
    """
    if entry.get("Organization"):
        query += """
        WITH r
        MATCH (o:Organization {OrganizationId: $Organization})
        MERGE (r)-[:BELONGS_TO]->(o)
        """
    if entry.get("App"):
        query += """
        WITH r
        MATCH (a:App {AppId: $App})
        MERGE (r)-[:USED_BY]->(a)
        """
    if entry.get("User"):
        query += """
        WITH r
        MATCH (u:User {UserId: $User})
        MERGE (r)-[:INITIATED_BY]->(u)
        """
    if entry.get("Entity"):
        query += """
        WITH r
        MATCH (e:Entity {EntityId: $Entity})
        MERGE (r)-[:ASSOCIATED_WITH]->(e)
        """
    tx.run(query, entry)
# Main execution
log_directory = '/Users/omkarsadekar/Documents/NEU Study Material/NEU Study Material/XN Project/server_logs_customer'
all_parsed_data = process_log_directory(log_directory)

# Populate Neo4j with parsed data
populate_neo4j(all_parsed_data)

# # Close the driver when done
# driver.close()

  with driver.session() as session:
  session.write_transaction(create_nodes_and_relationships, entry)


KeyboardInterrupt: 

In [14]:
# Define the app_id you want to query
app_id = 'lara.andrade-org_app'  # Replace with the actual AppId you want to query

# Cypher query
query = """
MATCH (u:User)-[:INITIATED_BY]->(r:Request)-[:USED_BY]->(a:App {AppId: $app_id})
RETURN u.UserId AS UserId
"""

# Execute the query and get the results
results = graph.run(query, app_id=app_id).data()

print(results)

[]
