In [None]:
## INSTALL IF NEEDED 
##pip install neo4j

## Import

In [None]:
import logging
from neo4j import GraphDatabase
import pandas as pd
import json

## Neo4J Handler class (implements basic loading and shortest path query)

In [None]:
class Neo4jHandler:
    def __init__(self, uri, username, password):
        """
        Initializes the Neo4jHandler object with the connection details.

        :param uri: The Neo4j Aura URI.
        :param username: Neo4j username.
        :param password: Neo4j password.
        """
        self.uri = uri
        self.username = username
        self.password = password
        self.driver = None

        # Set up logging
        self.logger = logging.getLogger("Neo4jHandler")
        self.logger.setLevel(logging.INFO)
        ch = logging.StreamHandler()
        ch.setLevel(logging.INFO)
        formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
        ch.setFormatter(formatter)
        self.logger.addHandler(ch)

    def open_connection(self):
        """Opens a session to the Neo4j database."""
        self.logger.info(f"Opening connection to Neo4j at {self.uri}")
        self.driver = GraphDatabase.driver(self.uri, auth=(self.username, self.password))
        self.logger.info("Connection opened successfully.")

    def close_connection(self):
        """Closes the connection to the Neo4j database."""
        if self.driver:
            self.driver.close()
            self.logger.info("Connection closed.")

    def execute_query(self, session, query, parameters=None):
        """Helper function to execute Cypher queries."""
        if parameters is None:
            parameters = {}
        self.logger.info(f"Executing query: {query}")
        return session.run(query, parameters)

    def create_feedback_nodes(self, id_ref, text_ref, id_similar_1, text_similar_1, id_similar_2, text_similar_2):
        """Create or merge feedback nodes in Neo4j."""
        query = """
        MERGE (f1:Feedback {id: $id_ref})
        ON CREATE SET f1.text = $text_ref
        MERGE (f2:Feedback {id: $id_similar_1})
        ON CREATE SET f2.text = $text_similar_1
        MERGE (f3:Feedback {id: $id_similar_2})
        ON CREATE SET f3.text = $text_similar_2
        """
        with self.driver.session() as session:
            self.logger.info(f"Creating or merging feedback nodes for {id_ref}, {id_similar_1}, {id_similar_2}.")
            self.execute_query(session, query, {
                "id_ref": id_ref,
                "text_ref": text_ref,
                "id_similar_1": id_similar_1,
                "text_similar_1": text_similar_1,
                "id_similar_2": id_similar_2,
                "text_similar_2": text_similar_2
            })
            self.logger.info(f"Feedback nodes for {id_ref}, {id_similar_1}, {id_similar_2} created successfully.")

    def create_similarity_relationships(self, id_ref, id_similar_1, sim1, id_similar_2, sim2):
        """Create SIMILAR_TO relationships in Neo4j."""
        query_1 = """
        MATCH (f1:Feedback {id: $id_ref}), (f2:Feedback {id: $id_similar_1})
        MERGE (f1)-[r1:SIMILAR_TO]->(f2)
        SET r1.score = $sim1
        """
        query_2 = """
        MATCH (f1:Feedback {id: $id_ref}), (f3:Feedback {id: $id_similar_2})
        MERGE (f1)-[r2:SIMILAR_TO]->(f3)
        SET r2.score = $sim2
        """
        with self.driver.session() as session:
            self.logger.info(f"Creating SIMILAR_TO relationships for {id_ref} with {id_similar_1} and {id_similar_2}.")
            if id_similar_1:
                self.execute_query(session, query_1, {
                    "id_ref": id_ref,
                    "id_similar_1": id_similar_1,
                    "sim1": sim1
                })
            if id_similar_2:
                self.execute_query(session, query_2, {
                    "id_ref": id_ref,
                    "id_similar_2": id_similar_2,
                    "sim2": sim2
                })
            self.logger.info(f"SIMILAR_TO relationships for {id_ref} created successfully.")

    def find_shortest_path(self, start_id):
        """Find the shortest path with specific conditions."""
        query = """
        MATCH (start:Feedback {id: $start_id})
        MATCH path = shortestPath((start)-[:SIMILAR_TO*..2]-(other))
        WHERE other <> start AND ALL(rel IN relationships(path) WHERE rel.score > 0.7)
        RETURN path
        """
        with self.driver.session() as session:
            self.logger.info(f"Finding shortest path for start ID {start_id}.")
            result = self.execute_query(session, query, {"start_id": start_id})
            paths = []
            for record in result:
                path_dict = self.path_to_dict(record["path"])
                paths.append(path_dict)
            self.logger.info(f"Found {len(paths)} shortest paths for start ID {start_id}.")
            return paths

    def path_to_dict(self, path):
        """Convert the path to a dictionary suitable for JSON export."""
        path_dict = {"nodes": [], "relationships": []}
        for node in path.nodes:
            node_data = dict(node)
            path_dict["nodes"].append(node_data)
        for rel in path.relationships:
            rel_data = dict(rel)
            path_dict["relationships"].append(rel_data)
        return path_dict

    def save_paths_to_json(self, paths, filename="shortest_paths.json"):
        """Save paths to a JSON file."""
        self.logger.info(f"Saving {len(paths)} paths to JSON file {filename}.")
        with open(filename, "w") as json_file:
            json.dump(paths, json_file, indent=4)
        self.logger.info(f"Paths saved to {filename}.")

    def get_feedback_data(self, start_id):
        """Execute the Cypher query to get feedback data."""
        query = """
        MATCH (start:Feedback {id: $start_id})
        MATCH path = (start)-[:SIMILAR_TO*1..2]-(connected)
        WHERE connected <> start

        WITH connected, relationships(path) AS rels, length(path) AS dist

        WITH connected, dist, reduce(totalScore = 0.0, r IN rels | totalScore + r.score) AS sumScore

        WITH connected, dist, max(sumScore) AS maxScore

        RETURN connected.text AS feedbackText, dist AS distance, maxScore AS score
        ORDER BY distance ASC, score DESC
        """
        with self.driver.session() as session:
            self.logger.info(f"Getting feedback data for start ID {start_id}.")
            result = self.execute_query(session, query, {"start_id": start_id})
            result_list = []
            for record in result:
                result_list.append({
                    "feedbackText": record["feedbackText"],
                    "distance": record["distance"],
                    "score": record["score"]
                })
            self.logger.info(f"Retrieved {len(result_list)} feedback records for start ID {start_id}.")
            return result_list

    def save_feedback_to_csv(self, feedback_data, filename="feedback_paths.csv"):
        """Save feedback data to a CSV file."""
        self.logger.info(f"Saving {len(feedback_data)} feedback records to CSV file {filename}.")
        df = pd.DataFrame(feedback_data)
        df.to_csv(filename, index=False)
        self.logger.info(f"Feedback data saved to {filename}.")

## Example usage:

In [None]:
# Initialize the Neo4jHandler with your credentials
neo4j_handler = Neo4jHandler(uri="neo4j+s://<your_aura_instance_url>", 
                             username="<your_username>", 
                             password="<your_password>")

In [None]:
# Open connection to the database
neo4j_handler.open_connection()

## (Artificial) Node and Relationships creation

In [None]:
# Create nodes and relationships for a specific feedback
neo4j_handler.create_feedback_nodes("1478.0", "Text for 1478", "11456.0", "Text for 11456", "1145678.0", "Text for 1145678")
neo4j_handler.create_similarity_relationships("1478.0", "11456.0", 0.85, "1145678.0", 0.90)

## Shortest path (path and text extraction)

In [None]:
# Find shortest paths for a specific start ID and save to JSON
paths = neo4j_handler.find_shortest_path("119929630.0")
neo4j_handler.save_paths_to_json(paths)

In [None]:
# Get feedback data for a specific start ID and save to CSV
feedback_data = neo4j_handler.get_feedback_data("119929630.0")
neo4j_handler.save_feedback_to_csv(feedback_data)

In [None]:
# Close the connection
neo4j_handler.close_connection()