In [None]:
from neo4j import GraphDatabase
import os
import pandas as pd
import json

In [None]:
#directory where the lists of edges created by the Visibility graph algorithm are stored 
dir_healthy_a = 'D:/Documents/Projects/Nodes/healthyAsleep'

In [None]:
# connecting to the Neo4j server
uri = "neo4j://localhost:7687"
driver = GraphDatabase.driver(uri, auth=("neo4j", "your-password"))  
def get_nodes_count(tx):
    return tx.run("MATCH (n) RETURN COUNT(n)").single().value()

with driver.session() as session:
    nodes_count = session.read_transaction(get_nodes_count)
    print(f"Number of nodes: {nodes_count}")

In [None]:
# function to create the edges in the graph
@staticmethod
def _create_edges(tx, edges):
    for edge in edges:
        query = (
                f"MERGE (a:Node {{name: '{edge[0]}'}}) "
                f"MERGE (b:Node {{name: '{edge[2]}'}}) "
                f"MERGE (a)-[:{edge[1]}]->(b)"
            )
        tx.run(query)

In [None]:
def create_graph_from_edges(edges):
    with driver.session() as session:
        session.write_transaction(_create_edges, edges)

In [None]:
# main code to create the graph in the database

class GraphLoader:

    def __init__(self, uri, user, password):
        self._driver = GraphDatabase.driver(uri, auth=(user, password))

    def close(self):
        self._driver.close()

    def create_graph_from_edges(self, edges):
        with self._driver.session() as session:
            session.write_transaction(self._create_edges, edges)

    @staticmethod
    def _create_edges(tx, edges):
        for edge in edges:
            query = (
                f"MERGE (a:Node {{name: '{edge[0]}'}}) "
                f"MERGE (b:Node {{name: '{edge[2]}'}}) "
                f"MERGE (a)-[:{edge[1]}]->(b)"
            )
            tx.run(query)

# Use the GraphLoader
loader = GraphLoader("bolt://localhost:7687", "neo4j", "123456789")


In [None]:
# generinc function to get a metric from the database from a cypher query
def get_metric(query):
    with driver.session() as session:
        result = session.run(query)
        return result.single()[0]

# Number of edges and nodes 

In [None]:
def get_number_of_nodes_and_edges():
    NumberOfNodes_query = """
    MATCH (n) 
    RETURN COUNT(n) AS NumberOfNodes;
    """

    NumberOfEdges_query = """
    MATCH ()-[r]->() 
    RETURN COUNT(r) AS NumberOfRelationships;
    """


    NumberOfNodes = get_metric(NumberOfNodes_query)
    NumberOfEdges = get_metric(NumberOfEdges_query)

    return NumberOfNodes,NumberOfEdges

# List of Node Degrees

In [None]:
def get_node_degrees():
    with driver.session() as session:
        result = session.run("""
        MATCH (n)
        RETURN ID(n) AS node_id, SIZE([(n)--() | 1]) AS degree
        ORDER BY degree DESC;
        """)
        return [{"node_id": record["node_id"], "degree": record["degree"]} for record in result]




# number of trianges

In [None]:
def get_number_of_triangles():
    NumberOfTiengles_query = """
    MATCH (a)-[r1]-(b)-[r2]-(c)-[r3]-(a)
    WHERE ID(a) < ID(b) AND ID(b) < ID(c)
    RETURN count(*)/3 AS numberOfTriangles;
    """

    NumberOfTrianagles = get_metric(NumberOfTiengles_query)
   
    return NumberOfTrianagles


# clustering Index

In [None]:
def get_clustering_index():
    clusteringIndex_query = """
    MATCH (a)-[:SEES]-(b)-[:SEES]-(c)-[:SEES]-(a)
    WHERE a < b AND b < c
    WITH a, COUNT(*) AS triangles
    MATCH (a)-[:SEES]-(d)
    WHERE d <> a
    WITH a, triangles, COUNT(*) AS degree
    RETURN AVG(2.0 * triangles / (degree * (degree - 1))) AS clustering_coefficient
    """

    clusteringIndex = get_metric(clusteringIndex_query)
   
    return clusteringIndex



# Average path lenght 

In [None]:
def get_average_path_length():
    average_path_length_query = """
    MATCH (a), (b)
    WHERE id(a) < id(b)
    MATCH p=shortestPath((a)-[*..100]-(b))
    WITH length(p) AS pathLength
    RETURN avg(pathLength) AS average_path_length;
    """

    average_path_length = get_metric(average_path_length_query)
   
    return average_path_length

# Diameter 

In [None]:
def get_diameter():
    diameter_query = """
    MATCH (a), (b)
    WHERE id(a) < id(b)
    MATCH p=shortestPath((a)-[*..100]-(b))
    RETURN max(length(p)) AS diameter;
    """

    diameter = get_metric(diameter_query)
   
    return diameter


# Assortativity 

In [None]:
def fetch_degree_data(tx):
    return tx.run("""
        MATCH (n)
        WITH n, SIZE([(n)--() | 1]) AS degreeN
        MATCH (n)-[]-(m)
        WITH n, m, degreeN, SIZE([(m)--() | 1]) AS degreeM
        RETURN id(n) AS Node1, id(m) AS Node2, degreeN, degreeM
    """).data()

In [None]:
def get_assortativity(): 
    with driver.session() as session:
        data = session.read_transaction(fetch_degree_data)
        print(data)
        # Compute assortativity
    degree_product_sum = 0
    degree1_sum = 0
    degree2_sum = 0
    degree1_square_sum = 0
    degree2_square_sum = 0

    for row in data:
        degree1 = row["degreeN"]
        degree2 = row["degreeM"]

        degree_product_sum += degree1 * degree2
        degree1_sum += degree1
        degree2_sum += degree2
        degree1_square_sum += degree1**2
        degree2_square_sum += degree2**2

    n = len(data)
    avg_degree_product = degree_product_sum / n
    avg_degree1 = degree1_sum / n
    avg_degree2 = degree2_sum / n

    numerator = avg_degree_product - avg_degree1 * avg_degree2
    denominator = (degree1_square_sum/n - avg_degree1**2) * (degree2_square_sum/n - avg_degree2**2)
    denominator = denominator**0.5

    assortativity = numerator / denominator

    print(f"Assortativity: {assortativity}")
    return assortativity

In [None]:
with driver.session() as session:
    data = session.read_transaction(fetch_degree_data)


In [None]:
# Compute assortativity
degree_product_sum = 0
degree1_sum = 0
degree2_sum = 0
degree1_square_sum = 0
degree2_square_sum = 0

for row in data:
    degree1 = row["degreeN"]
    degree2 = row["degreeM"]

    degree_product_sum += degree1 * degree2
    degree1_sum += degree1
    degree2_sum += degree2
    degree1_square_sum += degree1**2
    degree2_square_sum += degree2**2

n = len(data)
avg_degree_product = degree_product_sum / n
avg_degree1 = degree1_sum / n
avg_degree2 = degree2_sum / n

numerator = avg_degree_product - avg_degree1 * avg_degree2
denominator = (degree1_square_sum/n - avg_degree1**2) * (degree2_square_sum/n - avg_degree2**2)
denominator = denominator**0.5

assortativity = numerator / denominator

print(f"Assortativity: {assortativity}")

# main functionality

### Generating a graph from a list of egdes

In [None]:
def delete_all(tx):
    tx.run("MATCH (n) DETACH DELETE n")

In [None]:
# List all .tsv files in the specified directory
tsv_files = [f for f in os.listdir(dir_healthy_a) if f.endswith('.tsv')]

graph_dic_list = []

# Process each .tsv file
for tsv_file in tsv_files:
    edges = []
    # Read the .tsv file using pandas
    df = pd.read_csv(os.path.join(dir_healthy_a, tsv_file), sep='\t')
    # Transform the dataframe rows to the desired format and extend the edges list
    edges.extend([(f"{row['source']}", "SEES", f"{row['target']}") for _, row in df.iterrows()])

    loader.create_graph_from_edges(edges)
    loader.close()


    num_of_nodes, num_of_edges = get_number_of_nodes_and_edges()
    print( num_of_nodes, num_of_edges)
    graph_dic = {
        "file_name" : tsv_file,
        "num_of_nodes" : num_of_nodes,
        "num_of_edges" : num_of_edges,
        "avg_degree" :  num_of_edges*2/num_of_nodes,
        "node_degrees" : sorted(get_node_degrees(), key=lambda x: x["node_id"]),
        "num_of_triangles" : get_number_of_triangles(),
        "clustering_index" : get_clustering_index(),
        "assortativity" : get_assortativity(),
        "diameter" : get_diameter(),
        "average_path_length" : get_average_path_length()
    }
    
    print(tsv_file)
    graph_dic_list.append(graph_dic)

    with driver.session() as session:
        session.write_transaction(delete_all)
        print("nodes deleted")
    print(get_number_of_nodes_and_edges())

In [None]:
with driver.session() as session:
        session.write_transaction(delete_all)
        print("nodes deleted")
print(get_number_of_nodes_and_edges())

In [None]:
with open('healthyAsleep_dic.json', 'w') as file:
    json.dump(graph_dic_list, file)


## Generating the json files with descriptive analytics for each graph/time-series

In [None]:
tsv_files = [f for f in os.listdir(dir_healthy_a) if f.endswith('.tsv')]

In [None]:
edges = []
graph_dic_list = []
# Read the .tsv file using pandas
df = pd.read_csv(os.path.join(dir_healthy_a, tsv_file), sep='\t')
# Transform the dataframe rows to the desired format and extend the edges list
edges.extend([(f"{row['source']}", "SEES", f"{row['target']}") for _, row in df.iterrows()])

loader.create_graph_from_edges(edges)
loader.close()

    
num_of_nodes, num_of_edges = get_number_of_nodes_and_edges()
print( num_of_nodes, num_of_edges)
graph_dic = {
        "file_name" : tsv_file,
        "num_of_nodes" : num_of_nodes,
        "num_of_edges" : num_of_edges,
        "avg_degree" :  num_of_edges*2/num_of_nodes,
        "node_degrees" : sorted(get_node_degrees(), key=lambda x: x["node_id"]),
        "num_of_triangles" : get_number_of_triangles(),
        "clustering_index" : get_clustering_index(),
        "assortativity" : get_assortativity(),
        "diameter" : get_diameter(),
        "average_path_length" : get_average_path_length()
    }
print(tsv_file)
graph_dic_list.append(graph_dic)

with driver.session() as session:
    session.write_transaction(delete_all)
    print("nodes deleted")
print(get_number_of_nodes_and_edges())


# Get a list of dictionaries with each node and its degree
result = [{"node_id": node, "degree": degree} for node, degree in G.degree()]

In [None]:
# result
with open(f'{tsv_file}_test.json', 'w') as file:
    json.dump(result, file)


In [None]:
# result
with open(f'congestiveestivosAsleep_diclist_test.json', 'w') as file:
    json.dump(graph_dic_list, file)
