## Trying neo4j instead of py2neo

In [None]:
import os

from neo4j import GraphDatabase

uri = "bolt://localhost:7687"
user = os.getenv("NEO4J_USER")
password = os.getenv("NEO4J_PASSWORD_")

driver = GraphDatabase.driver(uri, auth=(user, password))

In [None]:
import pandas as pd

q = "match (n:NamedEntity) return n.type, n.value order by n.type"

records, summary, keys = driver.execute_query(
    q,
    routing_="r",
    database_="spacyner",
)

# Summary information
print("The query `{query}` returned {records_count} records in {time} ms.".format(
    query=summary.query, records_count=len(records),
    time=summary.result_available_after
))

# Loop through results and do something with them
for record in records:
    print(record.data())  # obtain record as dict

df = pd.DataFrame(records, columns=["type", "value"])
df

## Clustering keywords and topic identification

In [None]:
# textrank-spacy
# Creating a virtual graph in the knowledge graph
"""
CALL gds.graph.project(
    'keywordsGraph',
    'Keyword',
    {
        CO_OCCUR: {
            orientation: 'NATURAL'
        }
    },
    {
        relationshipProperties: 'normalizedWeight'
    }
)
"""
# Revealing communities by using Louvain
"""
CALL gds.louvain.write('keywordsGraph', {
    relationshipWeightProperty: 'normalizedWeight',
    writeProperty: 'community'
}) YIELD nodePropertiesWritten,  communityCount, modularity
RETURN nodePropertiesWritten,  communityCount, modularity
"""

# Getting the communities and the top 25 keywords for each community
# The community assigned to each keyword is saved as the community property in the related node; it contains the identifier of the community.
"""
MATCH (k:Keyword)-[:DESCRIBES]->(text:AnnotatedText)
WITH k, count(text) as weight
WHERE weight > 5
with k.community as community, k.id as keyword, weight
order by community, weight desc
WITH community, collect(keyword) as communityMembers
order by size(communityMembers) desc
RETURN community as communityId, communityMembers[0..25] as topMembers, size(communityMembers) as size
"""