In [26]:

# Step 1: Ensure the fraud detection database is set up in Neo4j Desktop with APOC and GDS plugins installed.
# Start (activate) the database before proceeding.

# Step 2: Install and import necessary libraries
from neo4j import GraphDatabase
from neo4j.exceptions import Neo4jError
import pandas as pd

# Step 3: Connect to the Neo4j database

# Replace 'your_password_here' with your actual Neo4j password
uri = "bolt://192.52.220.32:7687"  # Default URI for Neo4j
user = "neo4j"
password = "79327932"

driver = GraphDatabase.driver(uri, auth=(user, password))

from neo4j.exceptions import Neo4jError


def run_query(query, db="Assignment3"):
    with driver.session(database=db) as session:
        result = session.run(query)
        return [record.data() for record in result]



# Step 5: Create the SHARED_PII relationships between clients

query = '''
// Match clients that share the same PII (Email, Phone, or SSN)
MATCH (c1:Client)-[:HAS_EMAIL|:HAS_PHONE|:HAS_SSN]->(p)
MATCH (p)<-[:HAS_EMAIL|:HAS_PHONE|:HAS_SSN]-(c2:Client)
WHERE id(c1) < id(c2)
MERGE (c1)-[:SHARED_PII]->(c2)
'''

run_query(query)
print("SHARED_PII relationships created.")

# Step 6: Create the in-memory graph projection 'clientClusters'

query = '''
CALL gds.graph.project(
    'clientClusters',
    'Client',
    'SHARED_PII'
)
'''

run_query(query)
print("In-memory graph projection 'clientClusters' created.")

# Check the in-memory graphs

query = '''
CALL gds.graph.list()
'''

result = run_query(query)
df = pd.DataFrame(result)
df
# Step 7: Run the WCC algorithm to identify clusters of clients

query = '''
CALL gds.wcc.stream('clientClusters', {consecutiveIds: true})
YIELD nodeId, componentId
RETURN gds.util.asNode(nodeId).id AS clientId, componentId AS clusterId
'''

result = run_query(query)
df = pd.DataFrame(result)
df.head()  # Display the first few rows


# Step 8: Mark clients in clusters of size >= 2 as possibly belonging to a fraud ring

query = '''
CALL gds.wcc.stream('clientClusters', {consecutiveIds: true})
YIELD nodeId, componentId
WITH gds.util.asNode(nodeId) AS client, componentId AS clusterId
WITH clusterId, collect(client.id) AS clients
WITH clusterId, clients, size(clients) AS clusterSize
WHERE clusterSize >= 2
UNWIND clients AS clientId
MATCH (c:Client) WHERE c.id = clientId
SET c.secondPartyFraudRing = clusterId
'''

run_query(query)
print("Clients in clusters of size >= 2 have been marked.")


# Step 9: Create the bipartite graph using a Cypher projection

query = '''
// Find clients in fraud rings
MATCH (c:Client) WHERE c.secondPartyFraudRing IS NOT NULL
WITH collect(c) AS clients
// Find PII nodes
MATCH (n) WHERE n:Email OR n:Phone OR n:SSN
// Combine clients and PII nodes
WITH clients, collect(n) AS piis
WITH clients + piis AS nodes
// Relationships between clients and their PIIs
MATCH (c:Client)-[:HAS_EMAIL|:HAS_PHONE|:HAS_SSN]->(p)
WHERE c.secondPartyFraudRing IS NOT NULL
WITH nodes, collect({source: c, target: p}) AS relationships
// Create the 'similarity' graph using a Cypher projection
CALL gds.graph.project.cypher(
    'similarity',
    "UNWIND $nodes AS n RETURN id(n) AS id, labels(n) AS labels",
    "UNWIND $relationships AS r RETURN id(r['source']) AS source, id(r['target']) AS target, 'HAS_PII' AS type",
    {parameters: {nodes: nodes, relationships: relationships}}
)
YIELD graphName, nodeCount, relationshipCount
RETURN graphName, nodeCount, relationshipCount
'''

result = run_query(query)
df = pd.DataFrame(result)
df

# Step 10: Compute node similarity using the Node Similarity algorithm

query = '''
CALL gds.nodeSimilarity.mutate('similarity', {
    mutateProperty: 'jaccardScore',
    mutateRelationshipType: 'SIMILAR_TO',
    topK: 15
})
YIELD nodesCompared, relationshipsWritten
RETURN nodesCompared, relationshipsWritten
'''

result = run_query(query)
df = pd.DataFrame(result)
df

# Step 11: Write the 'SIMILAR_TO' relationships back to the database

query = '''
CALL gds.graph.writeRelationship('similarity', {
    writeRelationshipType: 'SIMILAR_TO',
    writeProperty: 'jaccardScore'
})
YIELD relationshipsWritten
RETURN relationshipsWritten
'''

result = run_query(query)
df = pd.DataFrame(result)
df


# Step 12: Compute degree centrality and write it to 'secondPartyFraudScore'

# Create a new graph projection that includes the 'SIMILAR_TO' relationships
query = '''
CALL gds.graph.project(
    'clientSimilarity',
    'Client',
    {
        SIMILAR_TO: {
            type: 'SIMILAR_TO',
            orientation: 'UNDIRECTED',
            properties: 'jaccardScore'
        }
    }
)
'''

run_query(query)
print("Graph 'clientSimilarity' created for degree centrality computation.")


# Compute degree centrality
query = '''
CALL gds.degree.write('clientSimilarity', {
    writeProperty: 'secondPartyFraudScore',
    relationshipWeightProperty: 'jaccardScore'
})
YIELD nodePropertiesWritten
RETURN nodePropertiesWritten
'''

result = run_query(query)
df = pd.DataFrame(result)
df

# Step 13: Label clients as 'SecondPartyFraudster' based on their fraud score

query = '''
MATCH (c:Client)
WHERE c.secondPartyFraudScore IS NOT NULL
WITH percentileCont(c.secondPartyFraudScore, 0.95) AS threshold
MATCH (c:Client)
WHERE c.secondPartyFraudScore > threshold
SET c:SecondPartyFraudster
'''

run_query(query)
print("Clients with high fraud scores have been labeled as 'SecondPartyFraudster'.")

# Step 14: List the names and IDs of potential fraudsters

query = '''
MATCH (c:Client:SecondPartyFraudster)
RETURN c.name AS name, c.id AS id
'''

result = run_query(query)
df = pd.DataFrame(result)
df

# Close the database connection
driver.close()




General error: The result is out of scope. The associated transaction has been closed. Results can only be used while the transaction is open.
SHARED_PII relationships created.
Neo4j error: {code: Neo.ClientError.Procedure.ProcedureNotFound} {message: There is no procedure with the name `gds.graph.project` registered for this database instance. Please ensure you've spelled the procedure name correctly and that the procedure is properly deployed.}
In-memory graph projection 'clientClusters' created.
Neo4j error: {code: Neo.ClientError.Procedure.ProcedureNotFound} {message: There is no procedure with the name `gds.graph.list` registered for this database instance. Please ensure you've spelled the procedure name correctly and that the procedure is properly deployed.}
Neo4j error: {code: Neo.ClientError.Procedure.ProcedureNotFound} {message: There is no procedure with the name `gds.wcc.stream` registered for this database instance. Please ensure you've spelled the procedure name correctly 