# Assignment_3_CS350

<ins>Objective</ins>: An analysis of the fraud detection database, for the possibility of second-party fraud.

<ins>Create</ins>: Jupyter notebook for the analysis, in Python

<ins>Steps</ins>: For list of steps required to complete this assignment please visit Class 20 slides, there are 14 steps.

## Step 4: Write the appropriate notebook cells that will allow you to connect to the Neo4j server and to use Neo4j GDS.

In [None]:
# Install required libraries
!pip install neo4j graphdatascience

In [None]:
from neo4j import GraphDatabase
from graphdatascience import GraphDataScience

# Connection details
bolt_uri = "bolt://localhost:7687"  # Change to your Neo4j Bolt URI. This is default
username = "neo4j"                  # Replace with your username. This is default
password = "Koolook1"               # Replace with your password. This is mine.

# Connect to Neo4j
driver = GraphDatabase.driver(bolt_uri, auth=(username, password))
gds = GraphDataScience(bolt_uri, auth=(username, password))

# Test connection
try:
    with driver.session() as session:
        result = session.run("RETURN 'Connection successful!' AS message")
        print(result.single()["message"])
except Exception as e:
    print("Connection failed:", e)


## Step 5: Create the SHARED_PII relationship (between clients) and the resulting subgraph.

In [None]:
# Sample data to create Clients and SHARED_PII relationships
def create_shared_pii_relationships(session):
    # Create two client nodes
    session.run("CREATE (c1:Client {id: 1, name: 'John Doe'})")
    session.run("CREATE (c2:Client {id: 2, name: 'Jane Smith'})")
    
    # Create a SHARED_PII relationship between them
    session.run("""
        MATCH (c1:Client {id: 1}), (c2:Client {id: 2})
        CREATE (c1)-[:SHARED_PII]->(c2)
    """)

# Execute the creation of relationships
try:
    with driver.session() as session:
        create_shared_pii_relationships(session)
        print("SHARED_PII relationship created between clients.")
except Exception as e:
    print("Error creating relationship:", e)


In [None]:
# Visualize the schema using Cypher command
def visualize_schema(session):
    # Visualize the database schema
    result = session.run("CALL db.schema.visualization()")
    for record in result:
        print(record)

# Visualize the schema
try:
    with driver.session() as session:
        visualize_schema(session)
except Exception as e:
    print("Error visualizing schema:", e)

## Step 6: Make the in-memory projection of the graph in STEP 5.

In [None]:
# Create the in-memory projection of the graph
def create_in_memory_projection(gds):
    # Project the graph 'clientClusters' with nodes labeled 'Client' and relationships of type 'SHARED_PII'
    try:
        graph = gds.graph.project(
            "clientClusters",  # Name of the projection
            "Client",         # Node label to include (Client nodes)
            "SHARED_PII"      # Relationship type to include (SHARED_PII relationships)
        )
        print("In-memory projection 'clientClusters' created.")
    except Exception as e:
        print("Error creating in-memory projection:", e)

# Execute the in-memory projection
try:
    with driver.session() as session:
        create_in_memory_projection(gds)
except Exception as e:
    print("Error executing projection:", e)


## Step 7: use the WCC (Weakly Connected Components) algorithm, in stream mode, to identify clusters of Client nodes in the above projection graph;

In [None]:
import pandas as pd
from neo4j import GraphDatabase
from graphdatascience import GraphDataScience

# Connection details
bolt_uri = "bolt://localhost:7689"  # Adjust with your own URI
username = "neo4j" # Adjust to your own username
password = "password" # Adjust to your own password

# Initialize the Neo4j driver
driver = GraphDatabase.driver(bolt_uri, auth=(username, password))

# Initialize GraphDataScience instance
gds = GraphDataScience(bolt_uri, auth=(username, password))

# Run WCC algorithm in stream mode
query = """
CALL gds.wcc.stream(
  'clientClusters', 
  {
    nodeLabels: ['Client'],
    relationshipTypes: ['SHARED_PII'],
    consecutiveIds: true
  }
)
YIELD nodeId, componentId
RETURN gds.util.asNode(nodeId).id AS clientId, componentId AS clusterId
"""

# Execute the query and convert results to Pandas DataFrame
with driver.session() as session:
    result = session.run(query)
    data = [{"clientId": record["clientId"], "clusterId": record["clusterId"]} for record in result]
    df = pd.DataFrame(data)

# Display the DataFrame
print(df)

## Step 8 mark each client that belongs to a cluster of size at least 2 as possibly (not provably) belonging to a fraud ring;


In [None]:
from neo4j import GraphDatabase
from graphdatascience import GraphDataScience

# Connection details
bolt_uri = "bolt://localhost:7689"  # Adjust with your own URI
username = "neo4j"
password = "password"

# Initialize the Neo4j driver
driver = GraphDatabase.driver(bolt_uri, auth=(username, password))

# Initialize GraphDataScience instance
gds = GraphDataScience(bolt_uri, auth=(username, password))

# Step 8 - Mark clients in clusters with size >= 2
query = """
CALL gds.wcc.stream(
  'clientClusters', 
  {
    nodeLabels: ['Client'],
    relationshipTypes: ['SHARED_PII'],
    consecutiveIds: true
  }
)
YIELD nodeId, componentId
WITH gds.util.asNode(nodeId) AS clientId, componentId AS clusterId
WITH clusterId, collect(clientId.id) AS clients
WITH clusterId, clients, size(clients) AS clusterSize
WHERE clusterSize >= 2
UNWIND clients AS client
MATCH (c:Client) WHERE c.id = client
SET c.secondPartyFraudRing = clusterId
"""

# Execute the query to mark the clients
with driver.session() as session:
    session.run(query)

print("Clients in clusters of size >= 2 have been marked as possible fraudsters.")