# Graph-Based Visualization of Fentanyl Trafficking Networks using AWS Neptune

This notebook demonstrates three visualization styles:

1. **High-Weight Company Graph** – for strong linkages only.
2. **Threshold-Based Graph Explorer** – filtered and interactive.


## Visualization 1: High-Weight Company Graph
**Purpose**: Visualize companies with `Company_Evidence_Weight ≥ 3000`

**Output**: Compact network highlighting strong linkage relationships.

In [None]:
send_gremlin("g.V().drop()")


In [None]:
import pandas as pd
import requests
import json
from tqdm import tqdm

# ==== Neptune Config ====
neptune_endpoint = "https://db-neptune-2.cluster-c8qttgkgfep5.us-east-1.neptune.amazonaws.com:8182/gremlin"
headers = {"Content-Type": "application/json"}
threshold = 3000  # Only base companies above this threshold will be included

def send_gremlin(query):
    payload = {"gremlin": query}
    try:
        response = requests.post(neptune_endpoint, headers=headers, data=json.dumps(payload))
        if response.status_code != 200:
            print(f"❌ {query}\n{response.text}")
        return response.status_code == 200
    except Exception as e:
        print(f"❌ Exception: {e}")
        return False

# ==== Load and Filter Dataset ====
df = pd.read_csv("Updated_Top_Companies_with_Evidence_Clean(in).csv")
df_filtered = df[df['Company_Evidence_Weight'] >= threshold].dropna(subset=["Company_Evidence_Count"])

# ==== Reset Graph ====
print("🧹 Clearing existing Neptune graph...")
send_gremlin("g.V().drop()")

# ==== Tracking Sets ====
base_companies = {}
associated_companies = {}
linkage_types = set()
linkage_values = {}
edges = set()

# ==== Build Nodes and Relationships ====
for _, row in df_filtered.iterrows():
    base = row['Base_Consolidation_Name'].strip().replace("'", "")
    assoc = row['Associated_Company_Name'].strip().replace("'", "")
    method = row['Linkage_Method'].strip().replace("'", "")
    value = row['Linkage_Value'].strip().replace("'", "")
    weight = int(row['Company_Evidence_Weight'])
    count = int(row['Company_Evidence_Count'])
    assoc_count = int(row['Association_companies_Count'])

    lv_key = f"{method}::{value}"

    # Register unique nodes
    base_companies[base] = (base, weight, count, assoc_count)
    associated_companies[assoc] = (assoc, weight, count, assoc_count)
    linkage_types.add(method)
    linkage_values[lv_key] = (method, value)

    # Only 1 edge from base ➝ type
    edges.add((base, 'HAS_TYPE', method))
    edges.add((method, 'HAS_VALUE', lv_key))
    edges.add((lv_key, 'SHARED_WITH', assoc))

# ==== Upload Base Company Nodes ====
for name, (n, w, c, a) in tqdm(base_companies.items(), desc="Base Companies"):
    q = f"g.V().has('name', '{n}').fold().coalesce(unfold(), addV('BaseCompany')" \
        f".property('name', '{n}').property('evidence_weight', {w})" \
        f".property('evidence_count', {c}).property('assoc_count', {a}))"
    send_gremlin(q)

# ==== Upload Associated Company Nodes ====
for name, (n, w, c, a) in tqdm(associated_companies.items(), desc="Associated Companies"):
    q = f"g.V().has('name', '{n}').fold().coalesce(unfold(), addV('AssociatedCompany')" \
        f".property('name', '{n}').property('evidence_weight', {w})" \
        f".property('evidence_count', {c}).property('assoc_count', {a}))"
    send_gremlin(q)

# ==== Upload Linkage Type Nodes ====
for lt in tqdm(linkage_types, desc="Linkage Types"):
    q = f"g.V().has('name', '{lt}').fold().coalesce(unfold(), addV('LinkageType')" \
        f".property('name', '{lt}'))"
    send_gremlin(q)

# ==== Upload Linkage Value Nodes ====
for lv_key, (method, val) in tqdm(linkage_values.items(), desc="Linkage Values"):
    q = f"g.V().has('name', '{lv_key}').fold().coalesce(unfold(), addV('LinkageValue')" \
        f".property('name', '{lv_key}').property('method', '{method}').property('value', '{val}'))"
    send_gremlin(q)

# ==== Upload All Edges (no duplicates) ====
for src, label, dst in tqdm(edges, desc="Edges"):
    q = f"g.V().has('name', '{src}').as('a').V().has('name', '{dst}').addE('{label}').from('a')"
    send_gremlin(q)

# ==== Expand Full Paths ====
print("📡 Expanding Base ➝ Type ➝ Value ➝ Assoc in Explorer...")
for base in tqdm(base_companies, desc="Auto-expanding Graph Explorer"):
    q = f"g.V().hasLabel('BaseCompany').has('name', '{base}')" \
        f".repeat(out().simplePath()).times(3).path()"
    send_gremlin(q)

print("\n✅ COMPLETE: Neptune graph is fully interactive and properly hierarchical.")

## Visualization 2: Threshold-Based Graph Explorer
**Purpose**: Focus on companies with evidence weight ≥ 2000

**Output**: Interactive Neptune visualization for investigative navigation.

In [None]:
import pandas as pd
import requests
import json
from tqdm import tqdm

# ==== Neptune Config ====
neptune_endpoint = "https://db-neptune-2.cluster-c8qttgkgfep5.us-east-1.neptune.amazonaws.com:8182/gremlin"
headers = {"Content-Type": "application/json"}
threshold = 2000  # ← Updated Threshold for Base Companies

def send_gremlin(query):
    payload = {"gremlin": query}
    try:
        response = requests.post(neptune_endpoint, headers=headers, data=json.dumps(payload))
        if response.status_code != 200:
            print(f"❌ {query}\n{response.text}")
        return response.status_code == 200
    except Exception as e:
        print(f"❌ Exception: {e}")
        return False

# ==== Load and Filter Dataset ====
df = pd.read_csv("Updated_Top_Companies_with_Evidence_Clean(in).csv")
df_filtered = df[df['Company_Evidence_Weight'] >= threshold].dropna(subset=["Company_Evidence_Count"])

# ==== Clear Existing Graph ====
print("🧹 Dropping old graph...")
send_gremlin("g.V().drop()")

# ==== Track All Nodes and Edges ====
base_companies = {}
associated_companies = {}
linkage_types = set()
linkage_values = {}
edges = set()

# ==== Build Graph ====
for _, row in df_filtered.iterrows():
    base = row['Base_Consolidation_Name'].strip().replace("'", "")
    assoc = row['Associated_Company_Name'].strip().replace("'", "")
    method = row['Linkage_Method'].strip().replace("'", "")
    value = row['Linkage_Value'].strip().replace("'", "")
    weight = int(row['Company_Evidence_Weight'])
    count = int(row['Company_Evidence_Count'])
    assoc_count = int(row['Association_companies_Count'])

    lv_key = f"{method}::{value}"

    base_companies[base] = (base, weight, count, assoc_count)
    associated_companies[assoc] = (assoc, weight, count, assoc_count)
    linkage_types.add(method)
    linkage_values[lv_key] = (method, value)

    # Edges
    edges.add((base, 'HAS_TYPE', method))
    edges.add((method, 'HAS_VALUE', lv_key))
    edges.add((lv_key, 'SHARED_WITH', assoc))

# ==== Insert Base Companies ====
for name, (n, w, c, a) in tqdm(base_companies.items(), desc="Base Companies"):
    q = f"g.V().has('name', '{n}').fold().coalesce(unfold(), addV('BaseCompany')" \
        f".property('name', '{n}').property('evidence_weight', {w})" \
        f".property('evidence_count', {c}).property('assoc_count', {a}))"
    send_gremlin(q)

# ==== Insert Associated Companies ====
for name, (n, w, c, a) in tqdm(associated_companies.items(), desc="Associated Companies"):
    q = f"g.V().has('name', '{n}').fold().coalesce(unfold(), addV('AssociatedCompany')" \
        f".property('name', '{n}').property('evidence_weight', {w})" \
        f".property('evidence_count', {c}).property('assoc_count', {a}))"
    send_gremlin(q)

# ==== Insert Linkage Types ====
for lt in tqdm(linkage_types, desc="Linkage Types"):
    q = f"g.V().has('name', '{lt}').fold().coalesce(unfold(), addV('LinkageType')" \
        f".property('name', '{lt}'))"
    send_gremlin(q)

# ==== Insert Linkage Values ====
for lv_key, (method, val) in tqdm(linkage_values.items(), desc="Linkage Values"):
    q = f"g.V().has('name', '{lv_key}').fold().coalesce(unfold(), addV('LinkageValue')" \
        f".property('name', '{lv_key}').property('method', '{method}').property('value', '{val}'))"
    send_gremlin(q)

# ==== Insert All Edges (deduped) ====
for src, label, dst in tqdm(edges, desc="Edges"):
    q = f"g.V().has('name', '{src}').as('a').V().has('name', '{dst}').addE('{label}').from('a')"
    send_gremlin(q)

# ==== Push to Graph Explorer ====
print("📡 Expanding Base ➝ Type ➝ Value ➝ Assoc...")
for base in tqdm(base_companies, desc="Graph Explorer Path Expansion"):
    q = f"g.V().hasLabel('BaseCompany').has('name', '{base}')" \
        f".repeat(out().simplePath()).times(3).path()"
    send_gremlin(q)

print("\n✅ COMPLETE: Graph with threshold ≥ 200 is now live in Explorer with full expansion!")