# 📘 Graph-Based Visualization of Fentanyl Trafficking Networks using AWS Neptune

This notebook demonstrates three visualization styles:

1. **High-Weight Company Graph** – for strong linkages only.
2. **Threshold-Based Graph Explorer** – filtered and interactive.
3. **Evidence-Tiered Graph** – hierarchical by weight class.

## 1️⃣ Visualization 1: High-Weight Company Graph
**Purpose**: Visualize companies with `Company_Evidence_Weight ≥ 3000`

**Output**: Compact network highlighting strong linkage relationships.

In [None]:
send_gremlin("g.V().drop()")


In [None]:
import pandas as pd
import requests
import json
from tqdm import tqdm

# ==== Neptune Config ====
neptune_endpoint = "https://db-neptune-2.cluster-c8qttgkgfep5.us-east-1.neptune.amazonaws.com:8182/gremlin"
headers = {"Content-Type": "application/json"}
threshold = 3000  # Only base companies above this threshold will be included

def send_gremlin(query):
    payload = {"gremlin": query}
    try:
        response = requests.post(neptune_endpoint, headers=headers, data=json.dumps(payload))
        if response.status_code != 200:
            print(f"❌ {query}\n{response.text}")
        return response.status_code == 200
    except Exception as e:
        print(f"❌ Exception: {e}")
        return False

# ==== Load and Filter Dataset ====
df = pd.read_csv("Updated_Top_Companies_with_Evidence_Clean(in).csv")
df_filtered = df[df['Company_Evidence_Weight'] >= threshold].dropna(subset=["Company_Evidence_Count"])

# ==== Reset Graph ====
print("🧹 Clearing existing Neptune graph...")
send_gremlin("g.V().drop()")

# ==== Tracking Sets ====
base_companies = {}
associated_companies = {}
linkage_types = set()
linkage_values = {}
edges = set()

# ==== Build Nodes and Relationships ====
for _, row in df_filtered.iterrows():
    base = row['Base_Consolidation_Name'].strip().replace("'", "")
    assoc = row['Associated_Company_Name'].strip().replace("'", "")
    method = row['Linkage_Method'].strip().replace("'", "")
    value = row['Linkage_Value'].strip().replace("'", "")
    weight = int(row['Company_Evidence_Weight'])
    count = int(row['Company_Evidence_Count'])
    assoc_count = int(row['Association_companies_Count'])

    lv_key = f"{method}::{value}"

    # Register unique nodes
    base_companies[base] = (base, weight, count, assoc_count)
    associated_companies[assoc] = (assoc, weight, count, assoc_count)
    linkage_types.add(method)
    linkage_values[lv_key] = (method, value)

    # Only 1 edge from base ➝ type
    edges.add((base, 'HAS_TYPE', method))
    edges.add((method, 'HAS_VALUE', lv_key))
    edges.add((lv_key, 'SHARED_WITH', assoc))

# ==== Upload Base Company Nodes ====
for name, (n, w, c, a) in tqdm(base_companies.items(), desc="Base Companies"):
    q = f"g.V().has('name', '{n}').fold().coalesce(unfold(), addV('BaseCompany')" \
        f".property('name', '{n}').property('evidence_weight', {w})" \
        f".property('evidence_count', {c}).property('assoc_count', {a}))"
    send_gremlin(q)

# ==== Upload Associated Company Nodes ====
for name, (n, w, c, a) in tqdm(associated_companies.items(), desc="Associated Companies"):
    q = f"g.V().has('name', '{n}').fold().coalesce(unfold(), addV('AssociatedCompany')" \
        f".property('name', '{n}').property('evidence_weight', {w})" \
        f".property('evidence_count', {c}).property('assoc_count', {a}))"
    send_gremlin(q)

# ==== Upload Linkage Type Nodes ====
for lt in tqdm(linkage_types, desc="Linkage Types"):
    q = f"g.V().has('name', '{lt}').fold().coalesce(unfold(), addV('LinkageType')" \
        f".property('name', '{lt}'))"
    send_gremlin(q)

# ==== Upload Linkage Value Nodes ====
for lv_key, (method, val) in tqdm(linkage_values.items(), desc="Linkage Values"):
    q = f"g.V().has('name', '{lv_key}').fold().coalesce(unfold(), addV('LinkageValue')" \
        f".property('name', '{lv_key}').property('method', '{method}').property('value', '{val}'))"
    send_gremlin(q)

# ==== Upload All Edges (no duplicates) ====
for src, label, dst in tqdm(edges, desc="Edges"):
    q = f"g.V().has('name', '{src}').as('a').V().has('name', '{dst}').addE('{label}').from('a')"
    send_gremlin(q)

# ==== Expand Full Paths ====
print("📡 Expanding Base ➝ Type ➝ Value ➝ Assoc in Explorer...")
for base in tqdm(base_companies, desc="Auto-expanding Graph Explorer"):
    q = f"g.V().hasLabel('BaseCompany').has('name', '{base}')" \
        f".repeat(out().simplePath()).times(3).path()"
    send_gremlin(q)

print("\n✅ COMPLETE: Neptune graph is fully interactive and properly hierarchical.")

## 2️⃣ Visualization 2: Threshold-Based Graph Explorer
**Purpose**: Focus on companies with evidence weight ≥ 2000

**Output**: Interactive Neptune visualization for investigative navigation.

In [None]:
import pandas as pd
import requests
import json
from tqdm import tqdm

# ==== Neptune Config ====
neptune_endpoint = "https://db-neptune-2.cluster-c8qttgkgfep5.us-east-1.neptune.amazonaws.com:8182/gremlin"
headers = {"Content-Type": "application/json"}
threshold = 2000  # ← Updated Threshold for Base Companies

def send_gremlin(query):
    payload = {"gremlin": query}
    try:
        response = requests.post(neptune_endpoint, headers=headers, data=json.dumps(payload))
        if response.status_code != 200:
            print(f"❌ {query}\n{response.text}")
        return response.status_code == 200
    except Exception as e:
        print(f"❌ Exception: {e}")
        return False

# ==== Load and Filter Dataset ====
df = pd.read_csv("Updated_Top_Companies_with_Evidence_Clean(in).csv")
df_filtered = df[df['Company_Evidence_Weight'] >= threshold].dropna(subset=["Company_Evidence_Count"])

# ==== Clear Existing Graph ====
print("🧹 Dropping old graph...")
send_gremlin("g.V().drop()")

# ==== Track All Nodes and Edges ====
base_companies = {}
associated_companies = {}
linkage_types = set()
linkage_values = {}
edges = set()

# ==== Build Graph ====
for _, row in df_filtered.iterrows():
    base = row['Base_Consolidation_Name'].strip().replace("'", "")
    assoc = row['Associated_Company_Name'].strip().replace("'", "")
    method = row['Linkage_Method'].strip().replace("'", "")
    value = row['Linkage_Value'].strip().replace("'", "")
    weight = int(row['Company_Evidence_Weight'])
    count = int(row['Company_Evidence_Count'])
    assoc_count = int(row['Association_companies_Count'])

    lv_key = f"{method}::{value}"

    base_companies[base] = (base, weight, count, assoc_count)
    associated_companies[assoc] = (assoc, weight, count, assoc_count)
    linkage_types.add(method)
    linkage_values[lv_key] = (method, value)

    # Edges
    edges.add((base, 'HAS_TYPE', method))
    edges.add((method, 'HAS_VALUE', lv_key))
    edges.add((lv_key, 'SHARED_WITH', assoc))

# ==== Insert Base Companies ====
for name, (n, w, c, a) in tqdm(base_companies.items(), desc="Base Companies"):
    q = f"g.V().has('name', '{n}').fold().coalesce(unfold(), addV('BaseCompany')" \
        f".property('name', '{n}').property('evidence_weight', {w})" \
        f".property('evidence_count', {c}).property('assoc_count', {a}))"
    send_gremlin(q)

# ==== Insert Associated Companies ====
for name, (n, w, c, a) in tqdm(associated_companies.items(), desc="Associated Companies"):
    q = f"g.V().has('name', '{n}').fold().coalesce(unfold(), addV('AssociatedCompany')" \
        f".property('name', '{n}').property('evidence_weight', {w})" \
        f".property('evidence_count', {c}).property('assoc_count', {a}))"
    send_gremlin(q)

# ==== Insert Linkage Types ====
for lt in tqdm(linkage_types, desc="Linkage Types"):
    q = f"g.V().has('name', '{lt}').fold().coalesce(unfold(), addV('LinkageType')" \
        f".property('name', '{lt}'))"
    send_gremlin(q)

# ==== Insert Linkage Values ====
for lv_key, (method, val) in tqdm(linkage_values.items(), desc="Linkage Values"):
    q = f"g.V().has('name', '{lv_key}').fold().coalesce(unfold(), addV('LinkageValue')" \
        f".property('name', '{lv_key}').property('method', '{method}').property('value', '{val}'))"
    send_gremlin(q)

# ==== Insert All Edges (deduped) ====
for src, label, dst in tqdm(edges, desc="Edges"):
    q = f"g.V().has('name', '{src}').as('a').V().has('name', '{dst}').addE('{label}').from('a')"
    send_gremlin(q)

# ==== Push to Graph Explorer ====
print("📡 Expanding Base ➝ Type ➝ Value ➝ Assoc...")
for base in tqdm(base_companies, desc="Graph Explorer Path Expansion"):
    q = f"g.V().hasLabel('BaseCompany').has('name', '{base}')" \
        f".repeat(out().simplePath()).times(3).path()"
    send_gremlin(q)

print("\n✅ COMPLETE: Graph with threshold ≥ 200 is now live in Explorer with full expansion!")

## 3️⃣ Visualization 3: Evidence-Tiered Multi-Layer Graph
**Purpose**: Categorize companies based on evidence weight ranges

**Output**: Hierarchical graph with 3 tiers:
- Tier 1: `< 2000`
- Tier 2: `2000 - 2999`
- Tier 3: `3000+`

In [None]:
import pandas as pd
import requests
import json
import time
from tqdm import tqdm

# === Neptune Config ===
neptune_endpoint = "https://db-neptune-2.cluster-c8qttgkgfep5.us-east-1.neptune.amazonaws.com:8182/gremlin"
headers = {"Content-Type": "application/json"}

# === Dataset Load ===
df = pd.read_csv("Updated_Top_Companies_with_Evidence_Clean(in).csv")
df = df.dropna(subset=["Company_Evidence_Count"])

# === Create Evidence Tiers ===
df['Weight_Tier'] = pd.cut(
    df['Company_Evidence_Weight'],
    bins=[0, 1999, 2999, float('inf')],
    labels=["< 2000", "2000 - 2999", "3000+"],
    right=True
)

# === Gremlin Sender ===
def send_gremlin(query, delay=0.1, retries=3):
    for _ in range(retries):
        try:
            payload = {"gremlin": query}
            res = requests.post(neptune_endpoint, headers=headers, data=json.dumps(payload), timeout=10)
            if res.status_code == 200:
                return True
            print(f"Error {res.status_code}: {res.text}")
        except Exception as e:
            print("Exception:", e)
        time.sleep(delay)
    return False

# === Drop Graph ===
send_gremlin("g.V().drop()")

# === Track Nodes ===
tiers, base_nodes, assoc_nodes = set(), {}, {}
linkage_types, linkage_values, edges = set(), {}, set()

for _, row in df.iterrows():
    tier = row['Weight_Tier']
    tiers.add(tier)

    base = row['Base_Consolidation_Name'].strip().replace("'", "")
    assoc = row['Associated_Company_Name'].strip().replace("'", "")
    method = row['Linkage_Method'].strip().replace("'", "")
    value = row['Linkage_Value'].strip().replace("'", "")
    lv_key = f"{method}::{value}"
    
    weight = int(row['Company_Evidence_Weight'])
    count = int(row['Company_Evidence_Count'])
    assoc_count = int(row['Association_companies_Count'])

    base_nodes[(tier, base)] = (base, weight, count, assoc_count)
    assoc_nodes[(tier, assoc)] = (assoc, weight, count, assoc_count)
    linkage_types.add((tier, method))
    linkage_values[(tier, lv_key)] = (method, value)

    edges.add((tier, 'HAS_BASE', base))
    edges.add((base, 'HAS_TYPE', method))
    edges.add((method, 'HAS_VALUE', lv_key))
    edges.add((lv_key, 'SHARED_WITH', assoc))

# === Insert Nodes ===
for tier in tiers:
    send_gremlin(f"g.addV('EvidenceTier').property('name', 'Evidence: {tier}')")

for (tier, name), (n, w, c, a) in tqdm(base_nodes.items(), desc="Base Companies"):
    send_gremlin(f"g.addV('BaseCompany').property('name', '{n}')" +
                 f".property('evidence_weight', {w}).property('evidence_count', {c}).property('assoc_count', {a})")
    send_gremlin(f"g.V().has('name', 'Evidence: {tier}').as('t')" +
                 f".V().has('name', '{n}').addE('HAS_BASE').from('t')")

for (tier, name), (n, w, c, a) in tqdm(assoc_nodes.items(), desc="Associated Companies"):
    send_gremlin(f"g.addV('AssociatedCompany').property('name', '{n}')" +
                 f".property('evidence_weight', {w}).property('evidence_count', {c}).property('assoc_count', {a})")

for (tier, method) in tqdm(linkage_types, desc="Linkage Types"):
    send_gremlin(f"g.addV('LinkageType').property('name', '{method}')")

for (tier, lv_key), (method, val) in tqdm(linkage_values.items(), desc="Linkage Values"):
    send_gremlin(f"g.addV('LinkageValue').property('name', '{lv_key}')" +
                 f".property('method', '{method}').property('value', '{val}')")

for src, label, dst in tqdm(edges, desc="Edges"):
    send_gremlin(f"g.V().has('name', '{src}').as('a').V().has('name', '{dst}').addE('{label}').from('a')")
    time.sleep(0.1)

# === Auto Expand from Tier ➝ Base ➝ Type ➝ Value ➝ Assoc
for tier in tqdm(tiers, desc="Expand From Tier"):
    send_gremlin(f"g.V().hasLabel('EvidenceTier').has('name', 'Evidence: {tier}')" +
                 ".out('HAS_BASE').out().out().out().path()")