In [None]:
import re
import pandas as pd
import requests
import json
from tqdm import tqdm
 
# ─── CONFIGURATION ───────────────────────────────────────────────────────────────
 
NEPTUNE_ENDPOINT = "https://db-neptune-3.cluster-c8qttgkgfep5.us-east-1.neptune.amazonaws.com:8182/gremlin"
HEADERS = {"Content-Type": "application/json"}
 
VALUE_COLS = [
    'Unified social credit code',
    'Payment method',
    'Website',
    'Chat:WhatsApp', 'Chat:Wechat', 'Chat:Skype', 'Chat:Wickrme',
    'Email1', 'Email2', 'Email3',
    'Phone1', 'Phone2'
]
 
# ─── HELPERS ─────────────────────────────────────────────────────────────────────
 
def sanitize_edge(colname: str) -> str:
    label = colname.lower()
    label = re.sub(r'[^a-z0-9]', '_', label)
    label = re.sub(r'_+', '_', label).strip('_')
    return f"has_{label}"
 
def send_gremlin(query: str):
    resp = requests.post(NEPTUNE_ENDPOINT, headers=HEADERS, json={"gremlin": query})
    if resp.status_code != 200:
        print(f"❌ {resp.status_code} error:\n{resp.text}\n→ {query}\n")
    return resp.status_code == 200
 
# ─── 1) WIPE EXISTING GRAPH ─────────────────────────────────────────────────────
 
print("🗑 Dropping existing graph…")
send_gremlin("g.V().drop().iterate()")
 
# ─── 2) LOAD & NORMALIZE DATA ───────────────────────────────────────────────────
 
print("📥 Loading contact_2.xlsx…")
df = pd.read_excel("contact_2.xlsx")
df.columns = df.columns.str.strip()
df.dropna(subset=['Base_Company'], inplace=True)
 
long = (
    df
    .melt(id_vars=['Base_Company'], value_vars=VALUE_COLS,
          var_name='col', value_name='value')
    .dropna(subset=['value'])
)
 
def norm(v):
    if isinstance(v, float) and v.is_integer():
        return str(int(v))
    return str(v).strip()
 
long['value'] = long['value'].map(norm)
 
# ─── 3) UPTICK STARS (Company → has_<col> → Value) ───────────────────────────────
 
print("🔨 Upserting Company→Value edges…")
for company, group in tqdm(long.groupby('Base_Company'), desc="Companies"):
    # upsert Company
    send_gremlin(f"""
      g.V().has('Company','name','{company}').fold()
        .coalesce(unfold(),
                  addV('Company').property('name','{company}')
        ).iterate()
    """)
    # for each unique value
    for val, sub in group.groupby('value'):
        # upsert AttributeValue
        send_gremlin(f"""
          g.V().has('AttributeValue','value','{val}').fold()
            .coalesce(unfold(),
                      addV('AttributeValue').property('value','{val}')
            ).iterate()
        """)
        # one has_<col> edge per column
        for col in sub['col'].unique():
            edge = sanitize_edge(col)
            send_gremlin(f"""
              g.V().has('Company','name','{company}').as('c')
                .V().has('AttributeValue','value','{val}')
                .coalesce(
                  __.inE('{edge}').where(outV().as('c')),
                  addE('{edge}').from('c')
                ).iterate()
            """)
 
print("✅ Done!  All Company→has_<col>→AttributeValue edges are loaded.")