# Embedding estructural (Node2Vec) — j11a
Genera embeddings de nodos para la instancia sintética más simple (`hptoptw-j11a`) y valida con un planificador por similitud coseno.

In [None]:
from pathlib import Path
import sys, numpy as np

# Parámetros Node2Vec
dims = 64
walk_length = 20
num_walks = 10
p = 1.0
q = 1.0
seed = 42

# Detectar raíz del repo (buscando baseline_greedy.py)
repo_root = Path.cwd()
for base in [repo_root, repo_root.parent, repo_root.parent.parent, repo_root.parent.parent.parent]:
    if (base / 'baseline_greedy.py').exists():
        repo_root = base
        break
graphs_dir = repo_root / 'experiments' / 'graphs'
emb_dir = repo_root / 'experiments' / 'embeddings'
data_syn = repo_root / 'data' / 'synthetic'
emb_dir.mkdir(parents=True, exist_ok=True)

# Localizar GraphML de j11a
g_candidates = sorted(graphs_dir.glob('hptoptw-j11a*_undir.graphml')) or sorted(graphs_dir.glob('hptoptw-j11a*.graphml'))
graph_path = g_candidates[0] if g_candidates else None
print('repo_root:', repo_root.as_posix())
print('graphs_dir:', graphs_dir.as_posix(), 'exists?', graphs_dir.exists())
print('graph_path:', graph_path.as_posix() if graph_path else None)
print('emb_dir:', emb_dir.as_posix())


## Generar embeddings Node2Vec

In [None]:
# Instalar si falta
try:
    from node2vec import Node2Vec
    HAVE_N2V = True
except Exception:
    HAVE_N2V = False

import networkx as nx, pandas as pd
if not HAVE_N2V:
    print('Instalando node2vec ...')
    !{sys.executable} -m pip install -q node2vec
    from node2vec import Node2Vec

assert graph_path is not None, 'No se encontró GraphML de j11a en experiments/graphs. Ejecuta 02_build_graphs primero.'
G = nx.read_graphml(graph_path)

# Generar Node2Vec
n2v = Node2Vec(G, dimensions=dims, walk_length=walk_length, num_walks=num_walks,
               p=p, q=q, weight_key='weight', workers=1, seed=seed)
model = n2v.fit(window=10, min_count=1, batch_words=64)

# Exportar embeddings
rows = []
for node in G.nodes():
    key = str(node)
    vec = model.wv[key] if key in model.wv else model.wv.get_vector(key)
    rows.append({'id': int(node), **{f'f{k}': float(vec[k]) for k in range(dims)}})
df_emb = pd.DataFrame(rows).sort_values('id')
out_emb = emb_dir / f"{graph_path.stem}_node2vec_d{dims}_p{p}_q{q}_wl{walk_length}_nw{num_walks}.csv"
df_emb.to_csv(out_emb, index=False)
print('Saved ->', out_emb.as_posix(), 'shape=', df_emb.shape)


## Validación con planificador por similitud coseno

In [None]:
from numpy.linalg import norm
sys.path.append(str(repo_root))
from baseline_greedy import load_instance, evaluate_route

def cosine(a,b):
    na, nb = norm(a)+1e-9, norm(b)+1e-9
    return float(np.dot(a,b) / (na*nb))

def load_embeddings(path: Path):
    df = pd.read_csv(path)
    ids = df['id'].astype(int).to_list()
    vecs = df[[c for c in df.columns if c.startswith('f')]].to_numpy(dtype=float)
    return {int(i): vecs[k] for k,i in enumerate(ids)}

def plan_structural(inst, emb_map, start_idx=0):
    nodes, dist = inst['nodes'], inst['dist']
    N = len(nodes)
    if not emb_map:
        return False, 0.0, 0.0, [start_idx, start_idx]
    proto = next(iter(emb_map.values()))
    unvisited = [i for i in range(N) if i!=start_idx and int(nodes.loc[i,'is_depot'])==0 and i in emb_map]
    route = [start_idx]
    while unvisited:
        cur = route[-1]
        best = None; best_sim = -1
        for v in unvisited:
            trial = route + [v] + [start_idx]
            feas, prof, T, _ = evaluate_route(trial, nodes, dist)
            if not feas: continue
            s = cosine(emb_map.get(cur, np.zeros_like(proto)), emb_map[v])
            if s > best_sim:
                best_sim, best = s, v
        if best is None: break
        route.append(best)
        unvisited.remove(best)
    if route[-1] != start_idx: route.append(start_idx)
    feas, prof, T, arr = evaluate_route(route, nodes, dist)
    return feas, prof, T, route

# Instancia j11a
csv_inst = data_syn / 'hptoptw-j11a.csv'
inst = load_instance(str(csv_inst), 'hptoptw-j11a')
emb_map = load_embeddings(out_emb)
feas, prof, T, route = plan_structural(inst, emb_map)
print({'feasible': bool(feas), 'profit': float(prof), 'time': float(T), 'stops': max(0,len(route)-2), 'route': route})
