## Environment setup, logging, and reproducibility

In [1]:
# Core
import os, math, random, time, json, logging
from dataclasses import dataclass, asdict
from typing import List, Dict, Any, Tuple

import numpy as np

# Torch / GNN
import torch
import torch.nn as nn
import torch.nn.functional as F
import networkx as nx

# HuggingFace LLM stack
# pip install transformers datasets peft optuna
from transformers import (
    AutoTokenizer, AutoModelForCausalLM,
    Trainer, TrainingArguments,
    DataCollatorForLanguageModeling
)
from datasets import Dataset
from peft import LoraConfig, get_peft_model

# HPO
import optuna

# Logging
logging.basicConfig(level=logging.INFO, format="[%(asctime)s] %(levelname)s - %(message)s")
LOGGER = logging.getLogger("hybrid_gnn_llm")

@dataclass
class Config:
    seed: int = 42
    gnn_hidden: int = 64
    gnn_out: int = 32
    n_nodes: int = 200
    graph_type: str = "barabasi_albert"
    graph_param: int = 3           # BA graph: m parameter
    node_feature_dim: int = 16
    n_classes: int = 3
    train_ratio: float = 0.8

    model_name: str = "sshleifer/tiny-gpt2"  # demo model
    block_size: int = 128
    default_lr: float = 2e-4
    default_batch_size: int = 16
    default_num_train_epochs: int = 3
    use_lora: bool = True
    lora_r: int = 8
    lora_alpha: int = 16
    lora_dropout: float = 0.05
    mixed_precision: str = "fp16"
    output_dir: str = "./hybrid_artifacts"

CFG = Config()

def set_all_seeds(seed: int):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

os.makedirs(CFG.output_dir, exist_ok=True)
set_all_seeds(CFG.seed)

## Synthetic graph generation and node features

We simulate a social-like graph (Barabási–Albert preferential attachment), assign random node features, and create labels (e.g., community or role classes).

In [2]:
def generate_graph(n_nodes: int, graph_type: str, param: int) -> nx.Graph:
    if graph_type == "barabasi_albert":
        G = nx.barabasi_albert_graph(n=n_nodes, m=param, seed=CFG.seed)
    else:
        G = nx.erdos_renyi_graph(n=n_nodes, p=0.05, seed=CFG.seed)
    return G

def make_node_features_labels(G: nx.Graph, feat_dim: int, n_classes: int):
    n = G.number_of_nodes()
    X = np.random.randn(n, feat_dim).astype(np.float32)
    # Labels derived from degree buckets (proxy for role classes)
    degrees = np.array([d for _, d in G.degree()], dtype=np.int32)
    bins = np.quantile(degrees, [0.33, 0.66])
    y = np.zeros(n, dtype=np.int64)
    y[degrees > bins[0]] = 1
    y[degrees > bins[1]] = 2
    return X, y

G = generate_graph(CFG.n_nodes, CFG.graph_type, CFG.graph_param)
X, y = make_node_features_labels(G, CFG.node_feature_dim, CFG.n_classes)
LOGGER.info(f"Graph nodes: {G.number_of_nodes()}, edges: {G.number_of_edges()}")

[2025-11-26 21:09:24,133] INFO - Graph nodes: 200, edges: 591


## Lightweight GNN: GraphSAGE-style aggregator (PyTorch)

We avoid external GNN libs to keep it portable. This GraphSAGE-like layer aggregates neighbor features by mean and applies linear transforms.

In [3]:
class MeanAggregator(nn.Module):
    def forward(self, x: torch.Tensor, adj_lists: List[List[int]]) -> torch.Tensor:
        # x: [N, D], adj_lists: list of neighbors per node
        N, D = x.shape
        out = torch.zeros_like(x)
        for i in range(N):
            nbrs = adj_lists[i]
            if len(nbrs) == 0:
                out[i] = x[i]
            else:
                out[i] = x[nbrs].mean(dim=0)
        return out

class GraphSAGE(nn.Module):
    def __init__(self, in_dim: int, hidden: int, out_dim: int, n_classes: int):
        super().__init__()
        self.agg1 = MeanAggregator()
        self.lin1 = nn.Linear(in_dim + in_dim, hidden)
        self.agg2 = MeanAggregator()
        self.lin2 = nn.Linear(hidden + hidden, out_dim)
        self.cls  = nn.Linear(out_dim, n_classes)

    def forward(self, x: torch.Tensor, adj_lists: List[List[int]]):
        # Layer 1
        nbr1 = self.agg1(x, adj_lists)
        h1 = torch.cat([x, nbr1], dim=1)
        h1 = F.relu(self.lin1(h1))

        # Layer 2
        nbr2 = self.agg2(h1, adj_lists)
        h2 = torch.cat([h1, nbr2], dim=1)
        h2 = F.relu(self.lin2(h2))

        logits = self.cls(h2)
        return h2, logits

def build_adj_lists(G: nx.Graph) -> List[List[int]]:
    return [list(G.neighbors(i)) for i in range(G.number_of_nodes())]

adj_lists = build_adj_lists(G)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_gnn = GraphSAGE(CFG.node_feature_dim, CFG.gnn_hidden, CFG.gnn_out, CFG.n_classes).to(device)
opt = torch.optim.Adam(model_gnn.parameters(), lr=1e-3)

X_t = torch.from_numpy(X).to(device)
y_t = torch.from_numpy(y).to(device)

# Train/eval split
idx = np.arange(CFG.n_nodes)
np.random.shuffle(idx)
split = int(CFG.train_ratio * CFG.n_nodes)
train_idx, eval_idx = idx[:split], idx[split:]

## Train GNN to produce embeddings and classify nodes

In [4]:
def train_gnn(epochs=50):
    model_gnn.train()
    for ep in range(epochs):
        opt.zero_grad()
        h, logits = model_gnn(X_t, adj_lists)
        loss = F.cross_entropy(logits[train_idx], y_t[train_idx])
        loss.backward()
        opt.step()
        if (ep+1) % 10 == 0:
            with torch.no_grad():
                _, logits_eval = model_gnn(X_t, adj_lists)
                pred = logits_eval.argmax(dim=1).cpu().numpy()
                acc = (pred[eval_idx] == y[eval_idx]).mean()
            LOGGER.info(f"[GNN] Epoch {ep+1:02d} | loss={loss.item():.4f} | eval_acc={acc:.3f}")

train_gnn(epochs=40)

model_gnn.eval()
with torch.no_grad():
    H, logits = model_gnn(X_t, adj_lists)  # H: node embeddings [N, out_dim]
H_np = H.detach().cpu().numpy()
LOGGER.info(f"GNN embeddings shape: {H_np.shape}")

[2025-11-26 21:11:00,576] INFO - [GNN] Epoch 10 | loss=1.0775 | eval_acc=0.325
[2025-11-26 21:11:01,097] INFO - [GNN] Epoch 20 | loss=1.0413 | eval_acc=0.300
[2025-11-26 21:11:01,577] INFO - [GNN] Epoch 30 | loss=0.9923 | eval_acc=0.350
[2025-11-26 21:11:02,037] INFO - [GNN] Epoch 40 | loss=0.9195 | eval_acc=0.325
[2025-11-26 21:11:02,056] INFO - GNN embeddings shape: (200, 32)


## Graph-to-text conversion: prompts from GNN embeddings and topology

We convert structural info + learned embeddings into natural language instructions for LLM fine-tuning

In [5]:
def describe_node(i: int, G: nx.Graph, emb: np.ndarray) -> str:
    deg = G.degree(i)
    nbrs = list(G.neighbors(i))
    emb_summary = ", ".join([f"{v:.2f}" for v in emb[i][:6]])  # small preview
    return (
        f"Node {i} has degree {deg} and neighbors {nbrs}. "
        f"Embedding summary: [{emb_summary}]"
    )

def instruction_from_graph(i: int, G: nx.Graph, emb: np.ndarray) -> str:
    deg = G.degree(i)
    if deg >= 5:
        task = "Summarize"
        guidance = "Provide a concise summary of this node's role and influence."
    elif deg >= 3:
        task = "Classify"
        guidance = "Classify the node into High/Medium/Low influence."
    else:
        task = "Extract"
        guidance = "Extract notable structural properties (degree, clustering)."
    desc = describe_node(i, G, emb)
    return f"{task}: {guidance} Context: {desc}"

def response_stub(instruction: str) -> str:
    # Placeholder expected output forms; in fine-tuning, model learns to produce these patterns
    if instruction.startswith("Summarize"):
        return "Summary: Node exhibits high connectivity and centrality."
    elif instruction.startswith("Classify"):
        return "Class: Medium influence."
    else:
        return "Properties: degree=low; clustering=moderate."

pairs = [{"instruction": instruction_from_graph(i, G, H_np), "response": response_stub(instruction_from_graph(i, G, H_np))}
         for i in range(CFG.n_nodes)]

train_pairs = pairs[:int(0.8*len(pairs))]
eval_pairs  = pairs[int(0.8*len(pairs)):]
train_ds = Dataset.from_list(train_pairs)
eval_ds  = Dataset.from_list(eval_pairs)
LOGGER.info(f"Graph-to-text pairs -> train={len(train_ds)}, eval={len(eval_ds)}")

[2025-11-26 21:12:44,921] INFO - Graph-to-text pairs -> train=160, eval=40


## Tokenization and LoRA PEFT setup

In [7]:
tokenizer = AutoTokenizer.from_pretrained(CFG.model_name)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

def fmt(ex):
    return {"text": f"Instruction: {ex['instruction']}\nResponse: {ex['response']}"}

train_fmt = train_ds.map(fmt)
eval_fmt  = eval_ds.map(fmt)

def tok(ex):
    return tokenizer(ex["text"], truncation=True, max_length=CFG.block_size, padding="max_length")

train_tok = train_fmt.map(tok, batched=True, remove_columns=train_fmt.column_names)
eval_tok  = eval_fmt.map(tok, batched=True, remove_columns=eval_fmt.column_names)
collator  = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

base_model = AutoModelForCausalLM.from_pretrained(CFG.model_name)
if CFG.use_lora:
    peft_cfg = LoraConfig(
        r=CFG.lora_r, lora_alpha=CFG.lora_alpha, lora_dropout=CFG.lora_dropout,
        bias="none", target_modules=["c_attn", "c_proj"]
    )
    model = get_peft_model(base_model, peft_cfg)
else:
    model = base_model

LOGGER.info("LLM ready with LoRA adapters" if CFG.use_lora else "LLM ready for full fine-tuning")

Map:   0%|          | 0/160 [00:00<?, ? examples/s]

Map:   0%|          | 0/40 [00:00<?, ? examples/s]

Map:   0%|          | 0/160 [00:00<?, ? examples/s]

Map:   0%|          | 0/40 [00:00<?, ? examples/s]

[2025-11-26 21:14:26,773] INFO - LLM ready with LoRA adapters


## Adaptive workload allocation and Trainer

In [13]:
from transformers import TrainingArguments, Trainer

dev_info = detect_devices()
LOGGER.info(f"Device info: {dev_info}")

effective_bs = adaptive_bs(CFG.default_batch_size, dev_info["n_gpu"])
LOGGER.info(f"Adaptive global batch size: {effective_bs}")

train_args = TrainingArguments(
    output_dir=CFG.output_dir,
    per_device_train_batch_size=max(1, effective_bs // max(1, dev_info["n_gpu"] or 1)),
    per_device_eval_batch_size=max(1, effective_bs // max(1, dev_info["n_gpu"] or 1)),
    num_train_epochs=CFG.default_num_train_epochs,
    learning_rate=CFG.default_lr,
    logging_steps=50,
    save_steps=200,
    report_to="none",              # disable reporting to WandB/Comet
    fp16=(CFG.mixed_precision == "fp16"),
    bf16=(CFG.mixed_precision == "bf16"),
    warmup_ratio=0.05
)

trainer = Trainer(
    model=model,
    args=train_args,
    train_dataset=train_tok,
    eval_dataset=eval_tok,
    data_collator=collator
)

# ✅ Manual evaluation call after training
trainer.train()
metrics = trainer.evaluate()
LOGGER.info(f"Evaluation metrics: {metrics}")

[2025-11-26 21:19:57,355] INFO - Device info: {'use_cuda': False, 'n_gpu': 0}
[2025-11-26 21:19:57,356] INFO - Adaptive global batch size: 4
`loss_type=None` was set in the config but it is unrecognized. Using the default loss: `ForCausalLMLoss`.


Step,Training Loss
50,10.6911
100,10.6913




[2025-11-26 21:20:09,540] INFO - Evaluation metrics: {'eval_loss': 10.67523193359375, 'eval_runtime': 0.3852, 'eval_samples_per_second': 103.855, 'eval_steps_per_second': 25.964, 'epoch': 3.0}


## Hyperparameter optimization (Optuna)

We optimize learning rate, LoRA rank, and block size to demonstrate infra + efficiency mindset.

In [15]:
from peft import LoraConfig, get_peft_model, TaskType

def objective(trial: optuna.Trial):
    # Hyperparameters to tune
    lr = trial.suggest_loguniform("learning_rate", 1e-5, 5e-4)
    lora_r = trial.suggest_int("lora_r", 4, 32, step=4)
    block_size = trial.suggest_int("block_size", 64, 256, step=32)

    # Retokenize with new block size
    def tok_bs(ex):
        return tokenizer(ex["text"], truncation=True, max_length=block_size, padding="max_length")
    train_tok_bs = train_fmt.map(tok_bs, batched=True, remove_columns=train_fmt.column_names)
    eval_tok_bs  = eval_fmt.map(tok_bs, batched=True, remove_columns=eval_fmt.column_names)

    # Rebuild model with supported LoRA targets for GPT-2 style models
    base_model_hp = AutoModelForCausalLM.from_pretrained(CFG.model_name)

    # IMPORTANT: Only target supported leaf modules (not the container "attn")
    peft_cfg = LoraConfig(
        r=lora_r,
        lora_alpha=CFG.lora_alpha,
        lora_dropout=CFG.lora_dropout,
        bias="none",
        task_type=TaskType.CAUSAL_LM,     # ensure correct PEFT task type
        target_modules=["c_attn", "c_proj"]  # ✅ supported for GPT-2 blocks
        # If you want final head adaptation too: add "lm_head" (if present in your model)
    )
    model_hp = get_peft_model(base_model_hp, peft_cfg)

    train_args_hp = TrainingArguments(
        output_dir=os.path.join(CFG.output_dir, f"optuna_trial_{trial.number}"),
        per_device_train_batch_size=max(1, effective_bs // max(1, dev_info["n_gpu"] or 1)),
        per_device_eval_batch_size=max(1, effective_bs // max(1, dev_info["n_gpu"] or 1)),
        num_train_epochs=1,
        learning_rate=lr,
        logging_steps=50,
        # For older Transformers versions, remove scheduling and evaluate manually:
        # evaluation_strategy="steps", eval_steps=200,
        report_to="none",
        fp16=(CFG.mixed_precision == "fp16"),
        bf16=(CFG.mixed_precision == "bf16")
    )

    trainer_hp = Trainer(
        model=model_hp,
        args=train_args_hp,
        train_dataset=train_tok_bs,
        eval_dataset=eval_tok_bs,
        data_collator=collator
    )

    trainer_hp.train()
    metrics = trainer_hp.evaluate()  # manual evaluation call (version-safe)
    trial.set_user_attr("metrics", metrics)
    return metrics.get("eval_loss", float("inf"))

In [16]:
study = optuna.create_study(direction="minimize")
LOGGER.info("Starting Optuna hyperparameter search (demo trials)")
study.optimize(objective, n_trials=5)
LOGGER.info(f"Best trial: {study.best_trial.number}, value={study.best_trial.value}, params={study.best_trial.params}")

[I 2025-11-26 21:24:31,023] A new study created in memory with name: no-name-48375583-77cc-4307-a21f-12af9dac6168
[2025-11-26 21:24:31,024] INFO - Starting Optuna hyperparameter search (demo trials)
  lr = trial.suggest_loguniform("learning_rate", 1e-5, 5e-4)


Map:   0%|          | 0/160 [00:00<?, ? examples/s]

Map:   0%|          | 0/40 [00:00<?, ? examples/s]



Step,Training Loss




[I 2025-11-26 21:24:36,829] Trial 0 finished with value: 10.675223350524902 and parameters: {'learning_rate': 0.0002984108051855515, 'lora_r': 4, 'block_size': 128}. Best is trial 0 with value: 10.675223350524902.
  lr = trial.suggest_loguniform("learning_rate", 1e-5, 5e-4)


Map:   0%|          | 0/160 [00:00<?, ? examples/s]

Map:   0%|          | 0/40 [00:00<?, ? examples/s]



Step,Training Loss




[I 2025-11-26 21:24:40,612] Trial 1 finished with value: 10.656453132629395 and parameters: {'learning_rate': 5.512699912537529e-05, 'lora_r': 28, 'block_size': 64}. Best is trial 1 with value: 10.656453132629395.
  lr = trial.suggest_loguniform("learning_rate", 1e-5, 5e-4)


Map:   0%|          | 0/160 [00:00<?, ? examples/s]

Map:   0%|          | 0/40 [00:00<?, ? examples/s]



Step,Training Loss




[I 2025-11-26 21:24:47,058] Trial 2 finished with value: 10.675196647644043 and parameters: {'learning_rate': 0.00047073541391392024, 'lora_r': 12, 'block_size': 160}. Best is trial 1 with value: 10.656453132629395.
  lr = trial.suggest_loguniform("learning_rate", 1e-5, 5e-4)


Map:   0%|          | 0/160 [00:00<?, ? examples/s]

Map:   0%|          | 0/40 [00:00<?, ? examples/s]



Step,Training Loss




[I 2025-11-26 21:24:52,674] Trial 3 finished with value: 10.67529582977295 and parameters: {'learning_rate': 2.4416759036612658e-05, 'lora_r': 12, 'block_size': 128}. Best is trial 1 with value: 10.656453132629395.
  lr = trial.suggest_loguniform("learning_rate", 1e-5, 5e-4)


Map:   0%|          | 0/160 [00:00<?, ? examples/s]

Map:   0%|          | 0/40 [00:00<?, ? examples/s]



Step,Training Loss




[I 2025-11-26 21:24:58,964] Trial 4 finished with value: 10.675294876098633 and parameters: {'learning_rate': 2.0087504355424445e-05, 'lora_r': 32, 'block_size': 160}. Best is trial 1 with value: 10.656453132629395.
[2025-11-26 21:24:58,965] INFO - Best trial: 1, value=10.656453132629395, params={'learning_rate': 5.512699912537529e-05, 'lora_r': 28, 'block_size': 64}


## Train final model with best hyperparameters

In [18]:
from peft import LoraConfig, get_peft_model, TaskType

# Inspect module names to confirm valid targets
def list_candidate_targets(model):
    candidates = []
    for name, mod in model.named_modules():
        if any(t in name for t in ["c_attn", "c_proj", "lm_head"]):
            candidates.append(name)
    return candidates

base_model_final = AutoModelForCausalLM.from_pretrained(CFG.model_name)

# Log candidates so you know what's available in your architecture
candidates = list_candidate_targets(base_model_final)
LOGGER.info(f"LoRA candidate targets found: {candidates[:10]}")

# Use supported leaf modules for GPT-2 style architectures
peft_cfg_final = LoraConfig(
    r=CFG.lora_r,
    lora_alpha=CFG.lora_alpha,
    lora_dropout=CFG.lora_dropout,
    bias="none",
    task_type=TaskType.CAUSAL_LM,            # ensure correct PEFT route
    target_modules=["c_attn", "c_proj"]      # ✅ do NOT use "attn"
    # Optionally include "lm_head" if present and you want to adapt the output head:
    # target_modules=["c_attn", "c_proj", "lm_head"]
)

model_final = get_peft_model(base_model_final, peft_cfg_final)

train_args_final = TrainingArguments(
    output_dir=os.path.join(CFG.output_dir, "final"),
    per_device_train_batch_size=max(1, effective_bs // max(1, dev_info["n_gpu"] or 1)),
    per_device_eval_batch_size=max(1, effective_bs // max(1, dev_info["n_gpu"] or 1)),
    num_train_epochs=CFG.default_num_train_epochs,
    learning_rate=CFG.default_lr,
    logging_steps=50,
    # If your Transformers version is old, omit evaluation scheduling and evaluate manually:
    # evaluation_strategy="steps", eval_steps=200,
    report_to="none",
    fp16=(CFG.mixed_precision == "fp16"),
    bf16=(CFG.mixed_precision == "bf16"),
    save_steps=200,
    warmup_ratio=0.05
)

trainer_final = Trainer(
    model=model_final,
    args=train_args_final,
    train_dataset=train_tok_final,
    eval_dataset=eval_tok_final,
    data_collator=collator
)

trainer_final.train()
final_metrics = trainer_final.evaluate()  # manual evaluation works across versions
LOGGER.info(f"Final eval metrics: {final_metrics}")

[2025-11-26 21:27:05,425] INFO - LoRA candidate targets found: ['transformer.h.0.attn.c_attn', 'transformer.h.0.attn.c_proj', 'transformer.h.0.mlp.c_proj', 'transformer.h.1.attn.c_attn', 'transformer.h.1.attn.c_proj', 'transformer.h.1.mlp.c_proj', 'lm_head']


Step,Training Loss
50,10.6562
100,10.6561




[2025-11-26 21:27:12,010] INFO - Final eval metrics: {'eval_loss': 10.656448364257812, 'eval_runtime': 0.2563, 'eval_samples_per_second': 156.053, 'eval_steps_per_second': 39.013, 'epoch': 3.0}


## Agentic hybrid reasoning: use GNN context to condition LLM outputs

We route tasks based on node structure and refine with the fine-tuned LLM.

In [20]:
def agent_route(inst: str) -> str:
    if inst.startswith("Summarize"): return "summarize"
    if inst.startswith("Classify"):  return "classify"
    return "extract"

TOOLS = {
    "summarize": lambda ctx: "Summary: " + ("High centrality" if "degree" in ctx else "OK"),
    "classify":  lambda ctx: "Class: High influence" if "degree" in ctx and "neighbors" in ctx else "Class: Low",
    "extract":   lambda ctx: "Properties: degree, clustering, egonet size."
}

def agentic_hybrid(G, H, n_samples=5):
    outputs = []
    for i in np.random.choice(range(G.number_of_nodes()), size=n_samples, replace=False):
        inst = instruction_from_graph(i, G, H)
        tool = agent_route(inst)
        tool_out = TOOLS[tool](inst)
        prompt = f"Instruction: {inst}\nToolOutput: {tool_out}\nResponse:"
        inputs = tokenizer(prompt, return_tensors="pt")
        try:
            gen = model_final.generate(**inputs, max_length=CFG.block_size)
            text = tokenizer.decode(gen[0], skip_special_tokens=True)
        except Exception:
            text = tool_out
        outputs.append({"node": i, "instruction": inst, "tool": tool, "llm_output": text[:300]})
    return outputs

hybrid_outputs = agentic_hybrid(G, H_np, n_samples=5)
for o in hybrid_outputs:
    print(f"Node {o['node']} | Tool={o['tool']}\nInstruction: {o['instruction']}\nLLM Output: {o['llm_output']}\n")

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Node 95 | Tool=summarize
Instruction: Summarize: Provide a concise summary of this node's role and influence. Context: Node 95 has degree 7 and neighbors [0, 10, 69, 98, 117, 173, 185]. Embedding summary: [0.00, 0.00, 0.00, 0.01, 0.00, 0.90]
LLM Output: Summary: High centrality

Node 15 | Tool=summarize
Instruction: Summarize: Provide a concise summary of this node's role and influence. Context: Node 15 has degree 9 and neighbors [12, 5, 7, 89, 113, 114, 134, 177, 183]. Embedding summary: [0.00, 0.00, 0.00, 0.28, 0.21, 0.47]
LLM Output: Summary: High centrality

Node 30 | Tool=summarize
Instruction: Summarize: Provide a concise summary of this node's role and influence. Context: Node 30 has degree 7 and neighbors [4, 14, 7, 32, 54, 131, 143]. Embedding summary: [0.00, 0.00, 0.00, 0.21, 0.11, 0.00]
LLM Output: Summary: High centrality

Node 158 | Tool=classify
Instruction: Classify: Classify the node into High/Medium/Low influence. Context: Node 158 has degree 4 and neighbors [62, 50, 1

## RL-style reward shaping for structural fidelity
We define a simple reward that checks whether the LLM output mentions expected structural properties.

In [21]:
def reward_structural(text: str) -> float:
    reward = 0.0
    if "Summary:" in text or "Class:" in text or "Properties:" in text: reward += 0.5
    for kw in ["degree", "neighbors", "centrality", "clustering"]:
        if kw in text: reward += 0.25
    return reward

def evaluate_hybrid(outputs: List[Dict[str, Any]]):
    rewards = [reward_structural(o["llm_output"]) for o in outputs]
    return {
        "mean_reward": float(np.mean(rewards)),
        "min_reward": float(np.min(rewards)),
        "max_reward": float(np.max(rewards))
    }

hybrid_metrics = evaluate_hybrid(hybrid_outputs)
LOGGER.info(f"Hybrid RL-style metrics: {hybrid_metrics}")

[2025-11-26 21:29:10,059] INFO - Hybrid RL-style metrics: {'mean_reward': 0.7, 'min_reward': 0.5, 'max_reward': 0.75}


## Artifact serialization and report

In [27]:
import numpy as np
import json

def to_json_safe(obj):
    """Recursively convert NumPy types and other non-serializable objects to JSON-safe Python types."""
    if isinstance(obj, dict):
        return {k: to_json_safe(v) for k, v in obj.items()}
    elif isinstance(obj, list):
        return [to_json_safe(v) for v in obj]
    elif isinstance(obj, tuple):
        return tuple(to_json_safe(v) for v in obj)
    elif isinstance(obj, (np.integer,)):
        return int(obj)
    elif isinstance(obj, (np.floating,)):
        return float(obj)
    elif isinstance(obj, (np.ndarray,)):
        return obj.tolist()
    else:
        return obj

# Apply conversion before dumping
report_safe = to_json_safe(report)

print(json.dumps(report_safe, indent=2)[:1200])

{
  "config": {
    "seed": 42,
    "gnn_hidden": 64,
    "gnn_out": 32,
    "n_nodes": 200,
    "graph_type": "barabasi_albert",
    "graph_param": 3,
    "node_feature_dim": 16,
    "n_classes": 3,
    "train_ratio": 0.8,
    "model_name": "sshleifer/tiny-gpt2",
    "block_size": 64,
    "default_lr": 5.512699912537529e-05,
    "default_batch_size": 16,
    "default_num_train_epochs": 3,
    "use_lora": true,
    "lora_r": 28,
    "lora_alpha": 16,
    "lora_dropout": 0.05,
    "mixed_precision": "fp16",
    "output_dir": "./hybrid_artifacts"
  },
  "gnn_eval_preview": {
    "n_nodes": 200,
    "n_edges": 591
  },
  "final_eval_metrics": {
    "eval_loss": 10.656448364257812,
    "eval_runtime": 0.2563,
    "eval_samples_per_second": 156.053,
    "eval_steps_per_second": 39.013,
    "epoch": 3.0
  },
  "hpo_best": {
    "learning_rate": 5.512699912537529e-05,
    "lora_r": 28,
    "block_size": 64
  },
  "hybrid_outputs": [
    {
      "node": 95,
      "instruction": "Summarize: Pro