# ------------------------------------
# No Alpha
# ------------------------------------

In [5]:
import pandas as pd
import numpy as np
import torch
import torch.nn.functional as F
from torch_geometric.nn import SAGEConv
from torch_geometric.data import Data
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error

# ------------------------------------
# Configuration & Hyperparameters
# ------------------------------------
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

GRAPH_SEED = 11

GRAPH_LEARN_MEAN = 0.5
GRAPH_LEARN_STD = 0.01
GRAPH_LEARN_EPOCHS = 5000
GRAPH_LEARN_BETA = 0.01
GRAPH_LEARN_ETA = 0.001

SAGE_HIDDEN_DIM = 128
SAGE_DROPOUT = 0.2
LR = 4e-3
WEIGHT_DECAY = 1e-4
EPOCHS = 15000
PATIENCE = 1000

# ------------------------------------
# Learn Graph Structure (L2 regularizer only)
# ------------------------------------
def learn_graph(X: torch.Tensor, seed_graph: int) -> np.ndarray:
    X_np = X.cpu().numpy()
    N = X_np.shape[0]

    rng = np.random.default_rng(seed_graph)
    W = rng.normal(GRAPH_LEARN_MEAN, GRAPH_LEARN_STD, size=(N, N))

    i_idx, j_idx = np.triu_indices(N, k=1)
    w = W[i_idx, j_idx].copy()
    delL = np.sum((X_np[i_idx] - X_np[j_idx]) ** 2, axis=1)

    best_w = w.copy()
    best_loss = np.inf

    for _ in range(GRAPH_LEARN_EPOCHS):
        grad = 2.0 * delL + GRAPH_LEARN_BETA * w
        w -= GRAPH_LEARN_ETA * grad
        np.clip(w, 0.0, None, out=w)

        loss = (delL * w).sum() + (GRAPH_LEARN_BETA / 2.0) * (w ** 2).sum()
        if loss < best_loss:
            best_loss = loss
            best_w = w.copy()

    adj = np.zeros((N, N), dtype=int)
    adj[i_idx[best_w > 0], j_idx[best_w > 0]] = 1
    adj[j_idx[best_w > 0], i_idx[best_w > 0]] = 1

    return adj

# ------------------------------------
# GNN Model
# ------------------------------------
class GraphSAGENet(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super().__init__()
        self.conv1 = SAGEConv(in_channels, hidden_channels)
        self.conv2 = SAGEConv(hidden_channels, hidden_channels)
        self.conv3 = SAGEConv(hidden_channels, hidden_channels)
        self.conv4 = SAGEConv(hidden_channels, hidden_channels)
        self.out = SAGEConv(hidden_channels, out_channels)

    def forward(self, x, edge_index):
        x = F.relu(self.conv1(x, edge_index))
        x = F.dropout(x, p=SAGE_DROPOUT, training=self.training)
        x = F.relu(self.conv2(x, edge_index))
        x = F.dropout(x, p=SAGE_DROPOUT, training=self.training)
        x = F.relu(self.conv3(x, edge_index))
        x = F.dropout(x, p=SAGE_DROPOUT, training=self.training)
        x = F.relu(self.conv4(x, edge_index))
        x = F.dropout(x, p=SAGE_DROPOUT, training=self.training)
        return self.out(x, edge_index)

# ------------------------------------
# Train and Evaluate for One Seed
# ------------------------------------
def run_single_seed(X_raw, Y_raw, seed_model, seed_graph):
    np.random.seed(seed_model)
    torch.manual_seed(seed_model)
    if DEVICE.type == 'cuda':
        torch.cuda.manual_seed_all(seed_model)

    scaler = StandardScaler()
    X_np = scaler.fit_transform(np.abs(X_raw.cpu().numpy()))
    X = torch.tensor(X_np, dtype=torch.float32)

    adj = learn_graph(X, seed_graph)
    edge_index = torch.tensor(np.vstack(adj.nonzero()), dtype=torch.long)

    idx = np.arange(X.shape[0])
    train_idx, temp_idx = train_test_split(idx, test_size=0.2, random_state=seed_model)
    val_idx, test_idx = train_test_split(temp_idx, test_size=0.5, random_state=seed_model)

    def get_mask(idxs):
        mask = torch.zeros(X.shape[0], dtype=torch.bool)
        mask[idxs] = True
        return mask

    data = Data(
        x=X.to(DEVICE),
        y=Y_raw.to(DEVICE),
        edge_index=edge_index.to(DEVICE)
    )
    masks = {k: get_mask(v).to(DEVICE) for k, v in zip(['train', 'val', 'test'], [train_idx, val_idx, test_idx])}

    model = GraphSAGENet(X.size(1), SAGE_HIDDEN_DIM, Y_raw.size(1)).to(DEVICE)
    optimizer = torch.optim.Adam(model.parameters(), lr=LR, weight_decay=WEIGHT_DECAY)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=200)
    criterion = torch.nn.MSELoss()

    best_val = np.inf
    patience_counter = 0

    for epoch in range(EPOCHS):
        model.train()
        optimizer.zero_grad()
        out = model(data.x, data.edge_index)
        loss = criterion(out[masks['train']], data.y[masks['train']])
        loss.backward()
        optimizer.step()

        model.eval()
        with torch.no_grad():
            val_loss = criterion(model(data.x, data.edge_index)[masks['val']], data.y[masks['val']])
        scheduler.step(val_loss)

        if val_loss < best_val:
            best_val = val_loss
            best_state = model.state_dict()
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= PATIENCE:
                break

    model.load_state_dict(best_state)
    model.eval()
    with torch.no_grad():
        out = model(data.x, data.edge_index)
        y_true = data.y

    r2 = r2_score(y_true[masks['test']].cpu().numpy(), out[masks['test']].cpu().numpy())
    mse = mean_squared_error(y_true[masks['test']].cpu().numpy(), out[masks['test']].cpu().numpy())

    return r2, mse

# ------------------------------------
# Main Execution
# ------------------------------------
if __name__ == "__main__":
    df = pd.read_csv("1000_cylindrical_anomalies.csv")
    X_raw = torch.tensor(df.iloc[:, :22].values, dtype=torch.float32)
    Y_raw = torch.tensor(df.iloc[:, 22:23].values, dtype=torch.float32)

    SEEDS = [6,40,43,118,146]  
    r2_scores = []
    mse_scores = []

    for seed in SEEDS:
        r2, mse = run_single_seed(X_raw, Y_raw, seed_model=seed, seed_graph=GRAPH_SEED)
        r2_scores.append(r2)
        mse_scores.append(mse)
        print(f"Seed {seed} -> R²: {r2:.4f}, MSE: {mse:.4f}")

    r2_scores = np.array(r2_scores)
    mse_scores = np.array(mse_scores)

    print("\n--- Final Summary (Mean ± Std) ---")
    print(f"R² Score: {r2_scores.mean():.4f} ± {r2_scores.std():.4f}")
    print(f"MSE     : {mse_scores.mean():.4f} ± {mse_scores.std():.4f}")


Seed 6 -> R²: 0.9127, MSE: 0.2506
Seed 40 -> R²: 0.9168, MSE: 0.2424
Seed 43 -> R²: 0.9247, MSE: 0.2668
Seed 118 -> R²: 0.9190, MSE: 0.2197
Seed 146 -> R²: 0.9253, MSE: 0.2082

--- Final Summary (Mean ± Std) ---
R² Score: 0.9197 ± 0.0048
MSE     : 0.2376 ± 0.0211


# ------------------------------------
# No Beta
# ------------------------------------

In [9]:
import pandas as pd
import numpy as np
import torch
import torch.nn.functional as F
from torch_geometric.nn import SAGEConv
from torch_geometric.data import Data
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
from scipy.sparse import coo_matrix

# ------------------------------------
# Configuration & Hyperparameters
# ------------------------------------
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

SEEDS = [10,22,48,60,74]
GRAPH_SEED = 11

GRAPH_LEARN_MEAN = 0.5
GRAPH_LEARN_STD = 0.01
GRAPH_LEARN_EPOCHS = 5000
GRAPH_LEARN_ALPHA = 1.0
GRAPH_LEARN_ETA = 0.001

SAGE_HIDDEN_DIM = 128
SAGE_DROPOUT = 0.2
LR = 4e-3
WEIGHT_DECAY = 1e-4
EPOCHS = 15000
PATIENCE = 1000

# ------------------------------------
# Learn Graph Structure (All edges with positive weight)
# ------------------------------------
def learn_graph(X: torch.Tensor, seed_graph: int):
    X_np = X.cpu().numpy()
    N = X_np.shape[0]

    rng = np.random.default_rng(seed_graph)
    W = rng.normal(GRAPH_LEARN_MEAN, GRAPH_LEARN_STD, size=(N, N))

    i_idx, j_idx = np.triu_indices(N, k=1)
    w = W[i_idx, j_idx].copy()
    delL = np.sum((X_np[i_idx] - X_np[j_idx]) ** 2, axis=1)

    row = np.concatenate([i_idx, j_idx])
    col = np.concatenate([np.arange(len(i_idx)), np.arange(len(j_idx))])
    S = coo_matrix((np.ones(len(row)), (row, col)), shape=(N, len(i_idx))).tocsc()
    S_T = S.T

    best_w = w.copy()
    best_loss = np.inf

    for _ in range(GRAPH_LEARN_EPOCHS):
        Sw = S.dot(w)
        inv_Sw = np.zeros_like(Sw)
        nonzero = Sw > 0
        inv_Sw[nonzero] = 1.0 / Sw[nonzero]
        grad = 2.0 * delL - GRAPH_LEARN_ALPHA * S_T.dot(inv_Sw)
        w -= GRAPH_LEARN_ETA * grad
        np.clip(w, 0.0, None, out=w)

        loss = (delL * w).sum() - GRAPH_LEARN_ALPHA * np.sum(np.log(Sw[nonzero]))
        if loss < best_loss:
            best_loss = loss
            best_w = w.copy()

    # Use all edges with positive weight
    valid_mask = best_w > 0
    adj = np.zeros((N, N), dtype=int)
    adj[i_idx[valid_mask], j_idx[valid_mask]] = 1
    adj[j_idx[valid_mask], i_idx[valid_mask]] = 1

    return adj, best_w, i_idx, j_idx

# ------------------------------------
# GNN Model
# ------------------------------------
class GraphSAGENet(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super().__init__()
        self.conv1 = SAGEConv(in_channels, hidden_channels)
        self.conv2 = SAGEConv(hidden_channels, hidden_channels)
        self.conv3 = SAGEConv(hidden_channels, hidden_channels)
        self.conv4 = SAGEConv(hidden_channels, hidden_channels)
        self.out = SAGEConv(hidden_channels, out_channels)

    def forward(self, x, edge_index):
        x = F.relu(self.conv1(x, edge_index))
        x = F.dropout(x, p=SAGE_DROPOUT, training=self.training)
        x = F.relu(self.conv2(x, edge_index))
        x = F.dropout(x, p=SAGE_DROPOUT, training=self.training)
        x = F.relu(self.conv3(x, edge_index))
        x = F.dropout(x, p=SAGE_DROPOUT, training=self.training)
        x = F.relu(self.conv4(x, edge_index))
        x = F.dropout(x, p=SAGE_DROPOUT, training=self.training)
        return self.out(x, edge_index)

# ------------------------------------
# Train and Evaluate for One Seed
# ------------------------------------
def run_single_seed(X_raw, Y_raw, seed_model, seed_graph):
    np.random.seed(seed_model)
    torch.manual_seed(seed_model)
    if DEVICE.type == 'cuda':
        torch.cuda.manual_seed_all(seed_model)

    scaler = StandardScaler()
    X_np = scaler.fit_transform(np.abs(X_raw.cpu().numpy()))
    X = torch.tensor(X_np, dtype=torch.float32)

    adj, weights, i_idx, j_idx = learn_graph(X, seed_graph)
    edge_index = torch.tensor(np.vstack(adj.nonzero()), dtype=torch.long)

    idx = np.arange(X.shape[0])
    train_idx, temp_idx = train_test_split(idx, test_size=0.2, random_state=seed_model)
    val_idx, test_idx = train_test_split(temp_idx, test_size=0.5, random_state=seed_model)

    def get_mask(idxs):
        mask = torch.zeros(X.shape[0], dtype=torch.bool)
        mask[idxs] = True
        return mask

    data = Data(
        x=X.to(DEVICE),
        y=Y_raw.to(DEVICE),
        edge_index=edge_index.to(DEVICE)
    )
    masks = {k: get_mask(v).to(DEVICE) for k, v in zip(['train', 'val', 'test'], [train_idx, val_idx, test_idx])}

    model = GraphSAGENet(X.size(1), SAGE_HIDDEN_DIM, Y_raw.size(1)).to(DEVICE)
    optimizer = torch.optim.Adam(model.parameters(), lr=LR, weight_decay=WEIGHT_DECAY)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=20)
    criterion = torch.nn.MSELoss()

    best_val = np.inf
    patience_counter = 0

    for epoch in range(EPOCHS):
        model.train()
        optimizer.zero_grad()
        out = model(data.x, data.edge_index)
        loss = criterion(out[masks['train']], data.y[masks['train']])
        loss.backward()
        optimizer.step()

        model.eval()
        with torch.no_grad():
            val_loss = criterion(model(data.x, data.edge_index)[masks['val']], data.y[masks['val']])
        scheduler.step(val_loss)

        if val_loss < best_val:
            best_val = val_loss
            best_state = model.state_dict()
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= PATIENCE:
                break

    model.load_state_dict(best_state)
    model.eval()
    with torch.no_grad():
        out = model(data.x, data.edge_index)
        y_true = data.y

    r2 = r2_score(y_true[masks['test']].cpu().numpy(), out[masks['test']].cpu().numpy())
    mse = mean_squared_error(y_true[masks['test']].cpu().numpy(), out[masks['test']].cpu().numpy())

    return r2, mse, weights, i_idx, j_idx

# ------------------------------------
# Main Execution
# ------------------------------------
if __name__ == "__main__":
    df = pd.read_csv("1000_cylindrical_anomalies.csv")
    X_raw = torch.tensor(df.iloc[:, :22].values, dtype=torch.float32)
    Y_raw = torch.tensor(df.iloc[:, 22:23].values, dtype=torch.float32)

    r2_scores = []
    mse_scores = []
    all_weights = []
    all_edges = []

    for seed in SEEDS:
        r2, mse, weights, i_idx, j_idx = run_single_seed(X_raw, Y_raw, seed_model=seed, seed_graph=GRAPH_SEED)
        r2_scores.append(r2)
        mse_scores.append(mse)
        all_weights.append(weights)
        all_edges.append((i_idx, j_idx))
        print(f"Seed {seed} -> R²: {r2:.4f}, MSE: {mse:.4f}")

    r2_scores = np.array(r2_scores)
    mse_scores = np.array(mse_scores)

    print("\n--- Final Summary (Mean ± Std) ---")
    print(f"R² Score: {r2_scores.mean():.4f} ± {r2_scores.std():.4f}")
    print(f"MSE     : {mse_scores.mean():.4f} ± {mse_scores.std():.4f}")

    # Print top 10 weighted edges for each seed
    for seed, weights, (i_idx, j_idx) in zip(SEEDS, all_weights, all_edges):
        edge_list = list(zip(i_idx, j_idx))
        top_indices = np.argsort(weights)[-10:]
        print(f"\nTop 10 Learned Edges for Seed {seed}:")
        for idx in reversed(top_indices):
            print(f"Edge {edge_list[idx]} -> Weight: {weights[idx]:.4f}")


Seed 10 -> R²: 0.9232, MSE: 0.2845
Seed 22 -> R²: 0.9225, MSE: 0.3082
Seed 48 -> R²: 0.9121, MSE: 0.3820
Seed 60 -> R²: 0.9298, MSE: 0.2537
Seed 74 -> R²: 0.9109, MSE: 0.3766

--- Final Summary (Mean ± Std) ---
R² Score: 0.9197 ± 0.0072
MSE     : 0.3210 ± 0.0507

Top 10 Learned Edges for Seed 10:
Edge (574, 812) -> Weight: 2.9143
Edge (511, 990) -> Weight: 2.8546
Edge (575, 698) -> Weight: 2.3089
Edge (381, 819) -> Weight: 2.2682
Edge (248, 779) -> Weight: 2.2496
Edge (73, 186) -> Weight: 2.2031
Edge (350, 597) -> Weight: 2.1839
Edge (283, 325) -> Weight: 2.1314
Edge (498, 818) -> Weight: 2.1241
Edge (861, 956) -> Weight: 2.0665

Top 10 Learned Edges for Seed 22:
Edge (574, 812) -> Weight: 2.9143
Edge (511, 990) -> Weight: 2.8546
Edge (575, 698) -> Weight: 2.3089
Edge (381, 819) -> Weight: 2.2682
Edge (248, 779) -> Weight: 2.2496
Edge (73, 186) -> Weight: 2.2031
Edge (350, 597) -> Weight: 2.1839
Edge (283, 325) -> Weight: 2.1314
Edge (498, 818) -> Weight: 2.1241
Edge (861, 956) -> Weig

# ------------------------------------
# NO Alpha and No Beta
# ------------------------------------

In [10]:
import pandas as pd
import numpy as np
import torch
import torch.nn.functional as F
from torch_geometric.nn import SAGEConv
from torch_geometric.data import Data
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error

# ------------------------------------
# Configuration & Hyperparameters
# ------------------------------------
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

SEEDS = [11,18,36,40,67]
GRAPH_SEED = 11

GRAPH_LEARN_MEAN = 0.5
GRAPH_LEARN_STD = 0.01
GRAPH_LEARN_EPOCHS = 5000
GRAPH_LEARN_ETA = 0.001

SAGE_HIDDEN_DIM = 128
SAGE_DROPOUT = 0.2
LR = 4e-3
WEIGHT_DECAY = 1e-4
EPOCHS = 15000
PATIENCE = 1000

# ------------------------------------
# Learn Graph Structure (Only minimize squared distances)
# ------------------------------------
def learn_graph(X: torch.Tensor, seed_graph: int):
    X_np = X.cpu().numpy()
    N = X_np.shape[0]

    rng = np.random.default_rng(seed_graph)
    W = rng.normal(GRAPH_LEARN_MEAN, GRAPH_LEARN_STD, size=(N, N))

    i_idx, j_idx = np.triu_indices(N, k=1)
    w = W[i_idx, j_idx].copy()
    delL = np.sum((X_np[i_idx] - X_np[j_idx]) ** 2, axis=1)

    best_w = w.copy()
    best_loss = np.inf

    for _ in range(GRAPH_LEARN_EPOCHS):
        grad = 2.0 * delL  # 🔁 No regularization
        w -= GRAPH_LEARN_ETA * grad
        np.clip(w, 0.0, None, out=w)

        loss = (delL * w).sum()
        if loss < best_loss:
            best_loss = loss
            best_w = w.copy()

    valid_mask = best_w > 0
    adj = np.zeros((N, N), dtype=int)
    adj[i_idx[valid_mask], j_idx[valid_mask]] = 1
    adj[j_idx[valid_mask], i_idx[valid_mask]] = 1

    return adj, best_w, i_idx, j_idx

# ------------------------------------
# GNN Model
# ------------------------------------
class GraphSAGENet(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super().__init__()
        self.conv1 = SAGEConv(in_channels, hidden_channels)
        self.conv2 = SAGEConv(hidden_channels, hidden_channels)
        self.conv3 = SAGEConv(hidden_channels, hidden_channels)
        self.conv4 = SAGEConv(hidden_channels, hidden_channels)
        self.out = SAGEConv(hidden_channels, out_channels)

    def forward(self, x, edge_index):
        x = F.relu(self.conv1(x, edge_index))
        x = F.dropout(x, p=SAGE_DROPOUT, training=self.training)
        x = F.relu(self.conv2(x, edge_index))
        x = F.dropout(x, p=SAGE_DROPOUT, training=self.training)
        x = F.relu(self.conv3(x, edge_index))
        x = F.dropout(x, p=SAGE_DROPOUT, training=self.training)
        x = F.relu(self.conv4(x, edge_index))
        x = F.dropout(x, p=SAGE_DROPOUT, training=self.training)
        return self.out(x, edge_index)

# ------------------------------------
# Train and Evaluate for One Seed
# ------------------------------------
def run_single_seed(X_raw, Y_raw, seed_model, seed_graph):
    np.random.seed(seed_model)
    torch.manual_seed(seed_model)
    if DEVICE.type == 'cuda':
        torch.cuda.manual_seed_all(seed_model)

    scaler = StandardScaler()
    X_np = scaler.fit_transform(np.abs(X_raw.cpu().numpy()))
    X = torch.tensor(X_np, dtype=torch.float32)

    adj, weights, i_idx, j_idx = learn_graph(X, seed_graph)
    edge_index = torch.tensor(np.vstack(adj.nonzero()), dtype=torch.long)

    idx = np.arange(X.shape[0])
    train_idx, temp_idx = train_test_split(idx, test_size=0.2, random_state=seed_model)
    val_idx, test_idx = train_test_split(temp_idx, test_size=0.5, random_state=seed_model)

    def get_mask(idxs):
        mask = torch.zeros(X.shape[0], dtype=torch.bool)
        mask[idxs] = True
        return mask

    data = Data(
        x=X.to(DEVICE),
        y=Y_raw.to(DEVICE),
        edge_index=edge_index.to(DEVICE)
    )
    masks = {k: get_mask(v).to(DEVICE) for k, v in zip(['train', 'val', 'test'], [train_idx, val_idx, test_idx])}

    model = GraphSAGENet(X.size(1), SAGE_HIDDEN_DIM, Y_raw.size(1)).to(DEVICE)
    optimizer = torch.optim.Adam(model.parameters(), lr=LR, weight_decay=WEIGHT_DECAY)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=10)
    criterion = torch.nn.MSELoss()

    best_val = np.inf
    patience_counter = 0

    for epoch in range(EPOCHS):
        model.train()
        optimizer.zero_grad()
        out = model(data.x, data.edge_index)
        loss = criterion(out[masks['train']], data.y[masks['train']])
        loss.backward()
        optimizer.step()

        model.eval()
        with torch.no_grad():
            val_loss = criterion(model(data.x, data.edge_index)[masks['val']], data.y[masks['val']])
        scheduler.step(val_loss)

        if val_loss < best_val:
            best_val = val_loss
            best_state = model.state_dict()
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= PATIENCE:
                break

    model.load_state_dict(best_state)
    model.eval()
    with torch.no_grad():
        out = model(data.x, data.edge_index)
        y_true = data.y

    r2 = r2_score(y_true[masks['test']].cpu().numpy(), out[masks['test']].cpu().numpy())
    mse = mean_squared_error(y_true[masks['test']].cpu().numpy(), out[masks['test']].cpu().numpy())

    return r2, mse, weights, i_idx, j_idx

# ------------------------------------
# Main Execution
# ------------------------------------
if __name__ == "__main__":
    df = pd.read_csv("1000_cylindrical_anomalies.csv")
    X_raw = torch.tensor(df.iloc[:, :22].values, dtype=torch.float32)
    Y_raw = torch.tensor(df.iloc[:, 22:23].values, dtype=torch.float32)

    r2_scores = []
    mse_scores = []
    all_weights = []
    all_edges = []

    for seed in SEEDS:
        r2, mse, weights, i_idx, j_idx = run_single_seed(X_raw, Y_raw, seed_model=seed, seed_graph=GRAPH_SEED)
        r2_scores.append(r2)
        mse_scores.append(mse)
        all_weights.append(weights)
        all_edges.append((i_idx, j_idx))
        print(f"Seed {seed} -> R²: {r2:.4f}, MSE: {mse:.4f}")

    r2_scores = np.array(r2_scores)
    mse_scores = np.array(mse_scores)

    print("\n--- Final Summary (Mean ± Std) ---")
    print(f"R² Score: {r2_scores.mean():.4f} ± {r2_scores.std():.4f}")
    print(f"MSE     : {mse_scores.mean():.4f} ± {mse_scores.std():.4f}")

    # Print top 10 edges per seed
    for seed, weights, (i_idx, j_idx) in zip(SEEDS, all_weights, all_edges):
        edge_list = list(zip(i_idx, j_idx))
        top_indices = np.argsort(weights)[-10:]
        print(f"\nTop 10 Learned Edges for Seed {seed}:")
        for idx in reversed(top_indices):
            print(f"Edge {edge_list[idx]} -> Weight: {weights[idx]:.4f}")


Seed 11 -> R²: 0.9056, MSE: 0.3651
Seed 18 -> R²: 0.9060, MSE: 0.3309
Seed 36 -> R²: 0.9054, MSE: 0.3054
Seed 40 -> R²: 0.9035, MSE: 0.2809
Seed 67 -> R²: 0.9058, MSE: 0.3067

--- Final Summary (Mean ± Std) ---
R² Score: 0.9053 ± 0.0009
MSE     : 0.3178 ± 0.0284

Top 10 Learned Edges for Seed 11:
Edge (148, 168) -> Weight: 0.3658
Edge (168, 692) -> Weight: 0.2741
Edge (375, 909) -> Weight: 0.2681
Edge (233, 741) -> Weight: 0.2521
Edge (459, 528) -> Weight: 0.2518
Edge (538, 546) -> Weight: 0.2451
Edge (419, 736) -> Weight: 0.2325
Edge (141, 546) -> Weight: 0.2302
Edge (141, 538) -> Weight: 0.2283
Edge (283, 325) -> Weight: 0.2264

Top 10 Learned Edges for Seed 18:
Edge (148, 168) -> Weight: 0.3658
Edge (168, 692) -> Weight: 0.2741
Edge (375, 909) -> Weight: 0.2681
Edge (233, 741) -> Weight: 0.2521
Edge (459, 528) -> Weight: 0.2518
Edge (538, 546) -> Weight: 0.2451
Edge (419, 736) -> Weight: 0.2325
Edge (141, 546) -> Weight: 0.2302
Edge (141, 538) -> Weight: 0.2283
Edge (283, 325) -> We