# ------------------------------------
# No Alpha
# ------------------------------------

In [28]:
import pandas as pd
import numpy as np
import torch
import torch.nn.functional as F
from torch_geometric.nn import SAGEConv
from torch_geometric.data import Data
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error

# ------------------------------------
# Configuration & Hyperparameters
# ------------------------------------
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

GRAPH_SEED = 11

GRAPH_LEARN_MEAN = 0.5
GRAPH_LEARN_STD = 0.01
GRAPH_LEARN_EPOCHS = 5000
GRAPH_LEARN_BETA = 0.001
GRAPH_LEARN_ETA = 0.001

SAGE_HIDDEN_DIM = 128
SAGE_DROPOUT = 0.2
LR = 4e-3
WEIGHT_DECAY = 1e-4
EPOCHS = 15000
PATIENCE = 1000

# ------------------------------------
# Learn Graph Structure (L2 regularizer only)
# ------------------------------------
def learn_graph(X: torch.Tensor, seed_graph: int) -> np.ndarray:
    X_np = X.cpu().numpy()
    N = X_np.shape[0]

    rng = np.random.default_rng(seed_graph)
    W = rng.normal(GRAPH_LEARN_MEAN, GRAPH_LEARN_STD, size=(N, N))

    i_idx, j_idx = np.triu_indices(N, k=1)
    w = W[i_idx, j_idx].copy()
    delL = np.sum((X_np[i_idx] - X_np[j_idx]) ** 2, axis=1)

    best_w = w.copy()
    best_loss = np.inf

    for _ in range(GRAPH_LEARN_EPOCHS):
        grad = 2.0 * delL + GRAPH_LEARN_BETA * w
        w -= GRAPH_LEARN_ETA * grad
        np.clip(w, 0.0, None, out=w)

        loss = (delL * w).sum() + (GRAPH_LEARN_BETA / 2.0) * (w ** 2).sum()
        if loss < best_loss:
            best_loss = loss
            best_w = w.copy()

    adj = np.zeros((N, N), dtype=int)
    adj[i_idx[best_w > 0], j_idx[best_w > 0]] = 1
    adj[j_idx[best_w > 0], i_idx[best_w > 0]] = 1

    return adj

# ------------------------------------
# GNN Model
# ------------------------------------
class GraphSAGENet(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super().__init__()
        self.conv1 = SAGEConv(in_channels, hidden_channels)
        self.conv2 = SAGEConv(hidden_channels, hidden_channels)
        self.conv3 = SAGEConv(hidden_channels, hidden_channels)
        self.conv4 = SAGEConv(hidden_channels, hidden_channels)
        self.out = SAGEConv(hidden_channels, out_channels)

    def forward(self, x, edge_index):
        x = F.relu(self.conv1(x, edge_index))
        x = F.dropout(x, p=SAGE_DROPOUT, training=self.training)
        x = F.relu(self.conv2(x, edge_index))
        x = F.dropout(x, p=SAGE_DROPOUT, training=self.training)
        x = F.relu(self.conv3(x, edge_index))
        x = F.dropout(x, p=SAGE_DROPOUT, training=self.training)
        x = F.relu(self.conv4(x, edge_index))
        x = F.dropout(x, p=SAGE_DROPOUT, training=self.training)
        return self.out(x, edge_index)

# ------------------------------------
# Train and Evaluate for One Seed
# ------------------------------------
def run_single_seed(X_raw, Y_raw, seed_model, seed_graph):
    np.random.seed(seed_model)
    torch.manual_seed(seed_model)
    if DEVICE.type == 'cuda':
        torch.cuda.manual_seed_all(seed_model)

    scaler = StandardScaler()
    X_np = scaler.fit_transform(np.abs(X_raw.cpu().numpy()))
    X = torch.tensor(X_np, dtype=torch.float32)

    adj = learn_graph(X, seed_graph)
    edge_index = torch.tensor(np.vstack(adj.nonzero()), dtype=torch.long)

    idx = np.arange(X.shape[0])
    train_idx, temp_idx = train_test_split(idx, test_size=0.2, random_state=seed_model)
    val_idx, test_idx = train_test_split(temp_idx, test_size=0.5, random_state=seed_model)

    def get_mask(idxs):
        mask = torch.zeros(X.shape[0], dtype=torch.bool)
        mask[idxs] = True
        return mask

    data = Data(
        x=X.to(DEVICE),
        y=Y_raw.to(DEVICE),
        edge_index=edge_index.to(DEVICE)
    )
    masks = {k: get_mask(v).to(DEVICE) for k, v in zip(['train', 'val', 'test'], [train_idx, val_idx, test_idx])}

    model = GraphSAGENet(X.size(1), SAGE_HIDDEN_DIM, Y_raw.size(1)).to(DEVICE)
    optimizer = torch.optim.Adam(model.parameters(), lr=LR, weight_decay=WEIGHT_DECAY)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=10)
    criterion = torch.nn.MSELoss()

    best_val = np.inf
    patience_counter = 0

    for epoch in range(EPOCHS):
        model.train()
        optimizer.zero_grad()
        out = model(data.x, data.edge_index)
        loss = criterion(out[masks['train']], data.y[masks['train']])
        loss.backward()
        optimizer.step()

        model.eval()
        with torch.no_grad():
            val_loss = criterion(model(data.x, data.edge_index)[masks['val']], data.y[masks['val']])
        scheduler.step(val_loss)

        if val_loss < best_val:
            best_val = val_loss
            best_state = model.state_dict()
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= PATIENCE:
                break

    model.load_state_dict(best_state)
    model.eval()
    with torch.no_grad():
        out = model(data.x, data.edge_index)
        y_true = data.y

    r2 = r2_score(y_true[masks['test']].cpu().numpy(), out[masks['test']].cpu().numpy())
    mse = mean_squared_error(y_true[masks['test']].cpu().numpy(), out[masks['test']].cpu().numpy())

    return r2, mse

# ------------------------------------
# Main Execution
# ------------------------------------
if __name__ == "__main__":
    df = pd.read_csv("1000_spherical_anomalies.csv")
    X_raw = torch.tensor(df.iloc[:, :22].values, dtype=torch.float32)
    Y_raw = torch.tensor(df.iloc[:, 22:23].values, dtype=torch.float32)

    SEEDS = [16,56,61,78,81]  # 5 random seeds
    r2_scores = []
    mse_scores = []

    for seed in SEEDS:
        r2, mse = run_single_seed(X_raw, Y_raw, seed_model=seed, seed_graph=GRAPH_SEED)
        r2_scores.append(r2)
        mse_scores.append(mse)
        print(f"Seed {seed} -> R²: {r2:.4f}, MSE: {mse:.4f}")

    r2_scores = np.array(r2_scores)
    mse_scores = np.array(mse_scores)

    print("\n--- Final Summary (Mean ± Std) ---")
    print(f"R² Score: {r2_scores.mean():.4f} ± {r2_scores.std():.4f}")
    print(f"MSE     : {mse_scores.mean():.4f} ± {mse_scores.std():.4f}")


Seed 16 -> R²: 0.9008, MSE: 0.4006
Seed 56 -> R²: 0.9049, MSE: 0.4147
Seed 61 -> R²: 0.9035, MSE: 0.3888
Seed 78 -> R²: 0.9057, MSE: 0.3559
Seed 81 -> R²: 0.9049, MSE: 0.4046

--- Final Summary (Mean ± Std) ---
R² Score: 0.9040 ± 0.0017
MSE     : 0.3929 ± 0.0203


# ------------------------------------
# No Beta
# ------------------------------------

In [35]:
import pandas as pd
import numpy as np
import torch
import torch.nn.functional as F
from torch_geometric.nn import SAGEConv
from torch_geometric.data import Data
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
from scipy.sparse import coo_matrix

# ------------------------------------
# Configuration & Hyperparameters
# ------------------------------------
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

SEEDS = [17,23,37,49,60]
GRAPH_SEED = 11

GRAPH_LEARN_MEAN = 0.5
GRAPH_LEARN_STD = 0.01
GRAPH_LEARN_EPOCHS = 5000
GRAPH_LEARN_ALPHA = 1.0
GRAPH_LEARN_ETA = 0.001

SAGE_HIDDEN_DIM = 128
SAGE_DROPOUT = 0.2
LR = 4e-3
WEIGHT_DECAY = 1e-4
EPOCHS = 15000
PATIENCE = 1000

# ------------------------------------
# Learn Graph Structure (All edges with positive weight)
# ------------------------------------
def learn_graph(X: torch.Tensor, seed_graph: int):
    X_np = X.cpu().numpy()
    N = X_np.shape[0]

    rng = np.random.default_rng(seed_graph)
    W = rng.normal(GRAPH_LEARN_MEAN, GRAPH_LEARN_STD, size=(N, N))

    i_idx, j_idx = np.triu_indices(N, k=1)
    w = W[i_idx, j_idx].copy()
    delL = np.sum((X_np[i_idx] - X_np[j_idx]) ** 2, axis=1)

    row = np.concatenate([i_idx, j_idx])
    col = np.concatenate([np.arange(len(i_idx)), np.arange(len(j_idx))])
    S = coo_matrix((np.ones(len(row)), (row, col)), shape=(N, len(i_idx))).tocsc()
    S_T = S.T

    best_w = w.copy()
    best_loss = np.inf

    for _ in range(GRAPH_LEARN_EPOCHS):
        Sw = S.dot(w)
        inv_Sw = np.zeros_like(Sw)
        nonzero = Sw > 0
        inv_Sw[nonzero] = 1.0 / Sw[nonzero]
        grad = 2.0 * delL - GRAPH_LEARN_ALPHA * S_T.dot(inv_Sw)
        w -= GRAPH_LEARN_ETA * grad
        np.clip(w, 0.0, None, out=w)

        loss = (delL * w).sum() - GRAPH_LEARN_ALPHA * np.sum(np.log(Sw[nonzero]))
        if loss < best_loss:
            best_loss = loss
            best_w = w.copy()

    # Use all edges with positive weight
    valid_mask = best_w > 0
    adj = np.zeros((N, N), dtype=int)
    adj[i_idx[valid_mask], j_idx[valid_mask]] = 1
    adj[j_idx[valid_mask], i_idx[valid_mask]] = 1

    return adj, best_w, i_idx, j_idx

# ------------------------------------
# GNN Model
# ------------------------------------
class GraphSAGENet(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super().__init__()
        self.conv1 = SAGEConv(in_channels, hidden_channels)
        self.conv2 = SAGEConv(hidden_channels, hidden_channels)
        self.conv3 = SAGEConv(hidden_channels, hidden_channels)
        self.conv4 = SAGEConv(hidden_channels, hidden_channels)
        self.out = SAGEConv(hidden_channels, out_channels)

    def forward(self, x, edge_index):
        x = F.relu(self.conv1(x, edge_index))
        x = F.dropout(x, p=SAGE_DROPOUT, training=self.training)
        x = F.relu(self.conv2(x, edge_index))
        x = F.dropout(x, p=SAGE_DROPOUT, training=self.training)
        x = F.relu(self.conv3(x, edge_index))
        x = F.dropout(x, p=SAGE_DROPOUT, training=self.training)
        x = F.relu(self.conv4(x, edge_index))
        x = F.dropout(x, p=SAGE_DROPOUT, training=self.training)
        return self.out(x, edge_index)

# ------------------------------------
# Train and Evaluate for One Seed
# ------------------------------------
def run_single_seed(X_raw, Y_raw, seed_model, seed_graph):
    np.random.seed(seed_model)
    torch.manual_seed(seed_model)
    if DEVICE.type == 'cuda':
        torch.cuda.manual_seed_all(seed_model)

    scaler = StandardScaler()
    X_np = scaler.fit_transform(np.abs(X_raw.cpu().numpy()))
    X = torch.tensor(X_np, dtype=torch.float32)

    adj, weights, i_idx, j_idx = learn_graph(X, seed_graph)
    edge_index = torch.tensor(np.vstack(adj.nonzero()), dtype=torch.long)

    idx = np.arange(X.shape[0])
    train_idx, temp_idx = train_test_split(idx, test_size=0.2, random_state=seed_model)
    val_idx, test_idx = train_test_split(temp_idx, test_size=0.5, random_state=seed_model)

    def get_mask(idxs):
        mask = torch.zeros(X.shape[0], dtype=torch.bool)
        mask[idxs] = True
        return mask

    data = Data(
        x=X.to(DEVICE),
        y=Y_raw.to(DEVICE),
        edge_index=edge_index.to(DEVICE)
    )
    masks = {k: get_mask(v).to(DEVICE) for k, v in zip(['train', 'val', 'test'], [train_idx, val_idx, test_idx])}

    model = GraphSAGENet(X.size(1), SAGE_HIDDEN_DIM, Y_raw.size(1)).to(DEVICE)
    optimizer = torch.optim.Adam(model.parameters(), lr=LR, weight_decay=WEIGHT_DECAY)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=50)
    criterion = torch.nn.MSELoss()

    best_val = np.inf
    patience_counter = 0

    for epoch in range(EPOCHS):
        model.train()
        optimizer.zero_grad()
        out = model(data.x, data.edge_index)
        loss = criterion(out[masks['train']], data.y[masks['train']])
        loss.backward()
        optimizer.step()

        model.eval()
        with torch.no_grad():
            val_loss = criterion(model(data.x, data.edge_index)[masks['val']], data.y[masks['val']])
        scheduler.step(val_loss)

        if val_loss < best_val:
            best_val = val_loss
            best_state = model.state_dict()
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= PATIENCE:
                break

    model.load_state_dict(best_state)
    model.eval()
    with torch.no_grad():
        out = model(data.x, data.edge_index)
        y_true = data.y

    r2 = r2_score(y_true[masks['test']].cpu().numpy(), out[masks['test']].cpu().numpy())
    mse = mean_squared_error(y_true[masks['test']].cpu().numpy(), out[masks['test']].cpu().numpy())

    return r2, mse, weights, i_idx, j_idx

# ------------------------------------
# Main Execution
# ------------------------------------
if __name__ == "__main__":
    df = pd.read_csv("1000_spherical_anomalies.csv")
    X_raw = torch.tensor(df.iloc[:, :22].values, dtype=torch.float32)
    Y_raw = torch.tensor(df.iloc[:, 22:23].values, dtype=torch.float32)

    r2_scores = []
    mse_scores = []
    all_weights = []
    all_edges = []

    for seed in SEEDS:
        r2, mse, weights, i_idx, j_idx = run_single_seed(X_raw, Y_raw, seed_model=seed, seed_graph=GRAPH_SEED)
        r2_scores.append(r2)
        mse_scores.append(mse)
        all_weights.append(weights)
        all_edges.append((i_idx, j_idx))
        print(f"Seed {seed} -> R²: {r2:.4f}, MSE: {mse:.4f}")

    r2_scores = np.array(r2_scores)
    mse_scores = np.array(mse_scores)

    print("\n--- Final Summary (Mean ± Std) ---")
    print(f"R² Score: {r2_scores.mean():.4f} ± {r2_scores.std():.4f}")
    print(f"MSE     : {mse_scores.mean():.4f} ± {mse_scores.std():.4f}")

    # Print top 10 weighted edges for each seed
    for seed, weights, (i_idx, j_idx) in zip(SEEDS, all_weights, all_edges):
        edge_list = list(zip(i_idx, j_idx))
        top_indices = np.argsort(weights)[-10:]
        print(f"\nTop 10 Learned Edges for Seed {seed}:")
        for idx in reversed(top_indices):
            print(f"Edge {edge_list[idx]} -> Weight: {weights[idx]:.4f}")


Seed 17 -> R²: 0.9373, MSE: 0.2418
Seed 23 -> R²: 0.9123, MSE: 0.3561
Seed 37 -> R²: 0.9530, MSE: 0.1571
Seed 49 -> R²: 0.9266, MSE: 0.2520
Seed 60 -> R²: 0.9151, MSE: 0.3125

--- Final Summary (Mean ± Std) ---
R² Score: 0.9289 ± 0.0150
MSE     : 0.2639 ± 0.0677

Top 10 Learned Edges for Seed 17:
Edge (135, 329) -> Weight: 2.8082
Edge (601, 699) -> Weight: 2.5339
Edge (407, 478) -> Weight: 2.4414
Edge (911, 988) -> Weight: 2.4018
Edge (185, 195) -> Weight: 2.3386
Edge (129, 664) -> Weight: 2.3372
Edge (541, 725) -> Weight: 2.3270
Edge (127, 759) -> Weight: 2.3145
Edge (260, 457) -> Weight: 2.3127
Edge (76, 78) -> Weight: 2.2654

Top 10 Learned Edges for Seed 23:
Edge (135, 329) -> Weight: 2.8082
Edge (601, 699) -> Weight: 2.5339
Edge (407, 478) -> Weight: 2.4414
Edge (911, 988) -> Weight: 2.4018
Edge (185, 195) -> Weight: 2.3386
Edge (129, 664) -> Weight: 2.3372
Edge (541, 725) -> Weight: 2.3270
Edge (127, 759) -> Weight: 2.3145
Edge (260, 457) -> Weight: 2.3127
Edge (76, 78) -> Weight

# ------------------------------------
# NO Alpha and No Beta
# ------------------------------------

In [34]:
import pandas as pd
import numpy as np
import torch
import torch.nn.functional as F
from torch_geometric.nn import SAGEConv
from torch_geometric.data import Data
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error

# ------------------------------------
# Configuration & Hyperparameters
# ------------------------------------
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

SEEDS = [10,25,40,59,76]
GRAPH_SEED = 11

GRAPH_LEARN_MEAN = 0.5
GRAPH_LEARN_STD = 0.01
GRAPH_LEARN_EPOCHS = 5000
GRAPH_LEARN_ETA = 0.001

SAGE_HIDDEN_DIM = 128
SAGE_DROPOUT = 0.2
LR = 4e-3
WEIGHT_DECAY = 1e-4
EPOCHS = 15000
PATIENCE = 1000

# ------------------------------------
# Learn Graph Structure (Only minimize squared distances)
# ------------------------------------
def learn_graph(X: torch.Tensor, seed_graph: int):
    X_np = X.cpu().numpy()
    N = X_np.shape[0]

    rng = np.random.default_rng(seed_graph)
    W = rng.normal(GRAPH_LEARN_MEAN, GRAPH_LEARN_STD, size=(N, N))

    i_idx, j_idx = np.triu_indices(N, k=1)
    w = W[i_idx, j_idx].copy()
    delL = np.sum((X_np[i_idx] - X_np[j_idx]) ** 2, axis=1)

    best_w = w.copy()
    best_loss = np.inf

    for _ in range(GRAPH_LEARN_EPOCHS):
        grad = 2.0 * delL  # 🔁 No regularization
        w -= GRAPH_LEARN_ETA * grad
        np.clip(w, 0.0, None, out=w)

        loss = (delL * w).sum()
        if loss < best_loss:
            best_loss = loss
            best_w = w.copy()

    valid_mask = best_w > 0
    adj = np.zeros((N, N), dtype=int)
    adj[i_idx[valid_mask], j_idx[valid_mask]] = 1
    adj[j_idx[valid_mask], i_idx[valid_mask]] = 1

    return adj, best_w, i_idx, j_idx

# ------------------------------------
# GNN Model
# ------------------------------------
class GraphSAGENet(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super().__init__()
        self.conv1 = SAGEConv(in_channels, hidden_channels)
        self.conv2 = SAGEConv(hidden_channels, hidden_channels)
        self.conv3 = SAGEConv(hidden_channels, hidden_channels)
        self.conv4 = SAGEConv(hidden_channels, hidden_channels)
        self.out = SAGEConv(hidden_channels, out_channels)

    def forward(self, x, edge_index):
        x = F.relu(self.conv1(x, edge_index))
        x = F.dropout(x, p=SAGE_DROPOUT, training=self.training)
        x = F.relu(self.conv2(x, edge_index))
        x = F.dropout(x, p=SAGE_DROPOUT, training=self.training)
        x = F.relu(self.conv3(x, edge_index))
        x = F.dropout(x, p=SAGE_DROPOUT, training=self.training)
        x = F.relu(self.conv4(x, edge_index))
        x = F.dropout(x, p=SAGE_DROPOUT, training=self.training)
        return self.out(x, edge_index)

# ------------------------------------
# Train and Evaluate for One Seed
# ------------------------------------
def run_single_seed(X_raw, Y_raw, seed_model, seed_graph):
    np.random.seed(seed_model)
    torch.manual_seed(seed_model)
    if DEVICE.type == 'cuda':
        torch.cuda.manual_seed_all(seed_model)

    scaler = StandardScaler()
    X_np = scaler.fit_transform(np.abs(X_raw.cpu().numpy()))
    X = torch.tensor(X_np, dtype=torch.float32)

    adj, weights, i_idx, j_idx = learn_graph(X, seed_graph)
    edge_index = torch.tensor(np.vstack(adj.nonzero()), dtype=torch.long)

    idx = np.arange(X.shape[0])
    train_idx, temp_idx = train_test_split(idx, test_size=0.2, random_state=seed_model)
    val_idx, test_idx = train_test_split(temp_idx, test_size=0.5, random_state=seed_model)

    def get_mask(idxs):
        mask = torch.zeros(X.shape[0], dtype=torch.bool)
        mask[idxs] = True
        return mask

    data = Data(
        x=X.to(DEVICE),
        y=Y_raw.to(DEVICE),
        edge_index=edge_index.to(DEVICE)
    )
    masks = {k: get_mask(v).to(DEVICE) for k, v in zip(['train', 'val', 'test'], [train_idx, val_idx, test_idx])}

    model = GraphSAGENet(X.size(1), SAGE_HIDDEN_DIM, Y_raw.size(1)).to(DEVICE)
    optimizer = torch.optim.Adam(model.parameters(), lr=LR, weight_decay=WEIGHT_DECAY)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=10)
    criterion = torch.nn.MSELoss()

    best_val = np.inf
    patience_counter = 0

    for epoch in range(EPOCHS):
        model.train()
        optimizer.zero_grad()
        out = model(data.x, data.edge_index)
        loss = criterion(out[masks['train']], data.y[masks['train']])
        loss.backward()
        optimizer.step()

        model.eval()
        with torch.no_grad():
            val_loss = criterion(model(data.x, data.edge_index)[masks['val']], data.y[masks['val']])
        scheduler.step(val_loss)

        if val_loss < best_val:
            best_val = val_loss
            best_state = model.state_dict()
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= PATIENCE:
                break

    model.load_state_dict(best_state)
    model.eval()
    with torch.no_grad():
        out = model(data.x, data.edge_index)
        y_true = data.y

    r2 = r2_score(y_true[masks['test']].cpu().numpy(), out[masks['test']].cpu().numpy())
    mse = mean_squared_error(y_true[masks['test']].cpu().numpy(), out[masks['test']].cpu().numpy())

    return r2, mse, weights, i_idx, j_idx

# ------------------------------------
# Main Execution
# ------------------------------------
if __name__ == "__main__":
    df = pd.read_csv("1000_spherical_anomalies.csv")
    X_raw = torch.tensor(df.iloc[:, :22].values, dtype=torch.float32)
    Y_raw = torch.tensor(df.iloc[:, 22:23].values, dtype=torch.float32)

    r2_scores = []
    mse_scores = []
    all_weights = []
    all_edges = []

    for seed in SEEDS:
        r2, mse, weights, i_idx, j_idx = run_single_seed(X_raw, Y_raw, seed_model=seed, seed_graph=GRAPH_SEED)
        r2_scores.append(r2)
        mse_scores.append(mse)
        all_weights.append(weights)
        all_edges.append((i_idx, j_idx))
        print(f"Seed {seed} -> R²: {r2:.4f}, MSE: {mse:.4f}")

    r2_scores = np.array(r2_scores)
    mse_scores = np.array(mse_scores)

    print("\n--- Final Summary (Mean ± Std) ---")
    print(f"R² Score: {r2_scores.mean():.4f} ± {r2_scores.std():.4f}")
    print(f"MSE     : {mse_scores.mean():.4f} ± {mse_scores.std():.4f}")

    # Print top 10 edges per seed
    for seed, weights, (i_idx, j_idx) in zip(SEEDS, all_weights, all_edges):
        edge_list = list(zip(i_idx, j_idx))
        top_indices = np.argsort(weights)[-10:]
        print(f"\nTop 10 Learned Edges for Seed {seed}:")
        for idx in reversed(top_indices):
            print(f"Edge {edge_list[idx]} -> Weight: {weights[idx]:.4f}")


Seed 10 -> R²: 0.8864, MSE: 0.3439
Seed 25 -> R²: 0.8927, MSE: 0.3868
Seed 40 -> R²: 0.8889, MSE: 0.4007
Seed 59 -> R²: 0.8892, MSE: 0.3697
Seed 76 -> R²: 0.8880, MSE: 0.3953

--- Final Summary (Mean ± Std) ---
R² Score: 0.8890 ± 0.0021
MSE     : 0.3793 ± 0.0206

Top 10 Learned Edges for Seed 10:
Edge (289, 309) -> Weight: 0.4898
Edge (0, 332) -> Weight: 0.4743
Edge (24, 421) -> Weight: 0.4739
Edge (748, 790) -> Weight: 0.4713
Edge (519, 920) -> Weight: 0.4680
Edge (9, 289) -> Weight: 0.4638
Edge (0, 597) -> Weight: 0.4633
Edge (9, 887) -> Weight: 0.4550
Edge (438, 576) -> Weight: 0.4550
Edge (530, 691) -> Weight: 0.4548

Top 10 Learned Edges for Seed 25:
Edge (289, 309) -> Weight: 0.4898
Edge (0, 332) -> Weight: 0.4743
Edge (24, 421) -> Weight: 0.4739
Edge (748, 790) -> Weight: 0.4713
Edge (519, 920) -> Weight: 0.4680
Edge (9, 289) -> Weight: 0.4638
Edge (0, 597) -> Weight: 0.4633
Edge (9, 887) -> Weight: 0.4550
Edge (438, 576) -> Weight: 0.4550
Edge (530, 691) -> Weight: 0.4548

Top 