In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!pip install codecarbon
!pip install pandas
!pip install scikit-learn
!pip install tqdm
!pip install torch-scatter torch-sparse torch-cluster torch-spline-conv -f https://data.pyg.org/whl/torch-2.6.0+cu124.html
!pip install torch-geometric
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import random
import pandas as pd
from torch_geometric.datasets import Amazon
from torch_geometric.nn import GCNConv, GATConv, SAGEConv, BatchNorm, LayerNorm
from codecarbon import EmissionsTracker

def set_seed(seed=42):
    torch.manual_seed(seed)
    random.seed(seed)
    np.random.seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

# --- Optimized Mask Split ---
def create_masks(data, num_train=8000, num_val=4000):
    num_nodes = data.num_nodes
    perm = torch.randperm(num_nodes)
    train_mask = torch.zeros(num_nodes, dtype=torch.bool)
    val_mask = torch.zeros(num_nodes, dtype=torch.bool)
    test_mask = torch.zeros(num_nodes, dtype=torch.bool)
    train_mask[perm[:num_train]] = True
    val_mask[perm[num_train:num_train+num_val]] = True
    test_mask[perm[num_train+num_val:]] = True
    data.train_mask = train_mask
    data.val_mask = val_mask
    data.test_mask = test_mask
    return data

class GNNSearchSpace:
    def __init__(self):
        self.conv_types = ['GCN', 'GAT', 'SAGE']
        self.norm_types = ['batch', 'layer', 'none']
        self.activation_types = ['relu', 'elu']
        self.hidden_dims = [64, 128, 256]
        self.num_layers = [2, 3]
        self.dropout_rates = [0.5]

    def sample_architecture(self):
        return {
            'conv_type': random.choice(self.conv_types),
            'activation': random.choice(self.activation_types),
            'norm_type': random.choice(self.norm_types),
            'hidden_dim': random.choice(self.hidden_dims),
            'num_layers': random.choice(self.num_layers),
            'dropout': random.choice(self.dropout_rates)
        }

class GNNModel(nn.Module):
    def __init__(self, config, input_dim, output_dim):
        super().__init__()
        self.num_layers = config['num_layers']
        self.config = config
        self.convs = nn.ModuleList()
        self.norms = nn.ModuleList()
        in_dim = input_dim
        hidden_dim = config['hidden_dim']
        for i in range(self.num_layers):
            out_dim = hidden_dim if i < self.num_layers - 1 else output_dim
            if config['conv_type'] == 'GCN':
                conv = GCNConv(in_dim, out_dim)
            elif config['conv_type'] == 'GAT':
                conv = GATConv(in_dim, out_dim, heads=1)
            elif config['conv_type'] == 'SAGE':
                conv = SAGEConv(in_dim, out_dim)
            self.convs.append(conv)
            if i < self.num_layers - 1:
                if config['norm_type'] == 'batch':
                    norm = BatchNorm(out_dim)
                elif config['norm_type'] == 'layer':
                    norm = LayerNorm(out_dim)
                else:
                    norm = nn.Identity()
                self.norms.append(norm)
            in_dim = out_dim

    def forward(self, x, edge_index):
        for i in range(self.num_layers):
            x = self.convs[i](x, edge_index)
            if i < self.num_layers - 1:
                x = self.norms[i](x)
                if self.config['activation'] == 'relu':
                    x = F.relu(x)
                elif self.config['activation'] == 'elu':
                    x = F.elu(x)
                x = F.dropout(x, p=self.config['dropout'], training=self.training)
        return x

class CarbonAwareGNNNAS:
    def __init__(self, dataset_name='computers', population_size=12, generations=5, seed=42,
                 max_attempts=100, proxy_acc_threshold=0.75, max_proxy_epochs_first_gen=30):
        set_seed(seed)
        self.dataset_name = dataset_name
        self.population_size = population_size
        self.generations = generations
        self.search_space = GNNSearchSpace()
        self.output_file = f"emissions_{self.dataset_name.lower()}.csv"
        self.max_attempts = max_attempts
        self.proxy_acc_threshold = proxy_acc_threshold
        self.max_proxy_epochs_first_gen = max_proxy_epochs_first_gen
        self.load_dataset()
        self.results = []
        self.best_architecture = None
        self.best_score = 0.0

    def load_dataset(self):
        dataset = Amazon(root=f'data/{self.dataset_name}', name=self.dataset_name)
        data = dataset[0]
        self.data = create_masks(data, num_train=8000, num_val=4000)
        self.input_dim = dataset.num_node_features
        self.output_dim = dataset.num_classes
        print(f"Dataset: Amazon {self.dataset_name}, features: {self.input_dim}, classes: {self.output_dim}")
        # Print mask sizes to confirm
        print("Train:", self.data.train_mask.sum().item(), 
              "Val:", self.data.val_mask.sum().item(), 
              "Test:", self.data.test_mask.sum().item())

    def calculate_block_reuse(self, arch1, arch2):
        reuse_score = 0.0
        total = len(arch1)
        for k in arch1.keys():
            if arch1[k] == arch2[k]:
                reuse_score += 1
        return reuse_score / total

    def proxy_train_and_eval(self, config, max_epochs=None, acc_threshold=None, check_every=5, min_epochs=5):
        if max_epochs is None:
            max_epochs = self.max_proxy_epochs_first_gen
        if acc_threshold is None:
            acc_threshold = self.proxy_acc_threshold
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        model = GNNModel(config, self.input_dim, self.output_dim).to(device)
        optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
        criterion = nn.CrossEntropyLoss()
        tracker = EmissionsTracker(
            project_name=f"gnn_nas_proxy_amazon_{self.dataset_name}",
            measure_power_secs=15,
            output_file=self.output_file,
            log_level='error'
        )
        tracker.start()
        best_val_acc = 0.0
        data = self.data.to(device)
        for epoch in range(max_epochs):
            model.train()
            out = model(data.x, data.edge_index)
            loss = criterion(out[data.train_mask], data.y[data.train_mask])
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            model.eval()
            with torch.no_grad():
                logits = model(data.x, data.edge_index)
                val_pred = logits[data.val_mask].argmax(dim=1)
                val_acc = (val_pred == data.y[data.val_mask]).float().mean().item()
                best_val_acc = max(best_val_acc, val_acc)
            if epoch >= min_epochs and (best_val_acc >= acc_threshold):
                break
        emissions = tracker.stop()
        return {'accuracy': best_val_acc, 'carbon': emissions, 'model': model}

    def train_and_evaluate(self, config, epochs=100):
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        model = GNNModel(config, self.input_dim, self.output_dim).to(device)
        optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
        criterion = nn.CrossEntropyLoss()
        tracker = EmissionsTracker(
            project_name=f"gnn_nas_amazon_{self.dataset_name}",
            measure_power_secs=15,
            output_file=self.output_file,
            log_level='error'
        )
        tracker.start()
        data = self.data.to(device)
        best_val_acc = 0
        for epoch in range(epochs):
            model.train()
            out = model(data.x, data.edge_index)
            loss = criterion(out[data.train_mask], data.y[data.train_mask])
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            model.eval()
            with torch.no_grad():
                logits = model(data.x, data.edge_index)
                val_pred = logits[data.val_mask].argmax(dim=1)
                val_acc = (val_pred == data.y[data.val_mask]).float().mean().item()
                best_val_acc = max(best_val_acc, val_acc)
        emissions = tracker.stop()
        return {'accuracy': best_val_acc, 'carbon': emissions, 'model': model}

    def run_search(self):
        print(f"Starting Carbon-Aware GNN NAS on Amazon {self.dataset_name}...")
        baseline_config = {
            'conv_type': 'GCN', 'activation': 'relu', 'norm_type': 'batch',
            'hidden_dim': 128, 'num_layers': 2, 'dropout': 0.5
        }
        self.baseline_config = baseline_config
        print("Training baseline...")
        baseline_result = self.train_and_evaluate(baseline_config, epochs=100)
        print(f"Baseline - Val Accuracy: {baseline_result['accuracy']:.4f}, Carbon: {baseline_result['carbon']}")
        self.best_score = baseline_result['accuracy']
        self.best_architecture = baseline_config

        for generation in range(self.generations):
            print(f"\nGeneration {generation + 1}/{self.generations}")
            population = []
            attempts = 0
            while len(population) < self.population_size and attempts < self.max_attempts:
                config = self.search_space.sample_architecture()
                result = self.proxy_train_and_eval(config)
                if result['accuracy'] >= self.proxy_acc_threshold:
                    population.append({
                        'config': config,
                        'proxy_acc': result['accuracy'],
                        'carbon': result['carbon'],
                        'model': result['model']
                    })
                    print(f"  Found candidate {len(population)}: Proxy Acc={result['accuracy']:.4f}, Proxy Carbon={result['carbon']}")
                attempts += 1

            if not population:
                print("  No viable candidates in this generation. Skipping evolution.")
                continue

            final_population = []
            for idx, candidate in enumerate(population, 1):
                full_result = self.train_and_evaluate(candidate['config'], epochs=100)
                final_population.append({
                    'config': candidate['config'],
                    'accuracy': full_result['accuracy'],
                    'carbon': full_result['carbon'],
                    'model': full_result['model']
                })
                print(f"    Final candidate {idx}: Val Acc={full_result['accuracy']:.4f}, Carbon={full_result['carbon']}")
            final_population.sort(key=lambda x: x['accuracy'], reverse=True)

            if final_population:
                best_candidate = final_population[0]
                if best_candidate['accuracy'] > self.best_score:
                    self.best_score = best_candidate['accuracy']
                    self.best_architecture = best_candidate['config']

        print("\nRetraining best architecture on train + val, and testing...")
        train_val_mask = self.data.train_mask | self.data.val_mask
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        data = self.data.to(device)
        model = GNNModel(self.best_architecture, self.input_dim, self.output_dim).to(device)
        optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
        criterion = nn.CrossEntropyLoss()
        model.train()
        for epoch in range(100):
            out = model(data.x, data.edge_index)
            loss = criterion(out[train_val_mask], data.y[train_val_mask])
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        model.eval()
        with torch.no_grad():
            test_pred = model(data.x, data.edge_index)[data.test_mask].argmax(dim=1)
            test_acc = (test_pred == data.y[data.test_mask]).float().mean().item()
        reuse = self.calculate_block_reuse(self.best_architecture, self.baseline_config)
        print("\n--- Final Results ---")
        print("Best Architecture:", self.best_architecture)
        print("Test Accuracy:", round(test_acc, 4))
        print("Block Reuse Score:", round(reuse, 3))

if __name__ == '__main__':
    nas = CarbonAwareGNNNAS(
        dataset_name='computers',
        population_size=12,
        generations=5,
        max_attempts=100,
        proxy_acc_threshold=0.75,
        max_proxy_epochs_first_gen=30
    )
    nas.run_search()


In [None]:
import os
import urllib.request

url = "https://snap.stanford.edu/data/soc-Epinions1.txt.gz"
local_gz = "soc-Epinions1.txt.gz"
local_txt = "soc-Epinions1.txt"

if not os.path.exists(local_txt):
    print("Downloading soc-Epinions1.txt.gz...")
    urllib.request.urlretrieve(url, local_gz)
    print("Extracting...")
    import gzip, shutil
    with gzip.open(local_gz, 'rb') as f_in:
        with open(local_txt, 'wb') as f_out:
            shutil.copyfileobj(f_in, f_out)
    print("File ready:", local_txt)
else:
    print("File already exists:", local_txt)


In [None]:
import os
import urllib.request
import gzip
import shutil
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import random
import pandas as pd
from sklearn.metrics import f1_score
import networkx as nx
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv, SAGEConv, GATConv
from codecarbon import EmissionsTracker

def set_seed(seed=42):
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

# --- AUTOMATIC DOWNLOAD ---
url = "https://snap.stanford.edu/data/soc-Epinions1.txt.gz"
local_gz = "soc-Epinions1.txt.gz"
local_txt = "soc-Epinions1.txt"
if not os.path.exists(local_txt):
    print("Downloading soc-Epinions1.txt.gz...")
    urllib.request.urlretrieve(url, local_gz)
    print("Extracting...")
    with gzip.open(local_gz, 'rb') as f_in:
        with open(local_txt, 'wb') as f_out:
            shutil.copyfileobj(f_in, f_out)
    print("File ready:", local_txt)
else:
    print("File already exists:", local_txt)

# --- Robust Loader: Ignore malformed lines and build complete node list ---
def load_epinions_graph(path):
    edge_list = []
    labels = []
    nodes = set()
    with open(path, 'r') as f:
        for line in f:
            if line.startswith('#') or not line.strip():
                continue
            tokens = line.strip().split()
            if len(tokens) != 3:
                continue  # skip lines that do not have exactly 3 tokens
            u, v, w = map(int, tokens)
            edge_list.append((u, v))
            labels.append(1 if w > 0 else 0)
            nodes.update([u, v])
    G = nx.DiGraph()
    G.add_nodes_from(sorted(nodes))  # ensure all nodes present
    G.add_edges_from(edge_list)
    return G, labels

def pyg_data_from_nx(G, labels):
    node_mapping = dict((n, i) for i, n in enumerate(sorted(G.nodes())))
    edge_index = torch.tensor([(node_mapping[u], node_mapping[v]) for u, v in G.edges()], dtype=torch.long).t()
    x = torch.ones((G.number_of_nodes(), 8))  # Dummy node features
    y = torch.tensor(labels, dtype=torch.long)
    data = Data(x=x, edge_index=edge_index, edge_y=y)
    return data

def create_edge_splits(data, seed=42):
    np.random.seed(seed)
    edge_count = data.edge_index.size(1)
    assert edge_count == data.edge_y.size(0)
    perm = np.random.permutation(edge_count)
    num_train = int(0.7 * edge_count)
    num_val = int(0.1 * edge_count)
    train_idx = perm[:num_train]
    val_idx = perm[num_train:num_train+num_val]
    test_idx = perm[num_train+num_val:]
    data.train_edge_mask = torch.zeros(edge_count, dtype=torch.bool)
    data.val_edge_mask = torch.zeros(edge_count, dtype=torch.bool)
    data.test_edge_mask = torch.zeros(edge_count, dtype=torch.bool)
    data.train_edge_mask[train_idx] = True
    data.val_edge_mask[val_idx] = True
    data.test_edge_mask[test_idx] = True
    return data

class GNNSearchSpace:
    def __init__(self):
        self.conv_types = ['GCN', 'SAGE', 'GAT']
        self.hidden_dims = [32, 64, 128]
        self.num_layers = [2, 3]
        self.activation_types = ['relu', 'elu']
        self.dropout_rates = [0.2, 0.5]
    def sample_architecture(self):
        return {
            'conv_type': random.choice(self.conv_types),
            'hidden_dim': random.choice(self.hidden_dims),
            'num_layers': random.choice(self.num_layers),
            'activation': random.choice(self.activation_types),
            'dropout': random.choice(self.dropout_rates),
        }

class EdgeScoreModel(nn.Module):
    def __init__(self, config, input_dim):
        super().__init__()
        conv_map = {'GCN': GCNConv, 'SAGE': SAGEConv, 'GAT': GATConv}
        self.convs = nn.ModuleList()
        in_dim = input_dim
        for i in range(config['num_layers']):
            out_dim = config['hidden_dim']
            self.convs.append(conv_map[config['conv_type']](in_dim, out_dim))
            in_dim = out_dim
        self.activation = F.relu if config['activation'] == 'relu' else F.elu
        self.dropout = config['dropout']
        self.classifier = nn.Linear(2*in_dim, 2)
    def forward(self, x, edge_index, target_edges=None):
        for conv in self.convs:
            x = conv(x, edge_index)
            x = self.activation(x)
            x = F.dropout(x, self.dropout, training=self.training)
        if target_edges is None:
            target_edges = edge_index
        src_x = x[target_edges[0]]
        dst_x = x[target_edges[1]]
        edge_feat = torch.cat([src_x, dst_x], dim=-1)
        return self.classifier(edge_feat)

class CarbonAwareEdgeNAS:
    def __init__(self, data, population_size=8, generations=3, seed=42, max_attempts=80,
                 proxy_acc_threshold=0.7, proxy_epochs=20):
        set_seed(seed)
        self.data = data
        self.population_size = population_size
        self.generations = generations
        self.search_space = GNNSearchSpace()
        self.max_attempts = max_attempts
        self.proxy_acc_threshold = proxy_acc_threshold
        self.proxy_epochs = proxy_epochs
        self.results = []

    def proxy_train_and_eval(self, config, max_epochs=None, acc_threshold=None):
        if max_epochs is None:
            max_epochs = self.proxy_epochs
        if acc_threshold is None:
            acc_threshold = self.proxy_acc_threshold
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        model = EdgeScoreModel(config, self.data.x.size(1)).to(device)
        optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
        criterion = nn.CrossEntropyLoss()
        tracker = EmissionsTracker(
            project_name=f"epinions_edge_nas_proxy",
            measure_power_secs=10,
            output_file="emissions_epinions.csv",
            log_level='error'
        )
        tracker.start()
        x, edge_index = self.data.x.to(device), self.data.edge_index.to(device)
        edge_y = self.data.edge_y.to(device)
        train_mask = self.data.train_edge_mask
        val_mask = self.data.val_edge_mask
        best_val_acc = 0.0
        for epoch in range(max_epochs):
            model.train()
            optimizer.zero_grad()
            out = model(x, edge_index, edge_index[:, train_mask])
            loss = criterion(out, edge_y[train_mask])
            loss.backward()
            optimizer.step()
            model.eval()
            with torch.no_grad():
                logits = model(x, edge_index, edge_index[:, val_mask])
                pred = logits.argmax(dim=1)
                acc = (pred == edge_y[val_mask]).float().mean().item()
                best_val_acc = max(best_val_acc, acc)
            if best_val_acc >= acc_threshold:
                break
        emissions = tracker.stop()
        return {'val_acc': best_val_acc, 'carbon': emissions, 'model': model}

    def train_and_evaluate(self, config, epochs=40):
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        model = EdgeScoreModel(config, self.data.x.size(1)).to(device)
        optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
        criterion = nn.CrossEntropyLoss()
        x, edge_index = self.data.x.to(device), self.data.edge_index.to(device)
        edge_y = self.data.edge_y.to(device)
        train_mask = self.data.train_edge_mask
        val_mask = self.data.val_edge_mask
        for epoch in range(epochs):
            model.train()
            optimizer.zero_grad()
            out = model(x, edge_index, edge_index[:, train_mask])
            loss = criterion(out, edge_y[train_mask])
            loss.backward()
            optimizer.step()
        model.eval()
        with torch.no_grad():
            logits = model(x, edge_index, edge_index[:, val_mask])
            pred = logits.argmax(dim=1)
            acc = (pred == edge_y[val_mask]).float().mean().item()
        return {'val_acc': acc, 'model': model}

    def run_search(self):
        print("Starting Carbon-Aware Edge NAS on Epinions...")
        baseline_config = {'conv_type': 'GCN', 'hidden_dim': 64, 'num_layers': 2,
                           'activation': 'relu', 'dropout': 0.5}
        baseline_result = self.train_and_evaluate(baseline_config, epochs=40)
        self.best_score = baseline_result['val_acc']
        self.best_architecture = baseline_config
        print("Baseline - Val Accuracy:", round(self.best_score, 4))
        for generation in range(self.generations):
            population = []
            attempts = 0
            while len(population) < self.population_size and attempts < self.max_attempts:
                config = self.search_space.sample_architecture()
                result = self.proxy_train_and_eval(config)
                if result['val_acc'] >= self.proxy_acc_threshold:
                    population.append({'config': config, 'val_acc': result['val_acc'], 'carbon': result['carbon']})
                    print(f"[Gen {generation+1}] Found: Proxy Val Acc={result['val_acc']:.4f}")
                attempts += 1
            if not population:
                print(f"[Gen {generation+1}] No good candidates.")
                continue
            final_population = []
            for cand in population:
                full_res = self.train_and_evaluate(cand['config'], epochs=40)
                final_population.append({'config': cand['config'], 'val_acc': full_res['val_acc']})
            final_population.sort(key=lambda x: x['val_acc'], reverse=True)
            best = final_population[0]
            if best['val_acc'] > self.best_score:
                self.best_score = best['val_acc']
                self.best_architecture = best['config']

        print("Retraining best NAS architecture on all edge splits for test evaluation...")
        # Train on train+val, test on test split
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        model = EdgeScoreModel(self.best_architecture, self.data.x.size(1)).to(device)
        optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
        criterion = nn.CrossEntropyLoss()
        x, edge_index = self.data.x.to(device), self.data.edge_index.to(device)
        edge_y = self.data.edge_y.to(device)
        full_mask = self.data.train_edge_mask | self.data.val_edge_mask
        test_mask = self.data.test_edge_mask
        for epoch in range(40):
            model.train()
            optimizer.zero_grad()
            out = model(x, edge_index, edge_index[:, full_mask])
            loss = criterion(out, edge_y[full_mask])
            loss.backward()
            optimizer.step()
        model.eval()
        with torch.no_grad():
            logits = model(x, edge_index, edge_index[:, test_mask])
            pred = logits.argmax(dim=1).cpu().numpy()
            test_labels = edge_y[test_mask].cpu().numpy()
            test_acc = (pred == test_labels).mean()
            test_f1 = f1_score(test_labels, pred, average='macro')
        print("--- Final Results ---")
        print("Best Architecture:", self.best_architecture)
        print("Test Accuracy:", round(test_acc, 4))
        print("Test Macro-F1:", round(test_f1, 4))

# ---- Usage ----
if __name__ == '__main__':
    set_seed(42)
    G, labels = load_epinions_graph('soc-Epinions1.txt')
    data = pyg_data_from_nx(G, labels)
    data = create_edge_splits(data, seed=42)
    nas = CarbonAwareEdgeNAS(
        data, 
        population_size=8, 
        generations=3,
        max_attempts=80,
        proxy_acc_threshold=0.7,
        proxy_epochs=20
    )
    nas.run_search()


In [None]:
import os
import urllib.request
import gzip
import shutil
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import random
import pandas as pd
from sklearn.metrics import roc_auc_score, f1_score
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv, SAGEConv, GATConv
from codecarbon import EmissionsTracker

def set_seed(seed=42):
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

# --- AUTOMATIC DOWNLOAD for WikiVote ---
url = "https://snap.stanford.edu/data/wiki-Vote.txt.gz"
local_gz = "wiki-Vote.txt.gz"
local_txt = "wiki-Vote.txt"
if not os.path.exists(local_txt):
    print("Downloading wiki-Vote.txt.gz...")
    urllib.request.urlretrieve(url, local_gz)
    print("Extracting...")
    with gzip.open(local_gz, 'rb') as f_in:
        with open(local_txt, 'wb') as f_out:
            shutil.copyfileobj(f_in, f_out)
    print("File ready:", local_txt)
else:
    print("File already exists:", local_txt)

def load_wikivote_edges(path):
    edge_list = []
    nodes = set()
    with open(path, 'r') as f:
        for line in f:
            if line.startswith('#') or not line.strip():
                continue
            tokens = line.strip().split()
            if len(tokens) != 2:
                continue
            u, v = map(int, tokens)
            edge_list.append((u, v))
            nodes.update([u, v])
    nodes = sorted(list(nodes))
    node_map = {n: i for i, n in enumerate(nodes)}
    pos_edges = [(node_map[u], node_map[v]) for (u, v) in edge_list]
    return pos_edges, nodes, node_map

def sample_negative_edges(pos_edges, num_nodes, num_negs, exclude_set=None, seed=42):
    rng = np.random.RandomState(seed)
    neg_edges = set()
    pos_set = set(pos_edges)
    if exclude_set is None:
        exclude_set = pos_set
    while len(neg_edges) < num_negs:
        idx_s = rng.randint(0, num_nodes)
        idx_t = rng.randint(0, num_nodes)
        if idx_s == idx_t:
            continue
        candidate = (idx_s, idx_t)
        if candidate in exclude_set or candidate in neg_edges:
            continue
        neg_edges.add(candidate)
    return list(neg_edges)

def build_link_data(pos_edges, neg_edges, num_nodes):
    # Label 1 for pos, 0 for neg
    all_edges = pos_edges + neg_edges
    edge_label = [1]*len(pos_edges) + [0]*len(neg_edges)
    edge_index = torch.tensor(all_edges, dtype=torch.long).t().contiguous()
    edge_label = torch.tensor(edge_label, dtype=torch.float)
    x = torch.ones((num_nodes, 8))  # dummy node features
    return Data(x=x, edge_index=edge_index, edge_label=edge_label)

def create_edge_split(data, seed=42):
    np.random.seed(seed)
    num_edges = data.edge_index.size(1)
    perm = np.random.permutation(num_edges)
    num_train = int(0.7 * num_edges)
    num_val = int(0.1 * num_edges)
    train_idx = perm[:num_train]
    val_idx = perm[num_train:num_train+num_val]
    test_idx = perm[num_train+num_val:]
    data.train_edge_mask = torch.zeros(num_edges, dtype=torch.bool)
    data.val_edge_mask = torch.zeros(num_edges, dtype=torch.bool)
    data.test_edge_mask = torch.zeros(num_edges, dtype=torch.bool)
    data.train_edge_mask[train_idx] = True
    data.val_edge_mask[val_idx] = True
    data.test_edge_mask[test_idx] = True
    return data

class GNNSearchSpace:
    def __init__(self):
        self.conv_types = ['GCN', 'SAGE', 'GAT']
        self.hidden_dims = [32, 64, 128]
        self.num_layers = [2, 3]
        self.activation_types = ['relu', 'elu']
        self.dropout_rates = [0.2, 0.5]
    def sample_architecture(self):
        return {
            'conv_type': random.choice(self.conv_types),
            'hidden_dim': random.choice(self.hidden_dims),
            'num_layers': random.choice(self.num_layers),
            'activation': random.choice(self.activation_types),
            'dropout': random.choice(self.dropout_rates),
        }

class EdgeScoreModel(nn.Module):
    def __init__(self, config, input_dim):
        super().__init__()
        conv_map = {'GCN': GCNConv, 'SAGE': SAGEConv, 'GAT': GATConv}
        self.convs = nn.ModuleList()
        in_dim = input_dim
        for i in range(config['num_layers']):
            out_dim = config['hidden_dim']
            self.convs.append(conv_map[config['conv_type']](in_dim, out_dim))
            in_dim = out_dim
        self.activation = F.relu if config['activation'] == 'relu' else F.elu
        self.dropout = config['dropout']
        self.classifier = nn.Linear(2*in_dim, 1)
    def forward(self, x, edge_index, target_edges=None):
        for conv in self.convs:
            x = conv(x, edge_index)
            x = self.activation(x)
            x = F.dropout(x, self.dropout, training=self.training)
        if target_edges is None:
            target_edges = edge_index
        src_x = x[target_edges[0]]
        dst_x = x[target_edges[1]]
        edge_feat = torch.cat([src_x, dst_x], dim=-1)
        return self.classifier(edge_feat).squeeze(-1)

class CarbonAwareEdgeNAS:
    def __init__(self, data, population_size=8, generations=3, seed=42, max_attempts=80,
                 proxy_auc_threshold=0.7, proxy_epochs=20):
        set_seed(seed)
        self.data = data
        self.population_size = population_size
        self.generations = generations
        self.search_space = GNNSearchSpace()
        self.max_attempts = max_attempts
        self.proxy_auc_threshold = proxy_auc_threshold
        self.proxy_epochs = proxy_epochs
        self.results = []

    def proxy_train_and_eval(self, config, max_epochs=None, auc_threshold=None):
        if max_epochs is None:
            max_epochs = self.proxy_epochs
        if auc_threshold is None:
            auc_threshold = self.proxy_auc_threshold
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        model = EdgeScoreModel(config, self.data.x.size(1)).to(device)
        optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
        criterion = nn.BCEWithLogitsLoss()
        tracker = EmissionsTracker(
            project_name=f"wikivote_edge_nas_proxy",
            measure_power_secs=10,
            output_file="emissions_wikivote.csv",
            log_level='error'
        )
        tracker.start()
        x, edge_index = self.data.x.to(device), self.data.edge_index.to(device)
        edge_label = self.data.edge_label.to(device)
        train_mask = self.data.train_edge_mask
        val_mask = self.data.val_edge_mask
        best_val_auc = 0.0
        for epoch in range(max_epochs):
            model.train()
            optimizer.zero_grad()
            out = model(x, edge_index, edge_index[:, train_mask])
            loss = criterion(out, edge_label[train_mask])
            loss.backward()
            optimizer.step()
            model.eval()
            with torch.no_grad():
                logits = model(x, edge_index, edge_index[:, val_mask])
                probs = torch.sigmoid(logits).cpu().numpy()
                gt = edge_label[val_mask].cpu().numpy()
                auc = roc_auc_score(gt, probs)
                best_val_auc = max(best_val_auc, auc)
            if best_val_auc >= auc_threshold:
                break
        emissions = tracker.stop()
        return {'val_auc': best_val_auc, 'carbon': emissions, 'model': model}

    def train_and_evaluate(self, config, epochs=40):
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        model = EdgeScoreModel(config, self.data.x.size(1)).to(device)
        optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
        criterion = nn.BCEWithLogitsLoss()
        x, edge_index = self.data.x.to(device), self.data.edge_index.to(device)
        edge_label = self.data.edge_label.to(device)
        train_mask = self.data.train_edge_mask
        val_mask = self.data.val_edge_mask
        for epoch in range(epochs):
            model.train()
            optimizer.zero_grad()
            out = model(x, edge_index, edge_index[:, train_mask])
            loss = criterion(out, edge_label[train_mask])
            loss.backward()
            optimizer.step()
        model.eval()
        with torch.no_grad():
            logits = model(x, edge_index, edge_index[:, val_mask])
            probs = torch.sigmoid(logits).cpu().numpy()
            gt = edge_label[val_mask].cpu().numpy()
            auc = roc_auc_score(gt, probs)
        return {'val_auc': auc, 'model': model}

    def run_search(self):
        print("Starting Carbon-Aware Edge NAS on WikiVote...")
        baseline_config = {'conv_type': 'GCN', 'hidden_dim': 64, 'num_layers': 2,
                           'activation': 'relu', 'dropout': 0.5}
        baseline_result = self.train_and_evaluate(baseline_config, epochs=40)
        self.best_score = baseline_result['val_auc']
        self.best_architecture = baseline_config
        print("Baseline - Val AUC:", round(self.best_score, 4))
        for generation in range(self.generations):
            population = []
            attempts = 0
            while len(population) < self.population_size and attempts < self.max_attempts:
                config = self.search_space.sample_architecture()
                result = self.proxy_train_and_eval(config)
                if result['val_auc'] >= self.proxy_auc_threshold:
                    population.append({'config': config, 'val_auc': result['val_auc'], 'carbon': result['carbon']})
                    print(f"[Gen {generation+1}] Found: Proxy Val AUC={result['val_auc']:.4f}")
                attempts += 1
            if not population:
                print(f"[Gen {generation+1}] No good candidates.")
                continue
            final_population = []
            for cand in population:
                full_res = self.train_and_evaluate(cand['config'], epochs=40)
                final_population.append({'config': cand['config'], 'val_auc': full_res['val_auc']})
            final_population.sort(key=lambda x: x['val_auc'], reverse=True)
            best = final_population[0]
            if best['val_auc'] > self.best_score:
                self.best_score = best['val_auc']
                self.best_architecture = best['config']

        print("Retraining best NAS architecture on all edge splits for test evaluation...")
        # Train on train+val, test on test split
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        model = EdgeScoreModel(self.best_architecture, self.data.x.size(1)).to(device)
        optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
        criterion = nn.BCEWithLogitsLoss()
        x, edge_index = self.data.x.to(device), self.data.edge_index.to(device)
        edge_label = self.data.edge_label.to(device)
        full_mask = self.data.train_edge_mask | self.data.val_edge_mask
        test_mask = self.data.test_edge_mask
        for epoch in range(40):
            model.train()
            optimizer.zero_grad()
            out = model(x, edge_index, edge_index[:, full_mask])
            loss = criterion(out, edge_label[full_mask])
            loss.backward()
            optimizer.step()
        model.eval()
        with torch.no_grad():
            logits = model(x, edge_index, edge_index[:, test_mask])
            probs = torch.sigmoid(logits).cpu().numpy()
            gt = edge_label[test_mask].cpu().numpy()
            auc = roc_auc_score(gt, probs)
            pred_label = (probs > 0.5).astype(np.int64)
            test_f1 = f1_score(gt, pred_label, average='macro')
        print("--- Final Results ---")
        print("Best Architecture:", self.best_architecture)
        print("Test AUC:", round(auc, 4))
        print("Test Macro-F1:", round(test_f1, 4))

# ---- Usage ----
if __name__ == '__main__':
    set_seed(42)
    pos_edges, nodes, node_map = load_wikivote_edges('wiki-Vote.txt')
    num_pos = len(pos_edges)
    num_nodes = len(nodes)
    neg_edges = sample_negative_edges(pos_edges, num_nodes, num_pos, exclude_set=set(pos_edges), seed=42)
    data = build_link_data(pos_edges, neg_edges, num_nodes)
    data = create_edge_split(data, seed=42)
    nas = CarbonAwareEdgeNAS(
        data, 
        population_size=8, 
        generations=3,
        max_attempts=80,
        proxy_auc_threshold=0.7,
        proxy_epochs=20
    )
    nas.run_search()


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import random
import copy
import pandas as pd
from torch_geometric.datasets import Amazon
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv, GATConv, SAGEConv
from torch_geometric.nn import BatchNorm, LayerNorm
from codecarbon import EmissionsTracker

def set_seed(seed=42):
    torch.manual_seed(seed)
    random.seed(seed)
    np.random.seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

class GNNSearchSpace:
    def __init__(self):
        self.conv_types = ['GCN', 'GAT', 'SAGE']
        self.norm_types = ['batch', 'layer', 'none']
        self.activation_types = ['relu', 'elu']
        self.hidden_dims = [64, 128, 256]
        self.num_layers = [2, 3]
        self.dropout_rates = [0.5]
    def sample_architecture(self):
        return {
            'conv_type': random.choice(self.conv_types),
            'activation': random.choice(self.activation_types),
            'norm_type': random.choice(self.norm_types),
            'hidden_dim': random.choice(self.hidden_dims),
            'num_layers': random.choice(self.num_layers),
            'dropout': random.choice(self.dropout_rates)
        }

class GNNModel(nn.Module):
    def __init__(self, config, input_dim, output_dim):
        super().__init__()
        self.num_layers = config['num_layers']
        self.config = config
        self.convs = nn.ModuleList()
        self.norms = nn.ModuleList()
        in_dim = input_dim
        hidden_dim = config['hidden_dim']
        for i in range(self.num_layers):
            out_dim = hidden_dim if i < self.num_layers - 1 else output_dim
            if config['conv_type'] == 'GCN':
                conv = GCNConv(in_dim, out_dim)
            elif config['conv_type'] == 'GAT':
                conv = GATConv(in_dim, out_dim, heads=1)
            elif config['conv_type'] == 'SAGE':
                conv = SAGEConv(in_dim, out_dim)
            self.convs.append(conv)
            if i < self.num_layers - 1:
                if config['norm_type'] == 'batch':
                    norm = BatchNorm(out_dim)
                elif config['norm_type'] == 'layer':
                    norm = LayerNorm(out_dim)
                else:
                    norm = nn.Identity()
                self.norms.append(norm)
            in_dim = out_dim

    def forward(self, x, edge_index):
        for i in range(self.num_layers):
            x = self.convs[i](x, edge_index)
            if i < self.num_layers - 1:
                x = self.norms[i](x)
                if self.config['activation'] == 'relu':
                    x = F.relu(x)
                elif self.config['activation'] == 'elu':
                    x = F.elu(x)
                x = F.dropout(x, p=self.config['dropout'], training=self.training)
        return x

class CarbonAwareGNNNAS:
    def __init__(self, dataset_name='Photo', population_size=12, generations=5, seed=42,
                 max_attempts=200, proxy_acc_threshold=0.90, max_proxy_epochs_first_gen=80):
        set_seed(seed)
        self.dataset_name = dataset_name
        self.population_size = population_size
        self.generations = generations
        self.search_space = GNNSearchSpace()
        self.output_file = f"{self.dataset_name}_emissions.csv"
        self.max_attempts = max_attempts
        self.proxy_acc_threshold = proxy_acc_threshold
        self.max_proxy_epochs_first_gen = max_proxy_epochs_first_gen
        self.load_dataset()
        self.results = []
        self.best_architecture = None
        self.best_score = 0.0
        self.best_model = None

    def load_dataset(self):
            dataset = Amazon(root=f'data/{self.dataset_name}', name=self.dataset_name)
            data = dataset[0]
            self.input_dim = dataset.num_node_features
            self.output_dim = dataset.num_classes
            N = data.y.size(0)
            indices = np.arange(N)
            np.random.shuffle(indices)
            train_size = int(0.6 * N)
            val_size = int(0.2 * N)
            train_idx = torch.tensor(indices[:train_size])
            val_idx = torch.tensor(indices[train_size:train_size + val_size])
            test_idx = torch.tensor(indices[train_size + val_size:])
    # Build masks
            data.train_mask = torch.zeros(N, dtype=torch.bool)
            data.val_mask = torch.zeros(N, dtype=torch.bool)
            data.test_mask = torch.zeros(N, dtype=torch.bool)
            data.train_mask[train_idx] = True
            data.val_mask[val_idx] = True
            data.test_mask[test_idx] = True
            self.data = data
            print(f"Dataset: Amazon {self.dataset_name} | Features: {self.input_dim} | Classes: {self.output_dim} | Nodes: {N}")



    def calculate_block_reuse(self, arch1, arch2):
        reuse_score = 0.0
        total = len(arch1)
        for k in arch1.keys():
            if arch1[k] == arch2[k]:
                reuse_score += 1
        return reuse_score / total

    def estimate_proxy_carbon(self, config):
        layer_weights = {'GCN': 1.0, 'GAT': 1.5, 'SAGE': 1.2}
        norm_weights = {'batch': 0.2, 'layer': 0.1, 'none': 0.0}
        return (layer_weights[config['conv_type']] +
                config['hidden_dim'] / 128.0 +
                config['num_layers'] / 3.0 +
                norm_weights[config['norm_type']])

    def proxy_train_and_eval(self, config, max_epochs=None, acc_threshold=None, check_every=5, min_epochs=10):
        if max_epochs is None:
            max_epochs = self.max_proxy_epochs_first_gen
        if acc_threshold is None:
            acc_threshold = self.proxy_acc_threshold
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        model = GNNModel(config, self.input_dim, self.output_dim).to(device)
        optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
        criterion = nn.CrossEntropyLoss()
        tracker = EmissionsTracker(
            project_name=f"gnn_nas_proxy_amazon_{self.dataset_name}",
            measure_power_secs=15,
            output_file=self.output_file
        )
        tracker.start()
        best_val_acc = 0.0
        data = self.data.to(device)
        for epoch in range(max_epochs):
            model.train()
            out = model(data.x, data.edge_index)
            loss = criterion(out[data.train_mask], data.y[data.train_mask])
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            model.eval()
            logits = model(data.x, data.edge_index)
            val_pred = logits[data.val_mask].argmax(dim=1)
            val_acc = (val_pred == data.y[data.val_mask]).float().mean().item()
            best_val_acc = max(best_val_acc, val_acc)
            if epoch >= min_epochs and (best_val_acc >= acc_threshold):
                break
        emissions = tracker.stop()
        carbon = emissions["emissions"] if emissions and isinstance(emissions, dict) and "emissions" in emissions else (self.estimate_proxy_carbon(config) * 0.01)
        return {'accuracy': best_val_acc, 'carbon': carbon, 'model': model}

    def train_and_evaluate(self, config, epochs=200):
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        model = GNNModel(config, self.input_dim, self.output_dim).to(device)
        optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
        criterion = nn.CrossEntropyLoss()
        tracker = EmissionsTracker(
            project_name=f"gnn_nas_amazon_{self.dataset_name}",
            measure_power_secs=15,
            output_file=self.output_file
        )
        tracker.start()
        data = self.data.to(device)
        for epoch in range(epochs):
            model.train()
            out = model(data.x, data.edge_index)
            loss = criterion(out[data.train_mask], data.y[data.train_mask])
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        model.eval()
        logits = model(data.x, data.edge_index)
        val_pred = logits[data.val_mask].argmax(dim=1)
        val_acc = (val_pred == data.y[data.val_mask]).float().mean().item()
        emissions = tracker.stop()
        carbon = emissions["emissions"] if emissions and isinstance(emissions, dict) and "emissions" in emissions else (self.estimate_proxy_carbon(config) * 0.01)
        return {'accuracy': val_acc, 'carbon': carbon, 'model': model}

    def mutate_architecture(self, base_config):
        # Key mapping to avoid KeyError
        component_map = {
            'conv_types': 'conv_type',
            'activation_types': 'activation',
            'norm_types': 'norm_type',
            'hidden_dims': 'hidden_dim',
            'num_layers': 'num_layers',
            'dropout_rates': 'dropout'
        }
        if base_config is None: return self.search_space.sample_architecture()
        new_config = copy.deepcopy(base_config)
        component_to_mutate = random.choice(list(component_map.keys()))
        config_key = component_map[component_to_mutate]
        current_value = new_config[config_key]
        options = getattr(self.search_space, component_to_mutate)
        possible_values = [v for v in options if v != current_value]
        if possible_values:
            new_config[config_key] = random.choice(possible_values)
        return new_config

    def run_search(self):
        print(f"Starting Carbon-Aware GNN NAS on Amazon {self.dataset_name}...")
        self.baseline_config = {
            'conv_type': 'GCN', 'activation': 'relu', 'norm_type': 'batch',
            'hidden_dim': 128, 'num_layers': 2, 'dropout': 0.5
        }
        print("Training baseline...")
        baseline_result = self.train_and_evaluate(self.baseline_config, epochs=200)
        print(f"Baseline - Val Accuracy: {baseline_result['accuracy']:.4f}, Carbon: {baseline_result['carbon']}")
        max_proxy_epochs_first_gen = 80
        max_proxy_epochs_others = 40
        proxy_acc_threshold = self.proxy_acc_threshold
        full_training_epochs = 200
        final_retrain_epochs = 300

        for generation in range(self.generations):
            print(f"\nGeneration {generation + 1}/{self.generations}")
            current_population = []
            if generation == 0:
                print("  Sampling initial viable population...")
                attempts = 0
                while len(current_population) < self.population_size and attempts < self.max_attempts:
                    config = self.search_space.sample_architecture()
                    result = self.proxy_train_and_eval(config, max_proxy_epochs_first_gen, proxy_acc_threshold)
                    if result['accuracy'] >= proxy_acc_threshold:
                        current_population.append({'config': config, 'proxy_acc': result['accuracy']})
                        print(f"    Found candidate {len(current_population)} with Proxy Acc={result['accuracy']:.4f}")
                    attempts += 1
                if len(current_population) < self.population_size:
                    print(f"  Warning: Only {len(current_population)} viable candidates found after {attempts} attempts!")
            else:
                print("  Mutating best architecture to create new population...")
                while len(current_population) < self.population_size:
                    config = self.mutate_architecture(self.best_architecture)
                    result = self.proxy_train_and_eval(config, max_proxy_epochs_others, proxy_acc_threshold)
                    if result['accuracy'] >= proxy_acc_threshold:
                        current_population.append({'config': config, 'proxy_acc': result['accuracy']})
                        print(f"    Found mutated candidate {len(current_population)} with Proxy Acc={result['accuracy']:.4f}")

            evaluated_population = []
            for candidate in current_population:
                full_result = self.train_and_evaluate(candidate['config'], epochs=full_training_epochs)
                reuse_score = self.calculate_block_reuse(candidate['config'], self.baseline_config)
                evaluated_population.append({
                    'config': candidate['config'],
                    'accuracy': full_result['accuracy'],
                    'carbon': full_result['carbon'],
                    'reuse': reuse_score,
                    'model': full_result['model']
                })
                print(f"    Candidate: Acc={full_result['accuracy']:.4f}, Carbon={full_result['carbon']:.6f}, Reuse={reuse_score:.3f}")
            if evaluated_population:
                for cand in evaluated_population:
                    cand['score'] = (cand['accuracy'] * 0.6
                                    + (1 - min(cand['carbon'], 0.1) / 0.1) * 0.2
                                    + cand['reuse'] * 0.2)
                evaluated_population.sort(key=lambda x: x['score'], reverse=True)
                best_of_gen = evaluated_population[0]
                if best_of_gen['score'] > self.best_score:
                    self.best_score = best_of_gen['score']
                    self.best_architecture = best_of_gen['config']
                    self.best_model = best_of_gen['model']
                    print(f"  New best architecture found! Score: {self.best_score:.4f}")
        if not self.best_architecture:
            print("No suitable architecture found. Exiting.")
            return
        print("\nRetraining best architecture on full data...")
        final_result = self.train_and_evaluate(self.best_architecture, epochs=final_retrain_epochs)
        data = self.data
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.best_model.eval()
        logits = final_result['model'](data.x.to(device), data.edge_index.to(device))
        test_pred = logits[data.test_mask].argmax(dim=1)
        test_acc = (test_pred == data.y[data.test_mask].to(device)).float().mean().item()
        print("\n--- Final Results ---")
        print(f"Best Architecture: {self.best_architecture}")
        print(f"Final Test Accuracy: {test_acc:.4f}")
        print(f"Final Carbon Footprint: {final_result['carbon']:.6f} kg CO2")
        print(f"Block Reuse Score: {self.calculate_block_reuse(self.best_architecture, self.baseline_config):.3f}")
        results_df = pd.DataFrame([{
            'dataset': self.dataset_name,
            'best_config': str(self.best_architecture),
            'test_accuracy': test_acc,
            'carbon_footprint': final_result['carbon'],
            'reuse_score': self.calculate_block_reuse(self.best_architecture, self.baseline_config)
        }])
        results_df.to_csv(f'{self.dataset_name.lower()}_gnn_nas_results.csv', index=False)

if __name__ == "__main__":
    nas_runner = CarbonAwareGNNNAS(dataset_name='Photo', proxy_acc_threshold=0.90)
    nas_runner.run_search()
