# Setup

In [None]:
!pip install torch
!pip install torch-geometric
!pip install captum # causes dependency issue with numpy as numpy requires a version <2; in colab simply hit restart runtime to use the older version without error
!pip install pandas
!pip install networkx
!pip install matplotlib
!pip install tqdm
!pip install numpy

In [None]:
from itertools import pairwise, product
import networkx as nx
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch_geometric as pyg
from tqdm.auto import tqdm
import pandas as pd
import random
import matplotlib.pyplot as plt

In [None]:
def set_seed(seed):
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    return seed
set_seed(0)

In [None]:
# Stores data with all values pertaining to their individual training
experiments = []

def read_csv():
    try:
        with open(folder_path+file_name, "r") as f:
            return pd.read_csv(f,index_col=0)
    except FileNotFoundError:
        return None

def add_experiment(param):
    experiments.append(param)

def get_dataframe():
    df = pd.DataFrame(experiments)
    return df

def clear_experiments():
    experiments.clear()

def load_experiments(path):
    try:
        df = pd.read_csv(path)
        for index, row in df.iterrows():
            experiments.append(row.to_dict())
    except FileNotFoundError:
        print(f"No CSV at {path}.")

def save_dataframe(path):
    df = get_dataframe()
    df.to_csv(path)

def insert_divider():
    experiments.append({"_type":1,"alpha":"-","Training Data Nodes Max":"-","Training Data Samples":"-","Testing Data Nodes":"-","Testing Data Samples":"-","Seed":"-","K":"-","Large Number":"-","_":"|","Hidden Channels": "-","Epoch": "-", "Train_Loss": "-", "Test_Loss": "-", "L1 Norm": "-", "Unadjusted L1": "-"})

In [None]:
# This cell must change depending on runtime environment; it is currently configured for my Colab.
from google.colab import drive
drive.mount("/content/drive")
folder_path = "/content/drive/MyDrive/FinDSExperiments/"
file_name = "GINE_Hyperparamter_Test.csv" # Should change by experiment

# Architecture

In [None]:
class ShortestPathGNN(torch.nn.Module):
    def __init__(
        self,
        hidden_channels: int,
        num_layers: int
    ):
        super().__init__()
        in_channels = 1
        out_channels = 1
        self.encoder = torch.nn.Linear(in_channels, hidden_channels)

        self.layers = torch.nn.ModuleList()
        for _ in range(num_layers):
            layer = pyg.nn.GINEConv(
                nn=torch.nn.Sequential(
                    torch.nn.Linear(hidden_channels, hidden_channels),
                    torch.nn.ReLU(),
                    torch.nn.Linear(hidden_channels, hidden_channels),
                ),edge_dim=1)
            self.layers.append(layer)
        self.decoder = torch.nn.Linear(hidden_channels, out_channels)

    def forward(self, x: torch.Tensor, edge_index: torch.Tensor, edge_attr: torch.Tensor):
        x = self.encoder(x)
        for layer in self.layers:
            x = layer(x, edge_index, edge_attr)
        x = self.decoder(x)
        return x

# Generate Data

In [None]:
from collections.abc import KeysView
K = 4
large_number = 10000

def convert_networkx_to_pyg_shortest_path(graph: nx.Graph,large_number:int):
    nx.set_edge_attributes(graph, values={e : 1.0 + 0.1*np.random.randn() for e in graph.edges()}, name='edge_attr')
    data = pyg.utils.convert.from_networkx(graph)
    data.x = torch.Tensor([0] + [large_number for _ in range(data.num_nodes-1)]).unsqueeze(1)
    length_dict = nx.shortest_path_length(graph, source=0, weight="edge_attr")
    data.y = torch.Tensor([length_dict.get(i, large_number) for i in range(data.num_nodes)])
    data.edge_attr = data.edge_attr.unsqueeze(1)
    return data

def get_connected_ER_graph(num_nodes: int, p: float):
    while True: # loop until we generate a connected graph
        graph = nx.erdos_renyi_graph(num_nodes, p)
        if nx.is_connected(graph):
            return graph

def create_pyg_dataset(num_nodes: int, num_samples: int, large_number:int):
    set_seed(0)
    return [
        convert_networkx_to_pyg_shortest_path(get_connected_ER_graph(num_nodes, p=0.1),large_number)
        for _ in range(num_samples)
    ]

In [None]:
# Generate training data
num_nodes = 20
num_samples = 500
train_dataset = create_pyg_dataset(num_nodes=num_nodes, num_samples=num_samples, large_number=large_number)
train_loader = pyg.loader.DataLoader(train_dataset, batch_size=64, shuffle=True)

# Generate testing data
num_test_nodes = 200
num_test_samples = 100
test_dataset = create_pyg_dataset(num_nodes=num_test_nodes, num_samples=num_test_samples, large_number=large_number)
test_loader = pyg.loader.DataLoader(test_dataset, batch_size=64, shuffle=False)

# Training/Testing

In [None]:
# Training basic setup
device = "cuda" if torch.cuda.is_available() else "cpu"

def runTraining(hidden_channels_choice, seed, alpha):
    set_seed(seed)
    network = ShortestPathGNN(hidden_channels=hidden_channels_choice, num_layers=K)
    network.to(device)
    optimizer = torch.optim.Adam(network.parameters(), lr=0.0003)
    loss_function = torch.nn.MSELoss()
    num_epochs = 250

    # Lists to store epoch loss and L1 norm
    epoch_losses = []
    l1_norms = []
    test_losses = []

    def train_one_epoch(network, optimizer, loss_function, train_loader, alpha, device):
        network.train()
        epoch_loss = 0
        for batch in train_loader:
            network.zero_grad()
            batch = batch.to(device)
            pred = network(batch.x, batch.edge_index, batch.edge_attr)
            loss = loss_function(pred.flatten(), batch.y)

            # Add L1 regularization
            l1_norm = sum(p.abs().sum() for p in network.parameters())
            loss = loss + alpha * l1_norm

            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()

        epoch_loss /= len(train_loader.dataset)
        return epoch_loss, alpha * l1_norm.item(), l1_norm.item()

    def test_model(network, loss_function, test_loader, device):
        network.eval()
        test_loss = 0.
        with torch.no_grad():
            for batch in test_loader:
                batch = batch.to(device)
                pred = network(batch.x, batch.edge_index, batch.edge_attr)
                loss = loss_function(pred.flatten(), batch.y)
                test_loss += loss.item()
        test_loss /= len(test_loader.dataset)
        return test_loss

    pbar = tqdm(range(num_epochs))
    final_values = {}
    for epoch in pbar:
        train_loss, train_l1_norm, l1_norm = train_one_epoch(network, optimizer, loss_function, train_loader, alpha, device)
        test_loss = test_model(network, loss_function, test_loader, device)
        epoch_losses.append(train_loss)
        l1_norms.append(train_l1_norm)
        pbar.set_description(f"Large Number: {large_number}, Alpha: {alpha}, Hidden Channels: {hidden_channels_choice}, Epoch {epoch}, Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}, L1 Norm: {train_l1_norm:.4f}, Unadjusted L1: {l1_norm:.4f}")
        final_values = {"Hidden Channels": hidden_channels_choice,"Epoch": epoch, "Train_Loss": train_loss, "Test_Loss": test_loss, "L1 Norm": train_l1_norm, "Unadjusted L1": l1_norm}

    experiment = {"_type":0,"alpha":alpha,"Training Data Nodes Max":num_nodes,"Training Data Samples":num_samples,"Testing Data Nodes":num_test_nodes,"Testing Data Samples":num_test_samples,"Seed":seed,"K":K,"Large Number":large_number}
    experiment.update({"_":"|"})
    experiment.update(final_values)
    add_experiment(experiment)
    return final_values

# Custom Experiments

### General Search

In [None]:
hidden_channels_choices = [2,4,8,16,32]
seed_choices = range(5)
alpha_choices = [0.001,0.005,0.01,0.05,0.1]

def last_completed_experiment():
    experiment_csv = read_csv()
    if experiment_csv is None:
        return None
    last_experiment_row = experiment_csv[experiment_csv['_type'] == 0].iloc[-1]
    return last_experiment_row.drop("_").astype(float)

def last_row():
    experiment_csv = read_csv()
    last_row = experiment_csv.iloc[-1]
    return last_row

def next_experiment():
    last_experiment = last_completed_experiment()
    if last_experiment is None:
        return[0,0,0]
    if last_experiment['Hidden Channels']==hidden_channels_choices[-1]:
        if last_experiment['Seed']==seed_choices[-1]:
            if last_experiment['alpha']==alpha_choices[-1]:
                return None
            else:
                if last_row()['_type']!=1:
                    insert_divider()
                    insert_divider()
                return[0,0,alpha_choices.index(last_experiment['alpha'])+1]
        else:
            if last_row()['_type']!=1:
                insert_divider()
            return[0,seed_choices.index(last_experiment['Seed'])+1,alpha_choices.index(last_experiment['alpha'])]
    else:
        return[hidden_channels_choices.index(last_experiment['Hidden Channels'])+1,seed_choices.index(last_experiment['Seed']),alpha_choices.index(last_experiment['alpha'])]

In [None]:
load_experiments(folder_path+file_name)

next_experiment_to_run = next_experiment()
if next_experiment_to_run != None:
    for alpha_index in range(next_experiment_to_run[2], len(alpha_choices)):
        alpha_choice = alpha_choices[alpha_index]
        for seed_index in range(next_experiment_to_run[1] if alpha_index == next_experiment_to_run[2] else 0, len(seed_choices)):
            seed_choice = seed_choices[seed_index]
            for hidden_channels_index in range(next_experiment_to_run[0] if alpha_index == next_experiment_to_run[2] and seed_index == next_experiment_to_run[1] else 0, len(hidden_channels_choices)):
                hidden_channels_choice = hidden_channels_choices[hidden_channels_index]
                runTraining(hidden_channels_choice,seed_choice,alpha_choice)
            insert_divider()
            save_dataframe(folder_path+file_name)
        insert_divider()
        save_dataframe(folder_path+file_name)
else:
    print("No more experiments to run")

### Pseudo-gradient Descent on Alpha

In [None]:
alpha_history = []
train_history = []
test_history = []
current_alpha = 1.5
previous_alpha = None

In [None]:
hidden_channels_current_test_value = 2 # Can be changed

PSEUDO_DERIVATIVE_JUMP = 1e-5

def approximate_derivative(final_values_actual,final_values_jump):
    return (final_values_jump['Test_Loss']-final_values_actual['Test_Loss'])/PSEUDO_DERIVATIVE_JUMP

num_trials = 20
learning_rate = 25

for _ in range(num_trials):
    experiment_values = runTraining(hidden_channels_current_test_value,0,current_alpha)
    current_train_loss = experiment_values['Train_Loss']

    derivative_values = runTraining(hidden_channels_current_test_value,0,current_alpha+PSEUDO_DERIVATIVE_JUMP)
    derivative_train_loss = derivative_values['Train_Loss']
    alpha_history.append(current_alpha)
    train_history.append(current_train_loss)
    test_history.append(experiment_values['Test_Loss'])

    gradient_alpha = approximate_derivative(experiment_values,derivative_values)
    current_alpha -= learning_rate * gradient_alpha

print("Arrived at ", current_alpha)
insert_divider()
save_dataframe(folder_path+file_name)

### Bayesian Algorithm

In [None]:
!pip install scikit-optimize
from skopt import gp_minimize
from skopt.space import Real

In [None]:
def objective_function(alpha_list):
    alpha = alpha_list[0]

    hidden_channels_choice = 2
    seed_choice = 1

    runTraining(hidden_channels_choice, seed_choice, alpha)

    df = get_dataframe()
    last_experiment = df[df['_type'] == 0].iloc[-1]
    test_loss = last_experiment['Test Loss']

    return test_loss

# Search space
space = [Real(0.0, 10.0, name='alpha')]

# Run the Bayesian optimization
result = gp_minimize(
    func=objective_function,
    dimensions=space,
    n_calls=30,
    random_state=1,
    verbose=True,
    x0 = [[1.0]]
)

print(f"Best alpha found: {result.x[0]}")
print(f"Best alpha test loss: {result.fun}")

print(result.x_iters)
print(result.func_vals)
save_dataframe(folder_path+file_name)