In [11]:
import time

import torch
from tqdm import tqdm
import pandas as pd
from Read_Data import load_data_list
from metrics import compute_kendall_tau, plot_approximation_ratio
from model import GIN
import numpy as np

In [12]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def set_seed(seed: int = 42) -> None:
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    # When running on the CuDNN backend, two further options must be set
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    print(f"Random seed set as {seed}")

set_seed()

Random seed set as 42


In [17]:
df = pd.DataFrame(columns=['Network', 'Measure', 'Final Train Error', 'Min Test Error', 'Kendalls Tau',
                           'Best Config', 'Running Time Networkit', 'Running Time NN', 'Time Difference'])

configs = {'32_3': (32, 3), '32_4': (32, 4), '32_5': (32, 5),
           '64_3': (64, 3), '64_4': (64, 4), '64_5': (64, 5),
           '128_3': (128, 3), '128_4': (128, 4), '128_5': (128, 5),
           '256_3': (256, 3), '256_4': (256, 4), '256_5': (256, 5)}

data_sets = load_data_list()

# default value
train_mask = torch.zeros(1000, dtype=torch.bool)

In [19]:
for network in tqdm(data_sets):
    net = network
    network = data_sets[net][0]
    running_times_network_it = data_sets[net][1]
    running_times_nn = {}

    for dataset in tqdm(network):
        measure = dataset
        dataset = network[measure]
        opt_config_dict = {"min_test_error": 100000, "best_config": None, "min_time": 100000, "final_out": None,
                           "final_train_error": None}

        for config in configs:

            gin = GIN(*configs[config]).to(torch.float32)
            gin.to(device)
            '''for name, param in gin.named_parameters():
                print(f'{name}: {param.device}')'''

            data, train_mask = dataset[0].to(device), dataset[1].to(device)

            '''print(data.x.device)
            print(data.edge_index.device)
            print(data.y.device)'''

            criterion = torch.nn.L1Loss()

            lr = 0.001
            num_epochs = 1000
            temp_loss = 100000
            optimizer = torch.optim.Adam(gin.parameters(), lr=lr, weight_decay=5e-4)
            gin.train()

            for epoch in range(num_epochs):

                optimizer.zero_grad()
                out = gin(x=data.x, edge_index=data.edge_index)

                loss = criterion(out.view(-1)[train_mask], data.y[train_mask].view(-1))

                # early stopping if the loss does not decrease by more than 1% for 20 epochs, very simplified version of the early stopping criterion
                # introduced in https://arxiv.org/abs/1703.09580, by approximating the loss gradient, to implement early stopping without validation set
                if epoch % 20 == 0 and epoch > 50:
                    if abs(temp_loss - loss) > abs(temp_loss) / 100:
                        temp_loss = loss

                    else:
                        # print("\nEarly stopping at epoch: ", epoch)
                        break

                loss.backward()
                optimizer.step()

            gin.eval()

            with torch.no_grad():
                start = time.perf_counter()
                out = gin(x=data.x, edge_index=data.edge_index)
                end = time.perf_counter()

            train_error = criterion(out.view(-1)[train_mask], data.y[train_mask].view(-1))

            test_error = criterion(out.view(-1)[~train_mask], data.y.view(-1)[~train_mask])
            test_error = test_error.cpu()
            train_error = train_error.cpu()
            if test_error.detach().numpy() < opt_config_dict["min_test_error"]:
                opt_config_dict["min_test_error"] = test_error.detach().numpy()
                opt_config_dict["best_config"] = config
                opt_config_dict["final_out"] = out.view(-1).detach()
                opt_config_dict["final_train_error"] = train_error.detach().numpy()
                opt_config_dict["min_time"] = end - start

        plot_approximation_ratio(dataset[0], opt_config_dict["final_out"], net, measure)
        running_times_nn[measure] = opt_config_dict["min_time"]

        # update the dataframe
        df.loc[len(df)] = {'Network': net, 'Measure': measure,
                           'Final Train Error': opt_config_dict["final_train_error"],
                           'Min Test Error': opt_config_dict["min_test_error"],
                           'Kendalls Tau': compute_kendall_tau(dataset[0].cpu(), opt_config_dict["final_out"].cpu(),
                                                               ~train_mask.cpu()),
                           'Best Config': opt_config_dict["best_config"],
                           'Running Time Networkit': running_times_network_it[measure],
                           'Running Time NN': running_times_nn[measure],
                           'Time Difference': running_times_network_it[measure] - running_times_nn[measure]}


# rounding the values to improve readability
df[['Running Time Networkit', 'Running Time NN', 'Time Difference']] = \
    df[['Running Time Networkit', 'Running Time NN', 'Time Difference']].round(6)
df[['Final Train Error', 'Min Test Error', 'Kendalls Tau']] = \
    df[['Final Train Error', 'Min Test Error', 'Kendalls Tau']].round(4)

df.to_csv('results.csv', index=False)
print(df.to_string())


  0%|          | 0/2 [00:00<?, ?it/s]
  0%|          | 0/3 [00:00<?, ?it/s][A


Early stopping at epoch:  300

Early stopping at epoch:  460

Early stopping at epoch:  480

Early stopping at epoch:  740

Early stopping at epoch:  380

Early stopping at epoch:  320

Early stopping at epoch:  360

Early stopping at epoch:  480

Early stopping at epoch:  440

Early stopping at epoch:  300

Early stopping at epoch:  580



 33%|███▎      | 1/3 [03:53<07:47, 233.74s/it][A


Early stopping at epoch:  260

Early stopping at epoch:  140

Early stopping at epoch:  380

Early stopping at epoch:  300

Early stopping at epoch:  460

Early stopping at epoch:  420

Early stopping at epoch:  200

Early stopping at epoch:  680

Early stopping at epoch:  500

Early stopping at epoch:  100

Early stopping at epoch:  300

Early stopping at epoch:  240



 67%|██████▋   | 2/3 [06:20<03:02, 182.30s/it][A


Early stopping at epoch:  140

Early stopping at epoch:  100

Early stopping at epoch:  220

Early stopping at epoch:  200

Early stopping at epoch:  180

Early stopping at epoch:  200

Early stopping at epoch:  180

Early stopping at epoch:  100

Early stopping at epoch:  160

Early stopping at epoch:  80

Early stopping at epoch:  440

Early stopping at epoch:  240



100%|██████████| 3/3 [07:55<00:00, 158.62s/it][A
 50%|█████     | 1/2 [07:55<07:55, 475.87s/it]
  0%|          | 0/3 [00:00<?, ?it/s][A


Early stopping at epoch:  220

Early stopping at epoch:  340

Early stopping at epoch:  280

Early stopping at epoch:  180

Early stopping at epoch:  360

Early stopping at epoch:  220

Early stopping at epoch:  120

Early stopping at epoch:  780

Early stopping at epoch:  280

Early stopping at epoch:  240



 33%|███▎      | 1/3 [02:51<05:43, 171.67s/it][A


Early stopping at epoch:  480

Early stopping at epoch:  320

Early stopping at epoch:  200

Early stopping at epoch:  440

Early stopping at epoch:  920

Early stopping at epoch:  720

Early stopping at epoch:  480

Early stopping at epoch:  640



 67%|██████▋   | 2/3 [07:03<03:39, 219.10s/it][A


Early stopping at epoch:  140

Early stopping at epoch:  280

Early stopping at epoch:  500

Early stopping at epoch:  180

Early stopping at epoch:  220

Early stopping at epoch:  220

Early stopping at epoch:  940

Early stopping at epoch:  180

Early stopping at epoch:  180

Early stopping at epoch:  380



100%|██████████| 3/3 [09:33<00:00, 191.04s/it][A
100%|██████████| 2/2 [17:28<00:00, 524.49s/it]

                     Network                 Measure Final Train Error Min Test Error  Kendalls Tau Best Config  Running Time Networkit  Running Time NN  Time Difference
0  Adolescent health network                  Degree       0.005906477    0.006955558        0.9480       256_5                0.000130         0.032892        -0.032762
1  Adolescent health network  Eigenvector Centrality       0.004505247   0.0037288377        0.2294        64_4                0.006794         0.008933        -0.002139
2  Adolescent health network                PageRank       0.033728242     0.03123384        0.7131        32_5                0.004139         0.009685        -0.005546
3              Yeast network                  Degree      0.0030861087   0.0030860181        0.7496       256_3                0.000354         0.012296        -0.011942
4              Yeast network  Eigenvector Centrality      0.0012718391   0.0021006938        0.5622        64_4                0.025737         0.0062


