## Get Cuda and Processor information

In [36]:
import sys
sys.path.append('../Submodular')

import DeviceDir

DIR, RESULTS_DIR = DeviceDir.get_directory()
device, NUM_PROCESSORS = DeviceDir.get_device()

NUM_PROCESSORS

40

In [37]:
import multiprocessing

NUM_PROCESSORS=multiprocessing.cpu_count()
print("Cpu count: ",NUM_PROCESSORS)

Cpu count:  40


In [38]:
from ipynb.fs.full.Dataset import get_data,generate_synthetic
from ipynb.fs.full.Dataset import datasets as available_datasets
from ipynb.fs.full.Utils import save_plot

In [39]:
import argparse
from argparse import ArgumentParser

#set default arguments here
def get_configuration():
    parser = ArgumentParser()
    parser.add_argument('--epochs', type=int, default=1)
    parser.add_argument('--log_info', type=bool, default=True)
    parser.add_argument('--pbar', type=bool, default=False)
    parser.add_argument('--batch_size', type=int, default=2048)
    parser.add_argument('--learning_rate', type=float, default=0.01)
    parser.add_argument('--recompute', type=bool, default=False)
    parser.add_argument('--num_gpus', type=int, default=-1)
    parser.add_argument('--parallel_mode', type=str, default="dp", choices=['dp', 'ddp', 'ddp2'])
    parser.add_argument('--dataset', type=str, default="Cora", choices=available_datasets)
    parser.add_argument('--use_normalization', action='store_false', default=True)
    parser.add_argument('--f') ##dummy for jupyternotebook
    args = parser.parse_args()
    
    dict_args = vars(args)
    
    return args, dict_args

args, dict_args = get_configuration()

## libraries

In [40]:
import random
import numpy as np
import torch

SYNTHETIC = True
seed = 123

data_filename_extension = ""

random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
None

In [41]:
import os
import math
import time
from tqdm import tqdm
import torch.nn as nn
from torch_geometric.data import Data, Dataset

## GNN model

In [42]:
import torch_geometric
from torch_geometric.nn import GCNConv, GATConv, SAGEConv, GINConv, ChebConv
from torch_geometric.nn import GraphConv, TransformerConv
from torch_geometric.utils import degree
import torch.nn.functional as F
from sklearn.metrics import accuracy_score, f1_score
from ipynb.fs.full.SpatialConv import SpatialConv

### GNN option 1

In [43]:
GNNconv = SAGEConv

In [44]:
# from torch_geometric.nn import GCNConv, GATConv, GINConv, SAGEConv
GNNconv2 = GATConv

class GNNother(torch.nn.Module):
    def __init__(self, num_features,num_classes, hidden_channels=256):
        super().__init__()        
        ##GNN layer
        global GNNconv2
        
        if(GNNconv2==GINConv):
            self.MLP1 = nn.Linear(num_features,hidden_channels)
            self.MLP2 = nn.Linear(hidden_channels,num_classes)
            self.conv1 = GNNconv2(self.MLP1)
            self.conv2 = GNNconv2(self.MLP2)                
        else:        
            self.conv1 = GNNconv2(num_features, hidden_channels)
            self.conv2 = GNNconv2(hidden_channels,num_classes)

    def forward(self, x, edge_index, edge_weight=None):
        x = self.conv1(x, edge_index, edge_weight)
        x = x.relu()
        x = F.dropout(x, p=0.2, training=self.training)
        x = self.conv2(x, edge_index, edge_weight)
        
        #x = x.log_softmax(dim=-1)
        #x = x.relu()
        
        return x
    

class GNNGAT(torch.nn.Module):
    def __init__(self, num_features, num_classes, hidden_channels, heads):
        super().__init__()
        self.conv1 = GATConv(num_features, hidden_channels, heads, edge_dim=1)  # TODO
        self.conv2 = GATConv(hidden_channels*heads, num_classes, heads=1, concat=True, edge_dim=1)  # TODO

    def forward(self, x, edge_index, edge_attr=None):
        x = F.dropout(x, p=0.2, training=self.training)
        x = self.conv1(x, edge_index, edge_attr)
        x = F.elu(x)
        x = F.dropout(x, p=0.2, training=self.training)
        x = self.conv2(x, edge_index, edge_attr)
        return x

In [45]:
class GNNHomophily(torch.nn.Module):
    def __init__(self, num_features,num_classes, hidden_channels=16):
        super().__init__()
        self.num_classes = num_classes
             
        self.conv1 = GNNconv(num_features, hidden_channels)
        #self.conv2 = GNNconv(hidden_channels,hidden_channels)
        self.conv3 = GNNconv(hidden_channels,num_classes)

    def forward(self, x, edge_index, edge_weight=None):
        x = self.conv1(x, edge_index, edge_weight)
        x = x.relu()
        x = F.dropout(x, p=0.2, training=self.training)
#         x = self.conv2(x, edge_index, edge_weight)
#         x = x.relu()
#         x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv3(x, edge_index, edge_weight)
        
        return x
    
class GNNHeterophily(torch.nn.Module):
    def __init__(self, num_features,num_classes, hidden_channels=16):
        super().__init__()
        self.num_classes = num_classes
             
        self.conv1 = ChebConv(num_features, hidden_channels, K=2, normalization='sym')
        #self.conv2 = GNNconv(hidden_channels,hidden_channels)
        self.conv3 = ChebConv(hidden_channels,num_classes, K=2, normalization='sym')

    def forward(self, x, edge_index, edge_weight=None):
        x = self.conv1(x, edge_index, edge_weight)
        x = x.relu()
        x = F.dropout(x, p=0.2, training=self.training)
#         x = self.conv2(x, edge_index, edge_weight)
#         x = x.relu()
#         x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv3(x, edge_index, edge_weight)
        
        return x
    
class AGSGNN(torch.nn.Module):
    def __init__(self, num_features,num_classes, hidden_channels=16, dropout=0.5):
        super().__init__()
        self.num_classes = num_classes
        
        hidden = int(hidden_channels/2)        
        
        ####################
#         self.gnn1 = GNNHomophily(num_features, hidden, hidden_channels)
# #         self.gnn2 = GNNHomophily(num_features, hidden, hidden_channels)
        
# #         self.gnn1 = GNNHeterophily(num_features, hidden, hidden_channels)
#         self.gnn2 = GNNHeterophily(num_features, hidden, hidden_channels)
        
#         ####################
#         self.gnn1 = GNNother(num_features, hidden, hidden_channels)
#         self.gnn2 = GNNother(num_features, hidden, hidden_channels)        

#         ####################
        self.gnn1 = GNNHomophily(num_features, num_classes, hidden_channels)
#         self.gnn1 = GNNHeterophily(num_features, num_classes, hidden_channels)
        #################
    
        self.p = dropout
        self.com_lin = nn.Linear(hidden*2, num_classes)
        
        
#         self.T = 2        
#         self.layer_norm_a1 =  nn.LayerNorm(num_classes)
#         self.layer_norm_s1 =  nn.LayerNorm(num_classes)
        
#         self.alpha_a1 = nn.Linear(num_classes, 1)
#         self.alpha_s1 = nn.Linear(num_classes, 1)
#         self.w1 = nn.Linear(self.T, self.T)
        
        #self.reset_parameters()
            
#     def reset_parameters(self):
#         std_att = 1. / math.sqrt(self.w1.weight.size(1))
#         std_att_vec = 1. / math.sqrt( self.alpha_a1.weight.size(1))
        
#         self.alpha_s1.weight.data.uniform_(-std_att, std_att)
#         self.alpha_i1.weight.data.uniform_(-std_att, std_att)
        
#         self.layer_norm_a1.reset_parameters()
#         self.layer_norm_s1.reset_parameters()        
        
    def forward(self, batch_data):
        
        #out = model(batch_data.x, batch_data.edge_index, batch_data.weight)
        #out = model(batch_data.x, batch_data.edge_index, batch_data.edge_weight)
        #out = model(batch_data.x, batch_data.edge_index)
        
        x1 = self.gnn1(batch_data[0].x, batch_data[0].edge_index)
        return x1        
        
        x2 = self.gnn2(batch_data[1].x, batch_data[1].edge_index)
        #return x2
        
        a1 = F.relu(x1)
        #a1 = self.layer_norm_a1(a1)
        a1 = F.dropout(a1, p=self.p, training=self.training)
        
        s1 = F.relu(x2)
        #s1 = self.layer_norm_s1(s1)
        s1 = F.dropout(s1, p=self.p, training=self.training)
        
        used = batch_data[0].batch_size
        
        x = torch.cat([a1[:used,:], s1[:used,:]], dim=-1)
        x = self.com_lin(x)
        
        
#         ala1 = torch.sigmoid(self.alpha_a1(a1))
#         als1 = torch.sigmoid(self.alpha_s1(s1))        
        
#         alpha1 = F.softmax(self.w1(torch.cat([ala1, als1],dim=-1)/self.T), dim=1)                
#         x = torch.mm(torch.diag(alpha1[:,0]),a1) + torch.mm(torch.diag(alpha1[:,1]),s1)
        
        #print(x.shape)
        
        return x

## GNN Training and Testing

In [46]:
from torch_geometric.loader import NeighborSampler, NeighborLoader
from ipynb.fs.full.AGSNodeSampler import WeightedNeighborLoader

In [47]:
def test(model, loader, mask, name='Train'):    
    if args.log_info:    
        pbar = tqdm(total=sum(mask).item())
        pbar.set_description(f'Evaluating {name}')
    
    model.eval()
    
    total_correct=0
    total_examples=0
    
    sigmoid = nn.Sigmoid()    
    
    y_true = []
    y_pred = []
    
    with torch.no_grad():                  
    
        for i,batch_data in enumerate(loader):
            
            batch_data = [b.to(device) for b in batch_data]
            used = batch_data[0].batch_size
            
            out = model(batch_data)
                   
            out=out[:used,:]
            pred = out.argmax(dim=1)            

            y_true.append(batch_data[0].y[:used].detach().cpu().numpy())
            y_pred.append(pred.detach().cpu().numpy())
            
            if args.log_info:
                pbar.update(used)
              
    if args.log_info:
        pbar.close()
    
    y_true = np.concatenate(y_true)
    y_pred = np.concatenate(y_pred)
    
    acc = accuracy_score(y_true, y_pred)
    #acc = f1_score(y_true, y_pred, average='micro')
                    
    return acc

In [48]:
def train(DATASET_NAME, model, data, epochs=100, train_neighbors=[-1,10], test_neighbors=[-1,10]):
    
    if args.log_info:
        print("Train neighbors: ", train_neighbors)
        print("Test neighbors: ", test_neighbors)
        
#     optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
    optimizer = torch.optim.Adam(model.parameters())
    
    if data.y.ndim == 1:
        criterion = torch.nn.CrossEntropyLoss()
    else:
        criterion = torch.nn.BCEWithLogitsLoss()
    
    batch_size=1024         
#     batch_size=512         
    worker = 8
    
    if data.num_nodes>=50000:
        worker = 8
    else:
        worker = min(8,int(sum(data.train_mask)/batch_size))
        
    if args.log_info:
        print("Worker: ", worker)
        
    weight_func=['knn','random']; 
#     weight_func=['knn','submodular']; 
#     weight_func=['random', 'random'];  worker = 0;
#     weight_func=['link-nn', 'link-sub'];  worker = 2;
    params={
        'knn':{'metric':'cosine'},
        'submodular':{'metric':'cosine'},
        'link-nn':{'value':'min'},
        'link-sub':{'value':'max'},
        'apricot':{'sub_func':'coverage','metric':'cosine'}
    }    
    
    global data_filename_extension        
#     sampler_dir = DIR+'AGSGNNstruc/'+DATASET_NAME+data_filename_extension
    sampler_dir = DIR+'AGSGNNstrucCorrect/'+DATASET_NAME+data_filename_extension
    
    if args.log_info:
        print(sampler_dir)
    
    
#     if not os.path.exists(sampler_dir):
#         os.makedirs(sampler_dir)
    
    start = time.time()    
#     loader = WeightedNeighborLoader(data, input_nodes=data.train_mask,num_neighbors=train_neighbors, 
#                               batch_size=batch_size, shuffle=True, num_workers=worker, drop_last=False, 
#                               weight_func=weight_func, params=params, log=args.log_info,
#                                     directed=True, replace = False,
#                                     save_dir = sampler_dir,recompute = args.recompute)
    
    loader = WeightedNeighborLoader(data, input_nodes=data.train_mask,num_neighbors=train_neighbors, 
                              batch_size=batch_size, shuffle=True, num_workers=worker, drop_last=False, 
                              weight_func=weight_func, params=params, log=True,
                                    directed=True, replace = False,
                                    save_dir = sampler_dir,recompute = args.recompute)

    train_loader = WeightedNeighborLoader(data, input_nodes=data.train_mask,num_neighbors=train_neighbors, 
                              batch_size=batch_size, shuffle=False, num_workers=worker, drop_last=False, 
                              weight_func=weight_func, params=params, log=args.log_info,
                                          directed=True, replace = False,
                                          save_dir = sampler_dir,recompute = False)
    
    val_loader = WeightedNeighborLoader(data, input_nodes=data.val_mask,num_neighbors=test_neighbors, 
                              batch_size=batch_size, shuffle=False, num_workers=min(8,int(sum(data.val_mask)/batch_size)), drop_last=False, 
                              weight_func=weight_func, params=params,log=args.log_info, directed=True, replace = False,
                                        save_dir = sampler_dir,recompute = False)
    
    test_loader = WeightedNeighborLoader(data, input_nodes=data.test_mask,num_neighbors=test_neighbors, 
                              batch_size=batch_size, shuffle=False, num_workers=min(8,int(sum(data.test_mask)/batch_size)), drop_last=False, 
                              weight_func=weight_func, params=params, log=args.log_info, directed=True, replace = False,
                                         save_dir = sampler_dir,recompute = False)
    
    top_k_accs = []    
    best_acc=0  
    
    train_losses=[]
    val_accuracies=[]
    train_accuracies=[]
    test_accuracies=[]
    
    num_iteration = epochs
    
    end = time.time()
    if args.log_info:
        print("Total initialization time: ", end-start)
    
    start = time.time()
    
    for epoch in range(1,epochs+1):
        
        if args.log_info:
            pbar = tqdm(total=int(sum(data.train_mask)))
            pbar.set_description(f'Epoch {epoch:02d}')
        
        model.train()
        total_loss = total_examples = 0
        
        for i,batch_data in enumerate(loader):            
            #print(batch_data)
            
            batch_data = [b.to(device) for b in batch_data]
            used = batch_data[0].batch_size #int(sum(batch_data.train_mask))       
            
            optimizer.zero_grad()            
            out = model(batch_data)
            #out = F.log_softmax(out, dim=1)                 
            #loss = F.nll_loss(out[batch_data[0].train_mask], batch_data[0].y[batch_data[0].train_mask])
            #loss = F.cross_entropy(out[:used], batch_data[0].y[:used])
            loss = criterion(out[:used], batch_data[0].y[:used])
            
            loss.backward()
            optimizer.step()
                        
            total_loss += loss.item() * used
            total_examples += used
            
            if args.log_info:
                pbar.update(used)
        if args.log_info:
            pbar.close()
        
        loss=total_loss / total_examples
        train_losses.append(loss)
        
        #print(f'Epoch: {epoch:03d}, Train Loss: {loss:.4f}', end = ', ')                
        
        if args.log_info:
            train_acc=test(model, train_loader,data.train_mask,'Train')            
            train_accuracies.append(train_acc.item())        
        else:
            train_acc = 0 ; train_accuracies.append(train_acc)
        
        if args.log_info:
            val_acc = test(model, val_loader,data.val_mask,'Validation')
            val_accuracies.append(val_acc.item())
        else:
            val_acc = 0 ; val_accuracies.append(val_acc)
    
        if epoch%10==0:
            test_acc = test(model, test_loader,data.test_mask,'Test')
            test_accuracies.append(test_acc.item())
        else:
            test_acc = 0
            test_accuracies.append(test_acc)
            
        
        #print(f'Epoch: {epoch:03d}, Test: {test_acc:.4f}')
        
        std_dev = np.std(train_losses[-5:])
        #print(f'Epoch: {epoch:03d}, Std dev: {std_dev:.4f}')
        
        if args.log_info:
            print(f'Epoch: {epoch:03d}, Train Loss: {loss:.4f}, Train: {train_acc:.4f}, Val: {val_acc:.4f}, Test: {test_acc:.4f}, Std dev: {std_dev:.4f}')

        if epoch>=5 and std_dev<=1e-3:
            num_iteration = epoch
            
            if args.log_info:                
                print("Iteration for convergence: ", epoch)
            break
        
    if args.log_info:
        #save_plot([val_accuracies], labels=['Validation'], name='Plots/Validation', yname='Accuracy', xname='Epoch')    
        save_plot([train_losses, train_accuracies, val_accuracies, test_accuracies], labels=['Loss','Train','Validation','Test'], name='Results/AGSNSVal', yname='Accuracy', xname='Epoch')
        
        print ("Best Validation Accuracy, ",max(val_accuracies))
        print ("Best Test Accuracy, ",max(test_accuracies))
        
    best_acc = max(test_accuracies)
    
    end = time.time()
    if args.log_info:
        print("Total epoch time: ", end-start)    
    
    return best_acc, num_iteration

In [49]:
def AGSNSperformanceSampler(DATASET_NAME, data, dataset, num_classes, epochs=1, train_neighbors=[-1,-1], test_neighbors=[-1,-1]):        
    
    model = AGSGNN(data.x.shape[1], num_classes, hidden_channels=256).to(device)
    
    if args.log_info: print(model)    
    
    best_acc, num_iteration = train(DATASET_NAME, model, data, epochs, train_neighbors=train_neighbors, test_neighbors=test_neighbors)    
    
    return best_acc, num_iteration, model

In [50]:
def adj_feature(data):    
    adj_mat = torch.zeros((data.num_nodes,data.num_nodes))
    edges = data.edge_index.t()
    adj_mat[edges[:,0], edges[:,1]] = 1
    return adj_mat

# adj_feature(data)
# data.x.shape

In [51]:
from torch_geometric.utils import add_self_loops

In [52]:
args.log_info = True
DATASET_NAME = 'Cora'
data, dataset = get_data(DATASET_NAME, DIR=None, log=False, h_score=True, split_no=1); print("")
print(data)

# (row, col) = data.edge_index
# data.edge_index = torch.stack((torch.cat((row, col),dim=0),torch.cat((col, row),dim=0)),dim=0)
# data.edge_index = torch_geometric.utils.coalesce(data.edge_index)
# print(data)

args.recompute = True


if len(data.y.shape) > 1:
    data.y = data.y.argmax(dim=1)        
    num_classes = torch.max(data.y).item()+1
else:
    num_classes = dataset.num_classes

if num_classes!= torch.max(data.y)+1:
    num_classes = torch.max(data.y).item()+1
    
# data.edge_index, _ = add_self_loops(data.edge_index)            
# data.x = torch.cat((data.x, adj_feature(data)), dim=1)
# if args.log_info == True:
#     print(data.x.shape)

    
# if DATASET_NAME in ['Cornell', 'cornell5']:
#     data.edge_index, _ = add_self_loops(data.edge_index)            
    
# if DATASET_NAME in ['Squirrel', 'Chameleon', 'amherst41',
#                     'Cornell','cornell5', 'johnshopkins55']:
#     data.x = torch.cat((data.x, adj_feature(data)), dim=1)
#     if args.log_info == True:
#         print(data.x.shape)


best_acc, num_iteration, _ =  AGSNSperformanceSampler(DATASET_NAME, data, dataset, num_classes, epochs=150, train_neighbors=[8,4], test_neighbors=[8,4])
print(best_acc, num_iteration)

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...
Done!


N  2708  E  10556  d  3.8980797636632203 0.825157880783081 0.8099659085273743 0.7657181620597839 -0.06587088108062744 
Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708])
AGSGNN(
  (gnn1): GNNHomophily(
    (conv1): SAGEConv(1433, 256)
    (conv3): SAGEConv(256, 7)
  )
  (com_lin): Linear(in_features=256, out_features=7, bias=True)
)
Train neighbors:  [8, 4]
Test neighbors:  [8, 4]
Worker:  0
./Dataset/AGSGNNstrucCorrect/Cora
Metric:  cosine


Nodes: 100%|██████████| 2708/2708 [00:03<00:00, 794.67it/s]


saving weights  knncosine
Total initialization time:  3.448054790496826


Epoch 01: 100%|██████████| 140/140 [00:00<00:00, 548.42it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 563.00it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 523.26it/s]


Epoch: 001, Train Loss: 1.9471, Train: 0.2071, Val: 0.1520, Test: 0.0000, Std dev: 0.0000


Epoch 02: 100%|██████████| 140/140 [00:00<00:00, 563.66it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 576.11it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 613.66it/s]


Epoch: 002, Train Loss: 1.9417, Train: 0.2714, Val: 0.2080, Test: 0.0000, Std dev: 0.0027


Epoch 03: 100%|██████████| 140/140 [00:00<00:00, 521.18it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 556.05it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 637.16it/s]


Epoch: 003, Train Loss: 1.9367, Train: 0.2929, Val: 0.2480, Test: 0.0000, Std dev: 0.0043


Epoch 04: 100%|██████████| 140/140 [00:00<00:00, 548.93it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 559.24it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 548.16it/s]


Epoch: 004, Train Loss: 1.9305, Train: 0.3214, Val: 0.2780, Test: 0.0000, Std dev: 0.0062


Epoch 05: 100%|██████████| 140/140 [00:00<00:00, 548.99it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 555.42it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 641.68it/s]


Epoch: 005, Train Loss: 1.9257, Train: 0.4357, Val: 0.3140, Test: 0.0000, Std dev: 0.0076


Epoch 06: 100%|██████████| 140/140 [00:00<00:00, 546.70it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 556.04it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 645.56it/s]


Epoch: 006, Train Loss: 1.9195, Train: 0.5857, Val: 0.3520, Test: 0.0000, Std dev: 0.0078


Epoch 07: 100%|██████████| 140/140 [00:00<00:00, 551.78it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 560.24it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 564.35it/s]


Epoch: 007, Train Loss: 1.9130, Train: 0.6571, Val: 0.3980, Test: 0.0000, Std dev: 0.0083


Epoch 08: 100%|██████████| 140/140 [00:00<00:00, 553.04it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 562.64it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 640.89it/s]


Epoch: 008, Train Loss: 1.9062, Train: 0.6786, Val: 0.4500, Test: 0.0000, Std dev: 0.0087


Epoch 09: 100%|██████████| 140/140 [00:00<00:00, 554.41it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 562.14it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 638.79it/s]


Epoch: 009, Train Loss: 1.8991, Train: 0.7143, Val: 0.5160, Test: 0.0000, Std dev: 0.0094


Epoch 10: 100%|██████████| 140/140 [00:00<00:00, 555.94it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 390.35it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 608.46it/s]
Evaluating Test: 100%|██████████| 1000/1000 [00:01<00:00, 649.02it/s]


Epoch: 010, Train Loss: 1.8921, Train: 0.7857, Val: 0.5420, Test: 0.5360, Std dev: 0.0097


Epoch 11: 100%|██████████| 140/140 [00:00<00:00, 549.28it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 551.73it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 559.72it/s]


Epoch: 011, Train Loss: 1.8829, Train: 0.8500, Val: 0.5640, Test: 0.0000, Std dev: 0.0105


Epoch 12: 100%|██████████| 140/140 [00:00<00:00, 553.33it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 561.41it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 641.85it/s]


Epoch: 012, Train Loss: 1.8761, Train: 0.8929, Val: 0.6040, Test: 0.0000, Std dev: 0.0108


Epoch 13: 100%|██████████| 140/140 [00:00<00:00, 548.24it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 556.80it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 640.01it/s]


Epoch: 013, Train Loss: 1.8651, Train: 0.9143, Val: 0.6320, Test: 0.0000, Std dev: 0.0119


Epoch 14: 100%|██████████| 140/140 [00:00<00:00, 549.27it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 561.17it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 561.66it/s]


Epoch: 014, Train Loss: 1.8557, Train: 0.9429, Val: 0.6640, Test: 0.0000, Std dev: 0.0129


Epoch 15: 100%|██████████| 140/140 [00:00<00:00, 546.86it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 560.57it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 634.03it/s]


Epoch: 015, Train Loss: 1.8462, Train: 0.9571, Val: 0.6700, Test: 0.0000, Std dev: 0.0133


Epoch 16: 100%|██████████| 140/140 [00:00<00:00, 548.72it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 557.53it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 637.72it/s]


Epoch: 016, Train Loss: 1.8351, Train: 0.9643, Val: 0.6780, Test: 0.0000, Std dev: 0.0143


Epoch 17: 100%|██████████| 140/140 [00:00<00:00, 550.80it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 560.27it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 558.40it/s]


Epoch: 017, Train Loss: 1.8239, Train: 0.9643, Val: 0.6880, Test: 0.0000, Std dev: 0.0146


Epoch 18: 100%|██████████| 140/140 [00:00<00:00, 552.14it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 564.35it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 643.00it/s]


Epoch: 018, Train Loss: 1.8135, Train: 0.9714, Val: 0.6920, Test: 0.0000, Std dev: 0.0151


Epoch 19: 100%|██████████| 140/140 [00:00<00:00, 551.54it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 564.95it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 627.96it/s]


Epoch: 019, Train Loss: 1.7993, Train: 0.9786, Val: 0.7000, Test: 0.0000, Std dev: 0.0164


Epoch 20: 100%|██████████| 140/140 [00:00<00:00, 545.01it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 563.88it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 562.45it/s]
Evaluating Test: 100%|██████████| 1000/1000 [00:01<00:00, 655.45it/s]


Epoch: 020, Train Loss: 1.7852, Train: 0.9786, Val: 0.6980, Test: 0.6730, Std dev: 0.0176


Epoch 21: 100%|██████████| 140/140 [00:00<00:00, 549.28it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 560.99it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 642.09it/s]


Epoch: 021, Train Loss: 1.7711, Train: 0.9714, Val: 0.7120, Test: 0.0000, Std dev: 0.0190


Epoch 22: 100%|██████████| 140/140 [00:00<00:00, 388.74it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 561.03it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 640.48it/s]


Epoch: 022, Train Loss: 1.7572, Train: 0.9786, Val: 0.7140, Test: 0.0000, Std dev: 0.0199


Epoch 23: 100%|██████████| 140/140 [00:00<00:00, 556.83it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 562.60it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 644.52it/s]


Epoch: 023, Train Loss: 1.7409, Train: 0.9857, Val: 0.7080, Test: 0.0000, Std dev: 0.0205


Epoch 24: 100%|██████████| 140/140 [00:00<00:00, 552.30it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 562.01it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 563.39it/s]


Epoch: 024, Train Loss: 1.7256, Train: 0.9786, Val: 0.7080, Test: 0.0000, Std dev: 0.0211


Epoch 25: 100%|██████████| 140/140 [00:00<00:00, 554.01it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 562.63it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 639.79it/s]


Epoch: 025, Train Loss: 1.7099, Train: 1.0000, Val: 0.7140, Test: 0.0000, Std dev: 0.0218


Epoch 26: 100%|██████████| 140/140 [00:00<00:00, 551.77it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 558.94it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 645.76it/s]


Epoch: 026, Train Loss: 1.6933, Train: 1.0000, Val: 0.7180, Test: 0.0000, Std dev: 0.0225


Epoch 27: 100%|██████████| 140/140 [00:00<00:00, 551.43it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 566.32it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 564.12it/s]


Epoch: 027, Train Loss: 1.6743, Train: 1.0000, Val: 0.7180, Test: 0.0000, Std dev: 0.0234


Epoch 28: 100%|██████████| 140/140 [00:00<00:00, 553.96it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 562.05it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 639.47it/s]


Epoch: 028, Train Loss: 1.6558, Train: 1.0000, Val: 0.7180, Test: 0.0000, Std dev: 0.0248


Epoch 29: 100%|██████████| 140/140 [00:00<00:00, 553.83it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 565.77it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 641.29it/s]


Epoch: 029, Train Loss: 1.6367, Train: 1.0000, Val: 0.7160, Test: 0.0000, Std dev: 0.0260


Epoch 30: 100%|██████████| 140/140 [00:00<00:00, 557.36it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 566.16it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 643.10it/s]
Evaluating Test: 100%|██████████| 1000/1000 [00:01<00:00, 657.44it/s]


Epoch: 030, Train Loss: 1.6189, Train: 1.0000, Val: 0.7220, Test: 0.6930, Std dev: 0.0263


Epoch 31: 100%|██████████| 140/140 [00:00<00:00, 547.92it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 565.55it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 642.56it/s]


Epoch: 031, Train Loss: 1.5964, Train: 1.0000, Val: 0.7180, Test: 0.0000, Std dev: 0.0273


Epoch 32: 100%|██████████| 140/140 [00:00<00:00, 390.51it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 564.84it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 643.92it/s]


Epoch: 032, Train Loss: 1.5790, Train: 1.0000, Val: 0.7320, Test: 0.0000, Std dev: 0.0275


Epoch 33: 100%|██████████| 140/140 [00:00<00:00, 552.60it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 564.72it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 640.48it/s]


Epoch: 033, Train Loss: 1.5544, Train: 1.0000, Val: 0.7380, Test: 0.0000, Std dev: 0.0290


Epoch 34: 100%|██████████| 140/140 [00:00<00:00, 541.81it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 558.84it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 555.26it/s]


Epoch: 034, Train Loss: 1.5315, Train: 1.0000, Val: 0.7340, Test: 0.0000, Std dev: 0.0307


Epoch 35: 100%|██████████| 140/140 [00:00<00:00, 553.12it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 560.45it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 635.76it/s]


Epoch: 035, Train Loss: 1.5099, Train: 1.0000, Val: 0.7340, Test: 0.0000, Std dev: 0.0312


Epoch 36: 100%|██████████| 140/140 [00:00<00:00, 549.96it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 557.41it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 643.09it/s]


Epoch: 036, Train Loss: 1.4871, Train: 1.0000, Val: 0.7400, Test: 0.0000, Std dev: 0.0323


Epoch 37: 100%|██████████| 140/140 [00:00<00:00, 553.76it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 545.67it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 641.09it/s]


Epoch: 037, Train Loss: 1.4663, Train: 1.0000, Val: 0.7400, Test: 0.0000, Std dev: 0.0312


Epoch 38: 100%|██████████| 140/140 [00:00<00:00, 553.66it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 562.81it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 643.12it/s]


Epoch: 038, Train Loss: 1.4355, Train: 1.0000, Val: 0.7380, Test: 0.0000, Std dev: 0.0334


Epoch 39: 100%|██████████| 140/140 [00:00<00:00, 549.44it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 563.27it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 637.83it/s]


Epoch: 039, Train Loss: 1.4131, Train: 1.0000, Val: 0.7420, Test: 0.0000, Std dev: 0.0347


Epoch 40: 100%|██████████| 140/140 [00:00<00:00, 549.46it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 562.57it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 560.74it/s]
Evaluating Test: 100%|██████████| 1000/1000 [00:01<00:00, 655.01it/s]


Epoch: 040, Train Loss: 1.3842, Train: 1.0000, Val: 0.7400, Test: 0.7250, Std dev: 0.0367


Epoch 41: 100%|██████████| 140/140 [00:00<00:00, 555.70it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 563.74it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 637.34it/s]


Epoch: 041, Train Loss: 1.3608, Train: 1.0000, Val: 0.7440, Test: 0.0000, Std dev: 0.0371


Epoch 42: 100%|██████████| 140/140 [00:00<00:00, 390.02it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 560.28it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 637.93it/s]


Epoch: 042, Train Loss: 1.3349, Train: 1.0000, Val: 0.7460, Test: 0.0000, Std dev: 0.0359


Epoch 43: 100%|██████████| 140/140 [00:00<00:00, 521.76it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 504.19it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 613.64it/s]


Epoch: 043, Train Loss: 1.3042, Train: 1.0000, Val: 0.7520, Test: 0.0000, Std dev: 0.0378


Epoch 44: 100%|██████████| 140/140 [00:00<00:00, 496.23it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 557.27it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 639.89it/s]


Epoch: 044, Train Loss: 1.2743, Train: 1.0000, Val: 0.7540, Test: 0.0000, Std dev: 0.0392


Epoch 45: 100%|██████████| 140/140 [00:00<00:00, 384.96it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 557.54it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 642.88it/s]


Epoch: 045, Train Loss: 1.2507, Train: 1.0000, Val: 0.7600, Test: 0.0000, Std dev: 0.0398


Epoch 46: 100%|██████████| 140/140 [00:00<00:00, 552.73it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 552.90it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 636.08it/s]


Epoch: 046, Train Loss: 1.2211, Train: 1.0000, Val: 0.7600, Test: 0.0000, Std dev: 0.0398


Epoch 47: 100%|██████████| 140/140 [00:00<00:00, 548.90it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 558.41it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 644.10it/s]


Epoch: 047, Train Loss: 1.1934, Train: 1.0000, Val: 0.7620, Test: 0.0000, Std dev: 0.0389


Epoch 48: 100%|██████████| 140/140 [00:00<00:00, 387.96it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 560.03it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 632.69it/s]


Epoch: 048, Train Loss: 1.1584, Train: 1.0000, Val: 0.7640, Test: 0.0000, Std dev: 0.0410


Epoch 49: 100%|██████████| 140/140 [00:00<00:00, 554.98it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 560.43it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 642.80it/s]


Epoch: 049, Train Loss: 1.1315, Train: 1.0000, Val: 0.7620, Test: 0.0000, Std dev: 0.0426


Epoch 50: 100%|██████████| 140/140 [00:00<00:00, 498.11it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 558.57it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 543.67it/s]
Evaluating Test: 100%|██████████| 1000/1000 [00:01<00:00, 643.66it/s]


Epoch: 050, Train Loss: 1.1052, Train: 1.0000, Val: 0.7620, Test: 0.7560, Std dev: 0.0416


Epoch 51: 100%|██████████| 140/140 [00:00<00:00, 550.44it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 552.28it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 637.71it/s]


Epoch: 051, Train Loss: 1.0749, Train: 1.0000, Val: 0.7680, Test: 0.0000, Std dev: 0.0411


Epoch 52: 100%|██████████| 140/140 [00:00<00:00, 545.11it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 386.50it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 638.98it/s]


Epoch: 052, Train Loss: 1.0452, Train: 1.0000, Val: 0.7660, Test: 0.0000, Std dev: 0.0400


Epoch 53: 100%|██████████| 140/140 [00:00<00:00, 553.99it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 562.29it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 636.01it/s]


Epoch: 053, Train Loss: 1.0170, Train: 1.0000, Val: 0.7720, Test: 0.0000, Std dev: 0.0409


Epoch 54: 100%|██████████| 140/140 [00:00<00:00, 551.16it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 562.43it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 589.33it/s]


Epoch: 054, Train Loss: 0.9847, Train: 1.0000, Val: 0.7700, Test: 0.0000, Std dev: 0.0423


Epoch 55: 100%|██████████| 140/140 [00:00<00:00, 548.90it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 389.70it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 641.33it/s]


Epoch: 055, Train Loss: 0.9555, Train: 1.0000, Val: 0.7720, Test: 0.0000, Std dev: 0.0423


Epoch 56: 100%|██████████| 140/140 [00:00<00:00, 553.29it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 563.94it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 638.06it/s]


Epoch: 056, Train Loss: 0.9313, Train: 1.0000, Val: 0.7680, Test: 0.0000, Std dev: 0.0410


Epoch 57: 100%|██████████| 140/140 [00:00<00:00, 549.61it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 551.36it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 641.14it/s]


Epoch: 057, Train Loss: 0.9010, Train: 1.0000, Val: 0.7740, Test: 0.0000, Std dev: 0.0404


Epoch 58: 100%|██████████| 140/140 [00:00<00:00, 390.48it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 561.13it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 642.88it/s]


Epoch: 058, Train Loss: 0.8719, Train: 1.0000, Val: 0.7720, Test: 0.0000, Std dev: 0.0397


Epoch 59: 100%|██████████| 140/140 [00:00<00:00, 551.24it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 562.34it/s]
Evaluating Validation: 100%|██████████| 500/500 [00:00<00:00, 632.04it/s]


Epoch: 059, Train Loss: 0.8440, Train: 1.0000, Val: 0.7820, Test: 0.0000, Std dev: 0.0400


Epoch 60: 100%|██████████| 140/140 [00:00<00:00, 551.62it/s]
Evaluating Train: 100%|██████████| 140/140 [00:00<00:00, 554.08it/s]
Nodes:   0%|          | 0/18660 [27:15<?, ?it/s]


KeyboardInterrupt: 

# Batch Experiments

In [53]:
def batch_experiments(num_run=1):
    
    ALL_DATASETs= [
        "Cornell","Texas","Wisconsin",
        "reed98","amherst41",
        "penn94","Roman-empire","cornell5","Squirrel","johnshopkins55",
        "AmazonProducts",
        "Actor","Minesweeper","Questions","Chameleon",
        "Tolokers","Flickr",
        "Yelp","Amazon-ratings","genius","cora","CiteSeer",
        "dblp","Computers","PubMed","pubmed","Reddit",
        "cora_ml","Cora","Reddit2","CS","Photo","Physics","citeseer"
    ]     
    
    ALL_DATASETs= [
        'cornell5',
    ]

    args.log_info = False
    
    filename = "Results/AGSGNN-NS-2.txt"
    
    for DATASET_NAME in ALL_DATASETs:  
        print(DATASET_NAME, end=' ')
        
        
        result_file = open(filename,'a+')        
        result_file.write(f'{DATASET_NAME} ')
        result_file.close()
                
        accs = []
        itrs = []
                
        for i in range(num_run):
            data, dataset = get_data(DATASET_NAME, DIR=None, log=False, h_score=False, split_no=i)   
            
            #optional for making undirected graph
            (row, col) = data.edge_index
            data.edge_index = torch.stack((torch.cat((row, col),dim=0),torch.cat((col, row),dim=0)),dim=0)
            data.edge_index = torch_geometric.utils.coalesce(data.edge_index)
            
#             if data.num_nodes>100000:
#                 accs.append(-1)
#                 itrs.append(-1)
#                 break
            
            if len(data.y.shape) > 1:
                data.y = data.y.argmax(dim=1)        
                num_classes = torch.max(data.y).item()+1
            else:
                num_classes = dataset.num_classes
            
            if num_classes!= torch.max(data.y)+1:
                num_classes = torch.max(data.y).item()+1
                
            if data.num_nodes<100000:
                max_epochs = 150
            else:
                max_epochs = 20
                
            if DATASET_NAME in ['Squirrel', 'Chameleon','cornell5','penn94','johnshopkins55'
                               "amherst41"]:
                data.x = torch.cat((data.x, adj_feature(data)), dim=1)
                if args.log_info == True:
                    print(data.x.shape)
                              
            accuracy, itr, _ = AGSNSperformanceSampler(DATASET_NAME, data, dataset, num_classes, epochs=max_epochs, train_neighbors=[8,4], test_neighbors=[8,4])
            
            accs.append(accuracy)
            itrs.append(itr)
            #print(itr, accuracy)
                        
        #print(accs, itrs)
        print(f'acc {np.mean(accs):0.4f} sd {np.std(accs):0.4f} itr {int(np.mean(itrs)):d} sd {int(np.std(itrs)):d}')
        result_file = open(filename,'a+')
        result_file.write(f'acc {np.mean(accs):0.4f} sd {np.std(accs):0.4f} itr {int(np.mean(itrs)):d} sd {int(np.std(itrs)):d}\n')
        result_file.close()
                
# batch_experiments(num_run=5)

## View Learned Representation

In [None]:
# if __name__ == '__main__':    
    
#     n=7
#     x = torch.Tensor([[1,0],[1,0],[1,0],[0,1],[0,1],[0,1],[0,1]])
#     y = torch.LongTensor([0,0,0, 1, 1, 1, 1])
#     edge_index = torch.LongTensor([[1,2],[1,4],[1,5],[2,1],[3,6],[3,7],[4,5],[4,1],[4,6],[4,7],[5,1],[5,4],[5,6],[6,3],[6,4],[6,5],[6,7],[7,3],[7,4],[7,6]]).T
#     edge_index = edge_index-1
    
#     mask = torch.zeros(n, dtype=torch.bool)
#     mask[[1,3]] = True
    
#     test_data = Data(x = x, y = y, edge_index = edge_index, train_mask = mask, test_mask = mask, val_mask = mask)    
#     print(test_data)
    
    
#     None

In [None]:
# from sklearn.manifold import TSNE
# import matplotlib.pyplot as plt

In [None]:
# model.eval()
# #X = model(data.x.to(device),data.edge_index.to(device), data.weight.to(device))
# X = model(data.x.to(device),data.edge_index.to(device))
# X = X.detach().to('cpu')
# y = data.y.to('cpu')
# X.shape

In [None]:
# plt.figure(figsize=(10, 10))

# # Create a t-SNE model with 2 components and a perplexity of 30
# tsne = TSNE(n_components=2, perplexity=30, random_state=42, learning_rate='auto', init='random')

# # Fit and transform the data to the 2D t-SNE space
# X_tsne = tsne.fit_transform(X)

# # Plot the data in the 2D t-SNE space, colored by class
# plt.scatter(X_tsne[:, 0], X_tsne[:, 1], c=y)
# plt.show()

## Sparsify

In [54]:
from ipynb.fs.full.SubmodularWeights import SubModularWeightFacilityFaster
from ipynb.fs.full.KNNWeights import KNNWeight
# from ipynb.fs.full.PretrainedLink import LinkPred, LinkNN, LinkSub
from ipynb.fs.full.PretrainedLinkFast import get_link_weight, LinkNN, LinkSub
from ipynb.fs.full.RandomSparse import RandomSparse
import torch_geometric.utils.homophily as homophily
import copy
import networkx as nx
from torch_geometric.utils import to_networkx, from_networkx

In [56]:
def sparsify(data, log = True, method = 'NN', metric= None):
    data.to('cpu')    
    
    if metric is None:
        metric = 'cosine'
    
    if method == 'nn':
        submodular_weight = KNNWeight(data, metric=metric, log=log)                
        data.weight = submodular_weight.compute_weights()        

    elif method == 'submodular':
        submodular_weight = SubModularWeightFacilityFaster(data, metric=metric, log=log)
        data.weight = submodular_weight.compute_weights()        
    
    elif method == 'link-nn':    
        submodular_weight = LinkPred(data, selfloop = True, log=log)
        data.weight = submodular_weight.compute_weights()        
        nn_weight = LinkNN(data, value='min', log=log) #min favor similar ones, max disimilar
        data.weight = nn_weight.compute_weights()
    elif method == 'link-sub':    
        nn_weight = LinkSub(data, value='max', selfloop = True, log=log) #min favor similar ones, max disimilar    
        data.weight = nn_weight.compute_weights()
    else:
        raise 'Not implemented error'
    
    cp_data= copy.deepcopy(data)
    G = to_networkx(cp_data, to_undirected=False, edge_attrs=['weight'])
    to_remove = [(a,b) for a, b, attrs in G.edges(data=True) if attrs["weight"] < 0.7 ]
    G.remove_edges_from(to_remove)
    updated_data = from_networkx(G)
    
    updated_data = from_networkx(G, group_edge_attrs=['weight'])
    updated_data.weight = updated_data.edge_attr.view(-1)

    row, col = updated_data.edge_index
    updated_data.edge_index = torch.stack((torch.cat((row, col),dim=0), torch.cat((col, row),dim=0)),dim=0)
    updated_data.weight = torch.cat((updated_data.weight, updated_data.weight),dim=0)

    
    #if args.log_info:
    if True:
        print(updated_data)
        print("Node Homophily:", homophily(updated_data.edge_index, data.y, method='node'))
        print("Edge Homophily:", homophily(updated_data.edge_index, data.y, method='edge'))
        print("Edge_insensitive Homophily:", homophily(updated_data.edge_index, data.y, method='edge_insensitive'))    
        print("Degree: ", updated_data.num_edges / updated_data.num_nodes)

    data.edge_index = updated_data.edge_index
    data.edge_weight = updated_data.weight
    data.weight = None

    return data

# LOG_INFO = True
# data = sparsify(data, log = False)
# data

In [57]:
def random_sparsify(data, K, log = False):    
    rand_sparse = RandomSparse(data, K = K, log = log)
    edge_index = rand_sparse.sparse()
    row, col = edge_index
    data.edge_index = torch.stack((torch.cat((row, col),dim=0), torch.cat((col, row),dim=0)),dim=0)
    
    if log:
        print("Node Homophily:", homophily(data.edge_index, data.y, method='node'))
        print("Edge Homophily:", homophily(data.edge_index, data.y, method='edge'))
        print("Edge_insensitive Homophily:", homophily(data.edge_index, data.y, method='edge_insensitive'))    
        print("Degree: ", data.num_edges / data.num_nodes)
    
    return data

In [58]:
def modify_homophily(data, h = 0.1, d = 11, log = False):
    data.to('cpu')
    N = data.num_nodes
    E = data.num_edges
    adj = SparseTensor(
        row=data.edge_index[0], col=data.edge_index[1],
        value=torch.arange(E, device=data.edge_index.device),
        sparse_sizes=(N, N))
    
    edge_index=[]
    
#     h = 0.1
#     d = 11

    match = int(round(d*h))
    unmatch = int(round(d*(1-h)))
    #print(match,unmatch)
    
    for u in range(N):                
        row, col, e_index = adj[u,:].coo()   
        
        cur_y = data.y[u]
        neighbors = data.y[col]
        #print(cur_y, neighbors)
        
        match_indexs = torch.nonzero(neighbors == cur_y).squeeze()
        other_indexs = torch.nonzero(neighbors != cur_y).squeeze()
        
        #print(match_indexs, other_indexs)
        
        if match_indexs.dim()>0:
            m_sel = match_indexs[np.random.choice(len(match_indexs), size=min(match,len(match_indexs)), replace = False)]
        else:
            m_sel = torch.LongTensor([])
        if other_indexs.dim()>0:
            um_sel = other_indexs[np.random.choice(len(other_indexs), size=min(unmatch, len(other_indexs)), replace = False)]
        else:
            um_sel = torch.LongTensor([])
            
        
        #print(m_sel, um_sel)
        
        indexs = torch.cat((m_sel,um_sel),dim=0)
    
        e_index = e_index[indexs]            
        edge_index.extend(e_index)
        
        #break        
            
    edge_index = data.edge_index[:,edge_index]
    row, col = edge_index
    data.edge_index = torch.stack((torch.cat((row, col),dim=0), torch.cat((col, row),dim=0)),dim=0)
    
    if log:
        print("Node Homophily:", homophily(data.edge_index, data.y, method='node'))
        print("Edge Homophily:", homophily(data.edge_index, data.y, method='edge'))
        print("Edge_insensitive Homophily:", homophily(data.edge_index, data.y, method='edge_insensitive'))    
        print("Degree: ", data.num_edges / data.num_nodes)
    
    return data

# data = modify_homophily(data, h=0.15, d=11, log = True)
# data

In [59]:
def test_hetero():
    d = 42
    for h in np.array(range(0,21))/20:
        DATASET_NAME = 'squirrel'
        data, dataset = get_data(DATASET_NAME, log=False)
        data = generate_synthetic(data, d=d, h = h, train=0.6, random_state=1, log=False, balance = False)
        num_classes = dataset.num_classes
        
        print('d ', d, ' h', h, end=' ')
        count, score = test_uniformity(data, num_classes, log=False)
        print(count, score, end = ' ')
        total_en, en_score = total_entropy(data, num_classes, log=False)
        print(total_en, en_score, end = ' ')
        
        print('sparse', end = ' ')
        data = sparsify(data, log=False)
        
        count, score = test_uniformity(data, num_classes, log=False)
        print(count, score, end = ' ')
        total_en, en_score = total_entropy(data, num_classes, log=False)
        print(total_en, en_score, end = ' ')
        
        print("Nh ", homophily(data.edge_index, data.y, method='node'), end = ' ')
        print("Eh ", homophily(data.edge_index, data.y, method='edge'), end = ' ')
        print("EiH ", homophily(data.edge_index, data.y, method='edge_insensitive'), end = ' ')    
        
#         print("Ha ", agg_homophily(data, 'affinity'), end = ' ')
#         print("Hl ", agg_homophily(data, 'laplacian'), end =' ')
        
        print("D ", data.num_edges / data.num_nodes, end = '\n')


# LOG_INFO = False
# test_hetero()

In [61]:
from ipynb.fs.full.Dataset import generate_synthetic2homophily

In [63]:
def ablation(num_run = 1):
    
    #SYN_NAME = random.randint(0,1000)

#     ALL_DATASETs= [
#         'Wisconsin',
#         'reed98',        
#         'Roman-empire',
#         'Actor',
#         'Minesweeper',        
#         'Tolokers'
#     ]

    ALL_DATASETs= [
        "reed98",
        "amherst41",
#         "penn94",
        "cornell5",
        "Squirrel",
        "johnshopkins55",
        "Chameleon",
#         "Tolokers",
#         "Flickr",
        
#         "Computers",
#         "Photo",
#         "Physics",
        
#         "AmazonProducts",
#         "Yelp",
#         'pokec',
#         'twitch-gamer',
#         'wiki',        
        
#         "Reddit",
#         "Reddit2",
    ]
    

    
#     ALL_DATASETs= ["Cora"]
    
    args.log_info = False    
    
    filename = "Results/AGSGNN-NS-2Ablation.txt"
    
    for DATASET_NAME in ALL_DATASETs:  
        
        random_state = 10
        #args.recompute = True
        
        print(DATASET_NAME,"-",random_state, end=' ')
        
        
        result_file = open(filename,'a+')        
        result_file.write(f'{DATASET_NAME} ')
        result_file.close()
                
        accs = []
        itrs = []
                
        for i in range(num_run):
            data, dataset = get_data(DATASET_NAME, DIR=None, log=False, h_score=False, split_no=i)   
            
            d = 100
            h =0.50
            train=0.1
            balance=True
            h2 = 0.25
            ratio = 0.50
                                    
#             global data_filename_extension
#             data_filename_extension = str(d)+str(h)+str(train)+str(random_state)+str(balance)+'.weight'            
#             data_filename = DIR+'AGSGNNstruc/'+DATASET_NAME+str(d)+str(h)+str(train)+str(random_state)+str(balance)
            
#             if os.path.exists(data_filename):
#                 data = torch.load(data_filename)                
#                 print("loaded "+data_filename)
#             else:
#                 data = generate_synthetic(data, d=d, h=h, train=train, random_state=random_state, log=False, balance=balance)
# #                 data = generate_synthetic(data, d=d, h=h, train=train, random_state=random_state, log=False)
#                 torch.save(data,data_filename)
#                 print("saved "+data_filename)
        
#             global data_filename_extension
#             data_filename_extension = str(d)+str(h)+str(h2)+str(ratio)+str(train)+str(random_state)+str(balance)+'.weight'            
#             data_filename = DIR+'AGSGNNstruc/'+DATASET_NAME+str(d)+str(h)+str(h2)+str(ratio)+str(train)+str(random_state)+str(balance)
            
#             if os.path.exists(data_filename):
#                 data = torch.load(data_filename)                
#                 print("loaded "+data_filename)
#             else:
#                 data = generate_synthetic2homophily(data, d=d, h1=h, h2=h2, ratio=ratio, train=train, random_state=random_state, log=False, balance=balance)                 
#                 torch.save(data,data_filename)
#                 print("saved "+data_filename)
    
            ##Sparsifiy
            #data = random_sparsify(data, 13, log = True)
#             data = sparsify(data, log = True, method = 'submodular', metric= 'cosine')
                        
#             data1 = sparsify(copy.deepcopy(data), log = True, method = 'submodular', metric= 'cosine')
#             data = sparsify(data, log = True, method = 'nn', metric= 'cosine')                         
#             data.edge_index = torch.cat((data.edge_index, data1.edge_index), dim=1)
            
            #optional for making undirected graph
            (row, col) = data.edge_index
            data.edge_index = torch.stack((torch.cat((row, col),dim=0),torch.cat((col, row),dim=0)),dim=0)
            data.edge_index = torch_geometric.utils.coalesce(data.edge_index)
            
            if args.log_info:
                print("Node Homophily:", homophily(data.edge_index, data.y, method='node'))
                print("Edge Homophily:", homophily(data.edge_index, data.y, method='edge'))
                print("Edge_insensitive Homophily:", homophily(data.edge_index, data.y, method='edge_insensitive'))    
                print("Degree: ", data.num_edges / data.num_nodes)

            
#             if data.num_nodes>100000:
#                 accs.append(-1)
#                 itrs.append(-1)
#                 break
            
            if len(data.y.shape) > 1:
                data.y = data.y.argmax(dim=1)        
                num_classes = torch.max(data.y).item()+1
            else:
                num_classes = dataset.num_classes
            
            if num_classes!= torch.max(data.y)+1:
                num_classes = torch.max(data.y).item()+1
                
            if data.num_nodes<100000:
                max_epochs = 500
            else:
                max_epochs = 20
                
            if DATASET_NAME in ['Squirrel', 'Chameleon','cornell5','penn94','johnshopkins55','amherst41']:
                data.x = torch.cat((data.x, adj_feature(data)), dim=1)
                if args.log_info == True:
                    print(data.x.shape)

#             accuracy, itr = 0,0
            
#             accuracy, itr, mdl = AGSNSperformanceSampler(DATASET_NAME, data, dataset, num_classes, epochs=max_epochs, train_neighbors=[25,25], test_neighbors=[25,25])        
            accuracy, itr, mdl = AGSNSperformanceSampler(DATASET_NAME, data, dataset, num_classes, epochs=max_epochs, train_neighbors=[8,4], test_neighbors=[8,4])            
#             accuracy, itr, mdl = AGSNSperformanceSampler(DATASET_NAME, data, dataset, num_classes, epochs=max_epochs, train_neighbors=[4,4], test_neighbors=[4,4])            
#             accuracy, itr, mdl = AGSNSperformanceSampler(DATASET_NAME, data, dataset, num_classes, epochs=max_epochs, train_neighbors=[-1,-1], test_neighbors=[-1,-1])
            
            #print(mdl)
            #args.recompute = False
    
            accs.append(accuracy)
            itrs.append(itr)
            #print(itr, accuracy)
                        
        print(accs, itrs)
        print(DATASET_NAME,"-",random_state, end=' ')
        print(f'acc {np.mean(accs)*100:0.4f} sd {np.std(accs)*100:0.4f} itr {int(np.mean(itrs)):d} sd {int(np.std(itrs)):d}')
        result_file = open(filename,'a+')
        result_file.write(f'acc {np.mean(accs)*100:0.4f} sd {np.std(accs)*10:0.4f} itr {int(np.mean(itrs)):d} sd {int(np.std(itrs)):d}\n')
        result_file.close()
                
    return 

# st_time = time.time()
# ablation(num_run=3)
# en_time = time.time()

# print("Runtime: ", en_time-st_time)