In [3]:
 #conda install -c conda-forge pyodbc

In [4]:
#pip install 'pip<24.1'

In [5]:
#pip install transforms

In [1]:
#!pip show pyodbc

In [2]:
#!pip install --upgrade pyodbc==4.0.27

In [3]:
#pip install torchmetrics

In [2]:
#pip install torch_sparse

In [3]:
#pip install torch_scatter

In [4]:
import torch_geometric.datasets

# Download and load the MUTAG dataset
mutag = torch_geometric.datasets.TUDataset(root='./data/TUDataset', name='MUTAG')
#ogb_molhiv = torch_geometric.datasets.OGBDataset(root='./data/OBG', name='ogbg-molhiv')
# To inspect the data
print(mutag[0])
#print(ogb_molhiv[0])

Data(edge_index=[2, 38], x=[17, 7], edge_attr=[38, 4], y=[1])


## Actual Implementation

In [1]:
import torch
#print(torch.__version__)
import torch.nn.functional as F
from torch_geometric.nn.conv.gcn_conv import gcn_norm
#from torch_geometric.utils import accuracy as accuracy_1d
from torchmetrics import Accuracy as accuracy_1d
from torch.nn import Dropout, SELU
from torch_geometric.nn import MessagePassing, SAGEConv, GCNConv, GATConv
from torch_sparse import matmul
#import matmul

class KProp(MessagePassing):
    def __init__(self, steps, aggregator, add_self_loops, normalize, cached, transform=lambda x: x):
        super().__init__(aggr=aggregator)
        self.transform = transform
        self.K = steps
        self.add_self_loops = add_self_loops
        self.normalize = normalize
        self.cached = cached
        self._cached_x = None

    def forward(self, x, adj_t):
        if self._cached_x is None or not self.cached:
            self._cached_x = self.neighborhood_aggregation(x, adj_t)

        return self._cached_x

    def neighborhood_aggregation(self, x, adj_t):
        if self.K <= 0:
            return x

        if self.normalize:
            adj_t = gcn_norm(adj_t, add_self_loops=False)

        if self.add_self_loops:
            adj_t = adj_t.set_diag()

        for k in range(self.K):
            x = self.propagate(adj_t, x=x)

        x = self.transform(x)
        return x

    def message_and_aggregate(self, adj_t, x):  # noqa
        return matmul(adj_t, x, reduce=self.aggr)


class GNN(torch.nn.Module):
    def __init__(self, dropout):
        super().__init__()
        self.conv1 = None
        self.conv2 = None
        self.dropout = Dropout(p=dropout)
        self.activation = SELU(inplace=True)

    def forward(self, x, adj_t):
        x = self.conv1(x, adj_t)
        x = self.activation(x)
        x = self.dropout(x)
        x = self.conv2(x, adj_t)
        return x


class GCN(GNN):
    def __init__(self, input_dim, output_dim, hidden_dim, dropout):
        super().__init__(dropout)
        self.conv1 = GCNConv(input_dim, hidden_dim)
        self.conv2 = GCNConv(hidden_dim, output_dim)


class GAT(GNN):
    def __init__(self, input_dim, output_dim, hidden_dim, dropout):
        super().__init__(dropout)
        heads = 4
        self.conv1 = GATConv(input_dim, hidden_dim, heads=heads, concat=True)
        self.conv2 = GATConv(heads * hidden_dim, output_dim, heads=1, concat=False)


class GraphSAGE(GNN):
    def __init__(self, input_dim, output_dim, hidden_dim, dropout):
        super().__init__(dropout)
        self.conv1 = SAGEConv(in_channels=input_dim, out_channels=hidden_dim, normalize=False, root_weight=True)
        self.conv2 = SAGEConv(in_channels=hidden_dim, out_channels=output_dim, normalize=False, root_weight=True)


class NodeClassifier(torch.nn.Module):
    def __init__(self,
                 input_dim,
                 num_classes,
                 model,
                 hidden_dim,
                 dropout,
                 x_steps,
                 y_steps,
                 forward_correction
                 ):
        super().__init__()

        self.x_prop = KProp(steps=x_steps, aggregator='add', add_self_loops=False, normalize=True, cached=True)
        self.y_prop = KProp(steps=y_steps, aggregator='add', add_self_loops=False, normalize=True, cached=False,
                            transform=torch.nn.Softmax(dim=1))

        self.gnn = {'gcn': GCN, 'sage': GraphSAGE, 'gat': GAT}[model](
            input_dim=input_dim,
            output_dim=num_classes,
            hidden_dim=hidden_dim,
            dropout=dropout
        )

        self.cached_yt = None
        self.forward_correction = forward_correction

    def forward(self, data):
        x, adj_t = data.x, data.adj_t
        x = self.x_prop(x, adj_t)
        x = self.gnn(x, adj_t)

        p_y_x = F.softmax(x, dim=1)                                                         # P(y|x')
        p_yp_x = torch.matmul(p_y_x, data.T) if self.forward_correction else p_y_x          # P(y'|x')
        p_yt_x = self.y_prop(p_yp_x, data.adj_t)                                            # P(y~|x')

        return p_y_x, p_yp_x, p_yt_x

    def training_step(self, data):
        p_y_x, p_yp_x, p_yt_x = self(data)

        if self.cached_yt is None:
            yp = data.y.float()
            yp[data.test_mask] = 0  # to avoid using test labels
            self.cached_yt = self.y_prop(yp, data.adj_t)  # y~

        loss = self.cross_entropy_loss(p_y=p_yt_x[data.train_mask], y=self.cached_yt[data.train_mask], weighted=False)

        metrics = {
            'train/loss': loss.item(),
            'train/acc': self.accuracy(pred=p_y_x[data.train_mask], target=data.y[data.train_mask]) * 100,
            'train/maxacc': data.T[0, 0].item() * 100,
        }

        return loss, metrics

    def validation_step(self, data):
        p_y_x, p_yp_x, p_yt_x = self(data)

        metrics = {
            'val/loss': self.cross_entropy_loss(p_yp_x[data.val_mask], data.y[data.val_mask]).item(),
            'val/acc': self.accuracy(pred=p_y_x[data.val_mask], target=data.y[data.val_mask]) * 100,
            'test/acc': self.accuracy(pred=p_y_x[data.test_mask], target=data.y[data.test_mask]) * 100,
        }

        return metrics

    @staticmethod
    def accuracy(pred, target):
        pred = pred.argmax(dim=1) if len(pred.size()) > 1 else pred
        target = target.argmax(dim=1) if len(target.size()) > 1 else target
        return accuracy_1d(pred=pred, target=target)

    @staticmethod
    def cross_entropy_loss(p_y, y, weighted=False):
        y_onehot = F.one_hot(y.argmax(dim=1))
        loss = -torch.log(p_y + 1e-20) * y_onehot
        loss *= y if weighted else 1
        loss = loss.sum(dim=1).mean()
        return loss
        

  long_ = _make_signed(np.long)


ModuleNotFoundError: No module named 'torch_scatter'

In [None]:
import os
from functools import partial
import pandas as pd
import torch
from torch_geometric.data import Data, InMemoryDataset, download_url
from torch_geometric.datasets import Planetoid
from torch_geometric.transforms import ToSparseTensor, AddTrainValTestMask
from torch_geometric.utils import to_undirected

from transforms import Normalize, FilterTopClass

def load_dataset(dataset, val_ratio  = .25, test_ratio = .25 ):
    
    if dataset == 'mutag'
        
        #Get the dataset of interest
        mutag = torch_geometric.datasets.TUDataset(root='./data/TUDataset', name='MUTAG')
        
        #For the time being get one graph
        mutag_graph = mutag[5]
        
        #Get the number of nodes in the graph
        numNode = mutag_graph.num_nodes
        
        #Randomize indices 
        index = torch.randperm(numNode)
        
        
        
        #Masks 
        train_mask = torch.zeros(mutag_graph.num_nodes, dtype=torch.bool)
        val_mask   = torch.zeros(mutag_graph.num_nodes, dtype=torch.bool)
        test_mask  = torch.zeros(mutag_graph.num_nodes, dtype=torch.bool)
        
        #Get the splits for training, test, and val data 
        val_data = int(numNode*val_ratio)
        test_data = int(numNode*test_ratio)
        train_data = numNode - val_data - test_data
        
        #Get random nodes from the fractions calculated above 
        train_index = index[:train_data]
        valid_index = index[train_data: train_data+val_data]
        test_index = index[train_data+val_data:]
        
        #Assign masks - used in original code
        train_mask[train_index] = True
        test_mask[test_index] = True
        val_mask[valid_index] = True
        
        #Set masks in data 
        mutag_graph.train_mask = train_mask
        mutag_graph.test_mask = test_mask
        mutag_graph.val_mask = val_mask
    
        return mutag_graph

In [None]:
# import os
# import sys
# import traceback
# import uuid
# from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
# import random
# import numpy as np
# import pandas as pd
# import torch
# from tqdm.auto import tqdm
# from torch_geometric.transforms import Compose
# from datasets import load_dataset
# from models import NodeClassifier
# from models import GraphConvModel
# from trainer import Trainer
# from transforms import FeatureTransform, FeaturePerturbation, LabelPerturbation
# from utils import print_args, WandbLogger, add_parameters_as_argument, \
#     measure_runtime, from_args, str2bool, Enum, EnumAction, colored_text, bootstrap


# class LogMode(Enum):
#     INDIVIDUAL = 'individual'
#     COLLECTIVE = 'collective'


# def seed_everything(seed):
#     random.seed(seed)
#     np.random.seed(seed)
#     torch.manual_seed(seed)
#     torch.cuda.manual_seed_all(seed)


# def confidence_interval(data, func=np.mean, size=1000, ci=95, seed=12345):
#     bs_replicates = bootstrap(data, func=func, n_boot=size, seed=seed)
#     p = 50 - ci / 2, 50 + ci / 2
#     bounds = np.nanpercentile(bs_replicates, p)
#     return (bounds[1] - bounds[0]) / 2


# @measure_runtime
# def run(args):
#     dataset = from_args(load_dataset, args)

#     test_acc = []
#     run_metrics = {}
#     run_id = str(uuid.uuid1())

#     logger = None
#     if args.log and args.log_mode == LogMode.COLLECTIVE:
#         logger = WandbLogger(project=args.project_name, config=args, enabled=args.log, reinit=False, group=run_id)

#     progbar = tqdm(range(args.repeats), file=sys.stdout)
#     for version in progbar:

#         if args.log and args.log_mode == LogMode.INDIVIDUAL:
#             args.version = version
#             logger = WandbLogger(project=args.project_name, config=args, enabled=args.log, group=run_id)

#         try:
#             data = dataset.clone().to(args.device)

#             # preprocess data
#             data = Compose([
#                 from_args(FeatureTransform, args),
#                 from_args(FeaturePerturbation, args),
#                 from_args(LabelPerturbation, args)
#             ])(data)

#             # define model
#             model = from_args(GraphConvModel, args, input_dim=data.num_features, num_classes=data.num_classes)

#             # train the model
#             trainer = from_args(Trainer, args, logger=logger if args.log_mode == LogMode.INDIVIDUAL else None)
#             best_metrics = trainer.fit(model, data)

#             # process results
#             for metric, value in best_metrics.items():
#                 run_metrics[metric] = run_metrics.get(metric, []) + [value]

#             test_acc.append(best_metrics['test/acc'])
#             progbar.set_postfix({'last_test_acc': test_acc[-1], 'avg_test_acc': np.mean(test_acc)})

#         except Exception as e:
#             error = ''.join(traceback.format_exception(Exception, e, e.__traceback__))
#             logger.log_summary({'error': error})
#             raise e
#         finally:
#             if args.log and args.log_mode == LogMode.INDIVIDUAL:
#                 logger.finish()

#     if args.log and args.log_mode == LogMode.COLLECTIVE:
#         summary = {}
#         for metric, values in run_metrics.items():
#             summary[metric + '_mean'] = np.mean(values)
#             summary[metric + '_ci'] = confidence_interval(values, size=1000, ci=95, seed=args.seed)

#         logger.log_summary(summary)

#     if not args.log:
#         os.makedirs(args.output_dir, exist_ok=True)
#         df_results = pd.DataFrame(test_acc, columns=['test/acc']).rename_axis('version').reset_index()
#         df_results['Name'] = run_id
#         for arg_name, arg_val in vars(args).items():
#             df_results[arg_name] = [arg_val] * len(test_acc)
#         df_results.to_csv(os.path.join(args.output_dir, f'{run_id}.csv'), index=False)


# def main():
#     init_parser = ArgumentParser(add_help=False, conflict_handler='resolve')

#     # dataset args
#     group_dataset = init_parser.add_argument_group('dataset arguments')
#     add_parameters_as_argument(load_dataset, group_dataset)

#     # data transformation args
#     group_perturb = init_parser.add_argument_group(f'data transformation arguments')
#     add_parameters_as_argument(FeatureTransform, group_perturb)
#     add_parameters_as_argument(FeaturePerturbation, group_perturb)
#     add_parameters_as_argument(LabelPerturbation, group_perturb)

#     # model args
#     group_model = init_parser.add_argument_group(f'model arguments')
#     add_parameters_as_argument(GraphConvModel, group_model)

#     # trainer arguments (depends on perturbation)
#     group_trainer = init_parser.add_argument_group(f'trainer arguments')
#     add_parameters_as_argument(Trainer, group_trainer)
#     group_trainer.add_argument('--device', help='desired device for training', choices=['cpu', 'cuda'], default='cuda')

#     # experiment args
#     group_expr = init_parser.add_argument_group('experiment arguments')
#     group_expr.add_argument('-s', '--seed', type=int, default=None, help='initial random seed')
#     group_expr.add_argument('-r', '--repeats', type=int, default=1, help="number of times the experiment is repeated")
#     group_expr.add_argument('-o', '--output-dir', type=str, default='./output', help="directory to store the results")
#     group_expr.add_argument('--log', type=str2bool, nargs='?', const=True, default=False, help='enable wandb logging')
#     group_expr.add_argument('--log-mode', type=LogMode, action=EnumAction, default=LogMode.INDIVIDUAL,
#                             help='wandb logging mode')
#     group_expr.add_argument('--project-name', type=str, default='LPGNN', help='wandb project name')

#     parser = ArgumentParser(parents=[init_parser], formatter_class=ArgumentDefaultsHelpFormatter)
#     args = parser.parse_args()
#     print_args(args)
#     args.cmd = ' '.join(sys.argv)  # store calling command

#     if args.seed:
#         seed_everything(args.seed)

#     if args.device == 'cuda' and not torch.cuda.is_available():
#         print(colored_text('CUDA is not available, falling back to CPU', color='red'))
#         args.device = 'cpu'

#     try:
#         run(args)
#     except KeyboardInterrupt:
#         print('Graceful Shutdown...')


# if __name__ == '__main__':
#     main()
