In [9]:
import torch
import numpy as np
import pandas as pd
import numpy as np 
from itertools import cycle
from sklearn import metrics
from src.models import PyGod
import matplotlib.pyplot as plt
from torch_geometric.data import Data
from torch_geometric.transforms import RandomLinkSplit, RandomNodeSplit

from src.data_preparation import DataPreprocessor
from src.graph_data_loader import graph_loader,  heterogeneous_graph_loader

### Generating Train Test Split Excel Files

In [10]:
!python src/generate_splits.py

### Homogeneous Graph Loader Example

In [11]:
train_graph, test_graph = graph_loader(split_type='mutually_exclusive', swap_rate=0.1)

Reading train data from  data/splits/mutually_exclusive/train.xlsx
Reading test data from  data/splits/mutually_exclusive/test.xlsx


In [12]:
print("Train Details: \n")
print("Number of nodes: ", train_graph.num_nodes)
print("Number of features per node: ", train_graph.num_node_features)
print("Number of edges: ", train_graph.num_edges)
print("Number of edge attributes: ", train_graph.num_edge_features)
print("Undirected: ", train_graph.is_undirected())
print("Isolated Nodes: ", train_graph.has_isolated_nodes())
print("Self Loops: ", train_graph.has_self_loops())

Train Details: 

Number of nodes:  7085
Number of features per node:  20
Number of edges:  33474
Number of edge attributes:  0
Undirected:  True
Isolated Nodes:  True
Self Loops:  False


In [13]:
print("Test Details: \n")
print("Number of nodes: ", test_graph.num_nodes)
print("Number of features per node: ", test_graph.num_node_features)
print("Number of edges: ", test_graph.num_edges)
print("Number of edge attributes: ", test_graph.num_edge_features)
print("Undirected: ", test_graph.is_undirected())
print("Isolated Nodes: ", test_graph.has_isolated_nodes())
print("Self Loops: ", test_graph.has_self_loops())

Test Details: 

Number of nodes:  3037
Number of features per node:  20
Number of edges:  13258
Number of edge attributes:  0
Undirected:  True
Isolated Nodes:  True
Self Loops:  False


### Heterogeneous Graph Loader Example

In [14]:
het_train_graph, het_test_graph = heterogeneous_graph_loader(split_type='whole', swap_rate=0.1)
print(het_train_graph)

HeteroData(
  [1msample[0m={
    x=[10122, 20],
    y=[10122]
  },
  [1mathlete[0m={ x=[2840, 2] },
  [1m(athlete, knows, athlete)[0m={ edge_index=[2, 8062760] },
  [1m(sample, belongs_to, athlete)[0m={ edge_index=[2, 10122] }
)


In [15]:
het_transform = RandomLinkSplit(edge_types=('sample', 'belongs_to', 'athlete'))
het_data = het_transform(het_train_graph)
print(het_data)

(HeteroData(
  [1msample[0m={
    x=[10122, 20],
    y=[10122]
  },
  [1mathlete[0m={ x=[2840, 2] },
  [1m(athlete, knows, athlete)[0m={ edge_index=[2, 8062760] },
  [1m(sample, belongs_to, athlete)[0m={
    edge_index=[2, 7086],
    edge_label=[14172],
    edge_label_index=[2, 14172]
  }
), HeteroData(
  [1msample[0m={
    x=[10122, 20],
    y=[10122]
  },
  [1mathlete[0m={ x=[2840, 2] },
  [1m(athlete, knows, athlete)[0m={ edge_index=[2, 8062760] },
  [1m(sample, belongs_to, athlete)[0m={
    edge_index=[2, 7086],
    edge_label=[2024],
    edge_label_index=[2, 2024]
  }
), HeteroData(
  [1msample[0m={
    x=[10122, 20],
    y=[10122]
  },
  [1mathlete[0m={ x=[2840, 2] },
  [1m(athlete, knows, athlete)[0m={ edge_index=[2, 8062760] },
  [1m(sample, belongs_to, athlete)[0m={
    edge_index=[2, 8098],
    edge_label=[4048],
    edge_label_index=[2, 4048]
  }
))


In [16]:
het_transform = RandomNodeSplit(num_train_per_class=400)
het_data = het_transform(het_train_graph)
print(het_data)

HeteroData(
  [1msample[0m={
    x=[10122, 20],
    y=[10122],
    train_mask=[10122],
    val_mask=[10122],
    test_mask=[10122]
  },
  [1mathlete[0m={ x=[2840, 2] },
  [1m(athlete, knows, athlete)[0m={ edge_index=[2, 8062760] },
  [1m(sample, belongs_to, athlete)[0m={ edge_index=[2, 10122] }
)


In [21]:
print("Number of sample nodes: ", het_train_graph['sample'].num_nodes)
print("Number of athlete nodes: ", het_train_graph['athlete'].num_nodes)
print("Number of features per sample node: ", het_train_graph['sample'].num_node_features)
print("Number of features per athlete node: ", het_train_graph['athlete'].num_node_features)
print("Number of edges: ", het_train_graph.num_edges)
print("Number of edge attributes: ", train_graph.num_edge_features)
print("Number of training nodes: ", het_data['sample'].train_mask.sum().item())
print("Undirected: ", het_data.is_undirected())
print("Isolated Nodes: ", het_data.has_isolated_nodes())
print("Self Loops: ", het_data.has_self_loops())

Number of sample nodes:  10122
Number of athlete nodes:  2840
Number of features per sample node:  20
Number of features per athlete node:  2
Number of edges:  8072882
Number of edge attributes:  0
Number of training nodes:  8622
Undirected:  False
Isolated Nodes:  True
Self Loops:  False
