In [4]:
#import spektral
import numpy as np
import tensorflow as tf
from ogb.graphproppred import GraphPropPredDataset
from spektral.data import Dataset, Graph
#from spektral.datasets import TUDataset, QM9
import sys
import time
import shutil
import numpy as np
import tensorflow as tf
import pandas as pd
import spektral
from matplotlib import pyplot as plt
from scipy.stats import kendalltau
from tensorflow.keras.optimizers import Adam
from keras.losses import BinaryCrossentropy, MeanSquaredError
from keras.metrics import BinaryAccuracy

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
config = {
    'seed': 1,
    'epochs': 10,
    'batch_size': 32,
    'learning_rate': 0.001,
    'dataset': 'ogbg-molesol', #JA: QM9, ogbg-molesol, ogbg-molfreesolv, ogbg-mollipo, ZINC| NEIN: aspirin
    'train_test_split': 0.8
}

np.random.seed(config['seed'])
tf.random.set_seed(config['seed'])

In [6]:
class OGBDataset(Dataset):
    '''
    (spektral) Dataset class wrapper for Open Graph Benchmark datasets.
    '''
    def __init__(self, name, **kwargs):
        self.name = name
        super().__init__(**kwargs)

    def read(self):
        dataset = GraphPropPredDataset(name=self.name)
        graphs = []
        for data in dataset:
            edge_index = data[0]['edge_index']
            edge_feat = data[0]['edge_feat']
            node_feat = data[0]['node_feat']
            label = data[1]

            # Create adjacency matrix
            num_nodes = node_feat.shape[0]
            adj = np.zeros((num_nodes, num_nodes))
            for edge in edge_index.T:
                adj[edge[0], edge[1]] = 1

            # Create spektral Graph object
            graphs.append(Graph(x=node_feat, a=adj, e=edge_feat, y=label))
            
        self.size = len(graphs)

        return graphs

def ogb_available_datasets():
    #These regression datasets have size % 2 == 0 number of graphs
    return ['ogbg-molesol', 'ogbg-molfreesolv', 'ogbg-mollipo']

In [7]:
def _load_data(name: str):
    '''
    Loads a dataset from [TUDataset, OGB]
    '''
    if name in ogb_available_datasets():
        dataset= OGBDataset(name)
    else:
        raise ValueError(f'Dataset {name} unknown')

    return dataset, dataset.n_labels

In [8]:
def _split_data(data, train_test_split, seed):
    '''
    Split the data into train and test sets
    '''
    np.random.seed(seed)
    idxs = np.random.permutation(len(data))
    split = int(train_test_split * len(data))
    idx_train, idx_test = np.split(idxs, [split])
    train, test = data[idx_train], data[idx_test]
    train.size = len(train)
    test.size = len(test)
    return train, test

In [9]:
def get_data(config):
    seed = config['seed']
    train_test_split = config['train_test_split']
    name = config['dataset']

    # Load data
    data, config['n_out'] = _load_data(name)
    # Split data
    train_data, test_data = _split_data(data, train_test_split, seed)

    return train_data, test_data

In [10]:
dataset_train, dataset_test = get_data(config)

In [11]:
len(dataset_train), len(dataset_test)

(902, 226)

In [12]:
dataset_train

OGBDataset(n_graphs=902)

In [13]:
def iterate_train_random(elements):
    objects = elements
    sort_idx = np.argsort(objects)
    olen = objects.size
    seed = 0 + olen
    pair_count = (olen * (olen - 1)) // 2
    sample_size = min(int(20 * pair_count), pair_count)
    rng = np.random.default_rng(seed)

    sample = rng.choice(pair_count, sample_size, replace=False)
    sample_b = (np.sqrt(sample * 2 + 1/4) + 1/2).astype(np.int)
    sample_a = sample - (sample_b * (sample_b - 1)) // 2
    idx_a = sort_idx[sample_a]
    idx_b = sort_idx[sample_b]

    return idx_a, idx_b

In [14]:
index=5
indices=np.arange(len(dataset_test))
indices=np.array(indices[index*32:(index+1)*32])
i_a, i_b = iterate_train_random(indices)

In [15]:
len(i_a),len(i_b)

(496, 496)

In [16]:
def get_target(data, indices_a, indices_b):
    assert(len(indices_a)==len(indices_b))
    a = data[indices_a]
    b = data[indices_b]
    t = []
    for i in range(len(indices_a)):
        u_a = a[i].y
        u_b = b[i].y
        if u_a >= u_b:
            t.append(1)
        else:
            t.append(0)
    return np.array(t)

In [17]:
ta = get_target(dataset_test, i_a, i_b)

In [18]:
i_a[0], i_b[0]

(0, 16)

In [19]:
dataset_test[i_a[0]].y, dataset_test[i_b[0]].y

(array([-2.55]), array([-1.03]))

In [20]:
dataset_test[i_a[1]].y, dataset_test[i_b[1]].y

(array([-0.96]), array([-3.499]))

In [21]:
len(ta),ta

(496,
 array([0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0,
        1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1,
        1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1,
        1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1,
        1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0,
        1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1,
        0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1,
        1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0,
        1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1,
        1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1,
        0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
        1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1,
        1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        0, 0, 1, 1, 0, 0, 1, 1, 

In [280]:
import tensorflow as tf
from spektral.data.utils import (
    prepend_none,
    sp_matrices_to_sp_tensors,
    to_disjoint,
    collate_labels_disjoint
)

class CustomDataLoader(tf.keras.utils.Sequence):
    def __init__(self, data, batch_size=32, shuffle=True, seed=42, sampling_ratio=20):
        self.data = data
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.seed = seed
        self.sampling_ratio = sampling_ratio
        self.indices = np.arange(len(self.data))
        self.node_level = False
        self.idx_a = []
        self.idx_b = []
        self.target = []
        self.on_epoch_end()

    def __len__(self):
        return int(np.floor(len(self.data) / self.batch_size))

    def __getitem__(self, index):
        indices = np.array(self.indices[index*self.batch_size:(index+1)*self.batch_size])
        self.idx_a, self.idx_b = self.iterate_train_random(indices)
        batch_data = self.data[indices]
        #disjointloader content start
        packed = self.pack(batch_data)
        y = packed.pop("y_list", None)
        if y is not None:
            y = collate_labels_disjoint(y, node_level=self.node_level)
        output = to_disjoint(**packed)
        output = sp_matrices_to_sp_tensors(output)
        #disjointloader content end
        
        # target berechnen für die pairs start
        self.target = self.get_target(idx_a, idx_b)
        # target berechnen für die pairs ende
        
        return output + (self.idx_a, self.idx_b), self.target #batch_data, batch_labels

    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.indices)
    
    def iterate_train_random(self, elements):
        objects = elements
        sort_idx = np.argsort(objects)
        olen = objects.size
        seed = self.seed + olen
        pair_count = (olen * (olen - 1)) // 2
        sample_size = min(int(self.sampling_ratio * pair_count), pair_count)
        rng = np.random.default_rng(seed)

        sample = rng.choice(pair_count, sample_size, replace=False)
        sample_b = (np.sqrt(sample * 2 + 1/4) + 1/2).astype(np.int)
        sample_a = sample - (sample_b * (sample_b - 1)) // 2
        idx_a = sort_idx[sample_a]
        idx_b = sort_idx[sample_b]

        return idx_a, idx_b
    
    def pack(self, batch):
        """
        Given a batch of graphs, groups their attributes into separate lists and packs
        them in a dictionary.

        For instance, if a batch has three graphs g1, g2 and g3 with node
        features (x1, x2, x3) and adjacency matrices (a1, a2, a3), this method
        will return a dictionary:

        ```python
        >>> {'a_list': [a1, a2, a3], 'x_list': [x1, x2, x3]}
        ```

        :param batch: a list of `Graph` objects.
        """
        output = [list(elem) for elem in zip(*[g.numpy() for g in batch])]
        keys = [k + "_list" for k in self.data.signature.keys()]
        return dict(zip(keys, output))
    
    def get_target(self, indices_a, indices_b):
        assert(len(indices_a)==len(indices_b))
        a = self.data[indices_a.flatten()]
        b = self.data[indices_b.flatten()]
        t = []
        for i in range(len(indices_a)):
            util_a = a[i].y
            util_b = b[i].y
            if util_a >= util_b:
                t.append(1)
            else:
                t.append(0)
        return np.array(t)
    
    def info(self):
        print(f"idx_a:{self.idx_a}")
        print(f"idx_b:{self.idx_b}")
        print(f"target:{self.target}")

In [290]:
data = dataset_test
batch_size = 32
shuffle = True
sseed = 42123
sampling_ratio = 20
indices = np.arange(len(data))
node_level = False
index=1

In [369]:
len(data)

226

In [291]:
####### Aufbau A #######
# erst Indizes auswählen -> Paare anhand Indizes -> Daten+Target

In [292]:
def iterate_train_random(elements):
    objects = elements
    sort_idx = np.argsort(objects)
    olen = objects.size
    seed = sseed + olen
    pair_count = (olen * (olen - 1)) // 2
    sample_size = min(int(sampling_ratio * pair_count), pair_count)
    rng = np.random.default_rng(seed)

    sample = rng.choice(pair_count, sample_size, replace=False)
    sample_b = (np.sqrt(sample * 2 + 1/4) + 1/2).astype(np.int)
    sample_a = sample - (sample_b * (sample_b - 1)) // 2
    idx_a = sort_idx[sample_a]
    idx_b = sort_idx[sample_b]

    return idx_a, idx_b

In [293]:
indices = np.array(indices[index*batch_size:(index+1)*batch_size])
idx_a, idx_b = iterate_train_random(indices)
batch_data = data[indices]

In [295]:
indices

array([32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
       49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63])

In [319]:
idx_a.shape

(496,)

In [294]:
idx_a, idx_b

(array([ 4, 20,  8,  4,  2, 24,  3, 14,  5,  8,  2, 17, 12, 13,  0,  5,  2,
        22, 15,  8,  0, 21,  5,  9,  7, 10, 23, 16, 21, 15,  9,  3,  1, 16,
         1, 21, 13, 21,  9, 18,  8, 12, 20,  9,  8,  9,  5, 14,  7, 19,  9,
         6,  7, 11, 27, 10, 10,  4, 16, 26, 19, 17,  2, 16,  6, 19,  0,  2,
         4,  3,  0, 10,  6, 15, 11, 22, 17,  6, 15, 14,  0,  7, 13, 11,  3,
         5,  3,  7,  0, 26, 15,  5,  6, 10,  0,  1,  1,  6,  5, 12, 17, 24,
         0,  8, 12, 14, 13,  0, 11,  4,  1, 25,  2,  0,  8,  8, 12,  0,  8,
         1,  1, 22,  1, 18,  7, 17,  3, 14,  3,  4, 12,  7,  1,  1,  0,  4,
        11,  1,  1, 24,  9, 10,  5, 23,  8, 13,  3,  7,  1,  8,  6, 13, 15,
         9,  1, 17,  0,  4, 11,  9,  1,  2, 15,  0, 16,  0,  3,  0, 21,  6,
         9, 12,  0, 16,  2, 10,  2,  9,  4,  1, 25,  3, 25,  2,  8, 27, 14,
         5, 12,  0,  0,  3, 15, 16, 18,  5, 22, 24, 17,  2,  0,  4,  4,  4,
        17,  6, 19, 11, 19,  7, 20, 12, 24,  2,  7,  9, 19,  4, 10,  6,  2,
        14, 

In [438]:
Counter(idx_a).most_common()#, Counter(idx_b).most_common()

[(0, 31),
 (1, 30),
 (2, 29),
 (3, 28),
 (4, 27),
 (5, 26),
 (6, 25),
 (7, 24),
 (8, 23),
 (9, 22),
 (10, 21),
 (11, 20),
 (12, 19),
 (13, 18),
 (14, 17),
 (15, 16),
 (16, 15),
 (17, 14),
 (18, 13),
 (19, 12),
 (20, 11),
 (21, 10),
 (22, 9),
 (23, 8),
 (24, 7),
 (25, 6),
 (26, 5),
 (27, 4),
 (28, 3),
 (29, 2),
 (30, 1)]

In [296]:
batch_data

OGBDataset(n_graphs=32)

In [301]:
def get_target(indices_a, indices_b):
    assert(len(indices_a)==len(indices_b))
    a = data[indices_a.flatten()]
    b = data[indices_b.flatten()]
    t = []
    for i in range(len(indices_a)):
        util_a = a[i].y
        util_b = b[i].y
        if util_a >= util_b:
            t.append(1)
        else:
            t.append(0)
    return np.array(t)

In [302]:
target = get_target(idx_a, idx_b)

In [336]:
list(target).count(1), list(target).count(0)

(300, 196)

In [337]:
target

array([1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0,
       1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1,
       0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0,
       0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
       1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1,
       1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0,
       1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0,
       1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1,
       0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1,
       1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1,
       1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1,

In [470]:
Counter(target).most_common()

[(1, 300), (0, 196)]

In [None]:
####### Aufbau B #######
# Erst Paare über alle Daten -> Target

In [423]:
data = dataset_test
batch_size = 32
shuffle = True
sseed = 42
sampling_ratio = 2
indices = np.arange(len(data))
node_level = False
index=1

In [448]:
def iterate_train_randomB(elements):
    objects = elements
    utilities = [e.y for e in elements]#hier und
    sort_idx = np.argsort(utilities, axis=0)#hier angepasst
    olen = objects.size
    seed = sseed + olen
    pair_count = (olen * (olen - 1)) // 2
    print(f"paircount:{pair_count}")
    sample_size = min(int(sampling_ratio * pair_count), pair_count)
    print(f"sample_size:{sample_size}")
    rng = np.random.default_rng(seed)

    sample = rng.choice(pair_count, sample_size, replace=False)
    print(f"sample:{sample}")
    sample_b = (np.sqrt(sample * 2 + 1/4) + 1/2).astype(np.int)
    sample_a = sample - (sample_b * (sample_b - 1)) // 2
    idx_a = sort_idx[sample_a]
    idx_b = sort_idx[sample_b]

    return idx_a, idx_b

In [449]:
idx_a2, idx_b2 = iterate_train_randomB(data)

paircount:25425
sample_size:25425
sample:[ 1659 13983 13295 ...  1461 24475 17082]


In [450]:
idx_a2.flatten(), idx_b2.flatten()

(array([130,  48,  30, ..., 149, 112,  55]),
 array([189,  11, 128, ...,  96, 140, 167]))

In [451]:
Counter(idx_a2.flatten()).most_common()

[(157, 225),
 (187, 224),
 (39, 223),
 (170, 222),
 (21, 221),
 (209, 220),
 (130, 219),
 (201, 218),
 (103, 217),
 (85, 216),
 (198, 215),
 (56, 214),
 (28, 213),
 (179, 212),
 (81, 211),
 (141, 210),
 (71, 209),
 (90, 208),
 (46, 207),
 (211, 206),
 (86, 205),
 (45, 204),
 (34, 203),
 (132, 202),
 (160, 201),
 (110, 200),
 (15, 199),
 (153, 198),
 (41, 197),
 (188, 196),
 (149, 195),
 (109, 194),
 (10, 193),
 (51, 192),
 (106, 191),
 (60, 190),
 (80, 189),
 (62, 188),
 (22, 187),
 (186, 186),
 (183, 185),
 (208, 184),
 (13, 183),
 (50, 182),
 (83, 181),
 (151, 180),
 (91, 179),
 (64, 178),
 (192, 177),
 (161, 176),
 (176, 175),
 (122, 174),
 (33, 173),
 (82, 172),
 (96, 171),
 (131, 170),
 (12, 169),
 (172, 168),
 (189, 167),
 (75, 166),
 (217, 165),
 (99, 164),
 (55, 163),
 (20, 162),
 (44, 161),
 (42, 160),
 (204, 159),
 (219, 158),
 (54, 157),
 (184, 156),
 (108, 155),
 (138, 154),
 (66, 153),
 (84, 152),
 (171, 151),
 (113, 150),
 (47, 149),
 (88, 148),
 (214, 147),
 (222, 146),


In [443]:
len(idx_a2.flatten())

25425

In [444]:
def get_targetB(indices_a, indices_b):
    assert(len(indices_a)==len(indices_b))
    a = data[indices_a.flatten()]
    b = data[indices_b.flatten()]
    t = []
    for i in range(len(indices_a)):
        util_a = data[indices_a[i][0]].y #a[i].y
        util_b = data[indices_b[i][0]].y #b[i].y
        if util_a >= util_b:
            t.append(1)
        else:
            t.append(0)
    return np.array(t)

In [445]:
target2 = get_targetB(idx_a2, idx_b2)

In [446]:
list(target2).count(1), list(target2).count(0)

(20, 25405)

In [447]:
target2

array([0, 0, 0, ..., 0, 0, 0])

In [340]:
asdasd = data[idx_a]
asdasd

OGBDataset(n_graphs=496)

In [341]:
sdfasdf = data[idx_a2.flatten()]
sdfasdf

OGBDataset(n_graphs=25425)

In [366]:
from collections import Counter
y = [e.y for e in sdfasdf]
y =[x[0] for x in y]
Counter(y)

Counter({-4.735: 177,
         -3.924: 147,
         -7.15: 216,
         -6.291: 207,
         -3.796: 138,
         -4.047: 152,
         -2.11: 79,
         -3.73: 271,
         -4.799: 180,
         -5.72: 200,
         -4.678: 176,
         -1.74: 68,
         -5.46: 196,
         -6.49: 208,
         -0.22: 14,
         -3.9: 146,
         -2.78: 99,
         -4.53: 168,
         -2.21: 81,
         -7.68: 220,
         -5.26: 192,
         -8.4: 223,
         -0.742: 28,
         -4.44: 166,
         -2.63: 95,
         -5.19: 190,
         -4.19: 156,
         -1.4: 56,
         -2.74: 97,
         -3.81: 139,
         -7.21: 218,
         -7.92: 222,
         -6.09: 203,
         -4.42: 164,
         -3.571: 130,
         -3.12: 223,
         -0.45: 19,
         -1.24: 47,
         -3.14: 113,
         -3.7: 134,
         -1.34: 156,
         -5.3820000000000014: 195,
         -3.65: 133,
         -4.376: 161,
         -3.85: 429,
         -2.57: 93,
         -5.184: 189,
    

In [453]:
####### Aufbau C #######
# Erst Paare über alle Paare (ineffizient) -> Target

In [454]:
data = dataset_test
batch_size = 32
shuffle = True
sseed = 42
sampling_ratio = 2
indices = np.arange(len(data))
node_level = False
index=1

In [457]:
from itertools import combinations
def sample_preference_pairs(graphs):
    c = [(a, b, check_util(graphs, a,b)) for a, b in combinations(range(len(graphs)), 2)]
    idx_a = []
    idx_b = []
    target = []
    for id_a, id_b, t in c:
        idx_a.append(id_a)
        idx_b.append(id_b)
        target.append(t)
    return np.array(idx_a), np.array(idx_b), np.array(target).reshape(-1, 1)

def check_util(data, index_a, index_b):
        a = data[index_a]
        b = data[index_b]
        util_a = a.y
        util_b = b.y
        if util_a >= util_b:
            return 1
        else:
            return 0

In [458]:
idx_a3, idx_b3, t3 = sample_preference_pairs(data)

In [460]:
Counter(idx_a3).most_common()

[(0, 225),
 (1, 224),
 (2, 223),
 (3, 222),
 (4, 221),
 (5, 220),
 (6, 219),
 (7, 218),
 (8, 217),
 (9, 216),
 (10, 215),
 (11, 214),
 (12, 213),
 (13, 212),
 (14, 211),
 (15, 210),
 (16, 209),
 (17, 208),
 (18, 207),
 (19, 206),
 (20, 205),
 (21, 204),
 (22, 203),
 (23, 202),
 (24, 201),
 (25, 200),
 (26, 199),
 (27, 198),
 (28, 197),
 (29, 196),
 (30, 195),
 (31, 194),
 (32, 193),
 (33, 192),
 (34, 191),
 (35, 190),
 (36, 189),
 (37, 188),
 (38, 187),
 (39, 186),
 (40, 185),
 (41, 184),
 (42, 183),
 (43, 182),
 (44, 181),
 (45, 180),
 (46, 179),
 (47, 178),
 (48, 177),
 (49, 176),
 (50, 175),
 (51, 174),
 (52, 173),
 (53, 172),
 (54, 171),
 (55, 170),
 (56, 169),
 (57, 168),
 (58, 167),
 (59, 166),
 (60, 165),
 (61, 164),
 (62, 163),
 (63, 162),
 (64, 161),
 (65, 160),
 (66, 159),
 (67, 158),
 (68, 157),
 (69, 156),
 (70, 155),
 (71, 154),
 (72, 153),
 (73, 152),
 (74, 151),
 (75, 150),
 (76, 149),
 (77, 148),
 (78, 147),
 (79, 146),
 (80, 145),
 (81, 144),
 (82, 143),
 (83, 142),
 (

In [461]:
Counter(idx_b3).most_common()

[(225, 225),
 (224, 224),
 (223, 223),
 (222, 222),
 (221, 221),
 (220, 220),
 (219, 219),
 (218, 218),
 (217, 217),
 (216, 216),
 (215, 215),
 (214, 214),
 (213, 213),
 (212, 212),
 (211, 211),
 (210, 210),
 (209, 209),
 (208, 208),
 (207, 207),
 (206, 206),
 (205, 205),
 (204, 204),
 (203, 203),
 (202, 202),
 (201, 201),
 (200, 200),
 (199, 199),
 (198, 198),
 (197, 197),
 (196, 196),
 (195, 195),
 (194, 194),
 (193, 193),
 (192, 192),
 (191, 191),
 (190, 190),
 (189, 189),
 (188, 188),
 (187, 187),
 (186, 186),
 (185, 185),
 (184, 184),
 (183, 183),
 (182, 182),
 (181, 181),
 (180, 180),
 (179, 179),
 (178, 178),
 (177, 177),
 (176, 176),
 (175, 175),
 (174, 174),
 (173, 173),
 (172, 172),
 (171, 171),
 (170, 170),
 (169, 169),
 (168, 168),
 (167, 167),
 (166, 166),
 (165, 165),
 (164, 164),
 (163, 163),
 (162, 162),
 (161, 161),
 (160, 160),
 (159, 159),
 (158, 158),
 (157, 157),
 (156, 156),
 (155, 155),
 (154, 154),
 (153, 153),
 (152, 152),
 (151, 151),
 (150, 150),
 (149, 149),

In [468]:
t3_ = [t[0] for t in t3]

In [469]:
Counter(t3_)

Counter({0: 12684, 1: 12741})

In [223]:
def iterate_train_random(elements):
    objects = elements
    utility_scores = [e.y for e in elements]
    sort_idx = np.argsort(utility_scores, axis=0)
    olen = objects.size
    seed = sseed + olen
    pair_count = (olen * (olen - 1)) // 2
    sample_size = min(int(sampling_ratio * pair_count), pair_count)
    rng = np.random.default_rng(seed)

    sample = rng.choice(pair_count, sample_size, replace=False)
    sample_b = (np.sqrt(sample * 2 + 1/4) + 1/2).astype(np.int)
    sample_a = sample - (sample_b * (sample_b - 1)) // 2
    idx_a = sort_idx[sample_a]
    idx_b = sort_idx[sample_b]

    return np.array(idx_a), np.array(idx_b)

In [224]:
idx_a, idx_b = iterate_train_random(data)

In [225]:
idx_a, idx_b

(array([[192],
        [214],
        [ 85],
        ...,
        [ 32],
        [160],
        [ 87]]),
 array([[199],
        [142],
        [ 47],
        ...,
        [ 68],
        [ 53],
        [ 35]]))

In [226]:
def get_target(idx_a, idx_b, data):
    a = data[idx_a.flatten().tolist()]
    b = data[idx_b.flatten().tolist()]
    u_a = [e.y for e in a]
    t = []
    for i in range(len(idx_a)):
        util_a = a[i].y
        util_b = b[i].y
        if util_a >= util_b:
            t.append(1)
        else:
            t.append(0)
    return np.array(t)

In [227]:
target = get_target(idx_a, idx_b, data)

In [228]:
target

array([0, 0, 0, ..., 0, 0, 0])

In [229]:
list(target).count(0)

25405

In [139]:
dl = CustomDataLoader(dataset_test)

[[Graph(n_nodes=17, n_node_features=9, n_edge_features=3, n_labels=1)]
 [Graph(n_nodes=11, n_node_features=9, n_edge_features=3, n_labels=1)]
 [Graph(n_nodes=6, n_node_features=9, n_edge_features=3, n_labels=1)]
 ...
 [Graph(n_nodes=22, n_node_features=9, n_edge_features=3, n_labels=1)]
 [Graph(n_nodes=8, n_node_features=9, n_edge_features=3, n_labels=1)]
 [Graph(n_nodes=11, n_node_features=9, n_edge_features=3, n_labels=1)]]


IndexError: index 1 is out of bounds for axis 0 with size 1

In [38]:
import tensorflow as tf
from tensorflow.keras.layers import Dense
from spektral.layers import ECCConv


class PRGNN(tf.keras.Model):
    def __init__(self, config):
        super().__init__()
        self.conv1 = ECCConv(32, activation="relu")
        self.conv2 = ECCConv(32, activation="relu")
        self.dense = Dense(config['n_out'], activation=None)

    def call(self, inputs):#, training=False
        x, a, e, i, idx_a, idx_b = inputs
        #print(x)
        #print(a)
        #print(e)
        #print(idx_a)
        #print(idx_b)

        x = tf.cast(x, tf.float32)
        a = a.with_values(tf.cast(a.values, tf.float32))
        e = tf.cast(e, tf.float32)

        X = self.conv1([x, a, e])
        X = self.conv2([X, a, e])
        X_util = self.dense(X)
        X_a, X_b = self.pref_lookup(X_util, idx_a, idx_b)
        print(X_b - X_a)
        #assert False
        return X_b - X_a
        # if training:
        #     return X_b - X_a
        # else:
        #     return X_util

        # return X_b - X_a, X_util

    def pref_lookup(self, X, pref_a, pref_b):

        X_a = tf.gather(X, pref_a, axis=0)
        X_b = tf.gather(X, pref_b, axis=0)

        return X_a, X_b

In [39]:
# Create DataLoader
batch_size = 64
sampling_ratio=4
data_loader = CustomDataLoader(dataset_test, batch_size=batch_size, sampling_ratio=sampling_ratio)

# Example model
model = PRGNN(config)

model.compile(optimizer=Adam(config['learning_rate']),
              loss=BinaryCrossentropy(from_logits=True),
              metrics=[BinaryAccuracy(threshold=.5)])

# Fit model
model.fit(data_loader, epochs=50, verbose=1)

[ 9 54 18 ...  7 17 20]
tf.Tensor(
[[-0.8973114 ]
 [ 2.445302  ]
 [-0.06859034]
 ...
 [-2.1251192 ]
 [-0.30545223]
 [-0.11459565]], shape=(2016, 1), dtype=float32)
Epoch 1/2


AttributeError: in user code:

    /Users/sophia.schubert/miniconda3/envs/experiment/lib/python3.6/site-packages/keras/engine/training.py:853 train_function  *
        return step_function(self, iterator)
    <ipython-input-38-7da32289c574>:22 call  *
        a = a.with_values(tf.cast(a.values, tf.float32))
    /Users/sophia.schubert/miniconda3/envs/experiment/lib/python3.6/site-packages/tensorflow/python/framework/ops.py:401 __getattr__
        self.__getattribute__(name)

    AttributeError: 'Tensor' object has no attribute 'with_values'


In [40]:
a = zip([1,2,3],[4,5,6])

In [41]:
a

<zip at 0x7f9688908f08>

In [43]:
a = list(a)

In [45]:
a[1]

(2, 5)