<a href="https://colab.research.google.com/github/Sripathm2/UCLA_CS_245_Project5/blob/GNN/GNN/Untitled0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%pip install spektral

In [None]:
import tensorflow as tf
import numpy as np
from spektral.models import GeneralGNN

from spektral.data import DisjointLoader

from spektral.datasets import TUDataset
from tensorflow.keras.optimizers import Adam

physical_devices = tf.config.list_physical_devices('GPU')
if len(physical_devices) > 0:
    tf.config.experimental.set_memory_growth(physical_devices[0], True)

# Best config
batch_size = 32
learning_rate = 0.01
epochs = 1

# Read data
data = TUDataset('PROTEINS')

# Train/test split
np.random.shuffle(data)
split = int(0.8 * len(data))
data_tr, data_te = data[:split], data[split:]

# Data loader
loader_tr = DisjointLoader(data_tr, batch_size=batch_size, epochs=epochs)
loader_te = DisjointLoader(data_te, batch_size=batch_size)

# Create model
model = GeneralGNN(data.n_labels, activation='softmax')
optimizer = Adam(learning_rate)
model.compile('adam', 'categorical_crossentropy', metrics=['categorical_accuracy'])


# Evaluation function
def evaluate(loader):
    step = 0
    results = []
    for batch in loader:
        step += 1
        loss, acc = model.test_on_batch(*batch)
        results.append((loss, acc))
        if step == loader.steps_per_epoch:
            return np.mean(results, 0)


# Training loop
epoch = step = 0
results = []
for batch in loader_tr:
    step += 1
    loss, acc = model.train_on_batch(*batch)
    results.append((loss, acc))
    if step == loader_tr.steps_per_epoch:
        step = 0
        epoch += 1
        results_te = evaluate(loader_te)
        print('Epoch {} - Train loss: {:.3f} - Train acc: {:.3f} - '
              'Test loss: {:.3f} - Test acc: {:.3f}'
              .format(epoch, *np.mean(results, 0), *results_te))

results_te = evaluate(loader_te)
print('Final results - Loss: {:.3f} - Acc: {:.3f}'.format(*results_te))

Downloading PROTEINS dataset.
Successfully loaded PROTEINS.


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Epoch 1 - Train loss: 0.649 - Train acc: 0.636 - Test loss: 0.608 - Test acc: 0.695
Final results - Loss: 0.606 - Acc: 0.695


In [4]:
from tensorflow.keras import layers
from tensorflow import keras

class Sampling(layers.Layer):
    """Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""

    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon


class Encoder(layers.Layer):
    """Maps MNIST digits to a triplet (z_mean, z_log_var, z)."""

    def __init__(self, latent_dim=32, intermediate_dim=64, name="encoder", **kwargs):
        super(Encoder, self).__init__(name=name, **kwargs)
        self.dense_proj = layers.Dense(intermediate_dim, activation="relu")
        self.dense_mean = layers.Dense(latent_dim)
        self.dense_log_var = layers.Dense(latent_dim)
        self.sampling = Sampling()

    def call(self, inputs):
        x = self.dense_proj(inputs)
        z_mean = self.dense_mean(x)
        z_log_var = self.dense_log_var(x)
        z = self.sampling((z_mean, z_log_var))
        return z_mean, z_log_var, z


class Decoder(layers.Layer):
    """Converts z, the encoded digit vector, back into a readable digit."""

    def __init__(self, original_dim, intermediate_dim=64, name="decoder", **kwargs):
        super(Decoder, self).__init__(name=name, **kwargs)
        self.dense_proj = layers.Dense(intermediate_dim, activation="relu")
        self.dense_output = layers.Dense(original_dim, activation="sigmoid")

    def call(self, inputs):
        print(type(self.dense_proj))
        print(inputs)
        x = self.dense_proj(inputs)
        return self.dense_output(x)


class VariationalAutoEncoder(keras.Model):
    """Combines the encoder and decoder into an end-to-end model for training."""

    def __init__(
        self,
        original_dim,
        intermediate_dim=64,
        latent_dim=32,
        name="autoencoder",
        **kwargs
    ):
        super(VariationalAutoEncoder, self).__init__(name=name, **kwargs)
        self.original_dim = original_dim
        self.encoder = Encoder(latent_dim=latent_dim, intermediate_dim=intermediate_dim)
        self.decoder = Decoder(original_dim, intermediate_dim=intermediate_dim)

    def call(self, inputs):
        z_mean, z_log_var, z = self.encoder(inputs)
        reconstructed = self.decoder(z)
        # Add KL divergence regularization loss.
        kl_loss = -0.5 * tf.reduce_mean(
            z_log_var - tf.square(z_mean) - tf.exp(z_log_var) + 1
        )
        self.add_loss(kl_loss)
        return reconstructed

In [None]:
import tensorflow as tf
original_dim = 784
vae = VariationalAutoEncoder(original_dim, 64, 32)

optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
mse_loss_fn = tf.keras.losses.MeanSquaredError()

loss_metric = tf.keras.metrics.Mean()

(x_train, _), _ = tf.keras.datasets.mnist.load_data()
x_train = x_train.reshape(60000, 784).astype("float32") / 255

train_dataset = tf.data.Dataset.from_tensor_slices(x_train)
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(64)

epochs = 2

# Iterate over epochs.
for epoch in range(epochs):
    print("Start of epoch %d" % (epoch,))

    # Iterate over the batches of the dataset.
    for step, x_batch_train in enumerate(train_dataset):
        with tf.GradientTape() as tape:
            reconstructed = vae(x_batch_train)
            # Compute reconstruction loss
            loss = mse_loss_fn(x_batch_train, reconstructed)
            loss += sum(vae.losses)  # Add KLD regularization loss

        grads = tape.gradient(loss, vae.trainable_weights)
        optimizer.apply_gradients(zip(grads, vae.trainable_weights))

        loss_metric(loss)

        if step % 100 == 0:
            print("step %d: mean loss = %.4f" % (step, loss_metric.result()))

In [None]:
%pip install spektral

In [38]:
"""
This example shows how to define your own dataset and use it to train a
non-trivial GNN with message-passing and pooling layers.
The script also shows how to implement fast training and evaluation functions
in disjoint mode, with early stopping and accuracy monitoring.
The dataset that we create is a simple synthetic task in which we have random
graphs with randomly-colored nodes. The goal is to classify each graph with the
color that occurs the most on its nodes. For example, given a graph with 2
colors and 3 nodes:
x = [[1, 0],
     [1, 0],
     [0, 1]],
the corresponding target will be [1, 0].
"""

import numpy as np
import scipy.sparse as sp
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2

from spektral.data import Dataset, Graph, DisjointLoader, SingleLoader
from spektral.layers import GCSConv, GlobalAvgPool, GCNConv
from spektral.layers.pooling import TopKPool
from spektral.transforms.normalize_adj import NormalizeAdj

################################################################################
# PARAMETERS
################################################################################
learning_rate = 1e-2       # Learning rate
epochs = 400               # Number of training epochs
es_patience = 10           # Patience for early stopping
batch_size = 32            # Batch size


################################################################################
# LOAD DATA
################################################################################
class MyDataset(Dataset):
    """
    A dataset of random colored graphs.
    The task is to classify each graph with the color which occurs the most in
    its nodes.
    The graphs have `n_colors` colors, of at least `n_min` and at most `n_max`
    nodes connected with probability `p`.
    """
    def __init__(self, n_samples, n_colors=3, n_min=10, n_max=100, p=0.1, **kwargs):
        self.n_samples = n_samples
        self.n_colors = n_colors
        self.n_min = n_min
        self.n_max = n_max
        self.p = p
        super().__init__(**kwargs)

    def read(self):
        def make_graph():
            n = np.random.randint(self.n_min, self.n_max)
            colors = np.random.randint(0, self.n_colors, size=n)

            # Node features
            x = np.zeros((n, self.n_colors))
            x[np.arange(n), colors] = 1

            # Edges
            a = np.random.rand(n, n) <= self.p
            a = np.maximum(a, a.T).astype(int)
            a = sp.csr_matrix(a)

            # Labels
            y = np.zeros((self.n_colors, ))
            color_counts = x.sum(0)
            y[np.argmax(color_counts)] = 1

            return Graph(x=x,a=a)

        # We must return a list of Graph objects
        return [make_graph() for _ in range(self.n_samples)]


dataset = MyDataset(1000, transforms=NormalizeAdj())

# Parameters
F = dataset.n_node_features  # Dimension of node features
n_out = dataset.n_labels     # Dimension of the target

# Train/valid/test split
idxs = np.random.permutation(len(dataset))
split_va, split_te = int(0.8 * len(dataset)), int(0.9 * len(dataset))
idx_tr, idx_va, idx_te = np.split(idxs, [split_va, split_te])
dataset_tr = dataset[idx_tr]
dataset_va = dataset[idx_va]
dataset_te = dataset[idx_te]

loader_tr = DisjointLoader(dataset_tr, batch_size=batch_size, epochs=epochs)
loader_va = DisjointLoader(dataset_va, batch_size=batch_size)
loader_te = DisjointLoader(dataset_te, batch_size=batch_size)

In [14]:
# Parameters
channels = 16          # Number of channels in the first layer
dropout = 0.5          # Dropout rate for the features
l2_reg = 5e-4 / 2      # L2 regularization rate
learning_rate = 1e-2   # Learning rate
epochs = 200           # Number of training epochs
patience = 10          # Patience for early stopping
a_dtype = dataset[0].a.dtype  # Only needed for TF 2.1

N = 51          # Number of nodes in the graph
F = 5           # Original size of node features
y = 5           # Label

# GNN Class Definition
class GNN(Model):
  def __init__(self):
    super(GNN,self).__init__()
    # self.x_in = Input(shape=(F,))
    # self.a_in = Input((N,), sparse=True, dtype=a_dtype)
    self.do_1 = Dropout(dropout)
    self.gc_1 = GCNConv(channels=channels,
                   activation='relu',
                   kernel_regularizer=l2(l2_reg),
                   use_bias=False
                  )
    self.do_2 = Dropout(dropout)
    self.gc_2 = GCNConv(channels=F,
                       activation='softmax',
                       use_bias=False
                       )
  def call(self, inputs):
      # x_in = self.x_in(inputs)
      # a_in = self.a_in(inputs)
      print(inputs)
      x_in = inputs[0]
      a_in = inputs[1]
      x = self.do_1(x_in)
      x = self.gc_1([x, a_in])
      x = self.do_2(x)
      return self.gc_2([x, a_in])

In [None]:
gnn = GNN()
gnn.compile(optimizer=Adam(learning_rate=learning_rate), loss=tf.keras.losses.MeanSquaredError(), metrics=['mse'])
loader = SingleLoader([dataset[0]])
gnn.fit(loader.load(), steps_per_epoch=loader.steps_per_epoch, epochs=5)

In [47]:
"""
This example shows how to define your own dataset and use it to train a
non-trivial GNN with message-passing and pooling layers.
The script also shows how to implement fast training and evaluation functions
in disjoint mode, with early stopping and accuracy monitoring.
The dataset that we create is a simple synthetic task in which we have random
graphs with randomly-colored nodes. The goal is to classify each graph with the
color that occurs the most on its nodes. For example, given a graph with 2
colors and 3 nodes:
x = [[1, 0],
     [1, 0],
     [0, 1]],
the corresponding target will be [1, 0].
"""

import numpy as np
import scipy.sparse as sp
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

from spektral.data import Dataset, Graph, SingleLoader
from spektral.layers import GCSConv, GlobalAvgPool
from spektral.layers.pooling import TopKPool
from spektral.transforms.normalize_adj import NormalizeAdj

################################################################################
# PARAMETERS
################################################################################
learning_rate = 1e-2       # Learning rate
epochs = 400               # Number of training epochs
es_patience = 10           # Patience for early stopping
batch_size = 32            # Batch size


################################################################################
# LOAD DATA
################################################################################
class MyDataset(Dataset):
    """
    A dataset of random colored graphs.
    The task is to classify each graph with the color which occurs the most in
    its nodes.
    The graphs have `n_colors` colors, of at least `n_min` and at most `n_max`
    nodes connected with probability `p`.
    """
    def __init__(self, n_samples, n_colors=3, n_min=10, n_max=100, p=0.1, **kwargs):
        self.n_samples = n_samples
        self.n_colors = n_colors
        self.n_min = n_min
        self.n_max = n_max
        self.p = p
        super().__init__(**kwargs)

    def read(self):
        def make_graph():
            n = np.random.randint(self.n_min, self.n_max)
            colors = np.random.randint(0, self.n_colors, size=n)

            # Node features
            x = np.zeros((n, self.n_colors))
            x[np.arange(n), colors] = 1

            # Edges
            a = np.random.rand(n, n) <= self.p
            a = np.maximum(a, a.T).astype(int)
            a = sp.csr_matrix(a)

            # Labels
            y = np.zeros((self.n_colors, ))
            color_counts = x.sum(0)
            y[np.argmax(color_counts)] = 1

            return Graph(x=x, a=a, y=y)

        # We must return a list of Graph objects
        return [make_graph() for _ in range(self.n_samples)]


dataset = MyDataset(1000, transforms=NormalizeAdj())

# Parameters
F = dataset.n_node_features  # Dimension of node features
n_out = dataset.n_labels     # Dimension of the target

# Train/valid/test split
idxs = np.random.permutation(len(dataset))
split_va, split_te = int(0.8 * len(dataset)), int(0.9 * len(dataset))
idx_tr, idx_va, idx_te = np.split(idxs, [split_va, split_te])
dataset_tr = dataset[idx_tr]
dataset_va = dataset[idx_va]
dataset_te = dataset[idx_te]

loader_tr = SingleLoader([dataset_tr[0]], epochs=epochs)
loader_va = SingleLoader([dataset_va[0]])
loader_te = SingleLoader([dataset_te[0]])

################################################################################
# BUILD (unnecessarily big) MODEL
################################################################################
X_in = Input(shape=(F, ), name='X_in')
A_in = Input(shape=(None,), sparse=True)
I_in = Input(shape=(), name='segment_ids_in', dtype=tf.int32)

X_1 = GCSConv(32, activation='relu')([X_in, A_in])
X_1, A_1, I_1 = TopKPool(ratio=0.5)([X_1, A_in, I_in])
X_2 = GCSConv(32, activation='relu')([X_1, A_1])
X_2, A_2, I_2 = TopKPool(ratio=0.5)([X_2, A_1, I_1])
X_3 = GCSConv(32, activation='relu')([X_2, A_2])
X_3 = GlobalAvgPool()([X_3, I_2])
output = Dense(n_out, activation='softmax')(X_3)

# Build model
model = Model(inputs=[X_in, A_in, I_in], outputs=output)
opt = Adam(lr=learning_rate)
loss_fn = CategoricalCrossentropy()
acc_fn = CategoricalAccuracy()


In [None]:
model.compile(optimizer=opt, loss=loss_fn, metrics=acc_fn)
model.fit(loader_tr.load(), steps_per_epoch=loader_tr.steps_per_epoch, epochs=3)

In [71]:
import spektral
MPNN = spektral.layers.MessagePassing(aggregate='sum')
a = np.array([[0,1,0,1,1],[1,0,1,0,0],[0,1,0,0,0],[1,0,0,0,0],[1,0,0,0,0]])
print(a)
x = np.array([[1,1],[1,2],[2,4],[3,2],[3,1]])
a = sp.csc_matrix(a)
#t = MPNN.propagate(x,a)
print(a.indices)
print(a)

[[0 1 0 1 1]
 [1 0 1 0 0]
 [0 1 0 0 0]
 [1 0 0 0 0]
 [1 0 0 0 0]]
[1 3 4 0 2 1 0 0]
  (1, 0)	1
  (3, 0)	1
  (4, 0)	1
  (0, 1)	1
  (2, 1)	1
  (1, 2)	1
  (0, 3)	1
  (0, 4)	1


In [73]:
n = 5
a = np.random.rand(n, n) <= 0.6
print(a.shape)
a = np.maximum(a, a.T).astype(int)
a = sp.csr_matrix(a)
print(a.indices)

(5, 5)
[0 1 3 4 0 4 0 3 4 0 1 3]


In [74]:
print(a)

  (0, 0)	1
  (0, 1)	1
  (0, 3)	1
  (0, 4)	1
  (1, 0)	1
  (1, 4)	1
  (3, 0)	1
  (3, 3)	1
  (3, 4)	1
  (4, 0)	1
  (4, 1)	1
  (4, 3)	1
