In [1]:
import pandas as pd
import torch
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Dropout, Input
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from spektral.layers import GATConv
from spektral.transforms import AdjToSpTensor, LayerPreprocess
from spektral.utils import tic, toc
from spektral.data import Dataset , Graph
import os
import networkx as nx
from spektral.data import DisjointLoader
from spektral.data import SingleLoader

from scipy.sparse import coo_matrix
tf.random.set_seed(0)
l2_reg = 2.5e-4

In [2]:
class MyDataset(Dataset):
    """
    A dataset of five random graphs.
    """
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
    def read(self):
        # We must return a list of Graph objects
        df = pd.read_csv('all.csv').dropna(axis=0,how='any')
        ac = df.groupby(df['standwords_id'])
        output = []
        for i in ac:
            #print(i[1]['standwords_vector'].values[0])
            data_x = i[1]['vector'].apply(lambda x :eval(str(x))).tolist()
            data_x.append(eval(str(i[1]['standwords_vector'].values[0])))
            data_x = np.array(data_x,dtype='float32')
            #print(type(data_x))
            len_1 = len(data_x)
            #data_a = [0]*(len_1-1)*len_1+[1]*(len_1-1)+[0]
            data_a = ([0]*(len_1-1)+[1])*len_1
            data_a = np.array(data_a,dtype='float32').reshape(len_1,-1)
            # print(data_a)
            data_y = [eval(i[1]['label'].tolist()[0])]*len_1
            data_y = np.array(data_y,dtype='float32')
            output.append(
                Graph(x=data_x, a=data_a, y=data_y)
            )
        return output

In [3]:
dataset = MyDataset()

In [4]:
len(dataset)

151

In [5]:
# Define model

x_in = Input(shape=(dataset.n_node_features,))
a_in = Input(shape=(None,), sparse=True)
#a2_in = Input(shape=(None,), sparse=True)

x_2 = Dropout(0.6)(x_in)
x_2 = GATConv(
    dataset.n_labels,
    attn_heads=1,
    concat_heads=False,
    dropout_rate=0.6,
    activation="softmax",
    kernel_regularizer=l2(l2_reg),
    attn_kernel_regularizer=l2(l2_reg),
    bias_regularizer=l2(l2_reg),
)([x_2, a_in])

# Build model
model = Model(inputs=[x_in, a_in], outputs=x_2)
optimizer = Adam(lr=5e-3)
loss_fn = CategoricalCrossentropy()
acc_fn = CategoricalAccuracy()



In [6]:
def getnum(x):
    if int(x)>63:
        return int(x)-2
    else:
        return int(x)-1

class MyDataset_2(Dataset):
    """
    A dataset of five random graphs.
    """
    def __init__(self, nodes , y , **kwargs):
        self.nodes = nodes
        self.y = y
        super().__init__(**kwargs)
    def read(self):
        # We must return a list of Graph objects
        edge = pd.read_csv('event_edge.csv',names=['id','from','to'])
        edge['row'] = edge['from'].apply(lambda x:getnum(x))
        edge['col'] = edge['to'].apply(lambda x:getnum(x))
        len_1 = len(edge)
        output = []
        data_x = np.array(self.nodes,dtype='float32')
        row  = np.array(list(edge['row']))
        col  = np.array(list(edge['col']))
        data = np.array([1]*len_1)
        A = coo_matrix((data, (row, col)), shape=(151,151)).tocsr()
        data_a = A.toarray().astype('float32')
        data_y = np.array(self.y,dtype='float32')
        output.append(
            Graph(x=data_x, a=data_a, y=data_y)
        )
        return output

In [7]:
# Define model_1

x_in_2 = Input(shape=(151,))
a_in_2 = Input(shape=(None,), sparse=True)
#a2_in = Input(shape=(None,), sparse=True)
x2_1 = Dropout(0.6)(x_in_2)
x2_1 = GATConv(
    8,
    attn_heads=1,
    concat_heads=True,
    dropout_rate=0.6,
    activation="elu",
    kernel_regularizer=l2(l2_reg),
    attn_kernel_regularizer=l2(l2_reg),
    bias_regularizer=l2(l2_reg),
)([x2_1, a_in_2])
x2_2 = Dropout(0.6)(x2_1)
x2_2 = GATConv(
    dataset.n_labels,
    attn_heads=1,
    concat_heads=False,
    dropout_rate=0.6,
    activation="softmax",
    kernel_regularizer=l2(l2_reg),
    attn_kernel_regularizer=l2(l2_reg),
    bias_regularizer=l2(l2_reg),
)([x2_2, a_in_2])

# Build model
model_2 = Model(inputs=[x_in_2, a_in_2], outputs=x2_2)
optimizer_2 = Adam(lr=5e-3)
loss_fn_2 = CategoricalCrossentropy()
acc_fn_2 = CategoricalAccuracy()

In [8]:
# @tf.function(input_signature=loader.tf_signature())
# def train_step(inputs, target):
#     with tf.GradientTape() as tape:
#         predictions = model(inputs, training=True)
#         loss = loss_fn(target, predictions) + sum(model.losses)
#     gradients = tape.gradient(loss, model.trainable_variables)
#     optimizer.apply_gradients(zip(gradients, model.trainable_variables))

In [9]:
# for epoch in range(100):
#     data_a2 = []
#     for batch in enumerate(loader):
#         inputs, target = batch
        
#         with tf.GradientTape() as tape:
#             predictions = model(inputs)
#             print(torch.from_numpy(target).squeeze(0).size())
#             print(list(predictions[-1].numpy()))
#             loss = loss_fn(torch.from_numpy(target).squeeze(0), predictions)
#             print(loss)
#             loss += sum(model.losses)
#         gradients = tape.gradient(loss, model.trainable_variables)
#         optimizer.apply_gradients(zip(gradients, model.trainable_variables))
#         break
#     if epoch%2==1:
#         predictions = model(inputs, training=False)
#         loss = loss_fn(torch.from_numpy(target).squeeze(0), predictions)
#         print(loss)
#     break

In [10]:
loader = DisjointLoader(dataset, batch_size=1,epochs=1)
for epoch in range(100):
    data_a2 = []
    data_y2 = []
    for batch in loader:
        inputs, target = batch
        with tf.GradientTape() as tape:
            predictions = model(inputs)
            #print(torch.from_numpy(target).squeeze(0).size())
            # print(predictions[-1].numpy())
            data_a2.append(list(predictions[-1].numpy()))
            data_y2.append(torch.from_numpy(target).squeeze(0)[-1].tolist())
            loss = loss_fn(torch.from_numpy(target).squeeze(0), predictions)
            loss += sum(model.losses)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))

    dataset_2 = MyDataset_2(data_a2,data_y2)
    print(dataset_2)
    loader_2 = SingleLoader(dataset_2,epochs=1)
    print(loader_2.load())
    for batch_2 in loader_2:
        inputs_2, target_2 = batch_2
        with tf.GradientTape() as tape_2:
            predictions_2 = model_2(inputs_2,training=True)
            loss_2 = loss_fn_2(torch.from_numpy(target_2).squeeze(0), predictions_2)
            loss_2 += sum(model_2.losses)
        gradients_2 = tape_2.gradient(loss_2, model_2.trainable_variables)
        optimizer_2.apply_gradients(zip(gradients_2, model_2.trainable_variables))

MyDataset_2(n_graphs=1)
<RepeatDataset shapes: (((151, 151), (151, 151)), (151, 151)), types: ((tf.float32, tf.float32), tf.float32)>
MyDataset_2(n_graphs=1)
<RepeatDataset shapes: (((0,), (151, 151)), (0,)), types: ((tf.float32, tf.float32), tf.float32)>


ValueError: Unknown mode for inputs x, a with ranks 1 and 2respectively.

In [10]:
loader = DisjointLoader(dataset, batch_size=1,epochs=100)
loader_3 = DisjointLoader(dataset, batch_size=1,epochs=1)
for batch in loader:
    inputs, target = batch
    with tf.GradientTape() as tape:
        predictions = model(inputs)
        #print(torch.from_numpy(target).squeeze(0).size())
        # print(predictions[-1].numpy())
        loss = loss_fn(torch.from_numpy(target).squeeze(0), predictions)
        loss += sum(model.losses)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))

In [11]:
data_a2 = []
data_y2 = []
for batch_3 in loader_3:
    inputs, target = batch_3
    predictions = model(inputs)
    data_a2.append(list(predictions[-1].numpy()))
    data_y2.append(torch.from_numpy(target).squeeze(0)[-1].tolist())

In [15]:
dataset_2 = MyDataset_2(data_a2,data_y2)
print(dataset_2)

loader_2 = SingleLoader(dataset_2,epochs=1000)
print(loader_2.load())
for batch_2 in loader_2:
    inputs_2, target_2 = batch_2
    with tf.GradientTape() as tape_2:
        predictions_2 = model_2(inputs_2,training=True)
        loss_2 = loss_fn_2(torch.from_numpy(target_2).squeeze(0), predictions_2)
        loss_2 += sum(model_2.losses)
    gradients_2 = tape_2.gradient(loss_2, model_2.trainable_variables)
    optimizer_2.apply_gradients(zip(gradients_2, model_2.trainable_variables))
    print(loss_2)

MyDataset_2(n_graphs=1)
<RepeatDataset shapes: (((151, 151), (151, 151)), (151, 151)), types: ((tf.float32, tf.float32), tf.float32)>
tf.Tensor(4.7835197, shape=(), dtype=float32)
tf.Tensor(4.935944, shape=(), dtype=float32)
tf.Tensor(4.8731656, shape=(), dtype=float32)
tf.Tensor(4.8442597, shape=(), dtype=float32)
tf.Tensor(4.8413787, shape=(), dtype=float32)
tf.Tensor(5.0061474, shape=(), dtype=float32)
tf.Tensor(4.87539, shape=(), dtype=float32)
tf.Tensor(4.906196, shape=(), dtype=float32)
tf.Tensor(4.8235936, shape=(), dtype=float32)
tf.Tensor(4.794607, shape=(), dtype=float32)
tf.Tensor(4.8746176, shape=(), dtype=float32)
tf.Tensor(4.777262, shape=(), dtype=float32)
tf.Tensor(4.922296, shape=(), dtype=float32)
tf.Tensor(4.915993, shape=(), dtype=float32)
tf.Tensor(4.7875113, shape=(), dtype=float32)
tf.Tensor(4.8926888, shape=(), dtype=float32)
tf.Tensor(4.764992, shape=(), dtype=float32)
tf.Tensor(4.918975, shape=(), dtype=float32)
tf.Tensor(4.8604474, shape=(), dtype=float32)
tf

tf.Tensor(4.875146, shape=(), dtype=float32)
tf.Tensor(4.9599123, shape=(), dtype=float32)
tf.Tensor(4.7935963, shape=(), dtype=float32)
tf.Tensor(4.8462954, shape=(), dtype=float32)
tf.Tensor(4.9455657, shape=(), dtype=float32)
tf.Tensor(4.8374753, shape=(), dtype=float32)
tf.Tensor(4.810832, shape=(), dtype=float32)
tf.Tensor(4.902793, shape=(), dtype=float32)
tf.Tensor(4.945718, shape=(), dtype=float32)
tf.Tensor(4.865675, shape=(), dtype=float32)
tf.Tensor(4.836469, shape=(), dtype=float32)
tf.Tensor(4.929747, shape=(), dtype=float32)
tf.Tensor(4.867347, shape=(), dtype=float32)
tf.Tensor(4.8001566, shape=(), dtype=float32)
tf.Tensor(4.7955136, shape=(), dtype=float32)
tf.Tensor(4.8464236, shape=(), dtype=float32)
tf.Tensor(4.8274975, shape=(), dtype=float32)
tf.Tensor(4.8173947, shape=(), dtype=float32)
tf.Tensor(4.850119, shape=(), dtype=float32)
tf.Tensor(4.912563, shape=(), dtype=float32)
tf.Tensor(5.0298624, shape=(), dtype=float32)
tf.Tensor(4.846555, shape=(), dtype=float32)

tf.Tensor(4.817904, shape=(), dtype=float32)
tf.Tensor(4.8400884, shape=(), dtype=float32)
tf.Tensor(4.799411, shape=(), dtype=float32)
tf.Tensor(4.7978697, shape=(), dtype=float32)
tf.Tensor(4.8810477, shape=(), dtype=float32)
tf.Tensor(4.8965254, shape=(), dtype=float32)
tf.Tensor(4.8018126, shape=(), dtype=float32)
tf.Tensor(4.913488, shape=(), dtype=float32)
tf.Tensor(4.986847, shape=(), dtype=float32)
tf.Tensor(4.670608, shape=(), dtype=float32)
tf.Tensor(4.803956, shape=(), dtype=float32)
tf.Tensor(4.710813, shape=(), dtype=float32)
tf.Tensor(4.8539267, shape=(), dtype=float32)
tf.Tensor(4.8088775, shape=(), dtype=float32)
tf.Tensor(5.0432763, shape=(), dtype=float32)
tf.Tensor(4.8401933, shape=(), dtype=float32)
tf.Tensor(4.8686485, shape=(), dtype=float32)
tf.Tensor(4.833335, shape=(), dtype=float32)
tf.Tensor(4.948646, shape=(), dtype=float32)
tf.Tensor(4.986488, shape=(), dtype=float32)
tf.Tensor(4.803526, shape=(), dtype=float32)
tf.Tensor(4.80003, shape=(), dtype=float32)
t