<a href="https://colab.research.google.com/github/GeorgeM2000/DMTE/blob/master/code/DMTE.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# ***Libraries & Tools***

In [5]:
import argparse
import os
import random
import numpy as np
import tensorflow as tf
from math import pow
from datetime import datetime
from pathlib import Path
import shutil
from tqdm import tqdm
import zipfile
from collections import defaultdict
from sklearn.preprocessing import normalize

In [None]:
!pwd

# ***Global Variables***

In [76]:
MAX_LEN=300
neg_table_size=1000000
NEG_SAMPLE_POWER=0.75
batch_size=64
num_epoch=200 # Default: 200
embed_size=200
word_embed_size=200
lr=1e-3

In [103]:
datasetName = "cora"
dataTextFile = "data.txt"
ratio = 0.15
alpha = 0.3
beta = 0.1
gamma = 0.0
delta = 0.0

In [None]:
max_word_count = 0
min_word_count = float('inf')

with open(f'/content/datasets/{datasetName}/{dataTextFile}', 'r') as file:
    for line in file:
        word_count = len(line.split())

        if word_count > max_word_count:
            max_word_count = word_count

        if word_count < min_word_count:
            min_word_count = word_count

print("Max word count:", max_word_count)
print("Min word count:", min_word_count)


MAX_LEN = max_word_count + 1

Max word count: 473
Min word count: 10


In [None]:
zip_file_path = '/content/data.zip'
extract_to = f'/content/datasets/{datasetName}'

# Open and extract the zip file
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extract_to)

print("Extraction complete!")

Extraction complete!


In [68]:
def sub_Mat(P, node):

    sub_P = np.zeros((len(node),len(node)))
    for i in range(len(node)):
        for j in range(len(node)):
            sub_P[i,j] = P[node[i],node[j]]

    return sub_P

# ***DataSet***

In [69]:
class dataSet:
    def __init__(self, text_path, graph_path, labels_path=None):

        #text_file, graph_file, labels_file = self.load(text_path, graph_path, labels_path)
        text_file, graph_file = self.load(text_path, graph_path)

        self.edges = self.load_edges(graph_file)

        #self.labels = self.load_labels(labels_file)

        self.text, self.num_vocab, self.num_nodes = self.load_text(text_file)

        self.nodes = range(0, self.num_nodes)

        self.negative_table = InitNegTable(self.edges)

        self.P = self.P_matrix(self.edges, self.num_nodes)

    """
    def load(self, text_path, graph_path, labels_path):
        text_file = open(text_path, 'rb').readlines()
        graph_file = open(graph_path, 'rb').readlines()
        labels_file = open(labels_path, 'rb').readlines()

        return text_file, graph_file, label_file
    """

    def load(self, text_path, graph_path):
        text_file = open(text_path, 'rb').readlines()
        graph_file = open(graph_path, 'rb').readlines()

        return text_file, graph_file

    def load_edges(self, graph_file):
        edges = []
        for i in graph_file:
            if np.random.uniform(0.0, 1.0) <= ratio:
                edges.append(list(map(int, i.strip().decode().split('\t'))))

        return edges

    def load_labels(self, labels_file):
      labels = []
      for i in labels_file:
        labels.append(list(map(int, i.strip().decode().split(','))))

      return labels

    def adj_list(self, edges):
      node1, node2 = zip(*edges)

      # Create adjacency list
      adj_list = defaultdict(list)
      for n1, n2 in edges:
          adj_list[n1].append(n2)
          adj_list[n2].append(n1)

      return dict(adj_list)

    def load_text(self, text_file):
        """
        Adapting with adapt(text_data):

        vectorize_layer.adapt(text_data) analyzes text_data, builds a vocabulary, and assigns a unique integer ID to each word based on its frequency (most frequent words get lower IDs).
        Transforming with vectorize_layer(text_data):

        This maps each word in text_data to its corresponding integer token ID, producing a 2D array where each row represents a sequence of token IDs for a given input line, padded or truncated to max_len.
        """
        vectorize_layer = tf.keras.layers.TextVectorization(
            max_tokens=None,  # Set a limit if needed
            output_mode='int',
            output_sequence_length=MAX_LEN
        )

        text_data = [line.strip() for line in text_file]

        vectorize_layer.adapt(text_data)

        text = vectorize_layer(text_data).numpy()

        num_vocab = len(vectorize_layer.get_vocabulary())
        print(f'Vocabulary: {num_vocab}')
        num_nodes = len(text)

        return text, num_vocab, num_nodes

    def negative_sample(self, edges):
        # edges is the sample_edges in self.generate_batches()
        node1, node2 = zip(*edges)
        sample_edges = []

        # The negative table contains edges that don not exist
        func = lambda: self.negative_table[random.randint(0, neg_table_size - 1)] # Pick a random node from the negative table

        # For each edge...
        for i in range(len(edges)):
            neg_node = func() # Pick a negative node

            # If the negative node is identical to the first and second node in the current edge...
            while node1[i] == neg_node or node2[i] == neg_node:
                neg_node = func() # Pick another negative node until the neg node is different than the first and second node in the current edge

            # Create a new type of edge that has an additional node, the negative node
            sample_edges.append([node1[i], node2[i], neg_node])

        return sample_edges

    def generate_batches(self, mode=None):

        num_batch = len(self.edges) // batch_size
        edges = self.edges
        if mode == 'add':
            num_batch += 1
            edges.extend(edges[:(batch_size - len(self.edges) % batch_size)])
        if mode != 'add':
            random.shuffle(edges)
        sample_edges = edges[:num_batch * batch_size]

        # For each edge in "sample_edges", add a negative edge
        sample_edges = self.negative_sample(sample_edges)

        # Create batches of edges

        """
        The first batch range is 0 -- batch_size - 1
        The second batch range is batch_size -- 2 * batch_size - 1
        The third batch range is 2 * batch_size -- 3* batch_size - 1 and so on
        """
        batches = []
        for i in range(num_batch):
            batches.append(sample_edges[i * batch_size:(i + 1) * batch_size])

        return batches

    def nodes_batches(self, mode=None):

        num_batch = len(self.nodes) // batch_size
        nodes = self.nodes
        if mode == 'add':
            num_batch += 1
            nodes.extend(nodes[:(batch_size - len(self.nodes) % batch_size)])
            random.shuffle(nodes)
        if mode != 'add':
            random.shuffle(nodes)
        sample_nodes = nodes[:num_batch * batch_size]

        batches = []
        for i in range(num_batch):
            batches.append(sample_nodes[i * batch_size:(i + 1) * batch_size])
        return batches

    def P_matrix(self, edges, num_nodes):
        # Take all the edges
        a_list,b_list=zip(*edges)
        a_list=list(a_list) # This list contains the first nodes in all edges
        b_list=list(b_list) # This list contains the second nodes in all edges

        P = np.zeros((num_nodes,num_nodes))

        for i in range(len(a_list)):
            P[a_list[i],b_list[i]]=1 # The prob of transitioning from "a_list[i]" to "b_list[i]". Initially it's 1 for unweighted graphs
            P[b_list[i],a_list[i]]=1 # The prob of transitioning from "b_list[i]" to "a_list[i]". Initially it's 1 for unweighted graphs

        P = normalize(P, axis=1, norm='l1') # We normalize P

        return P

# ***DMTE***

In [100]:
class Model:
    def __init__(self, vocab_size, num_nodes, num_labels=None):
        # '''hyperparameter'''
        with tf.name_scope('read_inputs') as scope:
            self.Text_a = tf.compat.v1.placeholder(tf.int32, [batch_size, MAX_LEN], name='Ta')
            self.Text_b = tf.compat.v1.placeholder(tf.int32, [batch_size, MAX_LEN], name='Tb')
            self.Text_neg = tf.compat.v1.placeholder(tf.int32, [batch_size, MAX_LEN], name='Tneg')
            self.Node_a = tf.compat.v1.placeholder(tf.int32, [batch_size], name='n1')
            self.Node_b = tf.compat.v1.placeholder(tf.int32, [batch_size], name='n2')
            self.Node_neg = tf.compat.v1.placeholder(tf.int32, [batch_size], name='n3')
            self.P_a = tf.compat.v1.placeholder(tf.float32, [batch_size, batch_size], name='Pa')
            self.P_b = tf.compat.v1.placeholder(tf.float32, [batch_size, batch_size], name='Pb')
            self.P_neg = tf.compat.v1.placeholder(tf.float32, [batch_size, batch_size], name='Pneg')
            #self.labels = tf.compat.v1.placeholder(tf.int32, [batch_size, num_labels], name='L')

        with tf.name_scope('initialize_embedding') as scope:
            self.text_embed = tf.Variable(tf.random.truncated_normal([vocab_size, word_embed_size], stddev=0.3))
            self.node_embed = tf.Variable(tf.random.truncated_normal([num_nodes, embed_size // 2], stddev=0.3))
            self.node_embed = tf.clip_by_norm(self.node_embed, clip_norm=1, axes=1)

        with tf.name_scope('lookup_embeddings') as scope:
            self.TA = tf.nn.embedding_lookup(self.text_embed, self.Text_a)
            self.T_A = tf.expand_dims(self.TA, -1)

            self.TB = tf.nn.embedding_lookup(self.text_embed, self.Text_b)
            self.T_B = tf.expand_dims(self.TB, -1)

            self.TNEG = tf.nn.embedding_lookup(self.text_embed, self.Text_neg)
            self.T_NEG = tf.expand_dims(self.TNEG, -1)

            self.N_A = tf.nn.embedding_lookup(self.node_embed, self.Node_a)
            self.N_B = tf.nn.embedding_lookup(self.node_embed, self.Node_b)
            self.N_NEG = tf.nn.embedding_lookup(self.node_embed, self.Node_neg)

        self.convA, self.convB, self.convNeg = self.conv()
        self.loss = self.compute_loss()

    def conv(self):

        W0 = tf.Variable(tf.random.truncated_normal([word_embed_size, embed_size // 2], stddev=0.3))
        W1 = tf.Variable(tf.random.truncated_normal([word_embed_size, embed_size // 2], stddev=0.3))
        W2 = tf.Variable(tf.random.truncated_normal([word_embed_size, embed_size // 2], stddev=0.3))
        W3 = tf.Variable(tf.random.truncated_normal([word_embed_size, embed_size // 2], stddev=0.3))
        W4 = tf.Variable(tf.random.truncated_normal([word_embed_size, embed_size // 2], stddev=0.3))

        mA = tf.reduce_mean(self.T_A, axis=1, keepdims=True)
        mB = tf.reduce_mean(self.T_B, axis=1, keepdims=True)
        mNEG = tf.reduce_mean(self.T_NEG, axis=1, keepdims=True)

        convA = tf.tanh(tf.squeeze(mA))
        convB = tf.tanh(tf.squeeze(mB))
        convNEG = tf.tanh(tf.squeeze(mNEG))

        attA = tf.matmul(convA, W0) + alpha * tf.matmul(tf.matmul(self.P_a, convA), W1) + beta * tf.matmul(tf.matmul(tf.math.square(self.P_a), convA), W2) + gamma * tf.matmul(tf.matmul(tf.math.pow(self.P_a, 3), convA), W3) + delta * tf.matmul(tf.matmul(tf.math.pow(self.P_a, 4), convA), W4)
        attB = tf.matmul(convB, W0) + alpha * tf.matmul(tf.matmul(self.P_b, convB), W1) + beta * tf.matmul(tf.matmul(tf.math.square(self.P_b), convB), W2) + gamma * tf.matmul(tf.matmul(tf.math.pow(self.P_b, 3), convB), W3) + delta * tf.matmul(tf.matmul(tf.math.pow(self.P_b, 4), convB), W4)
        attNEG = tf.matmul(convNEG, W0) + alpha * tf.matmul(tf.matmul(self.P_a, convNEG), W1) + beta * tf.matmul(tf.matmul(tf.math.square(self.P_a), convNEG), W2) + gamma * tf.matmul(tf.matmul(tf.math.pow(self.P_a, 3), convNEG), W3) + delta * tf.matmul(tf.matmul(tf.math.pow(self.P_a, 4), convNEG), W4)

        return attA, attB, attNEG

    def compute_loss(self):

        # Loss functions for:


        # Text-Text
        p1 = tf.math.log(tf.nn.sigmoid(tf.reduce_sum(tf.multiply(self.convA, self.convB), 1)) + 0.001)

        p2 = tf.math.log(tf.nn.sigmoid(-tf.reduce_sum(tf.multiply(self.convA, self.convNeg), 1)) + 0.001)

        p11 = tf.math.log(tf.nn.sigmoid(tf.reduce_sum(tf.multiply(self.convB, self.convA), 1)) + 0.001)

        p12 = tf.math.log(tf.nn.sigmoid(-tf.reduce_sum(tf.multiply(self.convB, self.convNeg), 1)) + 0.001)



        # Node-Node
        p3 = tf.math.log(tf.nn.sigmoid(tf.reduce_sum(tf.multiply(self.N_A + alpha * tf.matmul(self.P_a, self.N_A) + beta * tf.matmul(tf.math.square(self.P_a), self.N_A), self.N_B), 1)) + 0.001)

        p4 = tf.math.log(tf.nn.sigmoid(-tf.reduce_sum(tf.multiply(self.N_A + alpha * tf.matmul(self.P_a, self.N_A) + beta * tf.matmul(tf.math.square(self.P_a), self.N_A), self.N_NEG), 1)) + 0.001)

        p13 = tf.math.log(tf.nn.sigmoid(tf.reduce_sum(tf.multiply(self.N_B + alpha * tf.matmul(self.P_b, self.N_B) + beta * tf.matmul(tf.math.square(self.P_b), self.N_B), self.N_A), 1)) + 0.001)

        p14 = tf.math.log(tf.nn.sigmoid(-tf.reduce_sum(tf.multiply(self.N_B + alpha * tf.matmul(self.P_b, self.N_B) + beta * tf.matmul(tf.math.square(self.P_b), self.N_B), self.N_NEG), 1)) + 0.001)




        # Node-Text
        p5 = tf.math.log(tf.nn.sigmoid(tf.reduce_sum(tf.multiply(self.N_A + alpha * tf.matmul(self.P_a, self.N_A) + beta * tf.matmul(tf.math.square(self.P_a), self.N_A), self.convB), 1)) + 0.001)

        p6 = tf.math.log(tf.nn.sigmoid(-tf.reduce_sum(tf.multiply(self.N_A + alpha * tf.matmul(self.P_a, self.N_A) + beta * tf.matmul(tf.math.square(self.P_a), self.N_A), self.convNeg), 1)) + 0.001)

        p15 = tf.math.log(tf.nn.sigmoid(tf.reduce_sum(tf.multiply(self.N_B + alpha * tf.matmul(self.P_b, self.N_B) + beta * tf.matmul(tf.math.square(self.P_b), self.N_B), self.convA), 1)) + 0.001)

        p16 = tf.math.log(tf.nn.sigmoid(-tf.reduce_sum(tf.multiply(self.N_B + alpha * tf.matmul(self.P_b, self.N_B) + beta * tf.matmul(tf.math.square(self.P_b), self.N_B), self.convNeg), 1)) + 0.001)



        # Text-Node
        p7 = tf.math.log(tf.nn.sigmoid(tf.reduce_sum(tf.multiply(self.convA, self.N_B), 1)) + 0.001)

        p8 = tf.math.log(tf.nn.sigmoid(-tf.reduce_sum(tf.multiply(self.convA, self.N_NEG), 1)) + 0.001)

        p17 = tf.math.log(tf.nn.sigmoid(tf.reduce_sum(tf.multiply(self.convB, self.N_A), 1)) + 0.001)

        p18 = tf.math.log(tf.nn.sigmoid(-tf.reduce_sum(tf.multiply(self.convB, self.N_NEG), 1)) + 0.001)

        rho1 = 1.0
        rho2 = 0.3
        rho3 = 0.3
        temp_loss = rho1 * (p1 + p2 + p11 + p12) + rho2 * (p3 + p4 + p13 + p14) + rho3 * (p5 + p6 + p15 + p16) + rho3 * (p7 + p8 + p17 + p18)
        loss = -tf.reduce_sum(temp_loss)
        return loss

# ***Negative Sample***

In [71]:
def InitNegTable(edges):
    a_list, b_list = zip(*edges)
    a_list = list(a_list)
    b_list = list(b_list)
    node = a_list
    node.extend(b_list)

    node_degree = {}
    for i in node:
        if i in node_degree:
            node_degree[i] += 1
        else:
            node_degree[i] = 1
    sum_degree = 0
    for i in node_degree.values():
        sum_degree += pow(i, 0.75)

    por = 0
    cur_sum = 0
    vid = -1
    neg_table = []
    degree_list = list(node_degree.values())
    node_id = list(node_degree.keys())
    for i in range(neg_table_size):
        if ((i + 1) / float(neg_table_size)) > por:
            cur_sum += pow(degree_list[vid + 1], NEG_SAMPLE_POWER)
            por = cur_sum / sum_degree
            vid += 1
        neg_table.append(node_id[vid])
    print(f'Neg. table size: {len(neg_table)}')
    return neg_table


# ***Run***

In [72]:
def prepareData(datasetName, ratio):
  f = open('/content/datasets/%s/graph.txt' % datasetName, 'rb')
  edges = [i for i in f]
  selected = int(len(edges) * float(ratio))
  selected = selected - selected % batch_size
  selected = random.sample(edges, selected)
  remain = [i for i in edges if i not in selected]
  try:
    temp_dir = Path('temp')

    # Check if the directory exists, if so, delete it
    if temp_dir.exists() and temp_dir.is_dir():
        shutil.rmtree(temp_dir)
        print("Existing directory deleted.")

    # Create the directory
    temp_dir.mkdir(parents=True, exist_ok=True)
    print("Directory created successfully.")

  except Exception as e:
      print(f"An error occurred: {e}")

  fw1 = open('temp/graph.txt', 'wb')
  fw2 = open('temp/test_graph.txt', 'wb')

  for i in selected:
      fw1.write(i)
  for i in remain:
      fw2.write(i)

In [93]:
prepareData(datasetName, ratio)

Existing directory deleted.
Directory created successfully.


In [94]:
# load data
dataset_name = datasetName
graph_path = os.path.join('/content/temp/graph.txt')
text_path = os.path.join("/content", "datasets", dataset_name, dataTextFile)

data = dataSet(text_path, graph_path)

Vocabulary: 14696
Neg. table size: 1000000


In [104]:
with tf.Graph().as_default():
    sess = tf.compat.v1.Session()
    with sess.as_default():
        model = Model(data.num_vocab, data.num_nodes)
        opt = tf.compat.v1.train.AdamOptimizer(lr)#tf.keras.optimizers.Adam(learning_rate=lr)
        train_op = opt.minimize(model.loss)#opt.minimize(model.loss, var_list=model.trainable_variables)
        sess.run(tf.compat.v1.global_variables_initializer())
        time = 0

        # training
        print('start training.......')


        for epoch in tqdm(range(num_epoch), desc="Epochs"):
            start = datetime.now()
            loss_epoch = 0
            batches = data.generate_batches()
            h1 = 0
            num_batch = len(batches)
            for i in range(num_batch):
                batch = batches[i]

                node1, node2, node3 = zip(*batch)
                node1, node2, node3 = np.array(node1), np.array(node2), np.array(node3)
                text1, text2, text3 = data.text[node1], data.text[node2], data.text[node3]
                #labels1, labels2 = data.labels[node1], data.labels[node2]
                #labels = [max(a, b) for a, b in zip(labels1, labels2)]
                P1, P2, P3 = sub_Mat(data.P, node1), sub_Mat(data.P, node2), sub_Mat(data.P, node3)


                feed_dict = {
                    model.Text_a: text1,
                    model.Text_b: text2,
                    model.Text_neg: text3,
                    model.Node_a: node1,
                    model.Node_b: node2,
                    model.Node_neg: node3,
                    model.P_a: P1,
                    model.P_b: P2,
                    model.P_neg: P3
                    #model.labels: labels
                }

                # run the graph
                _, loss_batch = sess.run([train_op, model.loss], feed_dict=feed_dict)

                loss_epoch += loss_batch

            end = datetime.now()
            time += (end - start).total_seconds()
            print('epoch: ', epoch + 1, ' loss: ', loss_epoch)

        print(f'Time: {time}')
        # Saving embeddings
        with open('temp/embed.txt', 'wb') as file:
            batches = data.generate_batches(mode='add')
            num_batch = len(batches)
            embed = [[] for _ in range(data.num_nodes)]

            for i in range(num_batch):
                batch = batches[i]
                node1, node2, node3 = zip(*batch)
                node1, node2, node3 = np.array(node1), np.array(node2), np.array(node3)
                text1, text2, text3 = data.text[node1], data.text[node2], data.text[node3]
                #labels1, labels2 = data.labels[node1], data.labels[node2]
                #labels = [max(a, b) for a, b in zip(labels1, labels2)]
                P1, P2, P3 = sub_Mat(data.P, node1), sub_Mat(data.P, node2), sub_Mat(data.P, node3)

                feed_dict = {
                    model.Text_a: text1,
                    model.Text_b: text2,
                    model.Text_neg: text3,
                    model.Node_a: node1,
                    model.Node_b: node2,
                    model.Node_neg: node3,
                    model.P_a: P1,
                    model.P_b: P2,
                    model.P_neg: P3
                    #model.labels: labels
                }

                # Fetch embeddings
                #convA, convB, TA, TB = sess.run([model.convA, model.convB, model.N_A, model.N_B])
                convA, convB, TA, TB = sess.run([model.convA, model.convB, model.N_A, model.N_B], feed_dict=feed_dict)

                # For each node in the batch
                for j in range(batch_size):
                    em = list(convA[j]) + list(TA[j]) # Create an embedding by concatenating the text (convA) and node (TA) embeddings
                    embed[node1[j]].append(em) # A node can appear many times in edges. Thus, each time that node will have a different embedding. Append the different embeddings for a particular node

                    em = list(convB[j]) + list(TB[j]) # Create an embedding by concatenating the text (convA) and node (TA) embeddings
                    embed[node2[j]].append(em)


            for i in range(data.num_nodes):
                if embed[i]:
                    tmp = np.mean(embed[i], axis=0) #/ len(embed[i]) # If a node has many different embeddings, take their mean.
                    file.write((' '.join(map(str, tmp)) + '\n').encode())
                else:
                    file.write('\n'.encode())


start training.......


Epochs:   2%|▏         | 3/200 [00:03<02:40,  1.23it/s]

epoch:  1  loss:  4119.76611328125
epoch:  2  loss:  4136.9903564453125
epoch:  3  loss:  4127.1761474609375


Epochs:   2%|▎         | 5/200 [00:03<01:24,  2.31it/s]

epoch:  4  loss:  4120.839050292969
epoch:  5  loss:  4038.7012329101562
epoch:  6  loss:  3971.1140747070312


Epochs:   4%|▍         | 9/200 [00:03<00:38,  4.97it/s]

epoch:  7  loss:  3614.0501098632812
epoch:  8  loss:  2603.5908203125
epoch:  9  loss:  1690.70849609375


Epochs:   6%|▌         | 11/200 [00:03<00:29,  6.43it/s]

epoch:  10  loss:  1436.6168212890625
epoch:  11  loss:  1368.0758056640625
epoch:  12  loss:  1443.6529235839844


Epochs:   6%|▋         | 13/200 [00:03<00:26,  6.93it/s]

epoch:  13  loss:  1372.9758911132812
epoch:  14  loss:  1356.190185546875


Epochs:   8%|▊         | 15/200 [00:04<00:26,  6.88it/s]

epoch:  15  loss:  1338.4217529296875
epoch:  16  loss:  1309.3661804199219


Epochs:   9%|▉         | 18/200 [00:04<00:26,  6.88it/s]

epoch:  17  loss:  1297.1427001953125
epoch:  18  loss:  1296.3958129882812


Epochs:  10%|█         | 21/200 [00:04<00:21,  8.20it/s]

epoch:  19  loss:  1284.4850463867188
epoch:  20  loss:  1275.9586791992188
epoch:  21  loss:  1267.2528686523438


Epochs:  12%|█▏        | 23/200 [00:05<00:20,  8.57it/s]

epoch:  22  loss:  1267.871337890625
epoch:  23  loss:  1258.3843688964844


Epochs:  13%|█▎        | 26/200 [00:05<00:18,  9.47it/s]

epoch:  24  loss:  1252.594482421875
epoch:  25  loss:  1245.1712036132812
epoch:  26  loss:  1240.7978820800781


Epochs:  14%|█▍        | 29/200 [00:05<00:17,  9.87it/s]

epoch:  27  loss:  1230.2939758300781
epoch:  28  loss:  1227.9689331054688
epoch:  29  loss:  1219.3521118164062


Epochs:  15%|█▌        | 30/200 [00:05<00:17,  9.89it/s]

epoch:  30  loss:  1208.3528137207031
epoch:  31  loss:  1211.3338317871094
epoch:  32  loss:  1198.8781127929688

Epochs:  17%|█▋        | 34/200 [00:06<00:16, 10.08it/s]


epoch:  33  loss:  1197.9750366210938
epoch:  34  loss:  1195.9290161132812


Epochs:  18%|█▊        | 36/200 [00:06<00:16,  9.78it/s]

epoch:  35  loss:  1184.889892578125
epoch:  36  loss:  1175.0703125
epoch: 

Epochs:  20%|█▉        | 39/200 [00:06<00:15, 10.11it/s]

 37  loss:  1154.471923828125
epoch:  38  loss:  1159.1113586425781
epoch:  39  loss:  1149.6515808105469


Epochs:  20%|██        | 41/200 [00:06<00:15, 10.15it/s]

epoch:  40  loss:  1135.9491271972656
epoch:  41  loss:  1138.1854858398438
epoch:  42  loss:  1141.8928527832031


Epochs:  22%|██▎       | 45/200 [00:07<00:15, 10.10it/s]

epoch:  43  loss:  1123.284423828125
epoch:  44  loss:  1118.8261108398438
epoch:  45  loss:  1108.9203186035156


Epochs:  24%|██▎       | 47/200 [00:07<00:15,  9.95it/s]

epoch:  46  loss:  1106.0561218261719
epoch:  47  loss:  1100.2786254882812


Epochs:  24%|██▍       | 49/200 [00:07<00:15,  9.71it/s]

epoch:  48  loss:  1108.6045227050781
epoch:  49  loss:  1084.8766479492188


Epochs:  26%|██▌       | 52/200 [00:08<00:14, 10.12it/s]

epoch:  50  loss:  1069.5071411132812
epoch:  51  loss:  1076.1007080078125
epoch:  52  loss:  1072.9762573242188


Epochs:  27%|██▋       | 54/200 [00:08<00:14, 10.34it/s]

epoch:  53  loss:  1072.01611328125
epoch:  54  loss:  1057.6018981933594
epoch:  55  loss:  1072.3761291503906


Epochs:  29%|██▉       | 58/200 [00:08<00:11, 12.45it/s]

epoch:  56  loss:  1040.3787231445312
epoch:  57  loss:  1043.8277893066406
epoch:  58  loss:  1051.181640625
epoch:  59  loss:  1060.5708923339844


Epochs:  31%|███       | 62/200 [00:08<00:10, 13.66it/s]

epoch:  60  loss:  1038.047119140625
epoch:  61  loss:  1028.4303131103516
epoch:  62  loss:  1003.03662109375
epoch:  63  loss:  1017.9043426513672


Epochs:  33%|███▎      | 66/200 [00:09<00:09, 14.51it/s]

epoch:  64  loss:  1003.6477508544922
epoch:  65  loss:  1026.6278991699219
epoch:  66  loss:  1025.3876953125
epoch:  67  loss:  1009.2419281005859


Epochs:  35%|███▌      | 70/200 [00:09<00:08, 15.23it/s]

epoch:  68  loss:  997.9412536621094
epoch:  69  loss:  1003.7948608398438
epoch:  70  loss:  978.5798492431641
epoch:  71  loss:  996.1793060302734


Epochs:  37%|███▋      | 74/200 [00:09<00:08, 15.30it/s]

epoch:  72  loss:  989.6399841308594
epoch:  73  loss:  1000.6149749755859
epoch:  74  loss:  1001.0713348388672
epoch:  75  loss:  962.5664367675781


Epochs:  39%|███▉      | 78/200 [00:09<00:07, 15.47it/s]

epoch:  76  loss:  965.3010711669922
epoch:  77  loss:  970.9251556396484
epoch:  78  loss:  980.6015014648438
epoch:  79  loss:  1004.8473510742188


Epochs:  41%|████      | 82/200 [00:10<00:07, 15.75it/s]

epoch:  80  loss:  965.3785552978516
epoch:  81  loss:  976.3072357177734
epoch:  82  loss:  962.5125732421875
epoch:  83  loss:  971.0359954833984


Epochs:  43%|████▎     | 86/200 [00:10<00:07, 16.03it/s]

epoch:  84  loss:  956.6593627929688
epoch:  85  loss:  953.92138671875
epoch:  86  loss:  965.8691711425781
epoch:  87  loss:  971.9289703369141


Epochs:  45%|████▌     | 90/200 [00:10<00:06, 15.86it/s]

epoch:  88  loss:  956.5662231445312
epoch:  89  loss:  944.5313720703125
epoch:  90  loss:  926.5739440917969
epoch:  91  loss:  930.0012054443359


Epochs:  47%|████▋     | 94/200 [00:10<00:06, 15.85it/s]

epoch:  92  loss:  942.0923156738281
epoch:  93  loss:  943.0169525146484
epoch:  94  loss:  925.4425964355469
epoch:  95  loss:  940.9466094970703


Epochs:  49%|████▉     | 98/200 [00:11<00:06, 16.20it/s]

epoch:  96  loss:  922.9235687255859
epoch:  97  loss:  927.4575958251953
epoch:  98  loss:  931.0446472167969
epoch:  99  loss:  917.2550506591797


Epochs:  51%|█████     | 102/200 [00:11<00:05, 16.41it/s]

epoch:  100  loss:  949.9461364746094
epoch:  101  loss:  929.4538269042969
epoch:  102  loss:  926.0948333740234
epoch:  103  loss:  901.2156524658203


Epochs:  53%|█████▎    | 106/200 [00:11<00:05, 16.22it/s]

epoch:  104  loss:  901.1638641357422
epoch:  105  loss:  915.818603515625
epoch:  106  loss:  940.4492645263672
epoch:  107  loss:  932.0317993164062


Epochs:  55%|█████▌    | 110/200 [00:11<00:05, 15.98it/s]

epoch:  108  loss:  900.9303436279297
epoch:  109  loss:  904.0175476074219
epoch:  110  loss:  901.6498413085938
epoch:  111  loss:  911.2317352294922


Epochs:  57%|█████▋    | 114/200 [00:12<00:05, 16.07it/s]

epoch:  112  loss:  925.4723663330078
epoch:  113  loss:  905.1594696044922
epoch:  114  loss:  920.6211853027344
epoch:  115  loss:  886.5268402099609


Epochs:  59%|█████▉    | 118/200 [00:12<00:05, 16.18it/s]

epoch:  116  loss:  887.5525665283203
epoch:  117  loss:  897.1529083251953
epoch:  118  loss:  912.9183959960938
epoch:  119  loss:  916.1410369873047


Epochs:  61%|██████    | 122/200 [00:12<00:04, 16.25it/s]

epoch:  120  loss:  888.8165893554688
epoch:  121  loss:  881.772705078125
epoch:  122  loss:  866.5989990234375
epoch:  123  loss:  871.5459594726562


Epochs:  63%|██████▎   | 126/200 [00:12<00:04, 16.07it/s]

epoch:  124  loss:  875.2909088134766
epoch:  125  loss:  935.5361785888672
epoch:  126  loss:  891.5309295654297
epoch:  127  loss:  872.5652008056641


Epochs:  65%|██████▌   | 130/200 [00:13<00:04, 16.35it/s]

epoch:  128  loss:  909.5804901123047
epoch:  129  loss:  885.7877807617188
epoch:  130  loss:  885.4795989990234
epoch:  131  loss:  873.7373809814453


Epochs:  67%|██████▋   | 134/200 [00:13<00:04, 16.06it/s]

epoch:  132  loss:  861.1360015869141
epoch:  133  loss:  884.6602478027344
epoch:  134  loss:  876.1775665283203
epoch:  135  loss:  870.1233062744141


Epochs:  69%|██████▉   | 138/200 [00:13<00:03, 15.75it/s]

epoch:  136  loss:  881.5296478271484
epoch:  137  loss:  907.93359375
epoch:  138  loss:  892.025146484375
epoch:  139  loss:  875.6724700927734


Epochs:  71%|███████   | 142/200 [00:13<00:03, 15.53it/s]

epoch:  140  loss:  880.5223846435547
epoch:  141  loss:  849.9345092773438
epoch:  142  loss:  902.4028167724609
epoch:  143  loss:  888.2499694824219


Epochs:  73%|███████▎  | 146/200 [00:14<00:03, 15.68it/s]

epoch:  144  loss:  867.1259918212891
epoch:  145  loss:  909.2706451416016
epoch:  146  loss:  851.2326507568359
epoch:  147  loss:  884.7439880371094


Epochs:  75%|███████▌  | 150/200 [00:14<00:03, 15.58it/s]

epoch:  148  loss:  880.3315734863281
epoch:  149  loss:  859.1177978515625
epoch:  150  loss:  877.3316040039062
epoch:  151  loss:  871.6018981933594


Epochs:  77%|███████▋  | 154/200 [00:14<00:02, 15.61it/s]

epoch:  152  loss:  902.4567108154297
epoch:  153  loss:  851.2731628417969
epoch:  154  loss:  845.0798492431641
epoch:  155  loss:  842.1654968261719


Epochs:  79%|███████▉  | 158/200 [00:14<00:02, 15.39it/s]

epoch:  156  loss:  881.9335021972656
epoch:  157  loss:  865.8279876708984
epoch:  158  loss:  846.5517425537109
epoch:  159  loss:  910.4742431640625


Epochs:  81%|████████  | 162/200 [00:15<00:02, 15.77it/s]

epoch:  160  loss:  880.7051391601562
epoch:  161  loss:  851.4857788085938
epoch:  162  loss:  838.5465087890625
epoch:  163  loss:  854.9694671630859


Epochs:  83%|████████▎ | 166/200 [00:15<00:02, 15.64it/s]

epoch:  164  loss:  871.688720703125
epoch:  165  loss:  855.6508483886719
epoch:  166  loss:  860.6631927490234
epoch: 

Epochs:  85%|████████▌ | 170/200 [00:15<00:01, 15.86it/s]

 167  loss:  859.8633728027344
epoch:  168  loss:  870.8346099853516
epoch:  169  loss:  882.7185974121094
epoch:  170  loss:  857.1836853027344


Epochs:  87%|████████▋ | 174/200 [00:15<00:01, 15.61it/s]

epoch:  171  loss:  853.0437164306641
epoch:  172  loss:  820.3439483642578
epoch:  173  loss:  870.7980346679688
epoch:  174  loss:  879.9705352783203


Epochs:  89%|████████▉ | 178/200 [00:16<00:01, 16.04it/s]

epoch:  175  loss:  893.5269775390625
epoch:  176  loss:  882.1417999267578
epoch:  177  loss:  877.3469085693359
epoch:  178  loss:  826.3318634033203


Epochs:  91%|█████████ | 182/200 [00:16<00:01, 16.12it/s]

epoch:  179  loss:  863.8855438232422
epoch:  180  loss:  853.0677795410156
epoch:  181  loss:  845.2187652587891
epoch:  182  loss:  866.5494995117188


Epochs:  93%|█████████▎| 186/200 [00:16<00:00, 16.30it/s]

epoch:  183  loss:  859.7247772216797
epoch:  184  loss:  843.0488586425781
epoch:  185  loss:  832.9476776123047
epoch:  186  loss:  836.9181518554688


Epochs:  95%|█████████▌| 190/200 [00:16<00:00, 16.44it/s]

epoch:  187  loss:  858.6759948730469
epoch:  188  loss:  853.1402282714844
epoch:  189  loss:  853.1087341308594
epoch:  190  loss:  848.4010314941406


Epochs:  97%|█████████▋| 194/200 [00:17<00:00, 15.80it/s]

epoch:  191  loss:  813.4095458984375
epoch:  192  loss:  844.9398345947266
epoch:  193  loss:  861.3139801025391
epoch:  194  loss:  867.9522247314453


Epochs:  99%|█████████▉| 198/200 [00:17<00:00, 15.88it/s]

epoch:  195  loss:  841.2416229248047
epoch:  196  loss:  848.6564178466797
epoch:  197  loss:  833.8301391601562
epoch:  198  loss:  826.2361602783203


Epochs: 100%|██████████| 200/200 [00:17<00:00, 11.46it/s]


epoch:  199  loss:  822.6763000488281
epoch:  200  loss:  858.8649139404297
Time: 17.06182399999999


In [105]:
node2vec = {}
f = open('temp/embed.txt', 'rb')
for i, j in enumerate(f):
    if j.decode() != '\n':
        node2vec[i] = list(map(float, j.strip().decode().split(' ')))
f1 = open(os.path.join('temp/test_graph.txt'), 'rb')
edges = [list(map(int, i.strip().decode().split('\t'))) for i in f1]
nodes = list(set([i for j in edges for i in j]))
a = 0
b = 0
result = []
for i, j in edges:
    if i in node2vec.keys() and j in node2vec.keys():
        dot1 = np.dot(node2vec[i], node2vec[j])
        random_node = random.sample(nodes, 1)[0]
        while random_node == j or random_node not in node2vec.keys():
            random_node = random.sample(nodes, 1)[0]
        dot2 = np.dot(node2vec[i], node2vec[random_node])
        result.append(np.asarray([dot1,dot2]))
        if dot1 > dot2:
            a += 1
        elif dot1 == dot2:
            a += 0.5
        b += 1

print("Auc value:", float(a) / b)

Auc value: 0.8387096774193549
