In [None]:
!pip install tensorflow-gpu==1.15.2

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting tensorflow-gpu==1.15.2
  Downloading tensorflow_gpu-1.15.2-cp37-cp37m-manylinux2010_x86_64.whl (410.9 MB)
[K     |████████████████████████████████| 410.9 MB 32 kB/s 
Collecting keras-applications>=1.0.8
  Downloading Keras_Applications-1.0.8-py3-none-any.whl (50 kB)
[K     |████████████████████████████████| 50 kB 4.7 MB/s 
Collecting tensorboard<1.16.0,>=1.15.0
  Downloading tensorboard-1.15.0-py3-none-any.whl (3.8 MB)
[K     |████████████████████████████████| 3.8 MB 37.9 MB/s 
Collecting tensorflow-estimator==1.15.1
  Downloading tensorflow_estimator-1.15.1-py2.py3-none-any.whl (503 kB)
[K     |████████████████████████████████| 503 kB 27.8 MB/s 
Collecting gast==0.2.2
  Downloading gast-0.2.2.tar.gz (10 kB)
Building wheels for collected packages: gast
  Building wheel for gast (setup.py) ... [?25l[?25hdone
  Created wheel for gast: filename=gast-0.2.2-py3-none-any.whl si

In [None]:
!git clone https://github.com/dbusbridge/gcn_tutorial

Cloning into 'gcn_tutorial'...
remote: Enumerating objects: 69, done.[K
remote: Total 69 (delta 0), reused 0 (delta 0), pack-reused 69[K
Unpacking objects: 100% (69/69), done.


In [None]:
import networkx as nx
import numpy as np
import pandas as pd
import scipy.sparse
import tensorflow.compat.v1 as tf
import gcn_tutorial.layers.graph as lg
import gcn_tutorial.utils.sparse as us
from tqdm import tqdm


In [None]:

def set_label(G, emotion_label, df):
    for node in G.nodes():
        if ':music' in node:            
            label = df[df.musicId == int(node.replace(':music', ''))][emotion_label].to_list()[0]
            G.nodes[node]['label'] = label            
    return G


def masked_softmax_cross_entropy(preds, labels, mask):
    """Softmax cross-entropy loss with masking."""
    loss = tf.nn.softmax_cross_entropy_with_logits(logits=preds, labels=labels)
    mask = tf.cast(mask, dtype=tf.float32)
    mask /= tf.reduce_mean(mask)
    loss *= mask
    return tf.reduce_mean(loss)


def masked_accuracy(preds, labels, mask):
    """Accuracy with masking."""
    correct_prediction = tf.equal(tf.argmax(preds, 1), tf.argmax(labels, 1))
    accuracy_all = tf.cast(correct_prediction, tf.float32)
    mask = tf.cast(mask, dtype=tf.float32)
    mask /= tf.reduce_mean(mask)
    accuracy_all *= mask
    return tf.reduce_mean(accuracy_all)


In [None]:

def build_gcn(G, run):   
    run += 1
    network = str(run)
    node_list = []
    for node in G.nodes():
        node_list.append(node)

    label_codes = {}
    for node in node_list:
        if 'label' in G.nodes[node]:
            label = G.nodes[node]['label']
            if label not in label_codes: label_codes[label] = len(label_codes)
            G.nodes[node]['membership'] = label_codes[label]
        else:
            G.nodes[node]['membership'] = -1

    # adj = nx.adj_matrix(G,nodelist=node_list)
    adj = nx.adjacency_matrix(G, nodelist=node_list)

    # Get important parameters of adjacency matrix
    n_nodes = adj.shape[0]

    # GCN preprocessing
    adj_tilde = adj + np.identity(n=adj.shape[0])
    d_tilde_diag = np.squeeze(np.sum(np.array(adj_tilde), axis=1))
    d_tilde_inv_sqrt_diag = np.power(d_tilde_diag, -1 / 2)
    d_tilde_inv_sqrt = np.diag(d_tilde_inv_sqrt_diag)
    adj_norm = np.dot(np.dot(d_tilde_inv_sqrt, adj_tilde), d_tilde_inv_sqrt)
    adj_norm_tuple = us.sparse_to_tuple(scipy.sparse.coo_matrix(adj_norm))

    
    # get true labels
    y_true = []
    y_true_index = []
    cnt = 0
    for node in node_list:
        if "split" in G.nodes[node] and 'test' == G.nodes[node]['split']:
            y_true.append(label_codes[G.nodes[node]['label']])
            y_true_index.append(cnt)
        cnt += 1

    # Features from two modalities
    L_X = []
    for node in node_list:
        v1 = list(G.nodes[node]['f_acoustic'])
        v2 = list(G.nodes[node]['f_bert'])
        v_final = v1 + v2
        L_X.append(v_final)
    feat_x = np.array(L_X)

    feat_x_tuple = us.sparse_to_tuple(scipy.sparse.coo_matrix(feat_x))

    # Preparing train data
    memberships = [m for m in nx.get_node_attributes(G, 'membership').values()]
    nb_classes = len(set(memberships))
    targets = np.array([memberships], dtype=np.int32).reshape(-1)
    one_hot_targets = np.eye(nb_classes)[targets]

    labels_to_keep = []

    counter = 0
    for node in node_list:
        if 'label' in G.nodes[node]:
            labels_to_keep.append(counter)
        counter += 1

    y_train = np.zeros(shape=one_hot_targets.shape,
                       dtype=np.float32)

    train_mask = np.zeros(shape=(n_nodes,), dtype=np.bool)

    for l in labels_to_keep:
        y_train[l, :] = one_hot_targets[l, :]
        train_mask[l] = True

    # TensorFlow placeholders
    ph = {
        'adj_norm': tf.sparse_placeholder(tf.float32, name="adj_mat"),
        'x': tf.sparse_placeholder(tf.float32, name="x"),
        'labels': tf.placeholder(tf.float32, shape=(n_nodes, nb_classes)),
        'mask': tf.placeholder(tf.int32)}

    l_sizes = [512, 256, 128, nb_classes]
    print(nb_classes)  # , set(memberships))

    o_fc1 = lg.GraphConvLayer(
        input_dim=feat_x.shape[-1],
        output_dim=l_sizes[0],
        name='fc1_' + network,
        activation=tf.nn.tanh)(adj_norm=ph['adj_norm'], x=ph['x'], sparse=True)

    o_fc2 = lg.GraphConvLayer(
        input_dim=l_sizes[0],
        output_dim=l_sizes[1],
        name='fc2_' + network,
        activation=tf.nn.tanh)(adj_norm=ph['adj_norm'], x=o_fc1)

    o_fc3 = lg.GraphConvLayer(
        input_dim=l_sizes[1],
        output_dim=l_sizes[2],
        name='fc3_' + network,
        activation=tf.nn.tanh)(adj_norm=ph['adj_norm'], x=o_fc2)

    o_fc4 = lg.GraphConvLayer(
        input_dim=l_sizes[2],
        output_dim=l_sizes[3],
        name='fc4_' + network,
        activation=tf.identity)(adj_norm=ph['adj_norm'], x=o_fc3)

    with tf.name_scope('optimizer'):
        loss = masked_softmax_cross_entropy(preds=o_fc4, labels=ph['labels'], mask=ph['mask'])
        accuracy = masked_accuracy(preds=o_fc4, labels=ph['labels'], mask=ph['mask'])
        optimizer = tf.train.AdamOptimizer(learning_rate=1e-3)
        opt_op = optimizer.minimize(loss)

    feed_dict_train = {ph['adj_norm']: adj_norm_tuple,
                       ph['x']: feat_x_tuple,
                       ph['labels']: y_train,
                       ph['mask']: train_mask}
   
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())

    epochs = 100 #7000
    
    outputs = {}
    # Train model
    min_train_acc = 0    
    for epoch in tqdm(range(epochs), total=epochs):
        _, train_loss, train_acc = sess.run(
            (opt_op, loss, accuracy), feed_dict=feed_dict_train)
        feed_dict_output = {ph['adj_norm']: adj_norm_tuple, ph['x']: feat_x_tuple}

        # print(train_loss,train_acc)        
        if train_acc >= min_train_acc:
            min_train_acc = train_acc
            embeddings = sess.run(o_fc3, feed_dict=feed_dict_output)
            preds = sess.run(o_fc4, feed_dict=feed_dict_output)    
    y_pred = []
    for i in y_true_index:
        y_pred.append(preds[i].argmax())

    return y_true, y_pred, embeddings



In [None]:

if __name__ == '__main__':    
    df = pd.read_csv(f"dataset_base.tsv", sep='\t')
    run = 1

    emotion_label = "arousal"
    grafo = "graph.nx"
    G = nx.read_gpickle(f'{grafo}')    
    
    G = set_label(G=G, emotion_label=emotion_label, df=df)
    y_true, y_pred, embeddings = build_gcn(G=G, run=run)
    

In [None]:
print(y_true, y_pred)

[0, 2, 2, 2, 2, 2, 1, 2, 0, 1, 2, 2, 2, 1, 2, 2, 1, 1, 2, 2, 0, 2, 2, 2, 0, 2, 0, 0, 0, 2, 2, 2, 2, 2, 0, 1, 0, 2, 2, 2, 2, 0, 2, 2, 1, 2, 1, 0, 1, 0, 2, 2, 0, 2, 0, 0, 2, 2, 0, 2, 1] [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
