# Graph Neural Network

## Paper

[Kika, Alda, et al. "Imbalance Node Classification with Graph Neural Networks (GNN): A Study on a Twitter Dataset."](https://www.proquest.com/openview/707deabdf2dee201896409a9a4fccfb7/1?pq-origsite=gscholar&cbl=5444811)

In [53]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense
from spektral.layers import GCNConv
from spektral.data import BatchLoader, Graph, Dataset
import numpy as np

import os
from neo4j import GraphDatabase
import numpy as np
from spektral.data import Graph, Dataset
from dotenv import load_dotenv

load_dotenv()
client_id = os.getenv("N4J_USER")
client_secret = os.getenv("N4J_PW")

# Prepare our Reddit Dataset

Using Spektral, we have our dataset.

In [54]:
class RedditDataset(Dataset):
    def read(self):
        driver = GraphDatabase.driver(
            "bolt://localhost:7687", auth=(client_id, client_secret)
        )

        with driver.session() as session:
            disorder_results = session.run("MATCH (n:Mental_Health_Disorder) RETURN n.name as name")
            disorders = {
                record["name"]: idx for idx, record in enumerate(disorder_results)
            }

            word_results = session.run("MATCH (n:Word) RETURN n.name as name")
            words = {
                record["name"]: idx + len(disorders)
                for idx, record in enumerate(word_results)
            }

            verb_results = session.run("MATCH (n:Verb) RETURN n.name as name")
            verbs = {record["name"]: idx + len(disorders) + len(words) for idx, record in enumerate(verb_results)}


            # edge_results = session.run(
            #     "MATCH (n:Word)-[r]->(m:Subreddit) RETURN n.name as source, m.name as target"
            # )
            # edges = [
            #     (words[record["source"]], subreddits[record["target"]])
            #     for record in edge_results
            # ]

            
            subreddit_verb_results = session.run(
                "MATCH (s:Mental_Health_Disorder)-[r]->(v:Verb) RETURN s.name as source, v.name as target"
            )
            disorder_verb_edges = [
                (disorders[record["source"]], verbs[record["target"]])
                for record in subreddit_verb_results
            ]

            
            verb_word_results = session.run(
                "MATCH (v:Verb)-[r]->(w:Word) RETURN v.name as source, w.name as target"
            )
            verb_word_edges = [
                (verbs[record["source"]], words[record["target"]])
                for record in verb_word_results
            ]

            
            edges = disorder_verb_edges + verb_word_edges
        
        
        num_nodes = len(disorders) + len(words) + len(verbs)
        adj_matrix = np.zeros((num_nodes, num_nodes))
        for src, dst in edges:
            adj_matrix[src][dst] = 1

        node_features = np.eye(num_nodes)
        labels = np.zeros((num_nodes, 1))

        return [Graph(x=node_features, a=adj_matrix, y=labels)]


dataset = RedditDataset()

# Graph Neural Network

This is similar to a recommendation problem.

See: https://blog.tensorflow.org/2021/11/introducing-tensorflow-gnn.html

In [55]:
class RedditGNN(Model):
    def __init__(self, num_classes, **kwargs):
        super().__init__(**kwargs)
        self.conv1 = GCNConv(64, activation="relu")
        self.conv2 = GCNConv(32, activation="relu")
        self.dense = Dense(num_classes, activation="softmax")

    def call(self, inputs):
        x, a = inputs
        x = self.conv1([x, a])
        x = self.conv2([x, a])
        return self.dense(x)



num_classes = 6  #The MH subreddit count.
model = RedditGNN(num_classes=num_classes)
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

# Predict Subreddit - Therefore Mental Health Issue

In [59]:
from sklearn.metrics.pairwise import cosine_similarity
from keras.utils import to_categorical


def predict_subreddit(model, words, all_subreddits):
    word_embeddings = model.predict(words)  # Shape: [num_words, embedding_dim]
    subreddit_embeddings = model.predict(all_subreddits)  # Shape: [num_subreddits, embedding_dim]
    similarity_matrix = cosine_similarity(word_embeddings, subreddit_embeddings)  # Shape: [num_words, num_subreddits]

    closest_subreddits_indices = np.argmax(similarity_matrix, axis=1)
    closest_subreddits = [all_subreddits[idx] for idx in closest_subreddits_indices]

    return closest_subreddits


# Christian: We need to fit model first! 

# Adam: I need embeddings.
word_embeddings = None  # TODO
subreddit_embeddings = None  # TODO
closest_subreddits = predict_subreddit(model, word_embeddings, subreddit_embeddings)

Epoch 1/10


InvalidArgumentError: Exception encountered when calling layer 'gcn_conv_22' (type GCNConv).

{{function_node __wrapped____MklMatMul_device_/job:localhost/replica:0/task:0/device:CPU:0}} Matrix size-incompatible: In[0]: [1,13255], In[1]: [1,64] [Op:MatMul] name: 

Call arguments received by layer 'gcn_conv_22' (type GCNConv):
  • inputs=['tf.Tensor(shape=(1, 13255), dtype=float32)', 'tf.Tensor(shape=(1, 13255), dtype=float32)']
  • mask=None