# Graph Neural Network

Author: Adam Darmanin

## Paper

[Kika, Alda, et al. "Imbalance Node Classification with Graph Neural Networks (GNN): A Study on a Twitter Dataset."](https://www.proquest.com/openview/707deabdf2dee201896409a9a4fccfb7/1?pq-origsite=gscholar&cbl=5444811)

In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense
from spektral.layers import GCNConv
from spektral.data import BatchLoader, Graph, Dataset
import numpy as np

import os
from neo4j import GraphDatabase
import numpy as np
from spektral.data import Graph, Dataset
from dotenv import load_dotenv

load_dotenv()
client_id = os.getenv("N4J_USER")
client_secret = os.getenv("N4J_PW")

# Prepare our Reddit Dataset

Using Spektral, we have our dataset.

In [None]:
class RedditDataset(Dataset):
    def read(self):
        # Connect to Neo4j
        driver = GraphDatabase.driver(
            "bolt://localhost:7687", auth=(client_id, client_secret)
        )

        with driver.session() as session:
            # Get Subreddits
            node_results = session.run("MATCH (n:Subreddit) RETURN n.name as name")
            subreddits = {
                record["name"]: idx for idx, record in enumerate(node_results)
            }

            # Get Words
            word_results = session.run("MATCH (n:Word) RETURN n.name as name")
            words = {
                record["name"]: idx + len(subreddits)
                for idx, record in enumerate(word_results)
            }

            # Get Edges
            edge_results = session.run(
                "MATCH (n:Word)-[r]->(m:Subreddit) RETURN n.name as source, m.name as target"
            )
            edges = [
                (words[record["source"]], subreddits[record["target"]])
                for record in edge_results
            ]

        # Create adjacency matrix
        num_nodes = len(subreddits) + len(words)
        adj_matrix = np.zeros((num_nodes, num_nodes))
        for src, dst in edges:
            adj_matrix[src][dst] = 1  # Assuming undirected graph

        # Placeholder for node features and labels
        node_features = np.eye(num_nodes)  # One-hot encoding for simplicity
        labels = np.zeros(
            (num_nodes, 1)
        )  # You will need to define labels appropriately

        return [Graph(x=node_features, a=adj_matrix, y=labels)]


dataset = RedditDataset()

# Graph Neural Network

This is similar to a recommendation problem.

In [None]:
class RedditGNN(Model):
    def __init__(self, num_classes, **kwargs):
        super().__init__(**kwargs)
        self.conv1 = GCNConv(64, activation="relu")
        self.conv2 = GCNConv(32, activation="relu")
        self.dense = Dense(num_classes, activation="softmax")

    def call(self, inputs):
        x, a = inputs
        x = self.conv1([x, a])
        x = self.conv2([x, a])
        return self.dense(x)


# Assuming num_classes is the number of unique labels for your nodes
num_classes = 2  # Replace with your actual number of classes
model = RedditGNN(num_classes=num_classes)

# Compile the model
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

# Predict Subreddit - Therefore Mental Health Issue

In [None]:
def predict_subreddit(model, word_embeddings, subreddit_embeddings):
    # Calculate cosine similarity or another metric
    similarity_matrix = np.dot(word_embeddings, subreddit_embeddings.T)
    closest_subreddits = np.argmax(similarity_matrix, axis=1)
    return closest_subreddits


# Example usage (assuming you have word_embeddings and subreddit_embeddings prepared)
# word_embeddings = ...  # Obtain embeddings for input words
# subreddit_embeddings = ...  # Obtain embeddings for all subreddits
# closest_subreddits = predict_subreddit(model, word_embeddings, subreddit_embeddings)