In [1]:
import torch
import matplotlib.pyplot as plt
from openTSNE import TSNE
import pickle
from deepsnap.hetero_graph import HeteroGraph
import networkx as nx
from hetero_gnn import HeteroGNN
from train_gnn_llm import graph_tensors_to_device

  generate_convs(hetero_graph, HeteroGNNConv, self.hidden_size, first_layer=i is 0),
  from .autonotebook import tqdm as notebook_tqdm


# TSNE embedding analysis

In [2]:
train_args = {
    "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
    "hidden_size": 81,
    "epochs": 233,
    "weight_decay": 0.00002203762357664057,
    "lr": 0.003873757421883433,
    "attn_size": 48,
    "num_layers": 6,
    "aggr": "attn",
}


tsne = TSNE(
    perplexity=30,
    metric="euclidean",
    n_jobs=8,
    random_state=42,
    verbose=True,
    n_iter=50,
)

In [3]:
def plot_tsne(embeddings_2d):
    plt.figure(figsize=(12, 8))
    plt.scatter(embeddings_2d[:, 0], embeddings_2d[:, 1], marker="o", cmap="Spectral")
    plt.title("t-SNE of Node Embeddings")
    plt.xlabel("Component 1")
    plt.ylabel("Component 2")
    plt.colorbar()
    plt.show()

In [4]:
with open("./1_concepts_similar_llm.pkl", "rb") as f:
    G = pickle.load(f)

# Create a HeteroGraph object from the networkx graph
hetero_graph = HeteroGraph(G, netlib=nx, directed=True)
graph_tensors_to_device(hetero_graph)

model = HeteroGNN(
    hetero_graph,
    train_args,
    num_layers=train_args["num_layers"],
    aggr=train_args["aggr"],
    return_embedding=True,
).to(train_args["device"])

model.load_state_dict(torch.load("./best_model.pkl"))

preds = model(hetero_graph.node_feature, hetero_graph.edge_index)

node_embeddings = preds['event']

# Train and evaluate the t-SNE model
embedding_train = tsne.fit(node_embeddings)
embeddings_2d = embedding_train.transform(node_embeddings)

TYPE ('event', 'similar', 'event')
	 Feature 769
	 Feature 769
TYPE ('event', 'related', 'concept')
	 Feature 769
	 Feature 1
TYPE ('concept', 'related', 'event')
	 Feature 1
	 Feature 769
KEY ('event', 'similar', 'event') <class 'tuple'>
KEY NUMS ('event', 'similar', 'event') 8487 8487
MAX EDGES tensor(8283) tensor(8283) 8487 8487
KEY ('event', 'related', 'concept') <class 'tuple'>
KEY NUMS ('event', 'related', 'concept') 8487 8729
MAX EDGES tensor(8265) tensor(8728) 8487 8729
KEY ('concept', 'related', 'event') <class 'tuple'>
KEY NUMS ('concept', 'related', 'event') 8729 8487
MAX EDGES tensor(8728) tensor(8265) 8729 8487
--------------------------------------------------------------------------------
TSNE(early_exaggeration=12, n_iter=50, n_jobs=8, random_state=42, verbose=True)
--------------------------------------------------------------------------------
===> Finding 90 nearest neighbors using Annoy approximate search using euclidean distance...
   --> Time elapsed: 36.29 second

TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.

In [None]:
# Call the function with your embeddings
plot_tsne(embeddings_2d)