# Visualisation

We will generate node embeddings, reduce their dimensions, and finally visualsie these embeddings: we expect to empirically observe that the embeddings become more and more similar with more layers.

We will use the **validation dataset** for the visualisation.

In [None]:
OUTPUT_DIR = '/weighted-jk/visuals/'

In [1]:
import pandas as pd
import warnings
import matplotlib.pyplot as plt

from sklearn.manifold import TSNE
warnings.simplefilter(action='ignore', category=FutureWarning)

In [None]:
def dimension_reduction(model: nn.Module, dataset_name) -> pd.DataFrame:
    """
    Takes a model object and data, performs T-SNE, and returns a [DataFrame] containing 
    reduced variables and labels for each data point.
    
    Args:
    - model: model object for generating features
    - dataset_name: name of the dataset
    Returns:
    - pd.DataFrame: A data frame that has 'dimension 1', 'dimension 2',
                    and 'labels' as a column
     """
    device = "cuda" if torch.cuda.is_available() else "cpu"
    data = load_data(device, dataset_name)

    model.eval()
    x = data.x.to(device)  # node features
    edge_index = data.edge_index.to(device)  # graph structure
    # use validation set for visualisation
    x = x * data.val_mask
    edge_index = edge_index * data.val_mask

    # get node embeddings
    with torch.no_grad():  # disable gradient calculation
        node_embeddings = model.generate_node_embeddings(x, edge_index) # (X, A)

    # dim-reduction of node embeddings with T-SNE (to 2D)
    tsne = TSNE(n_components=2, random_state=123) 
    # need to copy Tensor to host memory first with .cpu().numpy()
    reduced_embeddings = tsne.fit_transform(node_embeddings.cpu().numpy())

    # format result df
    reduced_df = pd.DataFrame(reduced_embeddings,
                              columns=['dimension 1', 'dimension 2'])
    reduced_df['labels'] = data.y.cpu().numpy()  # target labels

    return reduced_df

### Visualisation with scatter plots

In [2]:
def visualise_one(model_name, dataset_name, df) -> None:
    """Visualises node embeddings as a scatter plot for one model."""
    
    fig, ax = plt.subplots(figsize=(10, 8))
    scatter = ax.scatter(df['dimension 1'], df['dimension 2'], c=df['labels'],
                         cmap='tab10', s=10)
    plt.colorbar(scatter, ax=ax, label='class')  # color bar for classes
    
    ax.set_title(f"Visualization of node embeddings from {model_name}",
                 fontsize=16)
    ax.set_xlabel("dimension 1", fontsize=14)
    ax.set_ylabel("dimension 2", fontsize=14)

    # show the plot
    plt.show()
    fig.savefig(f'{OUTPUT_DIR}{dataset_name}/{model_name}_vis.jpg', bbox_inches='tight')

In [3]:
def visualise_dimensions(models, params, layers=range(2,21,6)):
    """
    Visualises dimensionality-reduced node embeddings generated by the given models.
    - models: dictionary of (number of layers : trained model) pairs
    - params: training parameters
    """
    model_name = params['model_name']
    dataset_name = params['dataset']
    feature_dict = { f"{n}_layer_{model_name}": dimension_reduction(models[n], params) for n in layers }
    for model, df in feature_dict.items():
        visualise_one(model, dataset_name, df)