In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
# Install project requirements to Colab runtime
from google.colab import drive
drive.mount('/content/gdrive')
! pip3 install -r /content/gdrive/MyDrive/cdt-gnn-returns/asos-gnn-returns-requirements-colab.txt

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting torch@ https://download.pytorch.org/whl/cu113/torch-1.11.0%2Bcu113-cp37-cp37m-linux_x86_64.whl
  Downloading https://download.pytorch.org/whl/cu113/torch-1.11.0%2Bcu113-cp37-cp37m-linux_x86_64.whl (1637.0 MB)
[K     |████████████████▎               | 834.1 MB 1.2 MB/s eta 0:11:08tcmalloc: large alloc 1147494400 bytes == 0x2fda000 @  0x7f3e4b292615 0x592b76 0x4df71e 0x59afff 0x515655 0x549576 0x593fce 0x548ae9 0x51566f 0x549576 0x593fce 0x548ae9 0x5127f1 0x598e3b 0x511f68 0x598e3b 0x511f68 0x598e3b 0x511f68 0x4bc98a 0x532e76 0x594b72 0x515600 0x549576 0x593fce 0x548ae9 0x5127f1 0x549576 0x593fce 0x5118f8 0x593dd7
[K     |████████████████████▋           | 1055.7 MB 1.1 MB/s eta 0:08:31tcmalloc: large alloc 1434370048 bytes == 0x47630000 @  0x7f3e4b2

In [None]:
from typing import Callable, List, Optional
import os.path as osp
import torch
from torch_geometric.data import (HeteroData, InMemoryDataset, download_url,
                                  extract_zip)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
# device = "cpu"

class ASOSGnnDataNode2Vec(InMemoryDataset):
    r"""A heterogeneous rating dataset, assembled by GroupLens Research from
    the `MovieLens web site <https://movielens.org>`_, consisting of nodes of
    type :obj:`"movie"` and :obj:`"user"`.
    User ratings for movies are available as ground truth labels for the edges
    between the users and the movies :obj:`("user", "rates", "movie")`.

    Args:
        root (string): Root directory where the dataset should be saved.
        transform (callable, optional): A function/transform that takes in an
            :obj:`torch_geometric.data.HeteroData` object and returns a
            transformed version. The data object will be transformed before
            every access. (default: :obj:`None`)
        pre_transform (callable, optional): A function/transform that takes in
            an :obj:`torch_geometric.data.HeteroData` object and returns a
            transformed version. The data object will be transformed before
            being saved to disk. (default: :obj:`None`)
        model_name (str): Name of model used to transform movie titles to node
            features. The model comes from the`Huggingface SentenceTransformer
            <https://huggingface.co/sentence-transformers>`_.
    """

    def __init__(self, root, transform: Optional[Callable] = None,
                 pre_transform: Optional[Callable] = None,
                 model_name: Optional[str] = "asos-gnn-model"):
        self.model_name = model_name
        super().__init__(root, transform, pre_transform)
        self.data, self.slices = torch.load(self.processed_paths[0])

    reduced = 0
    
    if reduced ==1:
        # reduced data
        @property
        def raw_file_names(self) -> List[str]:
            return #[
                #osp.join('gnn_reduced_data', 'filterd_customer_nodes_training.csv'),
                #osp.join('gnn_reduced_data', 'filterd_product_nodes_training.csv'),
                #osp.join('gnn_reduced_data', 'filterd_event_table_training.csv'),
                #osp.join('gnn_reduced_data', 'filterd_esvent_table_testing.csv'),
            #] 
            [
                osp.join('gnn_reduced_data', 'filterd_customer_nodes_training.csv'),
                osp.join('gnn_reduced_data', 'filterd_product_nodes_training.csv'),
                osp.join('gnn_reduced_data', 'filterd_event_table_training.csv'),
                osp.join('gnn_reduced_data', 'filterd_event_table_testing.csv'),
            ] 
        @property
        def processed_file_names(self) -> str:
            return f'data_node2vec_reduced_{self.model_name}.pt'
        
    else:
        # full data    
        @property
        def raw_file_names(self) -> List[str]:
            return [
                osp.join('gnn_data', 'customer_nodes_training.csv'),
                osp.join('gnn_data', 'product_nodes_training.csv'),
                osp.join('gnn_data', 'event_table_training.csv'),
                osp.join('gnn_data', 'event_table_testing.csv'),
            ]
        @property
        def processed_file_names(self) -> str:
            return f'data_node2vec_{self.model_name}.pt'

    def download(self):
        pass

    def process(self):
        import pandas as pd
        # from sentence_transformers import SentenceTransformer

        data = HeteroData()

        df_customers = pd.read_csv(self.raw_paths[0]).drop(["shippingCountry"],
                                                               axis=1).dropna()
        
        df_customers.insert(0, "customer_id", range(0, len(df_customers)))
        
        df_products = pd.read_csv(self.raw_paths[1]).drop(["brandDesc"],
                                                              axis=1).dropna()
        
        df_products.insert(0, "product_id", 
                           range(0, len(df_products)))
#                            range(len(df_customers) + 1, len(df_customers) + 1 + len(df_products)))
        
        df_events = pd.read_csv(self.raw_paths[2]).dropna()
        
        df_events = df_events.merge(df_customers[["hash(customerId)", "customer_id"]], 
                                    on="hash(customerId)", how="inner")
        df_valid_events = df_events.merge(df_products[["variantID", "product_id"]], 
                                          on="variantID", how="inner")
        
                
        customer_src = torch.tensor(df_valid_events["customer_id"])
        product_dst = torch.tensor(df_valid_events["product_id"])

        edge_index = torch.stack([customer_src, product_dst])

        returned = torch.from_numpy(df_valid_events['isReturned'].values).to(torch.bool)
        
        return_edge_index = edge_index[:,returned]
#         kept_edge_index = edge_index[:,!returned]


        # include event from test set
        df_events_test = pd.read_csv(self.raw_paths[3]).dropna()
        
        df_events_test = df_events_test.merge(df_customers[["hash(customerId)", "customer_id"]], 
                                    on="hash(customerId)", how="inner")
        df_valid_events_test = df_events_test.merge(df_products[["variantID", "product_id"]], 
                                          on="variantID", how="inner")
        
        customer_src_test = torch.tensor(df_valid_events_test["customer_id"])
        product_dst_test = torch.tensor(df_valid_events_test["product_id"])

        edge_index_test = torch.stack([customer_src_test, product_dst_test])

        returned_test = torch.from_numpy(df_valid_events_test['isReturned'].values).to(torch.bool)
        
        return_edge_index_test = edge_index_test[:,returned_test]
#         kept_edge_index_test = edge_index_test[:,!returned_test]
        
        
        
        
        df_customers = df_customers.set_index("customer_id")
        df_products = df_products.set_index("product_id")

        df_products = df_products.drop('productType', axis=1)

        # modified here
        df_products = df_products.astype(float)
        
        data['customer'].x = torch.from_numpy(df_customers.to_numpy()).to(torch.float)        
        data['product'].x = torch.from_numpy(df_products.to_numpy()).to(torch.float)
        
        data['customer', 'purchases', 'product'].edge_index = edge_index.to(torch.long)
        data['customer', 'purchases', 'product'].edge_label = returned.to(torch.long)
        data['product', 'purchased_by', 'customer'].edge_index = torch.flip(edge_index.to(torch.long), [0])
        
        # create purchase edges for test events
        data['customer', 'purchases_test', 'product'].edge_index = edge_index_test.to(torch.long)
        data['customer', 'purchases_test', 'product'].edge_label = returned_test.to(torch.long)
        data['product', 'purchased_by_test', 'customer'].edge_index = torch.flip(edge_index_test.to(torch.long), [0])
        
#         data['customer', 'keeps', 'product'].edge_index = kept_edge_index.to(torch.long)
#         data['product', 'kept_by', 'customer'].edge_index = torch.flip(kept_edge_index.to(torch.long), [0])

        data['customer', 'returns', 'product'].edge_index = return_edge_index.to(torch.long)
        data['product', 'returned_by', 'customer'].edge_index = torch.flip(return_edge_index.to(torch.long), [0])
        
        
        # create return edges for test events
        data['customer', 'returns_test', 'product'].edge_index = return_edge_index_test.to(torch.long)
        data['product', 'returned_by_test', 'customer'].edge_index = torch.flip(return_edge_index_test.to(torch.long), [0])
        
        
        customer_nodes = int(edge_index[0].max() + 1)
        product_nodes = int(edge_index[1].max() + 1)
        data['customer'].num_nodes = customer_nodes
        data['product'].num_nodes = product_nodes
        
        data['customer'].node_index = torch.arange(0, customer_nodes)
        data['product'].node_index = torch.arange(0, product_nodes)
        
        if self.pre_transform is not None:
            data = self.pre_transform(data)

        torch.save(self.collate([data]), self.processed_paths[0])

In [None]:
import os
path = '/content/gdrive/MyDrive/cdt-gnn-returns/src'
os.chdir(path)
print(os.getcwd())

/content/gdrive/MyDrive/cdt-gnn-returns/src


In [None]:
gnnData = ASOSGnnDataNode2Vec(root="../")
gnnData.process()
dataset = gnnData.data
dataset

HeteroData(
  [1mcustomer[0m={
    x=[652518, 30],
    num_nodes=652518,
    node_index=[652518]
  },
  [1mproduct[0m={
    x=[431696, 41],
    num_nodes=431695,
    node_index=[431695]
  },
  [1m(customer, purchases, product)[0m={
    edge_index=[2, 1000000],
    edge_label=[1000000]
  },
  [1m(product, purchased_by, customer)[0m={ edge_index=[2, 1000000] },
  [1m(customer, purchases_test, product)[0m={
    edge_index=[2, 174865],
    edge_label=[174865]
  },
  [1m(product, purchased_by_test, customer)[0m={ edge_index=[2, 174865] },
  [1m(customer, returns, product)[0m={ edge_index=[2, 560813] },
  [1m(product, returned_by, customer)[0m={ edge_index=[2, 560813] },
  [1m(customer, returns_test, product)[0m={ edge_index=[2, 100195] },
  [1m(product, returned_by_test, customer)[0m={ edge_index=[2, 100195] }
)

In [None]:
from torch_geometric.nn import MetaPath2Vec

metapath = [
     ("customer", "purchases", "product"),
     ("product", "purchased_by", "customer"),
    ("customer", "returns", "product"),
    ("product", "returned_by", "customer")
]

model = MetaPath2Vec(dataset.edge_index_dict,
                     embedding_dim=128,
                     metapath=metapath,
                     walk_length=5,
                     context_size=3,
                     walks_per_node=10,
                     num_negative_samples=1,
                     sparse=True).to(device)

In [None]:
# use the loader to build a loader
loader = model.loader(batch_size=128, shuffle=True)

In [None]:
next(enumerate(loader))[1]

(tensor([[ 238161,  891341,  426765],
         [ 117735,  834539,  542066],
         [ 326635,  965400,   82611],
         ...,
         [1008420,  624781, 1008420],
         [1084213, 1084213, 1084213],
         [1084213, 1084213, 1084213]]), tensor([[ 238161, 1057643,  206586],
         [ 117735, 1060734,  333777],
         [ 326635,  959335,  542282],
         ...,
         [ 820336,  390538, 1039993],
         [ 934015,  455735, 1002350],
         [ 877296,  628058,  862318]]))

In [None]:
# Inizialize optimizer
optimizer = torch.optim.SparseAdam(list(model.parameters()), lr=0.01)

In [None]:
def train(epoch, log_steps=500, eval_steps=1000):
    model.train()

    total_loss = 0
    for i, (pos_rw, neg_rw) in enumerate(loader):
        optimizer.zero_grad()
        loss = model.loss(pos_rw.to(device), neg_rw.to(device))
        loss.backward()
        optimizer.step()

#         total_loss += loss.item()
#         if (i + 1) % log_steps == 0:
#             print((f'Epoch: {epoch}, Step: {i + 1:05d}/{len(loader)}, '
#                    f'Loss: {total_loss / log_steps:.4f}'))
#             total_loss = 0

#         if (i + 1) % eval_steps == 0:
#             acc, f1 = test()
#             print((f'Epoch: {epoch}, Step: {i + 1:05d}/{len(loader)}, '
#                    f'Acc: {acc:.4f}, F1 score: {f1:.3f}'))


In [None]:
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegressionCV
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score, accuracy_score, f1_score, precision_score, recall_score
from sklearn.preprocessing import StandardScaler
import numpy as np

# 1. link embeddings
def link_examples_to_features(link_examples, transform_node, binary_operator):
    return [
        binary_operator(transform_node[src], transform_node[dst])
        for src, dst in link_examples
    ]

def link_prediction_classifier(max_iter = 2000):
    lr_clf = LogisticRegressionCV(Cs=10, cv=10, scoring="roc_auc", max_iter=max_iter, 
                                  solver="lbfgs")
    
    return Pipeline(steps=[("sc", StandardScaler()), ("clf", lr_clf)])

# 2. training classifier
def train_link_prediction_model(link_examples, link_labels, get_embedding, binary_operator):
    clf = link_prediction_classifier()
    link_features = link_examples_to_features(
        link_examples, get_embedding, binary_operator
    )
    clf.fit(link_features, link_labels)
    return clf

# 3. and 4. evaluate classifier
def evaluate_link_prediction_model(
    clf, link_examples_test, link_labels_test, get_embedding, binary_operator
):
    link_features_test = link_examples_to_features(
        link_examples_test, get_embedding, binary_operator)
    score = evaluate_scores(clf, link_features_test, link_labels_test)
    return score

def evaluate_scores(clf, link_features, link_labels):
    y_pred = clf.predict(link_features)

    return {
        "accuracy": accuracy_score(link_labels, y_pred),
        "f1_score": f1_score(link_labels, y_pred),
        "precision": precision_score(link_labels, y_pred),
        "recall": recall_score(link_labels, y_pred)
    }

def evaluate_roc_auc(clf, link_features, link_labels):
    predicted = clf.predict_proba(link_features)

    # check which class corresponds to positive links
    positive_column = list(clf.classes_).index(1)
    return roc_auc_score(link_labels, predicted[:, positive_column])

In [None]:
def operator_l1(u, v):
    return np.abs(u - v)

def operator_l2(u, v):
    return (u - v) ** 2

def run_link_prediction(binary_operator, examples, labels, embedding, examples_test, labels_test):
    clf = train_link_prediction_model(
        examples, labels, embedding, binary_operator
    )
    score = evaluate_link_prediction_model(
        clf,
        examples_test,
        labels_test,
        embedding,
        binary_operator,
    )

    return {
        "classifier": clf,
        "binary_operator": binary_operator,
        "score": score,
    }


binary_operators = [operator_l1, operator_l2]

In [None]:
@torch.no_grad()
def test(train_ratio=0.1):
    model.eval()

    z_customer = model('customer', batch=dataset.node_index_dict['customer'].to(device)).detach()
    z_product = model('product', batch=dataset.node_index_dict['product'].to(device)).detach()
    
    examples = dataset.edge_index_dict["customer", "purchases", "product"].T
    labels = dataset.edge_label_dict["customer", "purchases", "product"]
    
    examples_test = dataset.edge_index_dict["customer", "purchases_test", "product"].T
    labels_test = dataset.edge_label_dict["customer", "purchases_test", "product"]
    
    emb_128 = np.concatenate((z_customer.cpu().numpy(), z_product.cpu().numpy()), axis=0)

    scores = run_link_prediction(operator_l1, examples.cpu(), labels.cpu(), emb_128, 
                                 examples_test.cpu(), labels_test.cpu())["score"]

    return scores["accuracy"], scores["f1_score"], scores["precision"], scores["recall"]

In [None]:
num_epoch = 10
for epoch in range(1, num_epoch+1):
    train(epoch)
    acc, f1, precision, recall = test()
    print(f'Epoch: {epoch}, acc: {100*acc:.2f}%, F1 score: {f1:.3f}, Precision: {precision:.3f}, Recall: {recall:.3f}')

ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


Epoch: 1, acc: 57.30%, F1 score: 0.729, Precision: 0.573, Recall: 1.000


ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
ABNORMAL_TERMINATION_IN_L

Epoch: 2, acc: 57.30%, F1 score: 0.729, Precision: 0.573, Recall: 1.000


ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
ABNORMAL_TERMINATION_IN_L

Epoch: 3, acc: 57.30%, F1 score: 0.729, Precision: 0.573, Recall: 1.000


ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
ABNORMAL_TERMINATION_IN_L

Epoch: 4, acc: 57.30%, F1 score: 0.729, Precision: 0.573, Recall: 1.000


ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


Epoch: 5, acc: 57.30%, F1 score: 0.729, Precision: 0.573, Recall: 1.000
Epoch: 6, acc: 57.30%, F1 score: 0.729, Precision: 0.573, Recall: 1.000


ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


Epoch: 7, acc: 57.30%, F1 score: 0.729, Precision: 0.573, Recall: 1.000
Epoch: 8, acc: 57.30%, F1 score: 0.729, Precision: 0.573, Recall: 1.000


ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


Epoch: 9, acc: 57.30%, F1 score: 0.729, Precision: 0.573, Recall: 1.000
Epoch: 10, acc: 57.30%, F1 score: 0.729, Precision: 0.573, Recall: 1.000


In [None]:
z_customer = model('customer', batch=dataset.node_index_dict['customer']).detach().numpy()
z_product = model('product', batch=dataset.node_index_dict['product']).detach().numpy()

RuntimeError: ignored

In [None]:
import numpy as np

emb_128 = np.concatenate((z_customer, z_product), axis=0)

In [None]:
examples = dataset.edge_index_dict["customer", "purchases", "product"].T
labels = dataset.edge_label_dict["customer", "purchases", "product"]

edge_embeddings = link_examples_to_features(examples, emb_128, operator_l2)

In [None]:
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

# fit and transform using PCA
pca = PCA(n_components=2)
emb2d = pca.fit_transform(emb_128)

fig = plt.figure(figsize=(10,8))
ax = fig.gca()
ax.scatter(emb2d[50000:,0],emb2d[50000:,1], marker="o", alpha=0.2, color="r", label="Product Nodes")
ax.scatter(emb2d[:50000,0],emb2d[:50000,1], marker="o", alpha=0.2, color="g", label="Customer Nodes")
ax.set_title("PCA Analysis of Node Embeddings")
ax.legend(fontsize=12, frameon=False)
ax.set_xlabel("Component A")
ax.set_ylabel("Component B")
ax.grid(True, alpha=0.1)
plt.show()

In [None]:
# fit and transform using PCA
pca = PCA(n_components=2)
emb2d = pca.fit_transform(edge_embeddings)

cdict = {0: "red", 1: "green"}
labeldict = {0: "Not returned", 1: "Returned"}

fig = plt.figure(figsize=(10,8))
ax = fig.gca()
for i in range(0,2):
    ix = np.where(labels == i)
    ax.scatter(emb2d[ix,0],emb2d[ix,1], marker="o", alpha=0.2, color=cdict[i], label=labeldict[i])
ax.set_title("PCA Analysis of Edge Embeddings")
ax.legend(fontsize=12, frameon=False)
ax.set_xlabel("Component A")
ax.set_ylabel("Component B")
ax.grid(True, alpha=0.1)
plt.show()

In [None]:
import umap
import matplotlib.pyplot as plt

z_customer_2d = umap.UMAP().fit_transform(z_customer)
z_product_2d = umap.UMAP().fit_transform(z_product)

plt.figure(figsize=(6,6))
plt.scatter(z_customer_2d[:,0],z_customer_2d[:,1],color="red",alpha=0.5,label="Customer Nodes")
plt.scatter(z_product_2d[:,0],z_product_2d[:,1],color="blue",alpha=0.5,label="Product Nodes")
plt.legend()
plt.title("2D embedding")
plt.show()

ModuleNotFoundError: No module named 'umap'