In [1]:
import sys

sys.path.append("..")
import argparse
import warnings
from logging import getLogger

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
from torch.utils.tensorboard import SummaryWriter
from mpl_toolkits.mplot3d import Axes3D
from recbole.utils import init_seed, set_color
from sklearn.decomposition import PCA

from config.configuration import Config
from data.dataset import GeneralDataset, GeneralGraphDataset
from data.utils import data_reparation
from models.embedding import (EmbeddingHelper, EmbeddingModel, EmbeddingType,
                              TemplateType)
from trainer import Trainer
from utils.logger import init_logger
from utils.utils import get_flops, get_model


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
writer = SummaryWriter("./embeddings")

In [3]:

parser = argparse.ArgumentParser()

parser.add_argument(
    "--dataset", "-d", type=str, default="wsdream-tp", help="name of datasets"
)

parser.add_argument(
    "--model", "-m", type=str, default="XXX", help="name of models"
)

args, _ = parser.parse_known_args()

config = Config(model=args.model, dataset=args.dataset)

dataset = GeneralGraphDataset(config)
train_data, test_data = data_reparation(config, dataset)


In [4]:
dataset.user_feat["country"]

tensor([14, 14, 12, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
        14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 68, 30, 30, 14, 14, 14, 14,
        14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
        14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 61, 61, 14, 19, 19, 14,
        14, 14, 14, 30, 30, 18, 19, 19, 18, 34, 16, 19, 19, 19, 19, 20, 20, 26,
        26, 18, 18, 14, 14, 14, 14, 19, 19, 19, 14, 14, 14, 14, 56, 56, 14, 14,
        14, 14, 58, 58, 12, 12, 68, 68, 19, 19, 19, 19, 24, 24, 74, 74, 74, 14,
        14, 14, 14, 37, 37, 37, 19, 19, 14, 34, 14, 14, 26, 26, 14, 14, 19, 19,
        68, 68, 56, 56, 19, 19, 19, 19, 19, 19, 10, 10, 46, 46, 46, 46, 46, 46,
        14, 14, 14, 14, 14, 19, 19, 14, 14, 14, 14, 14, 19, 19, 19, 19, 30, 30,
        30, 30, 26, 26, 47, 14, 10, 10,  1, 34, 34, 14, 10, 10, 44, 44, 12, 12,
        14, 14, 56, 56, 14, 14, 44, 44, 14, 14, 55, 14, 14, 34, 12, 12, 12, 12,
        49, 49, 14, 14, 14, 14, 14, 14, 

In [6]:
def get_pretrained_embedding(dataset, template_type:TemplateType):
    eh = EmbeddingHelper()
    user_invocations = {}
    item_invocations = {}
    for uid in dataset.uids_in_inter_feat:
        user_invocations[uid] = dataset.inter_data_by_type("user", uid)
    for iid in dataset.iids_in_inter_feat:
        item_invocations[iid] = dataset.inter_data_by_type("item", iid)
    # user_embedding = torch.Tensor(eh.fit(EmbeddingType.USER, template_type,
    #                                 EmbeddingModel.INSTRUCTOR_BGE_SMALL, invocations=user_invocations, auto_save=False))
    # item_embedding = torch.Tensor(eh.fit(EmbeddingType.ITEM, template_type,
    #                                 EmbeddingModel.INSTRUCTOR_BGE_SMALL, invocations=item_invocations, auto_save=False))
    
    user_embedding = torch.nn.Embedding(
            num_embeddings=339, embedding_dim=384).weight
    item_embedding = torch.nn.Embedding(
            num_embeddings=5825, embedding_dim=384).weight
    return user_embedding,item_embedding

u_embedding, i_embedding = get_pretrained_embedding(train_data.dataset, TemplateType.IMPROVED)

uids = list(range(len(u_embedding)))
iids = list(range(len(i_embedding)))

writer.add_embedding(u_embedding, metadata=uids, tag="User Embeddings - DEFAULT")
writer.add_embedding(i_embedding, metadata=iids, tag="Item Embeddings - DEFAULT")


In [8]:
def get_pretrained_embedding(dataset, template_type:TemplateType):
    eh = EmbeddingHelper()
    user_invocations = {}
    item_invocations = {}
    for uid in dataset.uids_in_inter_feat:
        user_invocations[uid] = dataset.inter_data_by_type("user", uid)
    for iid in dataset.iids_in_inter_feat:
        item_invocations[iid] = dataset.inter_data_by_type("item", iid)
    user_embedding = torch.Tensor(eh.fit(EmbeddingType.USER, template_type,
                                    EmbeddingModel.INSTRUCTOR_BGE_SMALL, invocations=user_invocations, auto_save=False))
    item_embedding = torch.Tensor(eh.fit(EmbeddingType.ITEM, template_type,
                                    EmbeddingModel.INSTRUCTOR_BGE_SMALL, invocations=item_invocations, auto_save=False))
    
    return user_embedding, item_embedding

u_embedding, i_embedding = get_pretrained_embedding(train_data.dataset, TemplateType.STATIC)

uids = list(dataset.user_feat["country"])
iids = list(dataset.item_feat["country"])

writer.add_embedding(u_embedding, metadata=uids, tag="User Embeddings - STATIC_country")
writer.add_embedding(i_embedding, metadata=iids, tag="Item Embeddings - STATIC_country")


load INSTRUCTOR_Transformer
max_seq_length  512
339
load INSTRUCTOR_Transformer
max_seq_length  512
5825
