In [1]:
import pandas as pd
import torch
import numpy as np

import dgl
import dgl.function as fn
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import model
from model import NGCF
from utility.parser import parse_args

import utility.metrics as metrics
from utility.load_data import *
from utility.batch_test import *
args = parse_args()


# if args.gpu >= 0 and torch.cuda.is_available():
#     device = "cuda:{}".format(args.gpu)
# else:
#     device = "cpu"

device = "cpu"

def add_new_user(model, g, data_generator, user_id, interacted_items):
    # Add the new user's interactions to the data generator
    user_idx = 6041  # Get the index for the new user
    data_generator.train_items[user_idx] = interacted_items
    data_generator.test_set[user_idx] = []  # New user has no test interactions

    # Convert the list of user-item interactions to tensors
    users = torch.tensor([user_idx] * len(interacted_items))
    pos_items = torch.tensor(interacted_items)
    neg_items = torch.tensor([])  # For simplicity, we don't include negative items

    # Update the model with the new user's interactions
    optimizer = optim.Adam(model.parameters(), lr=args.lr)
    n_batch = len(interacted_items) // args.batch_size + 1  # Adjust batch size accordingly
    for epoch in range(args.epoch):
        for idx in range(n_batch):
            batch_start = idx * args.batch_size
            batch_end = (idx + 1) * args.batch_size
            batch_users = users[batch_start:batch_end]
            batch_pos_items = pos_items[batch_start:batch_end]
            batch_neg_items = neg_items  # No negative items for the new user

            u_g_embeddings, pos_i_g_embeddings, _ = model(
                data_generator.g, "user", "item", batch_users, batch_pos_items, batch_neg_items
            )

            batch_loss, _, _ = model.create_bpr_loss(
                u_g_embeddings, pos_i_g_embeddings, torch.tensor([])  # No negative items
            )
            optimizer.zero_grad()
            batch_loss.backward()
            optimizer.step()

    # Get recommendations for the new user
    users_to_test = [user_idx]
    ret = test(model, g, users_to_test)
    recommendations = ret["recommendations"]
    
    return recommendations

ngcf_model = NGCF(
    data_generator.g, 64, [64,64,64], [0.1,0.1,0.1], [1e-5]
).to(device)

ngcf_model.load_state_dict(torch.load('NGCF.pkl'))

ngcf_model.eval()

# user_item_src=[]
# user_item_dst=[]

# for i in range(3):
#     user_item_src.append(6040)
#     user_item_dst.append(4)

# user_selfs = list(range(6041))
# item_selfs = list(range(3706))
# data_dict = {
#     ("user", "user_self", "user"): (user_selfs, user_selfs),
#     ("item", "item_self", "item"): (item_selfs, item_selfs),
#     ("user", "ui", "item"): (user_item_src, user_item_dst),
#     ("item", "iu", "user"): (user_item_dst, user_item_src),
# }
# # num_dict = {"user": self.n_users, "item": self.n_items}
# num_dict = {"user": 6041, "item": 3706}

# data_generator.g = dgl.heterograph(data_dict, num_nodes_dict=num_dict)




# new_user_id = 6040  # 예를 들어, 새로운 사용자 ID를 정의합니다.
# new_user_interactions = [3, 4, 10]

# recommendations = add_new_user(ngcf_model, data_generator.g, data_generator, new_user_id, new_user_interactions)
# print("Recommended items for the new user:")
# for item_id in recommendations:
#     print("Item ID:", item_id)


    

ModuleNotFoundError: No module named 'pytorch_models'

In [3]:
def sample(self):
    if self.batch_size <= self.n_users:
        users = rd.sample(self.exist_users, self.batch_size)
    else:
        users = [
            rd.choice(self.exist_users) for _ in range(self.batch_size)
        ]

    def sample_pos_items_for_u(u, num):
        # sample num pos items for u-th user
        pos_items = self.train_items[u]
        n_pos_items = len(pos_items)
        pos_batch = []
        while True:
            if len(pos_batch) == num:
                break
            pos_id = np.random.randint(low=0, high=n_pos_items, size=1)[0]
            pos_i_id = pos_items[pos_id]

            if pos_i_id not in pos_batch:
                pos_batch.append(pos_i_id)
        return pos_batch

    def sample_neg_items_for_u(u, num):
        # sample num neg items for u-th user
        neg_items = []
        while True:
            if len(neg_items) == num:
                break
            neg_id = np.random.randint(low=0, high=self.n_items, size=1)[0]
            if (
                neg_id not in self.train_items[u]
                and neg_id not in neg_items
            ):
                neg_items.append(neg_id)
        return neg_items

    pos_items, neg_items = [], []
    for u in users:
        pos_items += sample_pos_items_for_u(u, 1)
        neg_items += sample_neg_items_for_u(u, 1)

    return users, pos_items, neg_items




In [4]:
data_generator.g

Graph(num_nodes={'item': 3706, 'user': 6040},
      num_edges={('item', 'item_self', 'item'): 3706, ('item', 'iu', 'user'): 800167, ('user', 'ui', 'item'): 800167, ('user', 'user_self', 'user'): 6040},
      metagraph=[('item', 'item', 'item_self'), ('item', 'user', 'iu'), ('user', 'item', 'ui'), ('user', 'user', 'user_self')])

In [5]:
data_generator.train_items[6040]=[4,5,6,7]

In [5]:
# 새로운 연결 정보 정의
new_user_node = 6040
new_user_item_dst = [4,5,6,7]  # 새로운 'item' 노드 ID들

new_user_item_src = [new_user_node] * len(new_user_item_dst)



# # 새로운 연결을 data_dict에 추가
# data_generator.data_dict[("user", "ui", "item")] = (data_generator.user_item_src + new_user_item_src, data_generator.user_item_dst + new_user_item_dst)
# data_generator.data_dict[("item", "iu", "user")] = (data_generator.user_item_dst + new_user_item_dst, data_generator.user_item_src + new_user_item_src)
# data_generator.data_dict[("user", "user_self", "user")] = 

#             ("item", "item_self", "item"): (item_selfs, item_selfs),

In [6]:
data_generator.g

Graph(num_nodes={'item': 3706, 'user': 6040},
      num_edges={('item', 'item_self', 'item'): 3706, ('item', 'iu', 'user'): 800167, ('user', 'ui', 'item'): 800167, ('user', 'user_self', 'user'): 6040},
      metagraph=[('item', 'item', 'item_self'), ('item', 'user', 'iu'), ('user', 'item', 'ui'), ('user', 'user', 'user_self')])

In [7]:
# construct graph from the train data and add self-loops
user_selfs = list(range(6041))
item_selfs = list(range(3706))

In [8]:
data_dict = {
    ("user", "user_self", "user"): (user_selfs, user_selfs),
    ("item", "item_self", "item"): (item_selfs, item_selfs),
    ("user", "ui", "item"): (data_generator.user_item_src + new_user_item_src, data_generator.user_item_dst + new_user_item_dst),
    ("item", "iu", "user"): (data_generator.user_item_dst + new_user_item_dst, data_generator.user_item_src + new_user_item_src),
}

In [9]:
num_dict = {"user": 6041, "item": 3706}

data_generator.g = dgl.heterograph(data_dict, num_nodes_dict=num_dict)

In [10]:
data_generator.g

Graph(num_nodes={'item': 3706, 'user': 6041},
      num_edges={('item', 'item_self', 'item'): 3706, ('item', 'iu', 'user'): 800171, ('user', 'ui', 'item'): 800171, ('user', 'user_self', 'user'): 6041},
      metagraph=[('item', 'item', 'item_self'), ('item', 'user', 'iu'), ('user', 'item', 'ui'), ('user', 'user', 'user_self')])

In [11]:
def sample():
    users = new_user_item_src

    def sample_pos_items_for_u(u, num):
        # sample num pos items for u-th user
        pos_items = new_user_item_dst
        n_pos_items = len(pos_items)
        pos_batch = []
        while True:
            if len(pos_batch) == num:
                break
            pos_id = np.random.randint(low=0, high=n_pos_items, size=1)[0]
            pos_i_id = pos_items[pos_id]

            if pos_i_id not in pos_batch:
                pos_batch.append(pos_i_id)
        return pos_batch

    def sample_neg_items_for_u(u, num):
        # sample num neg items for u-th user
        neg_items = []
        while True:
            if len(neg_items) == num:
                break
            neg_id = np.random.randint(low=0, high=3707, size=1)[0]
            if (
                neg_id not in new_user_item_dst
                and neg_id not in neg_items
            ):
                neg_items.append(neg_id)
        return neg_items

    pos_items, neg_items = [], []
    for u in users:
        pos_items += sample_pos_items_for_u(u, 1)
        neg_items += sample_neg_items_for_u(u, 1)

    return users, pos_items, neg_items


In [13]:
model = NGCF(
    data_generator.g, 64, [64,64,64], [0.1,0.1,0.1], [1e-5]
).to(device)
optimizer = optim.Adam(model.parameters(), lr=args.lr)

In [30]:
u_g_embeddings, pos_i_g_embeddings, neg_i_g_embeddings = model(
    data_generator.g, "user", "item", users, pos_items, neg_items
)

In [18]:
for epoch in range(args.epoch):

    loss, mf_loss, emb_loss = 0.0, 0.0, 0.0
    users, pos_items, neg_items = data_generator.sample()
    u_g_embeddings, pos_i_g_embeddings, neg_i_g_embeddings = model(
        data_generator.g, "user", "item", users, pos_items, neg_items
    )
    batch_loss, batch_mf_loss, batch_emb_loss = model.create_bpr_loss(
        u_g_embeddings, pos_i_g_embeddings, neg_i_g_embeddings
    )
    optimizer.zero_grad()
    batch_loss.backward()
    optimizer.step()

    loss += batch_loss


TypeError: only integer tensors of a single element can be converted to an index

In [20]:
u_g_embeddings.shape[0]

1024

In [31]:
num_users, num_items = data_generator.get_num_users_items()

In [32]:
num_users

6040