In [3]:
import random

import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix
from sklearn.preprocessing import MinMaxScaler

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torch_geometric.data import Data
from torch_geometric.nn.models import LightGCN

In [35]:
# 1. CSV 파일 불러오기
mangoplate_df_cb = pd.read_csv('./sample_data/MangoPlate_CB.csv')
mangoplate_df_cf = pd.read_csv('./sample_data/MangoPlate_CF.csv')

In [36]:
# 2. User-Item Matrix 만들기
user_list = list(set(mangoplate_df_cf['user_name'].tolist()))
item_list = list(set(mangoplate_df_cf['res_name'].tolist()))

user2idx = {k: v for v, k in enumerate(user_list)}
item2idx = {k: v for v, k in enumerate(item_list)}

idx2user = {k: v for k, v in enumerate(user_list)}
idx2item = {k: v for k, v in enumerate(item_list)}

data = mangoplate_df_cf["rating"].tolist()
row = mangoplate_df_cf["user_name"].apply(lambda x: user2idx.get(x))
col = mangoplate_df_cf["res_name"].apply(lambda x: item2idx.get(x))

matrix = csr_matrix((data, (row, col)))
matrix

<603x100 sparse matrix of type '<class 'numpy.longlong'>'
	with 1588 stored elements in Compressed Sparse Row format>

In [37]:
# 3. Item 초기 임베딩 계산하기
mangoplate_df_cb.drop(columns=['name'], inplace=True)

address_one_hot = pd.get_dummies(mangoplate_df_cb['address'])
mangoplate_df_cb.drop(columns=['address'], inplace=True)
mangoplate_df_cb = pd.concat([mangoplate_df_cb, address_one_hot], axis=1)

# category, main_menu 정보는 어떻게 벡터화할 지 나중에 생각 - 일단 삭제 
mangoplate_df_cb.drop(columns=['category', 'main_mn'], inplace=True)

# 각 attribute별 scale 차이가 너무 심함 - normalize
scaler = MinMaxScaler()
mangoplate_cb_data = scaler.fit_transform(mangoplate_df_cb)

for i in range(10):
    print(f"Vector for Restaurant {idx2item.get(i)}:\t{mangoplate_cb_data[i]}")

Vector for Restaurant 갓포호산:	[0.11333333 0.28571429 0.04659498 0.         0.         0.
 1.        ]
Vector for Restaurant 솔밭묵집:	[0.02533333 0.28571429 0.20071685 0.         0.         1.
 0.        ]
Vector for Restaurant 우츠:	[0.02       0.57142857 0.25448029 0.         0.         0.
 1.        ]
Vector for Restaurant 성심당(대전역점):	[0.         0.14285714 0.13978495 0.         1.         0.
 0.        ]
Vector for Restaurant 동은성:	[0.06666667 0.14285714 0.2437276  0.         0.         1.
 0.        ]
Vector for Restaurant 소수:	[0.10666667 0.42857143 0.03584229 0.         0.         0.
 1.        ]
Vector for Restaurant 풍뉴가:	[0.        0.        0.1218638 0.        0.        1.        0.       ]
Vector for Restaurant 오리진:	[0.         0.         0.06810036 0.         0.         1.
 0.        ]
Vector for Restaurant 태평소국밥(본관):	[0.18666667 0.14285714 0.03584229 1.         0.         0.
 0.        ]
Vector for Restaurant 하프레스트:	[0.         0.         0.01433692 1.         0.         0.
 0.      

In [38]:
def negative_sampling (matrix, num_samples):
    matrix = matrix.todense()
    n_users = matrix.shape[0]
    n_items = matrix.shape[1]

    edge_index = []
    while (len(edge_index) != num_samples):
        user = random.randint(0, n_users-1)
        item = random.randint(0, n_items-1)
        if matrix[user, item] == 0:
            edge_index.append([user, item + n_users])

    return torch.LongTensor(edge_index).T

In [41]:
# 4. LightGCN 모델 학습

n_users = matrix.shape[0]
n_items = matrix.shape[1]
lightgcn = LightGCN(n_users + n_items, embedding_dim=mangoplate_cb_data.shape[1], num_layers=3)

# Graph 초기 임베딩으로 Content-Based Filtering에서 사용하던 Feature 사용
with torch.no_grad():
    lightgcn.embedding.weight[n_users:] = torch.FloatTensor(mangoplate_cb_data)

# User-Item Matrix를 가지고 User, Item 임베딩 학습
pos_edge_index = []
pos_edge_index.append(row)
pos_edge_index.append([i + n_users for i in col])
pos_edge_index = torch.LongTensor(pos_edge_index)

optimizer = optim.Adam(lightgcn.parameters(), lr=1e-3)
for _ in range(10):
    pos_rank = lightgcn(pos_edge_index)

    neg_edge_index = negative_sampling(matrix, pos_edge_index.shape[1])
    neg_rank = lightgcn(neg_edge_index)

    optimizer.zero_grad()
    loss = lightgcn.recommendation_loss(pos_rank, neg_rank)
    loss.backward()
    optimizer.step()

In [55]:
# 5. 추천 예시
src_index = torch.from_numpy(np.arange(n_users))
dst_index = torch.from_numpy(np.arange(n_users, n_users + n_items))
recommend_items_idx = lightgcn.recommend(pos_edge_index, src_index, dst_index, k=5)

for i in range(3):
    user_idx = random.randint(0, len(user2idx))
    print(f"Recommendation for User {idx2user[user_idx]}:")
    
    print(f"We can recommend following restaurants to user {idx2user[user_idx]}:")
    
    for item_idx in recommend_items_idx[user_idx]:
        print(f"\t{idx2item[item_idx.item() - n_users]}") 

Recommendation for User 별이:
We can recommend following restaurants to user 별이:
	가또블루
	궁손칼국수
	다알리
	한영식당
	성심당(대전역점)
Recommendation for User 좋아의로컬푸드:
We can recommend following restaurants to user 좋아의로컬푸드:
	한우곰탕
	대선칼국수
	김화식당
	태평소국밥(유성점별관)
	갓포호산
Recommendation for User Micky희섭:
We can recommend following restaurants to user Micky희섭:
	능두네
	순정닭발
	광세족발
	인트라던
	무이
