In [1]:
import sys
sys.path.append("..")

import common
from common import FILE_PATH
from common import MODEL_PATH

import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

from deepctr_torch.inputs import SparseFeat, VarLenSparseFeat, get_feature_names
from deepctr_torch.models import DeepFM
import pickle

%load_ext autoreload
%autoreload 2
import product_refine

st = sns.axes_style("whitegrid")
sns.set_style("ticks", {"xtick.major.size": 8, "ytick.major.size": 8})

plt.rcParams ['font.family'] = 'NanumGothic'

import warnings
warnings.filterwarnings("ignore")

### feather 파일로 저장되어있는 각각의 dataframe을 로드

In [2]:
%%time
# feather 파일로 저장되어있는 각각의 dataframe을 읽어옴

reviews = pd.read_feather (FILE_PATH + 'reviews.ftr', use_threads = True)
users = pd.read_feather (FILE_PATH + 'users.ftr', use_threads = True)
products = pd.read_feather (FILE_PATH + 'products.ftr', use_threads = True)
products_brand_rank = pd.read_feather (FILE_PATH + 'products_brand_rank.ftr', use_threads = True)
product_categories = pd.read_feather (FILE_PATH + 'product_categories.ftr', use_threads = True)

glowpick_before_labeling = pd.read_feather (FILE_PATH + 'glowpick_before_labeling.ftr', use_threads = True)
glowpick = pd.read_feather (FILE_PATH + 'glowpick.ftr', use_threads = True)

# products와 products_brand_rank를 merge한 dataframe
refined_products = pd.read_feather (FILE_PATH + 'refined_products.ftr', use_threads = True)

CPU times: user 8.51 s, sys: 2.43 s, total: 10.9 s
Wall time: 10.7 s


In [3]:
reviews.head ()

Unnamed: 0,contents,created_at,is_evaluation,like_count,rating,review_id,state,user_id,product_id
0,"티 컬렉션으로 출시되었던 제품으로, 가벼운 녹차향이 납니다. 향 자체는 좀 날리는 ...",2020-04-30T02:12:36Z,False,0,3,5416271,N,119763,100000
1,살짝 로션같이 짜지고 묽음.\r\n향은 독하지 않고 적절히 향긋함.\r\n거품잘남\...,2020-03-15T09:08:20Z,False,0,4,5340616,N,338669,100000
2,"해피바스는 무난하고 순한 매력이 있음! 다른것들도 잘 썼지만 정말 무난함,, 그치만...",2020-01-21T01:29:44Z,False,0,3,5228598,N,24862,100000
3,순하고 가격적으로 무난해서 쓰기 괜찮아요오오,2020-01-18T08:03:59Z,False,0,4,5225359,N,1331797,100000
4,성분이 착해서 샀고 타입이 폼이라 짜서 쓰는 젤보다 약간 귀찮지만 거품은 바로 많이...,2020-01-11T07:15:15Z,False,0,4,5211274,N,888968,100000


In [None]:
sparse_features = ["product_id", "user_id", "gender", "age", "skin_type", "price", "brandName", "idThirdCategory", ]
target = ['rating']

from sklearn.preprocessing import LabelEncoder

# 1.Label Encoding for sparse features,and process sequence features
for feat in sparse_features:
    lbe = LabelEncoder()
    glowpick [feat] = lbe.fit_transform (glowpick [feat])
    
glowpick.head (3)

In [4]:
sparse_features = ["product_id", "user_id", "gender", "age", "skin_type", "price", "brandName", "idThirdCategory", ]
target = ['rating']

# 2.count #unique features for each sparse field and generate feature config for sequence feature

fixlen_feature_columns = [SparseFeat (feat, glowpick [feat].nunique(), embedding_dim=4)
                          for feat in sparse_features]
fixlen_feature_columns

[SparseFeat(name='product_id', vocabulary_size=50761, embedding_dim=4, use_hash=False, dtype='int32', embedding_name='product_id', group_name='default_group'),
 SparseFeat(name='user_id', vocabulary_size=74773, embedding_dim=4, use_hash=False, dtype='int32', embedding_name='user_id', group_name='default_group'),
 SparseFeat(name='gender', vocabulary_size=2, embedding_dim=4, use_hash=False, dtype='int32', embedding_name='gender', group_name='default_group'),
 SparseFeat(name='age', vocabulary_size=67, embedding_dim=4, use_hash=False, dtype='int32', embedding_name='age', group_name='default_group'),
 SparseFeat(name='skin_type', vocabulary_size=5, embedding_dim=4, use_hash=False, dtype='int32', embedding_name='skin_type', group_name='default_group'),
 SparseFeat(name='price', vocabulary_size=905, embedding_dim=4, use_hash=False, dtype='int32', embedding_name='price', group_name='default_group'),
 SparseFeat(name='brandName', vocabulary_size=4458, embedding_dim=4, use_hash=False, dtype='i

In [4]:
# linear_feature_columns list 와 dnn_feature_columns list 를 load
with open (FILE_PATH + 'linear_feature_columns_list.pickle', 'rb') as fp:
    linear_feature_columns = pickle.load (fp)
    
with open (FILE_PATH + 'dnn_feature_columns_list.pickle', 'rb') as fp:
    dnn_feature_columns = pickle.load (fp)    

feature_names = get_feature_names (linear_feature_columns + dnn_feature_columns)    

In [5]:
# 3.generate input data for model
train, test = train_test_split (glowpick, test_size = 0.2)
train_model_input = {name: train [name] for name in feature_names}
test_model_input = {name: test [name] for name in feature_names}

### Load Model

In [6]:
device = 'cpu'
use_cuda = True
if use_cuda and torch.cuda.is_available():
    print('cuda ready...')
    device = 'cuda:0'

cuda ready...


In [7]:
model = DeepFM (linear_feature_columns, dnn_feature_columns, task = 'regression', device = device)
model.load_state_dict (torch.load (MODEL_PATH))
model.eval ()

DeepFM(
  (embedding_dict): ModuleDict(
    (age): Embedding(67, 4)
    (gender): Embedding(2, 4)
    (idThirdCategory): Embedding(286, 4)
    (product_id): Embedding(50761, 4)
    (skin_type): Embedding(5, 4)
    (user_id): Embedding(74773, 4)
  )
  (linear_model): Linear(
    (embedding_dict): ModuleDict(
      (age): Embedding(67, 1)
      (gender): Embedding(2, 1)
      (idThirdCategory): Embedding(286, 1)
      (product_id): Embedding(50761, 1)
      (skin_type): Embedding(5, 1)
      (user_id): Embedding(74773, 1)
    )
  )
  (out): PredictionLayer()
  (fm): FM()
  (dnn): DNN(
    (dropout): Dropout(p=0, inplace=False)
    (linears): ModuleList(
      (0): Linear(in_features=24, out_features=256, bias=True)
      (1): Linear(in_features=256, out_features=128, bias=True)
    )
    (activation_layers): ModuleList(
      (0): ReLU(inplace=True)
      (1): ReLU(inplace=True)
    )
  )
  (dnn_linear): Linear(in_features=128, out_features=1, bias=False)
)

### 평가

In [8]:
# epoch 6
from math import sqrt

pred_ans = model.predict (test_model_input, batch_size = 256)

print("test MSE", round (mean_squared_error (test [target].values, pred_ans), 4))

print ("\ntest RMSE", round (sqrt (mean_squared_error (test [target].values, pred_ans)), 4))

test MSE 0.8722

test RMSE 0.9339


### 실제 유저용 메서드

In [9]:
reviews.head (3)

Unnamed: 0,contents,created_at,is_evaluation,like_count,rating,review_id,state,user_id,product_id
0,"티 컬렉션으로 출시되었던 제품으로, 가벼운 녹차향이 납니다. 향 자체는 좀 날리는 ...",2020-04-30T02:12:36Z,False,0,3,5416271,N,119763,100000
1,살짝 로션같이 짜지고 묽음.\r\n향은 독하지 않고 적절히 향긋함.\r\n거품잘남\...,2020-03-15T09:08:20Z,False,0,4,5340616,N,338669,100000
2,"해피바스는 무난하고 순한 매력이 있음! 다른것들도 잘 썼지만 정말 무난함,, 그치만...",2020-01-21T01:29:44Z,False,0,3,5228598,N,24862,100000


In [10]:
show_cols = ['created_at', 'rating', 'user_id', 'product_id', 'age', 'gender', 'is_closed', 'skin_type', 'productTitle', 'volume', 'price', 'brandName', 'idThirdCategory']

real_method_df = glowpick_before_labeling [show_cols]
real_method_df.head (3)

Unnamed: 0,created_at,rating,user_id,product_id,age,gender,is_closed,skin_type,productTitle,volume,price,brandName,idThirdCategory
0,2020-04-30T02:12:36Z,3,119763,100000,36.0,f,False,복합성,티컬렉션 그린티 미셀라 클렌징폼,175g,11000.0,해피바스 (HAPPY BATH),112.0
1,2020-03-15T09:08:20Z,4,338669,100000,38.0,f,False,중성,티컬렉션 그린티 미셀라 클렌징폼,175g,11000.0,해피바스 (HAPPY BATH),112.0
2,2020-01-21T01:29:44Z,3,24862,100000,28.0,f,False,복합성,티컬렉션 그린티 미셀라 클렌징폼,175g,11000.0,해피바스 (HAPPY BATH),112.0


#### 특정 유저의 DataFrame 만들기

In [11]:
REAL_USER_ID = '338669'

real_user_df = real_method_df [real_method_df ['user_id'] == REAL_USER_ID]
real_user_df.head (3)

Unnamed: 0,created_at,rating,user_id,product_id,age,gender,is_closed,skin_type,productTitle,volume,price,brandName,idThirdCategory
1,2020-03-15T09:08:20Z,4,338669,100000,38.0,f,False,중성,티컬렉션 그린티 미셀라 클렌징폼,175g,11000.0,해피바스 (HAPPY BATH),112.0
106,2020-01-11T03:03:05Z,5,338669,100020,38.0,f,False,중성,로시크숨마 엘릭서 크림 [SPF50+/PA+++],60ml,350000.0,숨37˚ (sum37˚),26.0
133,2019-06-23T07:42:07Z,5,338669,100023,38.0,f,False,중성,로시크숨마 엘릭서 아이크림,25ml,200000.0,숨37˚ (sum37˚),23.0


In [12]:
len (real_user_df)

151

In [13]:
len (refined_products)

86184

In [14]:
real_user_df.head ()

Unnamed: 0,created_at,rating,user_id,product_id,age,gender,is_closed,skin_type,productTitle,volume,price,brandName,idThirdCategory
1,2020-03-15T09:08:20Z,4,338669,100000,38.0,f,False,중성,티컬렉션 그린티 미셀라 클렌징폼,175g,11000.0,해피바스 (HAPPY BATH),112.0
106,2020-01-11T03:03:05Z,5,338669,100020,38.0,f,False,중성,로시크숨마 엘릭서 크림 [SPF50+/PA+++],60ml,350000.0,숨37˚ (sum37˚),26.0
133,2019-06-23T07:42:07Z,5,338669,100023,38.0,f,False,중성,로시크숨마 엘릭서 아이크림,25ml,200000.0,숨37˚ (sum37˚),23.0
7653,2020-01-11T03:26:35Z,5,338669,100681,38.0,f,False,중성,자로우 도피러스 이피에스,69.6g,49500.0,자로우포뮬러스 (Jarrow Formulas),269.0
25326,2018-04-01T13:19:53Z,4,338669,15134,38.0,f,False,중성,동백오일 영양 마스크,1ea,1000.0,일리윤 (ILLIYOON),128.0


In [15]:
# 유저가 평가한 상품 id list
print ('평가 상품수: ', real_user_df ['product_id'].nunique ())

predicted_product_list = real_user_df ['product_id'].values.tolist ()
print ('\n', predicted_product_list)

평가 상품수:  151

 ['100000', '100020', '100023', '100681', '15134', '75699', '103981', '104680', '104943', '106324', '106528', '106568', '106890', '107086', '107622', '107921', '108679', '109444', '10970', '10995', '11051', '111259', '21813', '11422', '11430', '11433', '11434', '114779', '81170', '115285', '116527', '116665', '11754', '120167', '12050', '120774', '120980', '121264', '122722', '124321', '97543', '124756', '125801', '90875', '129986', '2496', '1315', '13205', '13904', '13906', '14321', '1495', '15133', '15980', '15996', '16977', '17742', '24728', '18261', '105840', '92530', '20132', '21500', '21557', '23112', '5866', '2359', '23973', '2447', '24555', '24774', '2493', '2494', '2614', '2616', '27695', '2775', '2855', '3143', '3179', '3195', '3270', '3279', '3333', '34075', '35031', '35610', '38508', '3907', '3975', '3984', '3985', '4136', '4195', '4211', '4226', '42468', '42807', '433', '91750', '104039', '48501', '5259', '5334', '540', '5744', '589', '65977', '6767', '6860',

In [16]:
refined_products [refined_products ['product_id'] == '100020']

Unnamed: 0,product_id,productTitle,price,volume,description,ratingAvg,wishCount,reviewCount,firstCategoryText,idFirstCategory,idSecondCategory,idThirdCategory,secondCategoryText,thirdCategoryText,rank,brand,brandName
73412,100020,로시크숨마 엘릭서 크림 [SPF50+/PA+++],350000.0,60ml,- 피부를 다스리는 순화의 비방 펌 아리쉬타 골드™\r\n- 황제가 누리던 더욱 강...,4.4,6,15,스킨케어,1,4,26,크림,안티에이징크림,81,"{'idBrand': None, 'brandTitle': '숨37˚ (sum37˚)...",숨37˚ (sum37˚)


In [17]:
glowpick_before_labeling ['product_id'].nunique ()

50761

In [18]:
review_product_id_list = glowpick_before_labeling ['product_id'].unique()

In [19]:
len (review_product_id_list)

50761

In [20]:
len (refined_products [refined_products ['product_id'].isin (review_product_id_list)])

50761

In [36]:
# glowpick_before_labeling은 결측치를 삭제했으므로 refined_products 보다 상품수가 적음
# len (glow_product_id_list) : 50761
# len (refined_products) : 86184
# glowpick_before_labeling에 있는 상품만 가져옴
removed_products = refined_products [refined_products ['product_id'].isin (review_product_id_list)]

In [22]:
len (refined_products)

86184

In [27]:
len (removed_products)

50761

In [41]:
removed_products.sort_values ('product_id', axis = 0).head (3)

Unnamed: 0,product_id,productTitle,price,volume,description,ratingAvg,wishCount,reviewCount,firstCategoryText,idFirstCategory,idSecondCategory,idThirdCategory,secondCategoryText,thirdCategoryText,rank,brand,brandName
76792,100000,티컬렉션 그린티 미셀라 클렌징폼,11000.0,175g,"연약한 피부, 클렌징부터 다르게!\r\n부드러운 거품의 마일드 솝-프리 폼\r\n\...",3.58,2,12,클렌징,7,32,112,페이셜클렌저,클렌징폼,386,"{'idBrand': None, 'brandTitle': '해피바스 (HAPPY B...",해피바스 (HAPPY BATH)
10353,100002,모이스트 립 글로스,4900.0,3.6g,미니소의 모이스트 립 글로스 입니다.,4.0,1,1,립메이크업,3,17,54,립글로스,립글로스,79,"{'idBrand': None, 'brandTitle': '미니소 (MINISO)'...",미니소 (MINISO)
73683,100003,크리스탈 4색 아이섀도우 팔레트,4900.0,6g*4,"다양한 색, 다양한 펄감을 팔레트 하나에!",3.33,0,3,아이메이크업,4,22,75,아이섀도우,아이팔레트,608,"{'idBrand': None, 'brandTitle': '미니소 (MINISO)'...",미니소 (MINISO)


In [None]:
100000
99998

In [45]:
removed_products.sort_values ('product_id', ascending = False).head ()

Unnamed: 0,product_id,productTitle,price,volume,description,ratingAvg,wishCount,reviewCount,firstCategoryText,idFirstCategory,idSecondCategory,idThirdCategory,secondCategoryText,thirdCategoryText,rank,brand,brandName
77611,99998,티컬렉션 그린티 미셀라 클렌징오일,17000.0,200ml,"연약한 피부, 클렌징부터 다르게!\r\n피부엔 가볍고 클렌징은 완벽한 라이트 오일\...",3.17,2,6,클렌징,7,33,118,메이크업클렌저,클렌징오일,358,"{'idBrand': None, 'brandTitle': '해피바스 (HAPPY B...",해피바스 (HAPPY BATH)
41662,99997,이드라 24+ 밤 마스크,69000.0,50ml,"피부가 메마를 땐, 수분을 집중할 때!\r\n\r\n산뜻한 젤 타입의 크림 제형이 ...",4.0,0,1,마스크/팩,8,38,133,페이스마스크,슬리핑팩,271,"{'idBrand': None, 'brandTitle': '빠이요 (PAYOT)',...",빠이요 (PAYOT)
74513,99995,사파이어 콜라겐 임팩트 에센셜 마스크,3000.0,25ml,웰더마에서 직접 개발한 탄력부직포가 적용된 마스크\r\n\r\n- 0.1mm 피부가...,4.43,0,7,마스크/팩,8,37,132,시트마스크,안티에이징마스크,71,"{'idBrand': None, 'brandTitle': '웰더마 (WellDerm...",웰더마 (WellDerma)
48449,99994,젤리 페이스 마스크 - 포모,0.0,60g,칼라민 파우더가 피부를 진정시켜주고 로즈 추출물이 깨끗한 피부를 만들어줍니다.,3.78,0,9,마스크/팩,8,38,134,페이스마스크,워시오프팩,203,"{'idBrand': None, 'brandTitle': '러쉬 (LUSH)', '...",러쉬 (LUSH)
60801,99993,시크리테일 누드 일루미네이팅 파우더,15000.0,9g,미세하고 고운 펄의 은은한 광택이 입체적인 얼굴과 바디라인을 연출해주는 멀티 하이라...,5.0,6,1,컨투어링,5,25,81,하이라이터,파우더하이라이터,96,"{'idBrand': None, 'brandTitle': '벨 (BELL)', 'b...",벨 (BELL)


In [46]:
removed_products.sort_values ('product_id', ascending = True).head ()

Unnamed: 0,product_id,productTitle,price,volume,description,ratingAvg,wishCount,reviewCount,firstCategoryText,idFirstCategory,idSecondCategory,idThirdCategory,secondCategoryText,thirdCategoryText,rank,brand,brandName
76792,100000,티컬렉션 그린티 미셀라 클렌징폼,11000.0,175g,"연약한 피부, 클렌징부터 다르게!\r\n부드러운 거품의 마일드 솝-프리 폼\r\n\...",3.58,2,12,클렌징,7,32,112,페이셜클렌저,클렌징폼,386,"{'idBrand': None, 'brandTitle': '해피바스 (HAPPY B...",해피바스 (HAPPY BATH)
10353,100002,모이스트 립 글로스,4900.0,3.6g,미니소의 모이스트 립 글로스 입니다.,4.0,1,1,립메이크업,3,17,54,립글로스,립글로스,79,"{'idBrand': None, 'brandTitle': '미니소 (MINISO)'...",미니소 (MINISO)
73683,100003,크리스탈 4색 아이섀도우 팔레트,4900.0,6g*4,"다양한 색, 다양한 펄감을 팔레트 하나에!",3.33,0,3,아이메이크업,4,22,75,아이섀도우,아이팔레트,608,"{'idBrand': None, 'brandTitle': '미니소 (MINISO)'...",미니소 (MINISO)
64454,100006,퀸 컬렉션 프레스드 파우더,5900.0,12g,하루종일 보송보송 애기피부로 만들어주는 파우더\r\n\r\n* 전성분은 01 라이트...,4.0,1,2,페이스메이크업,2,8,37,피니시파우더,프레스드파우더,107,"{'idBrand': None, 'brandTitle': '미니소 (MINISO)'...",미니소 (MINISO)
7118,100007,올데이 워터프루프 롱래쉬 마스카라,4900.0,8.5g,워터프루프로 하루종일 속눈썹 강력고정!,3.0,1,2,아이메이크업,4,23,77,마스카라,롱래시마스카라,170,"{'idBrand': None, 'brandTitle': '미니소 (MINISO)'...",미니소 (MINISO)


In [30]:
removed_products.describe ()

Unnamed: 0,price,ratingAvg,wishCount,reviewCount,idFirstCategory,idSecondCategory,idThirdCategory,rank
count,50761.0,50761.0,50761.0,50761.0,50761.0,50761.0,50761.0,50761.0
mean,28245.64,3.766675,35.607573,55.540395,6.487953,33.503635,108.864463,184.833415
std,43432.87,0.771975,207.493263,309.493091,4.792143,26.948915,80.067048,247.597573
min,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0
25%,9000.0,3.33,1.0,2.0,2.0,7.0,35.0,8.0
50%,18000.0,3.91,3.0,6.0,6.0,31.0,109.0,83.0
75%,33000.0,4.25,12.0,21.0,10.0,55.0,165.0,266.0
max,1747000.0,5.0,12109.0,17582.0,19.0,101.0,306.0,1397.0


In [60]:
reduce_columns = ['product_id', 'idThirdCategory']
reduce_products = removed_products [reduce_columns]
reduce_products.head()

Unnamed: 0,product_id,idThirdCategory
0,108711,255
1,118971,255
2,118819,255
3,90929,255
4,127206,255


In [80]:
len (reduce_products)

50761

In [81]:
reduce_products.reset_index (drop = True, inplace = True)
reduce_products

Unnamed: 0,product_id,idThirdCategory
0,108711,255
1,118971,255
2,118819,255
3,90929,255
4,127206,255
...,...,...
50756,81005,120
50757,24658,120
50758,6925,120
50759,80493,120


In [78]:
user_reduce_columns = ["user_id", "gender", "age", "skin_type",]
real_user_df = users [users ['user_id'] == '338669'] [user_reduce_columns]
real_user_df.reset_index (drop = True, inplace = True)
real_user_df

Unnamed: 0,user_id,gender,age,skin_type
0,338669,f,38.0,중성


In [83]:
reduce_glowpick = pd.concat ([reduce_products, real_user_df], axis = 1)
reduce_glowpick

Unnamed: 0,product_id,idThirdCategory,user_id,gender,age,skin_type
0,108711,255,338669,f,38.0,중성
1,118971,255,,,,
2,118819,255,,,,
3,90929,255,,,,
4,127206,255,,,,
...,...,...,...,...,...,...
50756,81005,120,,,,
50757,24658,120,,,,
50758,6925,120,,,,
50759,80493,120,,,,


In [98]:
print (dict (zip (real_user_df.columns.tolist(), real_user_df.values.squeeze().tolist ())))

{'user_id': '338669', 'gender': 'f', 'age': 38.0, 'skin_type': '중성'}


In [91]:
real_user_df.columns.tolist()

['user_id', 'gender', 'age', 'skin_type']

In [113]:
user_data_dict = dict (zip (real_user_df.columns.tolist(), real_user_df.values.squeeze().tolist ()))
u_cols = ['user_id', 'gender', 'age', 'skin_type']
reduce_glowpick.fillna (user_data_dict, inplace = True)

In [114]:
reduce_glowpick

Unnamed: 0,product_id,idThirdCategory,user_id,gender,age,skin_type
0,108711,255,338669,f,38.0,중성
1,118971,255,338669,f,38.0,중성
2,118819,255,338669,f,38.0,중성
3,90929,255,338669,f,38.0,중성
4,127206,255,338669,f,38.0,중성
...,...,...,...,...,...,...
50756,81005,120,338669,f,38.0,중성
50757,24658,120,338669,f,38.0,중성
50758,6925,120,338669,f,38.0,중성
50759,80493,120,338669,f,38.0,중성


In [102]:
reduce_products ['product_id'].nunique ()

50761

In [None]:
refined_products [refined_products]

In [None]:
refined_products ['product_id'].nunique ()

In [None]:
print ('실제 상품수: ', refined_products ['product_id'].nunique ())

refined_products_list = refined_products ['product_id'].values
refined_products_list

In [None]:
len (refined_products)

In [None]:
refined_products ['product_id'].isin (predicted_product_list)

In [None]:
removed_products = refined_products [~refined_products ['product_id'].isin (predicted_product_list)]

In [None]:
refined_products ['product_id'].nunique () - removed_products ['product_id'].nunique ()

In [115]:
real_fixlen_feature_columns = [SparseFeat (feat, reduce_glowpick [feat].nunique(), embedding_dim=4)
                          for feat in sparse_features]
real_fixlen_feature_columns

[SparseFeat(name='product_id', vocabulary_size=50761, embedding_dim=4, use_hash=False, dtype='int32', embedding_name='product_id', group_name='default_group'),
 SparseFeat(name='user_id', vocabulary_size=1, embedding_dim=4, use_hash=False, dtype='int32', embedding_name='user_id', group_name='default_group'),
 SparseFeat(name='gender', vocabulary_size=1, embedding_dim=4, use_hash=False, dtype='int32', embedding_name='gender', group_name='default_group'),
 SparseFeat(name='age', vocabulary_size=1, embedding_dim=4, use_hash=False, dtype='int32', embedding_name='age', group_name='default_group'),
 SparseFeat(name='skin_type', vocabulary_size=1, embedding_dim=4, use_hash=False, dtype='int32', embedding_name='skin_type', group_name='default_group'),
 SparseFeat(name='idThirdCategory', vocabulary_size=286, embedding_dim=4, use_hash=False, dtype='int32', embedding_name='idThirdCategory', group_name='default_group')]

In [107]:
real_fixlen_feature_columns [1].name

'user_id'

In [120]:
reduce_glowpick

Unnamed: 0,product_id,idThirdCategory,user_id,gender,age,skin_type
0,108711,255,338669,f,38.0,중성
1,118971,255,338669,f,38.0,중성
2,118819,255,338669,f,38.0,중성
3,90929,255,338669,f,38.0,중성
4,127206,255,338669,f,38.0,중성
...,...,...,...,...,...,...
50756,81005,120,338669,f,38.0,중성
50757,24658,120,338669,f,38.0,중성
50758,6925,120,338669,f,38.0,중성
50759,80493,120,338669,f,38.0,중성


In [121]:
from sklearn.preprocessing import LabelEncoder

# 1.Label Encoding for sparse features,and process sequence features
for feat in sparse_features:
    lbe = LabelEncoder()
    reduce_glowpick [feat] = lbe.fit_transform (reduce_glowpick [feat])
    
reduce_glowpick.head (3)

Unnamed: 0,product_id,idThirdCategory,user_id,gender,age,skin_type
0,5066,237,0,0,0,0
1,10272,237,0,0,0,0
2,10183,237,0,0,0,0


In [122]:
reduce_glowpick.info ()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50761 entries, 0 to 50760
Data columns (total 6 columns):
 #   Column           Non-Null Count  Dtype
---  ------           --------------  -----
 0   product_id       50761 non-null  int64
 1   idThirdCategory  50761 non-null  int64
 2   user_id          50761 non-null  int64
 3   gender           50761 non-null  int64
 4   age              50761 non-null  int64
 5   skin_type        50761 non-null  int64
dtypes: int64(6)
memory usage: 2.3 MB


In [123]:
real_model_input = {name: reduce_glowpick [name] for name in feature_names}

In [124]:
real_model_input

{'product_id': 0         5066
 1        10272
 2        10183
 3        45445
 4        13837
          ...  
 50756    39645
 50757    21907
 50758    33194
 50759    39287
 50760    27573
 Name: product_id, Length: 50761, dtype: int64,
 'user_id': 0        0
 1        0
 2        0
 3        0
 4        0
         ..
 50756    0
 50757    0
 50758    0
 50759    0
 50760    0
 Name: user_id, Length: 50761, dtype: int64,
 'gender': 0        0
 1        0
 2        0
 3        0
 4        0
         ..
 50756    0
 50757    0
 50758    0
 50759    0
 50760    0
 Name: gender, Length: 50761, dtype: int64,
 'age': 0        0
 1        0
 2        0
 3        0
 4        0
         ..
 50756    0
 50757    0
 50758    0
 50759    0
 50760    0
 Name: age, Length: 50761, dtype: int64,
 'skin_type': 0        0
 1        0
 2        0
 3        0
 4        0
         ..
 50756    0
 50757    0
 50758    0
 50759    0
 50760    0
 Name: skin_type, Length: 50761, dtype: int64,
 'idThirdCategor

In [125]:
# epoch 10
from math import sqrt

pred_ans = model.predict (real_model_input, batch_size = 256)

In [128]:
pred_list = list (map (lambda x : round (x, 2), pred_ans.flatten ().tolist ()))   # 예측한 점수 리스트
pred_list

[4.04,
 3.37,
 3.52,
 3.52,
 3.52,
 3.6,
 3.89,
 3.08,
 3.68,
 4.02,
 4.09,
 4.04,
 4.26,
 4.03,
 4.05,
 3.73,
 4.16,
 3.99,
 4.21,
 4.09,
 4.34,
 3.38,
 4.12,
 3.9,
 4.09,
 3.84,
 4.0,
 3.48,
 2.96,
 2.27,
 2.3,
 4.02,
 4.11,
 4.39,
 3.89,
 3.43,
 4.17,
 3.03,
 3.05,
 4.09,
 4.14,
 3.95,
 4.0,
 4.13,
 3.48,
 3.84,
 3.55,
 3.42,
 3.87,
 3.9,
 3.81,
 4.05,
 3.85,
 3.91,
 3.99,
 3.11,
 4.11,
 3.3,
 3.7,
 4.03,
 4.23,
 3.97,
 4.1,
 3.62,
 3.91,
 3.85,
 3.93,
 4.4,
 3.94,
 3.86,
 3.87,
 3.79,
 3.59,
 3.5,
 3.95,
 4.0,
 3.91,
 3.46,
 3.9,
 3.67,
 4.2,
 3.36,
 4.49,
 3.54,
 4.16,
 4.27,
 3.73,
 4.26,
 3.89,
 3.82,
 4.46,
 3.41,
 4.17,
 3.19,
 3.5,
 3.63,
 3.23,
 4.07,
 4.08,
 4.27,
 4.02,
 4.22,
 4.14,
 4.34,
 3.41,
 3.81,
 4.06,
 4.43,
 3.6,
 3.66,
 4.38,
 4.38,
 4.12,
 4.3,
 4.15,
 3.37,
 4.16,
 4.33,
 4.24,
 4.58,
 3.23,
 3.64,
 4.18,
 3.2,
 3.61,
 4.43,
 3.61,
 4.44,
 3.99,
 3.88,
 3.79,
 3.89,
 4.05,
 3.39,
 4.44,
 4.56,
 4.09,
 4.22,
 3.86,
 3.69,
 4.08,
 4.17,
 3.96,
 3.55,
 4.33,
 4.

In [129]:
len (pred_list)

50761

In [127]:
reduce_products

Unnamed: 0,product_id,idThirdCategory
0,108711,255
1,118971,255
2,118819,255
3,90929,255
4,127206,255
...,...,...
50756,81005,120
50757,24658,120
50758,6925,120
50759,80493,120


In [131]:
user_pred_df = pd.DataFrame (pred_list, columns = ['rating'])
user_pred_df

Unnamed: 0,rating
0,4.04
1,3.37
2,3.52
3,3.52
4,3.52
...,...
50756,2.84
50757,2.67
50758,2.80
50759,2.66


In [133]:
user_product_pred_df = pd.concat ([reduce_products, user_pred_df], axis = 1)
user_product_pred_df

Unnamed: 0,product_id,idThirdCategory,rating
0,108711,255,4.04
1,118971,255,3.37
2,118819,255,3.52
3,90929,255,3.52
4,127206,255,3.52
...,...,...,...
50756,81005,120,2.84
50757,24658,120,2.67
50758,6925,120,2.80
50759,80493,120,2.66


In [135]:
user_product_pred_df.sort_values ('rating', ascending = False, inplace = True)

In [136]:
user_product_pred_df

Unnamed: 0,product_id,idThirdCategory,rating
1925,26622,247,4.94
26767,78768,246,4.94
26727,16702,246,4.93
11482,41406,153,4.91
1923,98519,247,4.91
...,...,...,...
40741,2606,300,1.24
37077,48937,65,1.20
31195,7952,179,1.20
49525,36000,139,1.16


In [137]:
refined_products.head ()

Unnamed: 0,product_id,productTitle,price,volume,description,ratingAvg,wishCount,reviewCount,firstCategoryText,idFirstCategory,idSecondCategory,idThirdCategory,secondCategoryText,thirdCategoryText,rank,brand,brandName
0,108711,코스메티 LED 마스크,219000.0,1ea,"안면리프팅, 눈가탄력개선, 전피치밀도 수분개선 등에 도움을 주는 제품",3.75,2,4,디바이스,16,85,255,뷰티디바이스,LED마스크,5,"{'idBrand': None, 'brandTitle': '아름다운연구소', 'br...",아름다운연구소
1,118971,LED 마스크,149000.0,1ea,가장 적정거리의 96개의 LED로 피부 침투율을 높여주는 LED 마스크\n\n- 둥...,3.75,0,4,디바이스,16,85,255,뷰티디바이스,LED마스크,6,"{'idBrand': None, 'brandTitle': '에끌레어 (eclair)...",에끌레어 (eclair)
2,118819,디쎄 1.0 LED 마스크 [업무용],1320000.0,1ea,하루 한 번 자기전 15분 사용으로 전문적인 케어\r\n\r\n- 3가지 파장으로 ...,5.0,2,1,디바이스,16,85,255,뷰티디바이스,LED마스크,7,"{'idBrand': None, 'brandTitle': '닥터슈라클 (Dr.Ceu...",닥터슈라클 (Dr.Ceuracle)
3,90929,LED 리얼 마스크,198000.0,1ea,"값비싼 피부과 시술, 집에서 손쉽게 할 수 있는 LED 마스크\n\n- 눈가 탄력 ...",4.0,1,1,디바이스,16,85,255,뷰티디바이스,LED마스크,8,"{'idBrand': None, 'brandTitle': '솔루미에스테 (SOLLU...",솔루미에스테 (SOLLUME ESTHE)
4,127206,인텐시브 LED 마스크 프리미엄,1517100.0,1ea,총 480개 LED칩으로 두피부터 얼굴까지 케어해주는 프리미엄 마스크\r\n\r\n...,5.0,1,1,디바이스,16,85,255,뷰티디바이스,LED마스크,9,"{'idBrand': None, 'brandTitle': '엘리닉 (L Linic)...",엘리닉 (L Linic)


In [143]:
pd.merge (user_product_pred_df, refined_products, how = 'left', on = 'product_id') [['productTitle', 'rating']].head (10)

Unnamed: 0,productTitle,rating
0,인텐스 히 우드,4.94
1,오 도랑쥬 베르트 오 드 코롱,4.94
2,쥬 퍼퓸 티아라 느와 오 드 퍼퓸,4.93
3,아틀라스 마운틴 로즈 바디 버터_41406,4.91
4,토바코 바닐 EDP,4.91
5,어드밴스덤 데일리 수분 필링 크림,4.9
6,클래식 크림 얼티미트,4.89
7,슬림 헤드 눈썹칼,4.89
8,에스에너지 클렌징 젤,4.89
9,M5001A 플러스,4.88


In [None]:
real_user_df


In [None]:
user_review_indexs = list (real_method_df [real_method_df ['user_id'] == real_user_id].index)
print (user_review_indexs, '\n')
print (len (user_review_indexs))

In [None]:
glowpick.loc [5734]

In [None]:
glowpick.head ()

In [None]:
user_review_indexs

In [None]:
user_not_reviews_df = glowpick.loc [~glowpick.index.isin (user_review_indexs)]

user_not_reviews_df

In [None]:
len (glowpick) - len (user_review_indexs)

In [None]:
len (user_review_indexs)

In [None]:
all (user_index in user_not_reviews_list for user_index in user_review_indexs)    

In [None]:
user_not_reviews_list = list (user_not_reviews_df.index)
len (user_not_reviews_list)

In [None]:
user_not_reviews_list [: 10]

In [None]:
1 in user_not_reviews_list

In [None]:
user_review_indexs in user_not_reviews_list

In [None]:
len (real_user_df)

In [None]:
len (user_not_reviews_df ['product_id'])

In [None]:
print (real_user_df ['product_id'].unique ().tolist ())

In [None]:
unique_review_products = real_user_df ['product_id'].unique ().tolist ()
unique_review_products.sort ()
print (unique_review_products)

In [None]:
unique_not_review_products = user_not_reviews_df ['product_id'].unique ().tolist ()
unique_not_review_products.sort ()
print (unique_not_review_products)

In [None]:
print (user_not_reviews_df ['product_id'].unique ().tolist ())

In [None]:
len (glowpick.loc [~glowpick.index.isin (user_review_indexs)] ['product_id'].unique ())

In [None]:
user_not_reviews_df ['product_id'].value_counts ()

In [None]:
glowpick ['product_id'].nunique ()

In [None]:
len (real_method_df ['product_id'].unique ())

In [None]:

len (real_user_df ['product_id'].unique ())

In [None]:
real_method_df