In [1]:
import sys
sys.path.append("..")

import common
from common import FILE_PATH
from common import MODEL_PATH

import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

from deepctr_torch.inputs import SparseFeat, VarLenSparseFeat, get_feature_names
from deepctr_torch.models import DeepFM
import pickle

%load_ext autoreload
%autoreload 2
import product_refine

st = sns.axes_style("whitegrid")
sns.set_style("ticks", {"xtick.major.size": 8, "ytick.major.size": 8})

plt.rcParams ['font.family'] = 'NanumGothic'

import warnings
warnings.filterwarnings("ignore")

In [2]:
%load_ext autoreload
%autoreload 2
import product_refine

In [3]:
%%time

reviews = pd.read_feather (FILE_PATH + 'reviews.ftr', use_threads = True)
users = pd.read_feather (FILE_PATH + 'users.ftr', use_threads = True)
products = pd.read_feather (FILE_PATH + 'products.ftr', use_threads = True)
product_categories = pd.read_feather (FILE_PATH + 'product_categories.ftr', use_threads = True)

CPU times: user 3.12 s, sys: 840 ms, total: 3.95 s
Wall time: 3.72 s


In [4]:
%%time
df_products = product_refine.refine_products (products)

len del_id_list:  1355 

CPU times: user 5.03 s, sys: 126 ms, total: 5.16 s
Wall time: 5.06 s


In [5]:
users.head ()

Unnamed: 0,age,birth_year,email,gender,is_blinded,is_closed,is_inactivated,nickname,profile_image,rank,review_count,skin_type,user_id
0,31.0,1990.0,yhcu88@naver.com,f,0,False,False,희뷰리,https://d9vmi5fxk1gsw.cloudfront.net/prod/regi...,27347,44,복합성,1281918
1,33.0,1988.0,winwinanna@hanmail.net,f,0,False,False,소극적인뷰터,,41582,15,건성,1255686
2,37.0,1984.0,dmswjddlskfk@hanmail.net,f,0,False,False,greengables,https://d9vmi5fxk1gsw.cloudfront.net/home/glow...,946,375,지성,631689
3,32.0,1989.0,deer402@naver.com,f,0,False,True,백비송,https://d9vmi5fxk1gsw.cloudfront.net/prod/regi...,43404,29,건성,1188087
4,26.0,1995.0,kkr3348@naver.com,f,0,False,False,초코감귤,https://d9vmi5fxk1gsw.cloudfront.net/home/glow...,9386,97,복합성,632220


In [6]:
len (users ['age'].unique ())

79

In [7]:
# 100세 이상, 음수나이, 연도를 나이로 입력한듯한 나이(2016, 2018, 1995)등이 보임
users ['age'].value_counts ().tail (32)

 57.0      14
 121.0     11
 58.0      11
 7.0       11
 12.0      10
 3.0        9
 2.0        9
 60.0       8
 104.0      6
 62.0       5
 61.0       5
 11.0       5
 101.0      5
 59.0       4
 72.0       2
 103.0      2
 71.0       2
 102.0      2
 64.0       2
 8.0        2
 63.0       2
 105.0      2
-518.0      1
 67.0       1
 70.0       1
 9.0        1
 89.0       1
 98.0       1
 2016.0     1
 68.0       1
 2018.0     1
 1995.0     1
Name: age, dtype: int64

In [8]:
users.info ()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 76490 entries, 0 to 76489
Data columns (total 13 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   age             76489 non-null  float64
 1   birth_year      76489 non-null  float64
 2   email           76490 non-null  object 
 3   gender          76490 non-null  object 
 4   is_blinded      76490 non-null  int64  
 5   is_closed       76490 non-null  bool   
 6   is_inactivated  76490 non-null  bool   
 7   nickname        76489 non-null  object 
 8   profile_image   44893 non-null  object 
 9   rank            76490 non-null  int64  
 10  review_count    76490 non-null  int64  
 11  skin_type       76490 non-null  object 
 12  user_id         76490 non-null  object 
dtypes: bool(2), float64(2), int64(3), object(6)
memory usage: 6.6+ MB


In [9]:
# 0세 ~ 90세 회원 정보만 뽑아냄.  음수나이와 90세 이상은 제외
users = users [(users ['age'] < 90) & (users ['age'] > 0)]
users.info ()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 76456 entries, 0 to 76489
Data columns (total 13 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   age             76456 non-null  float64
 1   birth_year      76456 non-null  float64
 2   email           76456 non-null  object 
 3   gender          76456 non-null  object 
 4   is_blinded      76456 non-null  int64  
 5   is_closed       76456 non-null  bool   
 6   is_inactivated  76456 non-null  bool   
 7   nickname        76455 non-null  object 
 8   profile_image   44871 non-null  object 
 9   rank            76456 non-null  int64  
 10  review_count    76456 non-null  int64  
 11  skin_type       76456 non-null  object 
 12  user_id         76456 non-null  object 
dtypes: bool(2), float64(2), int64(3), object(6)
memory usage: 7.1+ MB


In [10]:
users.describe ()

Unnamed: 0,age,birth_year,is_blinded,rank,review_count
count,76456.0,76456.0,76456.0,76456.0,76456.0
mean,25.250118,1995.749882,0.0,69090.6,41.476823
std,5.984109,5.984109,0.0,504406.0,56.878904
min,2.0,1932.0,0.0,1.0,-46.0
25%,21.0,1993.0,0.0,18536.0,10.0
50%,25.0,1996.0,0.0,38404.0,26.0
75%,28.0,2000.0,0.0,62796.0,51.0
max,89.0,2019.0,0.0,9999999.0,2003.0


In [11]:
review_user_df = pd.merge (reviews, users, how = 'left', on = 'user_id')
review_user_df.head ()

Unnamed: 0,contents,created_at,is_evaluation,like_count,rating,review_id,state,user_id,product_id,age,...,email,gender,is_blinded,is_closed,is_inactivated,nickname,profile_image,rank,review_count,skin_type
0,"티 컬렉션으로 출시되었던 제품으로, 가벼운 녹차향이 납니다. 향 자체는 좀 날리는 ...",2020-04-30T02:12:36Z,False,0,3,5416271,N,119763,100000,36.0,...,intears23@naver.com,f,0.0,False,False,kyo,https://d9vmi5fxk1gsw.cloudfront.net/home/glow...,2914.0,205.0,복합성
1,살짝 로션같이 짜지고 묽음.\r\n향은 독하지 않고 적절히 향긋함.\r\n거품잘남\...,2020-03-15T09:08:20Z,False,0,4,5340616,N,338669,100000,38.0,...,hihearyeong@naver.com,f,0.0,False,False,뭐라,https://d9vmi5fxk1gsw.cloudfront.net/prod/regi...,1023.0,285.0,중성
2,"해피바스는 무난하고 순한 매력이 있음! 다른것들도 잘 썼지만 정말 무난함,, 그치만...",2020-01-21T01:29:44Z,False,0,3,5228598,N,24862,100000,28.0,...,leemj1993@naver.com,f,0.0,False,False,민졍,,2292.0,230.0,복합성
3,순하고 가격적으로 무난해서 쓰기 괜찮아요오오,2020-01-18T08:03:59Z,False,0,4,5225359,N,1331797,100000,20.0,...,betcha2001@naver.com,f,0.0,False,False,Gravity,,36233.0,36.0,복합성
4,성분이 착해서 샀고 타입이 폼이라 짜서 쓰는 젤보다 약간 귀찮지만 거품은 바로 많이...,2020-01-11T07:15:15Z,False,0,4,5211274,N,888968,100000,33.0,...,milk_madam@hanmail.net,f,0.0,False,False,이게모람,https://d9vmi5fxk1gsw.cloudfront.net/home/glow...,6737.0,108.0,복합성


In [12]:
len (reviews)

1574817

In [13]:
len (review_user_df)

1574817

In [14]:
review_user_product_df = pd.merge (review_user_df, df_products, how = 'left', on = 'product_id')
review_user_product_df.head (3)

Unnamed: 0,contents,created_at,is_evaluation,like_count,rating,review_id,state,user_id,product_id,age,...,price,reviewCnt,ratingAvg,productImg,isDiscontinue,rankChange,rankChangeType,brand,goodsInfo,brandName
0,"티 컬렉션으로 출시되었던 제품으로, 가벼운 녹차향이 납니다. 향 자체는 좀 날리는 ...",2020-04-30T02:12:36Z,False,0,3,5416271,N,119763,100000,36.0,...,11000.0,11.0,3.64,https://d9vmi5fxk1gsw.cloudfront.net/home/glow...,False,0,show,"{'idBrand': None, 'brandTitle': '해피바스 (HAPPY B...",{'goodsCount': 0},해피바스 (HAPPY BATH)
1,살짝 로션같이 짜지고 묽음.\r\n향은 독하지 않고 적절히 향긋함.\r\n거품잘남\...,2020-03-15T09:08:20Z,False,0,4,5340616,N,338669,100000,38.0,...,11000.0,11.0,3.64,https://d9vmi5fxk1gsw.cloudfront.net/home/glow...,False,0,show,"{'idBrand': None, 'brandTitle': '해피바스 (HAPPY B...",{'goodsCount': 0},해피바스 (HAPPY BATH)
2,"해피바스는 무난하고 순한 매력이 있음! 다른것들도 잘 썼지만 정말 무난함,, 그치만...",2020-01-21T01:29:44Z,False,0,3,5228598,N,24862,100000,28.0,...,11000.0,11.0,3.64,https://d9vmi5fxk1gsw.cloudfront.net/home/glow...,False,0,show,"{'idBrand': None, 'brandTitle': '해피바스 (HAPPY B...",{'goodsCount': 0},해피바스 (HAPPY BATH)


In [15]:
len (review_user_product_df)

1574817

### product_categoreis

In [16]:
product_categories.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 88756 entries, 0 to 88755
Data columns (total 7 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   firstCategoryText   88756 non-null  object
 1   idFirstCategory     88756 non-null  int64 
 2   idSecondCategory    88756 non-null  int64 
 3   idThirdCategory     88756 non-null  int64 
 4   secondCategoryText  88756 non-null  object
 5   thirdCategoryText   88704 non-null  object
 6   product_id          88756 non-null  object
dtypes: int64(3), object(4)
memory usage: 4.7+ MB


In [17]:
product_categories.isnull ().sum ()

firstCategoryText      0
idFirstCategory        0
idSecondCategory       0
idThirdCategory        0
secondCategoryText     0
thirdCategoryText     52
product_id             0
dtype: int64

In [18]:
product_categories ['idThirdCategory'].nunique ()

291

In [19]:
# 하나의 상품마다 2 이상의 카테고리를 가질수 있음
product_categories ['product_id'].value_counts ().head (50)

3754      38
1558      26
22223     23
90430     19
74532     19
51029     18
74539     17
9864      17
43452     16
114984    15
2766      15
28106     15
132371    15
55517     14
3245      13
2851      11
133407     7
4          7
85537      7
89452      6
74574      6
108331     6
90958      6
19695      6
433        5
86217      5
114608     5
64325      5
83589      5
97465      5
102742     5
42288      5
22476      5
112778     5
50745      5
6530       5
51869      5
105349     5
90840      5
91298      5
40668      5
15279      5
12221      5
76752      5
77355      5
100129     5
11734      5
35706      5
3159       4
34291      4
Name: product_id, dtype: int64

In [20]:
products [products ['product_id'] == '1030']

Unnamed: 0,product_id,rank,title,volume,price,reviewCnt,ratingAvg,productImg,isDiscontinue,rankChange,rankChangeType,brand,goodsInfo
38683,1030,10,알파인 베리 워터리 크림,50ml,37000,2222,4.09,https://d9vmi5fxk1gsw.cloudfront.net/home/glow...,False,-1,show,"{'idBrand': None, 'brandTitle': '프리메라 (primera...",{'goodsCount': 0}


In [21]:
product_categories [product_categories ['product_id'] == '1030']

Unnamed: 0,firstCategoryText,idFirstCategory,idSecondCategory,idThirdCategory,secondCategoryText,thirdCategoryText,product_id
2,립메이크업,3,15,50,립틴트/라커,글로시/젤틴트,1030
10033,스킨케어,1,3,17,에센스/세럼,안티에이징에센스,1030
82433,헤어,11,65,198,헤어케어,헤어에센스/세럼,1030


In [22]:
product_categories [product_categories ['product_id'] == '1030']

Unnamed: 0,firstCategoryText,idFirstCategory,idSecondCategory,idThirdCategory,secondCategoryText,thirdCategoryText,product_id
2,립메이크업,3,15,50,립틴트/라커,글로시/젤틴트,1030
10033,스킨케어,1,3,17,에센스/세럼,안티에이징에센스,1030
82433,헤어,11,65,198,헤어케어,헤어에센스/세럼,1030


In [23]:
# 상품별 중복 row를 삭제. 하나의 상품당 하나의 카테고리 (first > second > third)만
categories_drop_dupli = product_categories.drop_duplicates ('product_id')
categories_drop_dupli

Unnamed: 0,firstCategoryText,idFirstCategory,idSecondCategory,idThirdCategory,secondCategoryText,thirdCategoryText,product_id
0,프래그런스,15,83,246,향수,여성향수,10000
1,클렌징,7,32,112,페이셜클렌저,클렌징폼,100000
2,립메이크업,3,15,50,립틴트/라커,글로시/젤틴트,1030
3,립메이크업,3,17,54,립글로스,립글로스,100002
4,아이메이크업,4,22,75,아이섀도우,아이팔레트,88268
...,...,...,...,...,...,...,...
88751,헤어,11,62,197,트리트먼트/팩,헤어마스크/팩,99977
88752,헤어,11,62,197,트리트먼트/팩,헤어마스크/팩,99979
88753,헤어,11,62,197,트리트먼트/팩,헤어마스크/팩,99980
88754,헤어,11,62,196,트리트먼트/팩,헤어트리트먼트,99982


In [24]:
categories_drop_dupli ['idFirstCategory'].nunique ()

19

In [25]:
categories_drop_dupli ['idSecondCategory'].nunique ()

90

In [26]:
categories_drop_dupli ['idThirdCategory'].nunique ()

291

In [27]:
categories_drop_dupli.isnull ().sum ()

firstCategoryText      0
idFirstCategory        0
idSecondCategory       0
idThirdCategory        0
secondCategoryText     0
thirdCategoryText     48
product_id             0
dtype: int64

In [28]:
categories_drop_dupli.info ()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 79227 entries, 0 to 88755
Data columns (total 7 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   firstCategoryText   79227 non-null  object
 1   idFirstCategory     79227 non-null  int64 
 2   idSecondCategory    79227 non-null  int64 
 3   idThirdCategory     79227 non-null  int64 
 4   secondCategoryText  79227 non-null  object
 5   thirdCategoryText   79179 non-null  object
 6   product_id          79227 non-null  object
dtypes: int64(3), object(4)
memory usage: 4.8+ MB


In [29]:
categories_drop_dupli ['product_id'].value_counts ()

84484     1
102644    1
133228    1
18520     1
65245     1
         ..
12522     1
96515     1
15710     1
7138      1
116498    1
Name: product_id, Length: 79227, dtype: int64

In [30]:
categories_drop_dupli [categories_drop_dupli ['product_id'] == '2851']

Unnamed: 0,firstCategoryText,idFirstCategory,idSecondCategory,idThirdCategory,secondCategoryText,thirdCategoryText,product_id
1102,배쓰&바디,10,57,173,배쓰/샤워,바디워시,2851


### merge

review_user_product_df, categories_drop_dupli

In [31]:
total_merged_df = pd.merge (review_user_product_df, categories_drop_dupli, how = 'left', on = 'product_id')
total_merged_df.head (3)

Unnamed: 0,contents,created_at,is_evaluation,like_count,rating,review_id,state,user_id,product_id,age,...,rankChangeType,brand,goodsInfo,brandName,firstCategoryText,idFirstCategory,idSecondCategory,idThirdCategory,secondCategoryText,thirdCategoryText
0,"티 컬렉션으로 출시되었던 제품으로, 가벼운 녹차향이 납니다. 향 자체는 좀 날리는 ...",2020-04-30T02:12:36Z,False,0,3,5416271,N,119763,100000,36.0,...,show,"{'idBrand': None, 'brandTitle': '해피바스 (HAPPY B...",{'goodsCount': 0},해피바스 (HAPPY BATH),클렌징,7.0,32.0,112.0,페이셜클렌저,클렌징폼
1,살짝 로션같이 짜지고 묽음.\r\n향은 독하지 않고 적절히 향긋함.\r\n거품잘남\...,2020-03-15T09:08:20Z,False,0,4,5340616,N,338669,100000,38.0,...,show,"{'idBrand': None, 'brandTitle': '해피바스 (HAPPY B...",{'goodsCount': 0},해피바스 (HAPPY BATH),클렌징,7.0,32.0,112.0,페이셜클렌저,클렌징폼
2,"해피바스는 무난하고 순한 매력이 있음! 다른것들도 잘 썼지만 정말 무난함,, 그치만...",2020-01-21T01:29:44Z,False,0,3,5228598,N,24862,100000,28.0,...,show,"{'idBrand': None, 'brandTitle': '해피바스 (HAPPY B...",{'goodsCount': 0},해피바스 (HAPPY BATH),클렌징,7.0,32.0,112.0,페이셜클렌저,클렌징폼


In [32]:
len (review_user_product_df)

1574817

In [33]:
len (total_merged_df)

1574817

In [34]:
total_merged_df.columns

Index(['contents', 'created_at', 'is_evaluation', 'like_count', 'rating',
       'review_id', 'state', 'user_id', 'product_id', 'age', 'birth_year',
       'email', 'gender', 'is_blinded', 'is_closed', 'is_inactivated',
       'nickname', 'profile_image', 'rank_x', 'review_count', 'skin_type',
       'rank_y', 'title', 'volume', 'price', 'reviewCnt', 'ratingAvg',
       'productImg', 'isDiscontinue', 'rankChange', 'rankChangeType', 'brand',
       'goodsInfo', 'brandName', 'firstCategoryText', 'idFirstCategory',
       'idSecondCategory', 'idThirdCategory', 'secondCategoryText',
       'thirdCategoryText'],
      dtype='object')

In [35]:
total_merged_df.head (1)

Unnamed: 0,contents,created_at,is_evaluation,like_count,rating,review_id,state,user_id,product_id,age,...,rankChangeType,brand,goodsInfo,brandName,firstCategoryText,idFirstCategory,idSecondCategory,idThirdCategory,secondCategoryText,thirdCategoryText
0,"티 컬렉션으로 출시되었던 제품으로, 가벼운 녹차향이 납니다. 향 자체는 좀 날리는 ...",2020-04-30T02:12:36Z,False,0,3,5416271,N,119763,100000,36.0,...,show,"{'idBrand': None, 'brandTitle': '해피바스 (HAPPY B...",{'goodsCount': 0},해피바스 (HAPPY BATH),클렌징,7.0,32.0,112.0,페이셜클렌저,클렌징폼


In [36]:
get_cols = ['contents', 'created_at', 'rating', 'user_id', 'product_id', 'age', 'gender', 'is_blinded', 'is_closed', 'is_inactivated', 'skin_type', 'title', 'volume', 'price', 'brandName', 'idThirdCategory']

In [37]:
glowpick = total_merged_df [get_cols]
glowpick.head ()

Unnamed: 0,contents,created_at,rating,user_id,product_id,age,gender,is_blinded,is_closed,is_inactivated,skin_type,title,volume,price,brandName,idThirdCategory
0,"티 컬렉션으로 출시되었던 제품으로, 가벼운 녹차향이 납니다. 향 자체는 좀 날리는 ...",2020-04-30T02:12:36Z,3,119763,100000,36.0,f,0.0,False,False,복합성,티컬렉션 그린티 미셀라 클렌징폼,175g,11000.0,해피바스 (HAPPY BATH),112.0
1,살짝 로션같이 짜지고 묽음.\r\n향은 독하지 않고 적절히 향긋함.\r\n거품잘남\...,2020-03-15T09:08:20Z,4,338669,100000,38.0,f,0.0,False,False,중성,티컬렉션 그린티 미셀라 클렌징폼,175g,11000.0,해피바스 (HAPPY BATH),112.0
2,"해피바스는 무난하고 순한 매력이 있음! 다른것들도 잘 썼지만 정말 무난함,, 그치만...",2020-01-21T01:29:44Z,3,24862,100000,28.0,f,0.0,False,False,복합성,티컬렉션 그린티 미셀라 클렌징폼,175g,11000.0,해피바스 (HAPPY BATH),112.0
3,순하고 가격적으로 무난해서 쓰기 괜찮아요오오,2020-01-18T08:03:59Z,4,1331797,100000,20.0,f,0.0,False,False,복합성,티컬렉션 그린티 미셀라 클렌징폼,175g,11000.0,해피바스 (HAPPY BATH),112.0
4,성분이 착해서 샀고 타입이 폼이라 짜서 쓰는 젤보다 약간 귀찮지만 거품은 바로 많이...,2020-01-11T07:15:15Z,4,888968,100000,33.0,f,0.0,False,False,복합성,티컬렉션 그린티 미셀라 클렌징폼,175g,11000.0,해피바스 (HAPPY BATH),112.0


In [38]:
total_merged_df.info ()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1574817 entries, 0 to 1574816
Data columns (total 40 columns):
 #   Column              Non-Null Count    Dtype  
---  ------              --------------    -----  
 0   contents            1574817 non-null  object 
 1   created_at          1574817 non-null  object 
 2   is_evaluation       1574817 non-null  bool   
 3   like_count          1574817 non-null  int64  
 4   rating              1574817 non-null  int64  
 5   review_id           1574817 non-null  object 
 6   state               1574817 non-null  object 
 7   user_id             1574817 non-null  object 
 8   product_id          1574817 non-null  object 
 9   age                 1472144 non-null  float64
 10  birth_year          1472144 non-null  float64
 11  email               1472144 non-null  object 
 12  gender              1472144 non-null  object 
 13  is_blinded          1472144 non-null  float64
 14  is_closed           1472144 non-null  object 
 15  is_inactivated 

In [39]:
glowpick.info ()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1574817 entries, 0 to 1574816
Data columns (total 16 columns):
 #   Column           Non-Null Count    Dtype  
---  ------           --------------    -----  
 0   contents         1574817 non-null  object 
 1   created_at       1574817 non-null  object 
 2   rating           1574817 non-null  int64  
 3   user_id          1574817 non-null  object 
 4   product_id       1574817 non-null  object 
 5   age              1472144 non-null  float64
 6   gender           1472144 non-null  object 
 7   is_blinded       1472144 non-null  float64
 8   is_closed        1472144 non-null  object 
 9   is_inactivated   1472144 non-null  object 
 10  skin_type        1472144 non-null  object 
 11  title            1564554 non-null  object 
 12  volume           1564554 non-null  object 
 13  price            1564257 non-null  float64
 14  brandName        1564554 non-null  object 
 15  idThirdCategory  1520133 non-null  float64
dtypes: float64(4), int

### 결측치 삭제

1. 단순 삭제 모드

In [40]:
glowpick.isnull ().sum ()

contents                0
created_at              0
rating                  0
user_id                 0
product_id              0
age                102673
gender             102673
is_blinded         102673
is_closed          102673
is_inactivated     102673
skin_type          102673
title               10263
volume              10263
price               10560
brandName           10263
idThirdCategory     54684
dtype: int64

In [41]:
glowpick ['gender'].value_counts (dropna = False)

f      1457621
NaN     102673
m        14523
Name: gender, dtype: int64

In [42]:
df_thresh = glowpick.dropna (axis = 1, thresh = 500)
df_thresh.isnull ().sum ()

contents                0
created_at              0
rating                  0
user_id                 0
product_id              0
age                102673
gender             102673
is_blinded         102673
is_closed          102673
is_inactivated     102673
skin_type          102673
title               10263
volume              10263
price               10560
brandName           10263
idThirdCategory     54684
dtype: int64

In [43]:
df_age = glowpick.dropna (subset = ['age'], how = 'any', axis = 0)
df_age.isnull ().sum ()

contents               0
created_at             0
rating                 0
user_id                0
product_id             0
age                    0
gender                 0
is_blinded             0
is_closed              0
is_inactivated         0
skin_type              0
title               9542
volume              9542
price               9823
brandName           9542
idThirdCategory    51026
dtype: int64

In [44]:
len (df_age)

1472144

In [45]:
df_glowpick = glowpick.dropna (subset = ['age', 'title', 'price', 'idThirdCategory'], how = 'any', axis = 0)
df_glowpick.info ()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1411866 entries, 0 to 1574816
Data columns (total 16 columns):
 #   Column           Non-Null Count    Dtype  
---  ------           --------------    -----  
 0   contents         1411866 non-null  object 
 1   created_at       1411866 non-null  object 
 2   rating           1411866 non-null  int64  
 3   user_id          1411866 non-null  object 
 4   product_id       1411866 non-null  object 
 5   age              1411866 non-null  float64
 6   gender           1411866 non-null  object 
 7   is_blinded       1411866 non-null  float64
 8   is_closed        1411866 non-null  object 
 9   is_inactivated   1411866 non-null  object 
 10  skin_type        1411866 non-null  object 
 11  title            1411866 non-null  object 
 12  volume           1411866 non-null  object 
 13  price            1411866 non-null  float64
 14  brandName        1411866 non-null  object 
 15  idThirdCategory  1411866 non-null  float64
dtypes: float64(4), int

In [46]:
df_glowpick.isnull ().sum ()

contents           0
created_at         0
rating             0
user_id            0
product_id         0
age                0
gender             0
is_blinded         0
is_closed          0
is_inactivated     0
skin_type          0
title              0
volume             0
price              0
brandName          0
idThirdCategory    0
dtype: int64

In [47]:
# 15만건 정도가 삭제됨
len (df_glowpick)

1411866

In [48]:
len (glowpick)

1574817

In [49]:
# sparse_features = ["product_id", "user_id", "gender", "age", "skin_type", "idThirdCategory", ]
sparse_features = ["product_id", "user_id", "gender", "age", "skin_type", ]
target = ['rating']

In [50]:
glowpick ['skin_type'].nunique ()

5

In [51]:
from sklearn.preprocessing import LabelEncoder

# 1.Label Encoding for sparse features,and process sequence features
for feat in sparse_features:
    lbe = LabelEncoder()
    df_glowpick [feat] = lbe.fit_transform (df_glowpick [feat])

In [52]:
df_glowpick.head ()

Unnamed: 0,contents,created_at,rating,user_id,product_id,age,gender,is_blinded,is_closed,is_inactivated,skin_type,title,volume,price,brandName,idThirdCategory
0,"티 컬렉션으로 출시되었던 제품으로, 가벼운 녹차향이 납니다. 향 자체는 좀 날리는 ...",2020-04-30T02:12:36Z,3,7216,0,32,0,0.0,False,False,2,티컬렉션 그린티 미셀라 클렌징폼,175g,11000.0,해피바스 (HAPPY BATH),112.0
1,살짝 로션같이 짜지고 묽음.\r\n향은 독하지 않고 적절히 향긋함.\r\n거품잘남\...,2020-03-15T09:08:20Z,4,22803,0,34,0,0.0,False,False,3,티컬렉션 그린티 미셀라 클렌징폼,175g,11000.0,해피바스 (HAPPY BATH),112.0
2,"해피바스는 무난하고 순한 매력이 있음! 다른것들도 잘 썼지만 정말 무난함,, 그치만...",2020-01-21T01:29:44Z,3,19263,0,24,0,0.0,False,False,2,티컬렉션 그린티 미셀라 클렌징폼,175g,11000.0,해피바스 (HAPPY BATH),112.0
3,순하고 가격적으로 무난해서 쓰기 괜찮아요오오,2020-01-18T08:03:59Z,4,13371,0,16,0,0.0,False,False,2,티컬렉션 그린티 미셀라 클렌징폼,175g,11000.0,해피바스 (HAPPY BATH),112.0
4,성분이 착해서 샀고 타입이 폼이라 짜서 쓰는 젤보다 약간 귀찮지만 거품은 바로 많이...,2020-01-11T07:15:15Z,4,68326,0,29,0,0.0,False,False,2,티컬렉션 그린티 미셀라 클렌징폼,175g,11000.0,해피바스 (HAPPY BATH),112.0


In [53]:
df_glowpick.sort_values (['product_id']).head ()

Unnamed: 0,contents,created_at,rating,user_id,product_id,age,gender,is_blinded,is_closed,is_inactivated,skin_type,title,volume,price,brandName,idThirdCategory
0,"티 컬렉션으로 출시되었던 제품으로, 가벼운 녹차향이 납니다. 향 자체는 좀 날리는 ...",2020-04-30T02:12:36Z,3,7216,0,32,0,0.0,False,False,2,티컬렉션 그린티 미셀라 클렌징폼,175g,11000.0,해피바스 (HAPPY BATH),112.0
1,살짝 로션같이 짜지고 묽음.\r\n향은 독하지 않고 적절히 향긋함.\r\n거품잘남\...,2020-03-15T09:08:20Z,4,22803,0,34,0,0.0,False,False,3,티컬렉션 그린티 미셀라 클렌징폼,175g,11000.0,해피바스 (HAPPY BATH),112.0
2,"해피바스는 무난하고 순한 매력이 있음! 다른것들도 잘 썼지만 정말 무난함,, 그치만...",2020-01-21T01:29:44Z,3,19263,0,24,0,0.0,False,False,2,티컬렉션 그린티 미셀라 클렌징폼,175g,11000.0,해피바스 (HAPPY BATH),112.0
3,순하고 가격적으로 무난해서 쓰기 괜찮아요오오,2020-01-18T08:03:59Z,4,13371,0,16,0,0.0,False,False,2,티컬렉션 그린티 미셀라 클렌징폼,175g,11000.0,해피바스 (HAPPY BATH),112.0
4,성분이 착해서 샀고 타입이 폼이라 짜서 쓰는 젤보다 약간 귀찮지만 거품은 바로 많이...,2020-01-11T07:15:15Z,4,68326,0,29,0,0.0,False,False,2,티컬렉션 그린티 미셀라 클렌징폼,175g,11000.0,해피바스 (HAPPY BATH),112.0


In [54]:
# 2.count #unique features for each sparse field and generate feature config for sequence feature

fixlen_feature_columns = [SparseFeat (feat, df_glowpick [feat].nunique(), embedding_dim=4)
                          for feat in sparse_features]
fixlen_feature_columns

[SparseFeat(name='product_id', vocabulary_size=49381, embedding_dim=4, use_hash=False, dtype='int32', embedding_name='product_id', group_name='default_group'),
 SparseFeat(name='user_id', vocabulary_size=74563, embedding_dim=4, use_hash=False, dtype='int32', embedding_name='user_id', group_name='default_group'),
 SparseFeat(name='gender', vocabulary_size=2, embedding_dim=4, use_hash=False, dtype='int32', embedding_name='gender', group_name='default_group'),
 SparseFeat(name='age', vocabulary_size=67, embedding_dim=4, use_hash=False, dtype='int32', embedding_name='age', group_name='default_group'),
 SparseFeat(name='skin_type', vocabulary_size=5, embedding_dim=4, use_hash=False, dtype='int32', embedding_name='skin_type', group_name='default_group')]

In [55]:
linear_feature_columns = fixlen_feature_columns
dnn_feature_columns = fixlen_feature_columns
feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)

In [56]:
feature_names

['product_id', 'user_id', 'gender', 'age', 'skin_type']

In [57]:
# 3.generate input data for model
train, test = train_test_split (df_glowpick, test_size = 0.2)
train_model_input = {name: train [name] for name in feature_names}
test_model_input = {name: test [name] for name in feature_names}

In [58]:
device = 'cpu'
use_cuda = True
if use_cuda and torch.cuda.is_available():
    print('cuda ready...')
    device = 'cuda:0'

cuda ready...


In [59]:
model = DeepFM (linear_feature_columns, dnn_feature_columns, task='regression', device=device)
model.compile ("adam", "mse", metrics=['mse'], )

In [60]:
# history = model.fit(train_model_input, train[target].values,
#                     batch_size=256, epochs = 10, verbose=2, validation_split=0.2, )

In [61]:
history = model.fit(train_model_input, train[target].values,
                    batch_size=256, epochs = 5, verbose=2, validation_split=0.2, )

cuda:0
Train on 903593 samples, validate on 225899 samples, 3530 steps per epoch
Epoch 1/5
32s - loss:  1.1101 - mse:  1.1101 - val_mse:  0.9982
Epoch 2/5
33s - loss:  0.9248 - mse:  0.9248 - val_mse:  0.9967
Epoch 3/5
30s - loss:  0.8964 - mse:  0.8964 - val_mse:  0.9827
Epoch 4/5
31s - loss:  0.8798 - mse:  0.8798 - val_mse:  0.9852
Epoch 5/5
32s - loss:  0.8668 - mse:  0.8668 - val_mse:  0.9876


In [62]:
# epoch 10
pred_ans = model.predict (test_model_input, batch_size = 256)
print("test MSE", round(mean_squared_error(
    test [target].values, pred_ans), 4))

test MSE 0.9817


In [63]:
from sklearn.metrics import mean_squared_error
from math import sqrt

rms = sqrt (mean_squared_error (test [target].values, pred_ans))
print (rms)

0.9908093043244933


In [64]:
# epoch 5

pred_ans = model.predict (test_model_input, batch_size = 256)
print("test MSE", round(mean_squared_error(
    test [target].values, pred_ans), 4))

test MSE 0.9817


In [65]:
print ("test RMSE", round (sqrt (mean_squared_error (test [target].values, pred_ans)), 4))

test RMSE 0.9908


In [66]:
test_model_input

{'product_id': 218533      9896
 1096595    34455
 963263     31023
 518916     21329
 922233     29872
            ...  
 1419606    45533
 1381016    44177
 1029263    32568
 748214     25711
 992949     31973
 Name: product_id, Length: 282374, dtype: int64,
 'user_id': 218533     65640
 1096595    32313
 963263     45066
 518916     49565
 922233     19787
            ...  
 1419606      819
 1381016    74136
 1029263    54732
 748214     37712
 992949     58810
 Name: user_id, Length: 282374, dtype: int64,
 'gender': 218533     0
 1096595    0
 963263     0
 518916     0
 922233     0
           ..
 1419606    0
 1381016    0
 1029263    0
 748214     0
 992949     0
 Name: gender, Length: 282374, dtype: int64,
 'age': 218533     17
 1096595    17
 963263     18
 518916     25
 922233     22
            ..
 1419606    17
 1381016    14
 1029263    18
 748214     25
 992949     17
 Name: age, Length: 282374, dtype: int64,
 'skin_type': 218533     0
 1096595    1
 963263     0
 51891

In [67]:
len (test_model_input)

5

In [68]:
users.head ()

Unnamed: 0,age,birth_year,email,gender,is_blinded,is_closed,is_inactivated,nickname,profile_image,rank,review_count,skin_type,user_id
0,31.0,1990.0,yhcu88@naver.com,f,0,False,False,희뷰리,https://d9vmi5fxk1gsw.cloudfront.net/prod/regi...,27347,44,복합성,1281918
1,33.0,1988.0,winwinanna@hanmail.net,f,0,False,False,소극적인뷰터,,41582,15,건성,1255686
2,37.0,1984.0,dmswjddlskfk@hanmail.net,f,0,False,False,greengables,https://d9vmi5fxk1gsw.cloudfront.net/home/glow...,946,375,지성,631689
3,32.0,1989.0,deer402@naver.com,f,0,False,True,백비송,https://d9vmi5fxk1gsw.cloudfront.net/prod/regi...,43404,29,건성,1188087
4,26.0,1995.0,kkr3348@naver.com,f,0,False,False,초코감귤,https://d9vmi5fxk1gsw.cloudfront.net/home/glow...,9386,97,복합성,632220


In [69]:
reviews.head ()

Unnamed: 0,contents,created_at,is_evaluation,like_count,rating,review_id,state,user_id,product_id
0,"티 컬렉션으로 출시되었던 제품으로, 가벼운 녹차향이 납니다. 향 자체는 좀 날리는 ...",2020-04-30T02:12:36Z,False,0,3,5416271,N,119763,100000
1,살짝 로션같이 짜지고 묽음.\r\n향은 독하지 않고 적절히 향긋함.\r\n거품잘남\...,2020-03-15T09:08:20Z,False,0,4,5340616,N,338669,100000
2,"해피바스는 무난하고 순한 매력이 있음! 다른것들도 잘 썼지만 정말 무난함,, 그치만...",2020-01-21T01:29:44Z,False,0,3,5228598,N,24862,100000
3,순하고 가격적으로 무난해서 쓰기 괜찮아요오오,2020-01-18T08:03:59Z,False,0,4,5225359,N,1331797,100000
4,성분이 착해서 샀고 타입이 폼이라 짜서 쓰는 젤보다 약간 귀찮지만 거품은 바로 많이...,2020-01-11T07:15:15Z,False,0,4,5211274,N,888968,100000


In [76]:
df_1281918 = df_glowpick [reviews ['user_id'] == '1281918']
df_1281918

Unnamed: 0,contents,created_at,rating,user_id,product_id,age,gender,is_blinded,is_closed,is_inactivated,skin_type,title,volume,price,brandName,idThirdCategory
85901,시카가 여드름에 효능이 좋다해서 써봤는데 세안하고 닦아내는 용도로 사용중인데 괜찮아...,2019-06-19T12:15:05Z,4,11163,3761,27,0,0.0,False,False,2,시카 리페어 필링패드,72p,15000.0,닥터에이지 (Dr.AG),126.0
93439,커버력도 좋고 촉촉함도 있어서 쓰기 좋아요.\r\n쿠션타입이만 퍼프해서 사용가능해서...,2019-06-19T00:00:59Z,4,11163,4010,27,0,0.0,False,False,2,씨앤티 마스터글램 (시즌2) [SPF50+/PA+++],12ml,45000.0,조성아™,31.0
112244,비싼 제품은 부담되어 저렴이버전 led마스크 찾던중 알게되어 구매했는데 선캡 쓴것처...,2019-06-23T01:36:50Z,4,11163,4911,27,0,0.0,False,False,2,코스메티 LED 마스크,1ea,219000.0,아름다운연구소,255.0
117820,펌핑해서 얼굴에 살살 바르면 거품이 올라오는데 톡톡톡 얼굴에 흡수시키고 나면 금방 ...,2019-06-19T12:48:16Z,4,11163,5204,27,0,0.0,False,False,2,볼륨 톡스 오리지널 펩타이드 에센스,100ml,99000.0,파이진 (PI.GENE),17.0
143698,벨벳형식이라 발색력도 좋지만 립글로즈처럼 끈적임이 없어서 좋아요.\r\n다만 건조함...,2019-07-05T21:17:29Z,4,11163,6203,27,0,0.0,False,False,2,타투 끌레르 벨벳 틴트,4g,19000.0,포렌코즈 (FORENCOS),49.0
146287,여드름이 약간 있어서 사용중인데 냄새도 좋고 제형도 좋아요! 패키지도 들고 다니기 ...,2019-06-18T12:55:59Z,4,11163,6311,27,0,0.0,False,False,2,끄렘 드 오로라,30ml,31800.0,샬랑드파리 (CHALLANS de PARIS),18.0
169516,냄새도 좋고 성분도 좋은데 시간 지나면 끈적거림이 있어요. 흡수가 되는건지 조금 긴...,2019-06-18T12:52:57Z,3,11163,7333,27,0,0.0,False,False,2,앰플 드 오로라_113166,30ml,44800.0,샬랑드파리 (CHALLANS de PARIS),15.0
169594,휴대용 티슈타입 사용 중인데 정말 좋아요.\r\n특히 그날에 냄새때문에 민감할수 있...,2019-07-07T12:16:25Z,5,11163,7338,27,0,0.0,False,False,2,페미닌 클렌징 와입스 노멀 스킨,16ea,8000.0,썸머스이브 (summer's eve),228.0
182459,각질제거용으로 구매했어요.\r\n제가 쓰려고 구매했는데 아빠피부가 각질에 많아서 아...,2019-06-19T00:26:45Z,4,11163,7964,27,0,0.0,False,False,2,지우개 패드,60ea,17000.0,셀리맥스 (celimax),126.0
187684,모공케어용으로 샀는데 모공케어 되는지는 잘모르겠어요.\r\n반이상 사용하였는데 아직...,2019-06-18T13:00:15Z,3,11163,8210,27,0,0.0,False,False,2,끄렘 드 스텔라,30ml,31800.0,샬랑드파리 (CHALLANS de PARIS),21.0


In [83]:
# user id가 1281918인 user 테스트
# train_1281918, test_1281918 = train_test_split (df_1281918, test_size = 0.8)


# train_model_input_1281918 = {name: train_1281918 [name] for name in feature_names}
test_model_input_1281918 = {name: df_1281918 [name] for name in feature_names}

pred_ans = model.predict (test_model_input_1281918, batch_size = 256)

print ("test RMSE", round (sqrt (mean_squared_error (df_1281918 [target].values, pred_ans)), 4))

test RMSE 0.5295


In [84]:
result_list = list (zip (pred_ans.tolist (), df_1281918 [target].values.tolist ()))
result_list

[([3.7571470737457275], [4]),
 ([4.238088130950928], [4]),
 ([4.079061031341553], [4]),
 ([4.310779094696045], [4]),
 ([3.6760456562042236], [4]),
 ([3.5312931537628174], [4]),
 ([3.5580692291259766], [3]),
 ([4.386271953582764], [5]),
 ([3.901275396347046], [4]),
 ([3.7287116050720215], [3]),
 ([4.327513217926025], [4]),
 ([3.994749069213867], [5]),
 ([4.380995273590088], [4]),
 ([4.267673015594482], [4]),
 ([4.1621246337890625], [4]),
 ([4.618373394012451], [5]),
 ([3.9829258918762207], [3]),
 ([4.137250900268555], [4]),
 ([3.918311595916748], [5]),
 ([4.34718132019043], [4]),
 ([4.234959125518799], [4]),
 ([1.983639121055603], [2]),
 ([4.431247234344482], [4]),
 ([4.554099082946777], [4]),
 ([4.065104007720947], [5]),
 ([4.283544063568115], [5]),
 ([4.280738830566406], [4])]

In [102]:
def get_real_user_score (user_id, feature_names = feature_names):
    user_df = df_glowpick [reviews ['user_id'] == str (user_id)]
    user_model_input = {name: user_df [name] for name in feature_names}

    pred_ans = model.predict (user_model_input, batch_size = 256)

    print ("test RMSE", round (sqrt (mean_squared_error (user_df [target].values, pred_ans)), 4))
    print ('\nReal Score', list (zip (pred_ans.tolist (), user_df [target].values.tolist ())))

In [89]:
reviews.head ()

Unnamed: 0,contents,created_at,is_evaluation,like_count,rating,review_id,state,user_id,product_id
0,"티 컬렉션으로 출시되었던 제품으로, 가벼운 녹차향이 납니다. 향 자체는 좀 날리는 ...",2020-04-30T02:12:36Z,False,0,3,5416271,N,119763,100000
1,살짝 로션같이 짜지고 묽음.\r\n향은 독하지 않고 적절히 향긋함.\r\n거품잘남\...,2020-03-15T09:08:20Z,False,0,4,5340616,N,338669,100000
2,"해피바스는 무난하고 순한 매력이 있음! 다른것들도 잘 썼지만 정말 무난함,, 그치만...",2020-01-21T01:29:44Z,False,0,3,5228598,N,24862,100000
3,순하고 가격적으로 무난해서 쓰기 괜찮아요오오,2020-01-18T08:03:59Z,False,0,4,5225359,N,1331797,100000
4,성분이 착해서 샀고 타입이 폼이라 짜서 쓰는 젤보다 약간 귀찮지만 거품은 바로 많이...,2020-01-11T07:15:15Z,False,0,4,5211274,N,888968,100000


In [103]:
get_real_user_score (119763, feature_names)

test RMSE 1.0041

Real Score [([3.3647308349609375], [3]), ([3.7861568927764893], [4]), ([3.0997161865234375], [2]), ([3.0175468921661377], [4]), ([3.5525147914886475], [3]), ([3.348318099975586], [4]), ([3.970305919647217], [3]), ([3.7553493976593018], [4]), ([3.9729838371276855], [3]), ([4.241269588470459], [5]), ([3.4355404376983643], [4]), ([3.0436720848083496], [3]), ([4.2480010986328125], [5]), ([4.303683757781982], [4]), ([3.777149200439453], [4]), ([4.150819778442383], [2]), ([3.5279078483581543], [4]), ([3.5358896255493164], [4]), ([4.005305290222168], [5]), ([3.414149761199951], [2]), ([3.7169651985168457], [3]), ([3.357083320617676], [1]), ([3.471755266189575], [4]), ([3.5525150299072266], [5]), ([3.657834529876709], [3]), ([3.0594420433044434], [2]), ([4.190270900726318], [4]), ([3.4755496978759766], [5]), ([3.9149296283721924], [5]), ([3.7697789669036865], [5]), ([3.1234798431396484], [3]), ([3.7231833934783936], [3]), ([3.498690128326416], [2]), ([4.1205949783325195], [4]

In [91]:
df_glowpick [reviews ['user_id'] == '119763']

Unnamed: 0,contents,created_at,rating,user_id,product_id,age,gender,is_blinded,is_closed,is_inactivated,skin_type,title,volume,price,brandName,idThirdCategory
0,"티 컬렉션으로 출시되었던 제품으로, 가벼운 녹차향이 납니다. 향 자체는 좀 날리는 ...",2020-04-30T02:12:36Z,3,7216,0,32,0,0.0,False,False,2,티컬렉션 그린티 미셀라 클렌징폼,175g,11000.0,해피바스 (HAPPY BATH),112.0
5734,살짝 무게감 있는 질감의 수분 에센스가 적셔져 있는 마스크팩입니다. 시트의 밀착력이...,2018-12-12T10:01:10Z,4,7216,191,32,0,0.0,False,False,2,스텝솔루션 샤이닝 이펙트 아쿠아토닝 진주 마스크,25ml,3000.0,리더스코스메틱 (LEADERS),131.0
5738,사용해 본 샤이닝 이펙트 마스크 3종 중 가장 별로. 일단 시트가 에센스를 잘 머금...,2019-03-11T16:34:51Z,2,7216,193,32,0,0.0,False,False,2,스텝솔루션 샤이닝 이펙트 안티 링클 진주 마스크,25ml,3000.0,리더스코스메틱 (LEADERS),132.0
5741,"에멀전 타입 에센스 마스크팩으로 수분 충전 보다는 영양감, 무게감 있는 보습감을 주...",2019-02-16T13:34:23Z,4,7216,195,32,0,0.0,False,False,2,스텝솔루션 샤이닝 이펙트 리프팅 진주 마스크,25ml,3000.0,리더스코스메틱 (LEADERS),132.0
5764,시술 후 모발을 위한 마카다미아 오일 라인의 컨디셔너. 무거운 제형이지 않을까 생각...,2020-02-25T17:45:48Z,3,7216,201,32,0,0.0,False,False,2,리페어링 마카다미아오일 컨디셔너,250ml,12600.0,마크앤써니 (MARC ANTHONY),189.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1550838,베이스 트리트먼트에 ppt를 섞어 쓰는 제품입니다. 모방 상태에 따라 ppt 양을 ...,2018-11-22T06:43:40Z,5,7216,311,32,0,0.0,False,False,2,커스텀 케어 01 헤어 트리트먼트,230ml,48000.0,아우라 아듀라 (Aura Adllura),196.0
1553885,코튼패드라는 이름이지만 사실 코튼은 아니고 레이온과 펄프 혼방 재질인 스펀지형 화장...,2020-05-02T14:36:22Z,3,7216,43738,32,0,0.0,False,False,2,1/2 코튼 화장솜,80ea,3000.0,아리따움 (ARITAUM),109.0
1561681,청포도와 키위향이 상큼하게 잘 어우러저 손을 씻을 때마다 기분이 좋아지는 핸드워시예...,2020-03-01T04:10:33Z,5,7216,42968,32,0,0.0,False,False,2,쥬스 스무디 버블 핸드 워시 옐로우 그린,250ml,6500.0,해피바스 (HAPPY BATH),166.0
1561809,같은 라인 옐로우 그린에 비해 만족도는 살짝 떨어져요. 풍성한 거품이나 뽀득한 세정...,2020-03-01T04:12:13Z,4,7216,42969,32,0,0.0,False,False,2,쥬스 스무디 버블 핸드 워시 레드,250ml,6500.0,해피바스 (HAPPY BATH),166.0


In [104]:
df_glowpick.head ()

Unnamed: 0,contents,created_at,rating,user_id,product_id,age,gender,is_blinded,is_closed,is_inactivated,skin_type,title,volume,price,brandName,idThirdCategory
0,"티 컬렉션으로 출시되었던 제품으로, 가벼운 녹차향이 납니다. 향 자체는 좀 날리는 ...",2020-04-30T02:12:36Z,3,7216,0,32,0,0.0,False,False,2,티컬렉션 그린티 미셀라 클렌징폼,175g,11000.0,해피바스 (HAPPY BATH),112.0
1,살짝 로션같이 짜지고 묽음.\r\n향은 독하지 않고 적절히 향긋함.\r\n거품잘남\...,2020-03-15T09:08:20Z,4,22803,0,34,0,0.0,False,False,3,티컬렉션 그린티 미셀라 클렌징폼,175g,11000.0,해피바스 (HAPPY BATH),112.0
2,"해피바스는 무난하고 순한 매력이 있음! 다른것들도 잘 썼지만 정말 무난함,, 그치만...",2020-01-21T01:29:44Z,3,19263,0,24,0,0.0,False,False,2,티컬렉션 그린티 미셀라 클렌징폼,175g,11000.0,해피바스 (HAPPY BATH),112.0
3,순하고 가격적으로 무난해서 쓰기 괜찮아요오오,2020-01-18T08:03:59Z,4,13371,0,16,0,0.0,False,False,2,티컬렉션 그린티 미셀라 클렌징폼,175g,11000.0,해피바스 (HAPPY BATH),112.0
4,성분이 착해서 샀고 타입이 폼이라 짜서 쓰는 젤보다 약간 귀찮지만 거품은 바로 많이...,2020-01-11T07:15:15Z,4,68326,0,29,0,0.0,False,False,2,티컬렉션 그린티 미셀라 클렌징폼,175g,11000.0,해피바스 (HAPPY BATH),112.0


In [None]:
df_glowpick [reviews ['user_id'] == '119763']