In [84]:
import os
import sys

import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.losses import binary_crossentropy

# 0. Data Load

In [45]:
train = pd.read_json(os.path.join('..', 'Data', 'train.json'))
test = pd.read_json(os.path.join('..', 'Data', 'test.json'))

In [46]:
train.head()

Unnamed: 0,index,user_note,rating_per_user,vintage_id,user_like_count,userID,wine_id,wine_name,url,like
0,0,Soooo good 💕,4.0,164942680,0,19484511,1141133,Prestige Rosé Brut ChampagnenN.V.,/taittinger-prestige-rose-brut-champagne/w/114...,1
1,1,"Belíssimo champanhe rose, bem seco mais com mu...",4.0,164942680,2,352674,1141133,Prestige Rosé Brut ChampagnenN.V.,/taittinger-prestige-rose-brut-champagne/w/114...,1
2,4,,4.0,164942680,0,17786617,1141133,Prestige Rosé Brut ChampagnenN.V.,/taittinger-prestige-rose-brut-champagne/w/114...,1
3,5,Perfekt med gratinerede østers.,4.5,164942680,0,8078038,1141133,Prestige Rosé Brut ChampagnenN.V.,/taittinger-prestige-rose-brut-champagne/w/114...,1
4,6,Delicious!,4.0,164942680,0,3014532,1141133,Prestige Rosé Brut ChampagnenN.V.,/taittinger-prestige-rose-brut-champagne/w/114...,0


# 1. Data Process

In [47]:
train = train[['userID', 'wine_id', 'like']]
test = test[['userID', 'wine_id', 'like']]

In [48]:
user_ids = train['userID'].unique().tolist()
user2user_encoded = {x: i for i, x in enumerate(user_ids)}
userencoded2user = {i: x for i, x in enumerate(user_ids)}

wine_ids = train['wine_id'].unique().tolist()
wine2wine_encoded = {x: i for i, x in enumerate(wine_ids)}
wine_encoded2wine = {i: x for i, x in enumerate(wine_ids)}

In [51]:
add_train = pd.DataFrame(zip(train['userID'].map(user2user_encoded), train["wine_id"].map(wine2wine_encoded), train['like']),
             columns = ['userID', 'wine_id', 'like'])

add_test = pd.DataFrame(zip(test['userID'].map(user2user_encoded), test["wine_id"].map(wine2wine_encoded), test['like']),
             columns = ['userID', 'wine_id', 'like'])

In [52]:
add_train

Unnamed: 0,userID,wine_id,like
0,0,0,1
1,1,0,1
2,2,0,1
3,3,0,1
4,4,0,0
...,...,...,...
763382,5226,24315,0
763383,5226,11292,1
763384,5226,336,1
763385,5226,37333,0


In [78]:
X = add_train[['userID', 'wine_id']].values
y = add_train['like'].values

In [79]:
from sklearn.model_selection import train_test_split

x_train, x_val, y_train, y_val = train_test_split(X, y, test_size = 0.2, random_state = 42)

# 2. Model

In [68]:
sys.path.append(os.path.join('..', 'Models'))
import NeuralMF
import imp
imp.reload(NeuralMF)

<module 'NeuralMF' from '../Models/NeuralMF.py'>

In [69]:
num_users = len(user2user_encoded)
num_itmes = len(wine_encoded2wine)

EMBEDDING_SIZE = 50

In [85]:
model = NeuralMF.RecommenderNet(num_users, num_itmes, EMBEDDING_SIZE)

model.compile(
    loss = tf.keras.losses.BinaryCrossentropy(), 
    optimizer = keras.optimizers.Adam(lr = 0.001),
    metrics=[
        tf.keras.metrics.BinaryAccuracy(
            name='binary_accuracy', dtype = None, threshold = 0.5)
    ])

# 3. Train

In [87]:
callbacks_list = [
                  keras.callbacks.EarlyStopping(
                      monitor = 'binary_accuracy',
                      patience = 5
                      )]

In [88]:
history = model.fit(
    x = x_train,
    y = y_train,
    batch_size = 64,
    epochs = 100,
    verbose = 1,
    validation_data = (x_val, y_val),
)

Epoch 1/100
 145/9543 [..............................] - ETA: 2:02 - loss: 0.6936 - binary_accuracy: 0.4945

KeyboardInterrupt: 