In [20]:
import os
import sys

import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.losses import binary_crossentropy

# 0. Data Load

In [3]:
train = pd.read_json(os.path.join('..', 'Data', 'train.json'))
test = pd.read_json(os.path.join('..', 'Data', 'test.json'))

In [4]:
train.head()

Unnamed: 0,user_note,rating_per_user,vintage_id,user_like_count,userID,wine_id,wine_name,url,like
0,,4.0,142514980,0,19484511,2532733,Secateurs Red Blendn2016,/badenhorst-family-wines-secateurs-red-blend-s...,1
1,,3.5,48346487,0,19484511,1253802,Kloof Street Swartland Rougen2015,/mullineux-kloof-street-swartland-rouge/w/1253...,0
2,Beautiful golden nectar,3.5,156865950,1,19484511,1123441,Late Harvestn2018,/stables-late-harvest/w/1123441?year=2018,0
3,,3.5,150284640,0,19484511,1157656,Proseccon2017,/ruffino-prosecco/w/1157656?year=2017,0
4,Bright clean and refreshing,3.5,1471106,0,19484511,1134756,Prosecco TrevisonN.V.,/la-gioiosa-prosecco-treviso/w/1134756?year=N.V.,0


# 1. Data Process

In [5]:
train = train[['userID', 'wine_id', 'like']]
test = test[['userID', 'wine_id', 'like']]

In [6]:
user_ids = train['userID'].unique().tolist()
user2user_encoded = {x: i for i, x in enumerate(user_ids)}
userencoded2user = {i: x for i, x in enumerate(user_ids)}

wine_ids = train['wine_id'].unique().tolist()
wine2wine_encoded = {x: i for i, x in enumerate(wine_ids)}
wine_encoded2wine = {i: x for i, x in enumerate(wine_ids)}

In [7]:
add_train = pd.DataFrame(zip(train['userID'].map(user2user_encoded), train["wine_id"].map(wine2wine_encoded), train['like']),
             columns = ['userID', 'wine_id', 'like'])

add_test = pd.DataFrame(zip(test['userID'].map(user2user_encoded), test["wine_id"].map(wine2wine_encoded), test['like']),
             columns = ['userID', 'wine_id', 'like'])

In [8]:
add_train

Unnamed: 0,userID,wine_id,like
0,0,0,1
1,0,1,0
2,0,2,0
3,0,3,0
4,0,4,0
...,...,...,...
937751,6342,4672,0
937752,6342,1023,0
937753,6342,14075,1
937754,6342,3894,0


In [9]:
X = add_train[['userID', 'wine_id']].values
y = add_train['like'].values

In [10]:
from sklearn.model_selection import train_test_split

x_train, x_val, y_train, y_val = train_test_split(X, y, test_size = 0.2, random_state = 42)

# 2. Model

In [11]:
sys.path.append(os.path.join('..', 'Models'))
import NeuralMF

In [12]:
num_users = len(user2user_encoded)
num_itmes = len(wine_encoded2wine)

EMBEDDING_SIZE = 50

In [21]:
model = NeuralMF.RecommenderNet(num_users, num_itmes, EMBEDDING_SIZE)

model.compile(
    loss = tf.keras.losses.BinaryCrossentropy(), 
    optimizer = keras.optimizers.Adam(lr = 0.001),
    metrics=[
        tf.keras.metrics.BinaryAccuracy(
            name='binary_accuracy', dtype = None, threshold = 0.5)
    ])

# 3. Train

In [22]:
callbacks_list = [
                  keras.callbacks.EarlyStopping(
                      monitor = 'binary_accuracy',
                      patience = 5
                      )]

In [23]:
history = model.fit(
    x = x_train,
    y = y_train,
    batch_size = 128,
    epochs = 100,
    verbose = 1,
    validation_data = (x_val, y_val),
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78