<font color='tomato'><font color="#CC3D3D"><p>
# Neural Collaborative Filtering (NCF): Basic Model

- 행렬분해(MF)의 MLP 변환    

<img src='http://drive.google.com/uc?export=view&id=1X83rkwM3B1u0UAGYMIrRQnbeSyb_65Ge' align="left" width=800>

- NCF 아키텍처   

<img src='http://drive.google.com/uc?export=view&id=1MXgP7nfGEzb_MpJ2DmU65KegOzI9hFCv' align="left" width=800/>

### Import modules

In [None]:
import pandas as pd
import numpy as np
import pickle, random, os
import warnings; warnings.filterwarnings("ignore")
import matplotlib.pylab as plt
%matplotlib inline

from msr.python_splitters import (
    python_random_split, 
    python_chrono_split, 
    python_stratified_split
)

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

import tensorflow as tf
from tensorflow import keras
print(tf.__version__)

### Set random seeds to make your results reproducible

In [None]:
# 매번 모델링을 할 때마다 동일한 결과를 얻으려면 아래 코드를 실행해야 함.

def reset_seeds(reset_graph_with_backend=None):
    if reset_graph_with_backend is not None:
        K = reset_graph_with_backend
        K.clear_session()
        tf.compat.v1.reset_default_graph()
        print("KERAS AND TENSORFLOW GRAPHS RESET")

    np.random.seed(1)
    random.seed(2)
    tf.compat.v1.set_random_seed(3)
#    os.environ['CUDA_VISIBLE_DEVICES'] = ''  # for GPU
    print("RANDOM SEEDS RESET") 
   
reset_seeds()

### Step 1: Load and process the data

##### Read data

In [None]:
movies = pd.read_csv('movies.csv')    # 영화 정보
ratings = pd.read_csv('ratings.csv')  # 사용자가 영화에 대해 남긴 평점 데이터

movies.shape, ratings.shape

##### Split data into train & test set 

In [None]:
#train, test = python_random_split(ratings, ratio=0.75)
train, test = train_test_split(ratings, test_size=0.25, shuffle=True, random_state=0)

### Step 2: Define the model architecture

##### Stack layers from input to output

In [None]:
# Variable 초기화 
K = 200                             # Latent factor 수 
mu = train.rating.mean()            # 전체 평균 
M = ratings.userId.nunique() + 1    # Number of users
N = ratings.movieId.nunique() + 1   # Number of movies

M, N

In [None]:
user = keras.Input(shape=(1, ))                          # User input: userID
item = keras.Input(shape=(1, ))                          # Item input: movieId
P_embedding = keras.layers.Embedding(M, K)(user)         # M: Number of users, K: Latent factor 수 
Q_embedding = keras.layers.Embedding(N, K)(item)         # M: Number of movies, K: Latent factor 수 
user_bias = keras.layers.Embedding(M, 1)(user)           # User bias term 
item_bias = keras.layers.Embedding(N, 1)(item)           # Item bias term

R = keras.layers.Dot(axes=2)([P_embedding, Q_embedding]) # 행렬곱: P * Q 
R = keras.layers.Add()([R, user_bias, item_bias])
R = keras.layers.Flatten()(R)

#R = Dense(32, activation='relu')(R)
#R = Dense(16, activation='relu')(R)
#R = Dense(1, activation='linear')(R)

model = keras.Model(inputs=[user, item], outputs=R)

##### Summarize & visualize the model

In [None]:
model.summary()

In [None]:
# 연결 그래프 시각화: graphviz와 pydot 설치 필요 (graphviz 다운로드하고 설치한 후 pip install pydot)
keras.utils.plot_model(model, show_shapes=True, rankdir='BT')

### Step 3: Choose the optimizer and the cost function

In [None]:
# loss, optimizer, metrics 설정
model.compile(
    loss='mse',
    optimizer=keras.optimizers.Adam(), 
    metrics=[keras.metrics.RootMeanSquaredError(name='rmse')]
)

### Step 4: Train the model

##### Set learning conditions & fit the model 

In [None]:
hist = model.fit(
    x=[train.userId, train.movieId],
    y=train.rating - mu,
    validation_split=0.1,
    batch_size=256, 
    epochs=50, 
    callbacks=[keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)],
    shuffle=False,
    verbose=2
)

##### Visualize training history 

In [None]:
# Plot RMSE

plt.plot(hist.history['rmse'], label="train")
plt.plot(hist.history['val_rmse'], label="valid")
plt.xlabel('epoch')
plt.ylabel('RMSE')
plt.legend()
plt.show()

### Step 5: Make predictions

##### Evaluate the model

In [None]:
# 평가데이터에 대한 RMSE 측정

y_pred = model.predict([test.userId, test.movieId]) + mu
y_pred = y_pred.flatten()
y_true = test.rating

rmse = np.sqrt(mean_squared_error(y_true, y_pred))
print(f'RMSE = {rmse:.3f}')

##### Make recommendations

In [None]:
# Top-N 영화 추천

N = 10
targetUser = 276

# 본 영화 제외
seen_movies = ratings.query('userId == @targetUser').movieId.unique()
movie_ids = np.array(list(set(ratings.movieId.unique()) - set(seen_movies)))
# 평점 예측 대상 영화수만큼 userId 반복
user_ids = np.array([targetUser]*len(movie_ids))
# 평점 예측
predictions = model.predict([user_ids, movie_ids]) + mu

# 가장 평점이 높은 N개의 영화를 추천
pd.DataFrame({'movieId':movie_ids, 'pred': predictions.flatten()}).sort_values(by='pred', ascending=False).head(N)

###  Step 6: Save the model

In [None]:
# pickle 형식으로 저장
# with open('ncf_model.pkl', 'wb') as f:
#    pickle.dump(model, f)

# 추후 저장한 모형 불러올 때: 
# model = pd.read_pickle('ncf_model.pkl')

<font color="#CC3D3D"><p>
# End