# MLG HW3

In [11]:
import pandas as pd
import numpy as np
import os, glob, json
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import pairwise_distances
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from math import sqrt
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import KFold
from sklearn.metrics import recall_score
from sklearn.metrics import ndcg_score


import torch
from deepctr_torch.inputs import SparseFeat, get_feature_names
import torchfm



config = {'test_size' : 0.2, 
          'n_epoch' : 20}
rmse = {}
ndcg_10 = {}
recall_10 = {}


with open("./metrics/rmse.txt", "r") as fp:
    rmse = json.load(fp)
with open("./metrics/ndcg.txt", "r") as fp:
    ndcg_10 = json.load(fp)
# with open("./metrics/recall.txt", "r") as fp:
#     recall_10 = json.load(fp)
print(rmse)
print(ndcg_10)

temp = pd.read_csv('results.csv', header = [0, 1], index_col = [0])

{'bookIPNN': 0.7630446736162781, 'bookOPNN': 0.7596840523834419, 'bookPNN': 0.7518341524053948, 'bookCCPM': 0.7424491460611815, 'bookWDL': 0.7093089364684898, 'bookDCN': 0.7302810220618285, 'bookNFM': 0.7568314381827344, 'bookDeepFM': 0.7386060319249964, 'bookAFM': 0.7353113174115252, 'bookxDeepFM': 0.7524432051769194, 'movieIPNN': 1.1257715623148956, 'movieOPNN': 1.0723128454116133, 'moviePNN': 1.1457441626815474, 'movieCCPM': 1.053634406962656, 'movieWDL': 1.0492728323684728, 'movieDCN': 1.0530302980259754, 'movieNFM': 1.0942265873231904, 'movieDeepFM': 1.0515179004479969, 'movieAFM': 1.0710224032118236, 'moviexDeepFM': 1.0744275480292602, 'businessIPNN': 1.2039563606524462, 'businessOPNN': 1.1574377340755466, 'businessPNN': 1.1446729801067166, 'businessCCPM': 1.0781101968351026, 'businessWDL': 1.0570848609478811, 'businessDCN': 1.0804563918023302, 'businessNFM': 1.293179055678124, 'businessDeepFM': 1.0519467034898193, 'businessAFM': 1.083461280218211, 'businessxDeepFM': 1.1471864908

## Read Data

In [12]:
def Data2List(path):
    data = {}
    os.chdir(path)
    files = glob.glob('*.dat')
    file_name = [x[:-4] for x in files]
    for i in range(len(file_name)):
        data[file_name[i]] = pd.read_csv(files[i], sep = '\t', header = None)
        # print(file_name[i])
        # print(data[file_name[i]].head(2))
    return data

# Data Filtering
# delete users who's interaction less than 3
def del_less_than_3(data, target = 'user') :
    temp = data[target].value_counts()
    temp = temp.index[temp.values > 3]
    data = data[data[target].isin(temp)]
    
    return data

DoubanBook = Data2List('/home/rita/111/111-2MLG/HW3/data/DoubanBook')
Movielens = Data2List('/home/rita/111/111-2MLG/HW3/data/Movielens')
Yelp = Data2List('/home/rita/111/111-2MLG/HW3/data/Yelp')

DoubanBook['user_book'].columns = ['user', 'book', 'rating']
Movielens['user_movie'].columns = ["user","movie","rating","time"]
Yelp['user_business'].columns = ['user', 'business', 'rating']

DoubanBook['user_book'] = del_less_than_3(DoubanBook['user_book'])
Movielens['user_movie'] = del_less_than_3(Movielens['user_movie'])
Yelp['user_business'] = del_less_than_3(Yelp['user_business'])

# Merge user's feature
# DoubanBook['user'] = pd.merge(left = DoubanBook['user_location'], right = DoubanBook['user_group'], on = 0, how = 'outer')
# DoubanBook['user'].columns = ['user', 'location', 'group']
# DoubanBook['user'] = pd.merge(left = DoubanBook['user_book'], right = DoubanBook['user'], on = 'user', how = 'left')
DoubanBook['user'] = pd.merge(left = DoubanBook['user_book'], right = DoubanBook['user_location'], left_on = 'user', right_on = 0, how = 'left')
DoubanBook['user'] = DoubanBook['user'].drop([0], axis = 1)
DoubanBook['user'].columns = ['user', 'book', 'rating', 'location']

Movielens['user'] = pd.merge(left = Movielens['user_age'], right = Movielens['user_occupation'], on = 0, how = 'outer')
Movielens['user'].columns = ['user', 'age', 'occupation']
Movielens['user'] = pd.merge(left = Movielens['user_movie'], right = Movielens['user'], on = 'user', how = 'left')

Yelp['user'] = Yelp['user_business']   

print(DoubanBook['user'].shape)
print(DoubanBook['user'].head(2))
print(Movielens['user'].shape)
print(Movielens['user'].head(2))
print(Yelp['user'].shape)
print(Yelp['user'].head(2))
path = '/home/rita/111/111-2MLG/HW3'
os.chdir(path)

(788898, 4)
    user  book  rating  location
0  10855   938       4      33.0
1  10027     3       3     394.0
(100000, 6)
   user  movie  rating       time  age  occupation
0   196    242       3  881250949    5           3
1   186    302       3  891717742    4           4
(184835, 3)
   user  business  rating
2     2       186       5
3     2       205       5


In [13]:
X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])
y = np.array([1, 2, 3, 4])
kf = KFold(n_splits=3)
kf.get_n_splits(X)
print(kf)
for train_index, test_index in kf.split(X):
    print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]


KFold(n_splits=3, random_state=None, shuffle=False)
TRAIN: [2 3] TEST: [0 1]
TRAIN: [0 1 3] TEST: [2]
TRAIN: [0 1 2] TEST: [3]


## 10 Typical RecSys Methods
**UCF-s UCF-p ICF-s ICF-p** **MF** **FM** BPR-MF BPR-FM GBDT+LR XGB+LR 

In [100]:
# CF = Collaborative Filtering
# UCF-s / UCF-p / ICF-s / ICF-p
# https://elevenzou.github.io/2019/02/16/Surprise%E5%BA%93%E5%9F%BA%E7%A1%80%E7%94%A8%E6%B3%95/
# 4 mins
from surprise import Dataset
from surprise import Reader
from surprise.model_selection import cross_validate
from surprise import KNNBasic
from surprise.accuracy import rmse
def Based_CF(data, feats, based = 'U', sim = 's') :
    print('Start Training {}CF-{} model'.format(based, sim))
    reader = Reader(rating_scale=(1,5))
    data = Dataset.load_from_df(data[feats], reader)
    based_dict = {'U' : True, 'I' : False}
    sim_dict = {'s' : "cosine", 'p' : 'pearson'}
    sim_options = {"name":sim_dict[sim],"user-based":based_dict[based]}
    algo = KNNBasic(sim_options=sim_options)
    a = cross_validate(algo, data, cv=5, verbose=True, n_jobs = 5)
    print()
    return a['test_rmse'].mean()

with open("./metrics/rmse.txt", "r") as fp:
    rmse = json.load(fp)
with open("./metrics/ndcg.txt", "r") as fp:
    ndcg_10 = json.load(fp)
with open("./metrics/recall.txt", "r") as fp:
    recall_10 = json.load(fp)

data_ls = [DoubanBook['user_book'], Movielens['user_movie'], Yelp['user_business']]
feat_ls = [['user', 'book', 'rating'], ["user", "movie", "rating"], ['user', 'business', 'rating']]
based = ['U', 'I']
sim = ['s', 'p']
for data, feats in zip(data_ls, feat_ls) :
    # print(feats)
    for b in based :
        for s in sim : 
            name = feats[1] + '{}CF-{}'.format(b, s)
            rmse_t = Based_CF(data, feats, b, s)
            rmse[name] = rmse_t

with open("./metrics/rmse.txt", "w") as fp:
    json.dump(rmse, fp)
with open("./metrics/ndcg.txt", "w") as fp:
    json.dump(ndcg_10, fp)
with open("./metrics/recall.txt", "w") as fp:
    json.dump(recall_10, fp)

Start Training UCF-s model
Computing the cosine similarity matrix...
Computing the cosine similarity matrix...
Computing the cosine similarity matrix...
Computing the cosine similarity matrix...
Computing the cosine similarity matrix...
Done computing similarity matrix.
Done computing similarity matrix.
Done computing similarity matrix.
Done computing similarity matrix.
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNBasic on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.7529  0.7576  0.7573  0.7601  0.7544  0.7565  0.0025  
MAE (testset)     0.6053  0.6089  0.6085  0.6105  0.6061  0.6079  0.0019  
Fit time          14.10   13.05   13.31   12.92   13.18   13.31   0.41    
Test time         19.75   19.49   19.81   19.65   19.75   19.69   0.11    

Start Training UCF-p model
Computing the pearson similarity matrix...
Computing the pearson similarity matrix...
Computing the pearson similarity matrix...
Co

In [16]:
# MF
# https://albertauyeung.github.io/2017/04/23/python-matrix-factorization.html/
# 9 mins
class Matrix_Factorization():

    def __init__(self, R, K, alpha, beta, iterations):
        """
        Perform matrix factorization to predict empty
        entries in a matrix.

        Arguments
        - R (ndarray)   : user-item rating matrix
        - K (int)       : number of latent dimensions
        - alpha (float) : learning rate
        - beta (float)  : regularization parameter
        """

        self.R = R
        self.num_users, self.num_items = R.shape
        self.K = K
        self.alpha = alpha
        self.beta = beta
        self.iterations = iterations

    def train(self):
        # Initialize user and item latent feature matrice
        self.P = np.random.normal(scale=1./self.K, size=(self.num_users, self.K))
        self.Q = np.random.normal(scale=1./self.K, size=(self.num_items, self.K))

        # Initialize the biases
        self.b_u = np.zeros(self.num_users)
        self.b_i = np.zeros(self.num_items)
        self.b = np.mean(self.R[np.where(self.R != 0)])

        # Create a list of training samples
        self.samples = [
            (i, j, self.R[i, j])
            for i in range(self.num_users)
            for j in range(self.num_items)
            if self.R[i, j] > 0
        ]

        # Perform stochastic gradient descent for number of iterations
        training_process = []
        for i in range(self.iterations):
            np.random.shuffle(self.samples)
            self.sgd()
            mse = self.mse()
            training_process.append((i, mse))
            # if (i+1) % 10 == 0:
            if (i+1) % 1 == 0:
                print("Iteration: %d ; error = %.4f" % (i+1, mse))

        return training_process

    def mse(self):
        """
        A function to compute the total mean square error
        """
        xs, ys = self.R.nonzero()
        predicted = self.full_matrix()
        error = 0
        for x, y in zip(xs, ys):
            error += pow(self.R[x, y] - predicted[x, y], 2)
        return np.sqrt(error)

    def sgd(self):
        """
        Perform stochastic graident descent
        """
        for i, j, r in self.samples:
            # Computer prediction and error
            prediction = self.get_rating(i, j)
            e = (r - prediction)

            # Update biases
            self.b_u[i] += self.alpha * (e - self.beta * self.b_u[i])
            self.b_i[j] += self.alpha * (e - self.beta * self.b_i[j])

            # Update user and item latent feature matrices
            self.P[i, :] += self.alpha * (e * self.Q[j, :] - self.beta * self.P[i,:])
            self.Q[j, :] += self.alpha * (e * self.P[i, :] - self.beta * self.Q[j,:])

    def get_rating(self, i, j):
        """
        Get the predicted rating of user i and item j
        """
        prediction = self.b + self.b_u[i] + self.b_i[j] + self.P[i, :].dot(self.Q[j, :].T)
        return prediction

    def full_matrix(self):
        """
        Computer the full matrix using the resultant biases, P and Q
        """
        return self.b + self.b_u[:,np.newaxis] + self.b_i[np.newaxis:,] + self.P.dot(self.Q.T)

def MF(data, feats, target, K = 2, alpha = 0.01, beta = 0.01) :
    print(feats)
    train, test = train_test_split(data, test_size = config['test_size'])
    a = data[feats[0]].max()
    b = data[feats[1]].max()
    R = np.zeros((a, b), dtype = np.float128)
    train_x = np.array(train[feats])
    train_y = np.array(train[target])
    test_x = np.array(test[feats])
    test_y = np.array(test[target])
    for i in range(train_x.shape[0]):
        # R[train.iat[i, 0] - 1, train.iat[i, 1] - 1] = train.iat[i, 2]
        # R[train.at[i, feats[0]] - 1, train.at[i, feats[1]] - 1] = train.at[i, target]
        R[train_x[i, 0] - 1, train_x[i, 1] - 1] = train_y[i]
    

    # mf = Matrix_Factorization(R, K=K, alpha=alpha, beta=beta, iterations=config['n_epoch'])
    mf = Matrix_Factorization(R, K=K, alpha=alpha, beta=beta, iterations=2)
    training_process = mf.train()

    temp = mf.full_matrix()

    t = []
    for i in range(test.shape[0]) :
        t.append(temp[test_x[i, 0] - 1, test_x[i, 1] - 1])
    t = np.array(t)
    
    test_y = np.where(test_y > 3, 1, 0)
    t = np.where(t > 3, 1, 0)
    
    
    rmse = sqrt(mean_squared_error(test_y, t))
    ndcg = ndcg_score(test_y.reshape(1, -1), t.reshape(1, -1), k=10)
    recall = recall_score(test_y.reshape(1, -1), t.reshape(1, -1))
    
    print("test MSE : {:.4f}".format(rmse, 4), '\n')    
    
    return  rmse, ndcg, recall
    # print("Global bias:", mf.b, "User bias:", mf.b_u, "Item bias:", mf.b_i, sep = '\n')
    # x = [x for x, y in training_process]
    # y = [y for x, y in training_process]
    # plt.figure(figsize=((16,4)))
    # plt.plot(x, y)
    # plt.xticks(x, x)
    # plt.xlabel("Iterations")
    # plt.ylabel("Mean Square Error")
    # plt.grid(axis="y")
with open("./metrics/rmse.txt", "r") as fp:
    rmse = json.load(fp)
with open("./metrics/ndcg.txt", "r") as fp:
    ndcg_10 = json.load(fp)
with open("./metrics/recall.txt", "r") as fp:
    recall_10 = json.load(fp)
   
data_ls = [DoubanBook['user_book'], Movielens['user_movie'], Yelp['user_business']]
feats_ls = [['user', 'book', 'rating'], ["user", "movie", "rating"], ['user', 'business', 'rating']]
for data, feats in zip(data_ls, feats_ls):
    name = feats[1] + 'MF'
    rmse_t, ndcg_t, recall_t = MF(data, feats[:-1], feats[-1], K = 5, alpha = 0.1, beta = 0.15)
    # rmse[name] = rmse_t
    # ndcg_10[name] = ndcg_t
    # recall_10[name] = recall_t
    
with open("./metrics/rmse.txt", "w") as fp:
    json.dump(rmse, fp)
with open("./metrics/ndcg.txt", "w") as fp:
    json.dump(ndcg_10, fp)
with open("./metrics/recall.txt", "w") as fp:
    json.dump(recall_10, fp)


['user', 'book']
Iteration: 1 ; error = 549.8098
Iteration: 2 ; error = 544.7074


ValueError: Target is multilabel-indicator but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted', 'samples'].

In [14]:
# FM
# https://github.com/coreylynch/pyFM
# 3 mins
from pyfm import pylibfm
from sklearn.feature_extraction import DictVectorizer
def FM(data, feats, target) :
    print(feats)
    data = data.astype(str)
    train, test = train_test_split(data, test_size = config['test_size'])
    train_y = pd.Series(train[target].astype(float)).tolist()
    train_x = train[feats].to_dict('records')
    test_y = np.array(test[target].astype(float)) # .tolist()
    test_x = test[feats].to_dict('records')

    v = DictVectorizer()
    X = v.fit_transform(train_x).astype('double')
    fm = pylibfm.FM(num_factors=10, num_iter=config['n_epoch'], verbose=True, task="regression", initial_learning_rate=0.001, learning_rate_schedule="optimal")
    fm.fit(X,train_y)
    temp = fm.predict(v.transform(test_x))

    rmse = sqrt(mean_squared_error(test_y, temp))
    test_y = np.where(test_y > 3, 1, 0)
    temp = np.where(temp > 3, 1, 0)
    ndcg = ndcg_score(test_y.reshape(1, -1), temp.reshape(1, -1), k=10)
    recall = recall_score(test_y, temp)
    print("test MSE : {:.4f}".format(rmse, 4), '\n')
    
    return rmse, ndcg, recall

with open("./metrics/rmse.txt", "r") as fp:
    rmse = json.load(fp)
with open("./metrics/ndcg.txt", "r") as fp:
    ndcg_10 = json.load(fp)
with open("./metrics/recall.txt", "r") as fp:
    recall_10 = json.load(fp)
    
data_ls = [DoubanBook['user_book'], Movielens['user_movie'], Yelp['user_business']]
feats_ls = [['user', 'book', 'rating'], ["user", "movie", "rating"], ['user', 'business', 'rating']]
for data, feats in zip(data_ls, feats_ls):
    name = feats[1] + 'FM'
    rmse_t, ndcg_t, recall_t = FM(data, feats[:-1], feats[-1])
    rmse[name] = rmse_t
    ndcg_10[name] = ndcg_t
    recall_10[name] = recall_t
    
with open("./metrics/rmse.txt", "w") as fp:
    json.dump(rmse, fp)
with open("./metrics/ndcg.txt", "w") as fp:
    json.dump(ndcg_10, fp)
with open("./metrics/recall.txt", "w") as fp:
    json.dump(recall_10, fp)

['user', 'book']
Creating validation dataset of 0.01 of training for adaptive regularization
-- Epoch 1
Training MSE: 0.31979
-- Epoch 2
Training MSE: 0.29121
-- Epoch 3
Training MSE: 0.27841
-- Epoch 4
Training MSE: 0.27010
-- Epoch 5
Training MSE: 0.26407
-- Epoch 6
Training MSE: 0.25940
-- Epoch 7
Training MSE: 0.25563
-- Epoch 8
Training MSE: 0.25244
-- Epoch 9
Training MSE: 0.24976
-- Epoch 10
Training MSE: 0.24740
-- Epoch 11
Training MSE: 0.24533
-- Epoch 12
Training MSE: 0.24348
-- Epoch 13
Training MSE: 0.24178
-- Epoch 14
Training MSE: 0.24023
-- Epoch 15
Training MSE: 0.23884
-- Epoch 16
Training MSE: 0.23754
-- Epoch 17
Training MSE: 0.23632
-- Epoch 18
Training MSE: 0.23518
-- Epoch 19
Training MSE: 0.23410
-- Epoch 20
Training MSE: 0.23310
test MSE : 0.7036 

['user', 'movie']
Creating validation dataset of 0.01 of training for adaptive regularization
-- Epoch 1
Training MSE: 0.60366
-- Epoch 2
Training MSE: 0.52725
-- Epoch 3
Training MSE: 0.49865
-- Epoch 4
Training MSE

In [45]:
#not yet
# BPR-FM
# https://github.com/etlundquist/rankfm/blob/master/examples/movielens.ipynb
import os, sys, git
import numba as nb
import warnings

sns.set_style('whitegrid')
plt.rcParams['axes.titlesize'] = 14
plt.rcParams['axes.labelsize'] = 12
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12
# plt.rcParams
# git_repo = git.Repo('.', search_parent_directories=True)
# git_root = git_repo.git.rev_parse('--show-toplevel')
# cython_path = os.path.join(git_root, 'rankfm')

# sys.path[0] = git_root
# sys.path[1] = cython_path
# sys.path[:2]
# !cd $git_root && python setup.py build_ext --inplace

# %load_ext autoreload
# %autoreload 2

import rankfm
from rankfm.rankfm import RankFM
from rankfm.evaluation import hit_rate, reciprocal_rank, discounted_cumulative_gain, precision, recall, diversity


# repo_root = "/Users/ericlundquist/Repos/rankfm"
# data_path = os.path.join(repo_root, "data/examples")
# print("\n".join([repo_root, data_path]))

interactions = Movielens['user_movie']
interactions_train, interactions_test = train_test_split(interactions, test_size = config['test_size'])

user_features = Movielens['user']
item_features = Movielens['movie_genre']

item_names = pd.read_csv(os.path.join(data_path, 'ML_1M_ITEM_NAMES.csv'))



unique_users = interactions.user_id.nunique()
unique_items = interactions.item_id.nunique()

print("interactions shape: {}".format(interactions.shape))
print("interactions unique users: {}".format(interactions.user_id.nunique()))
print("interactions unique items: {}".format(interactions.item_id.nunique()))

print("user features users:", interactions.user_id.nunique())
print("item features items:", interactions.item_id.nunique())



sparsity = 1 - (len(interactions) / (unique_users * unique_items))
print("interaction matrix sparsity: {}%".format(round(100 * sparsity, 1)))


np.random.seed(1492)
interactions['random'] = np.random.random(size=len(interactions))
test_pct = 0.25


train_mask = interactions['random'] <  (1 - test_pct)
valid_mask = interactions['random'] >= (1 - test_pct)

interactions_train = interactions[train_mask][['user_id', 'item_id']]
interactions_valid = interactions[valid_mask][['user_id', 'item_id']]

train_users = np.sort(interactions_train.user_id.unique())
valid_users = np.sort(interactions_valid.user_id.unique())
cold_start_users = set(valid_users) - set(train_users)

train_items = np.sort(interactions_train.item_id.unique())
valid_items = np.sort(interactions_valid.item_id.unique())
cold_start_items = set(valid_items) - set(train_items)

print("train shape: {}".format(interactions_train.shape))
print("valid shape: {}".format(interactions_valid.shape))

print("train users: {}".format(len(train_users)))
print("valid users: {}".format(len(valid_users)))
print("cold-start users: {}".format(cold_start_users))

print("train items: {}".format(len(train_items)))
print("valid items: {}".format(len(valid_items)))
print("cold-start items: {}".format(cold_start_items))

user_features = user_features[user_features.user_id.isin(train_users)]
item_features = item_features[item_features.item_id.isin(train_items)]
user_features.shape, item_features.shape

model = RankFM(factors=20, loss='warp', max_samples=20, alpha=0.01, sigma=0.1, learning_rate=0.10, learning_schedule='invscaling')
model


%%time
model.fit(interactions_train, epochs=20, verbose=True)

valid_scores = model.predict(interactions_valid, cold_start='nan') 
print(valid_scores.shape)
pd.Series(valid_scores).describe()

valid_recommendations = model.recommend(valid_users, n_items=10, filter_previous=True, cold_start='nan')
valid_recommendations.head()

k = 10

most_popular = interactions_train.groupby('item_id')['user_id'].count().sort_values(ascending=False)[:k]
most_popular

test_user_items = interactions_valid.groupby('user_id')['item_id'].apply(set).to_dict()
test_user_items = {key: val for key, val in test_user_items.items() if key in set(train_users)}

base_hrt = np.mean([int(len(set(most_popular.index) & set(val)) > 0)                       for key, val in test_user_items.items()])
base_pre = np.mean([len(set(most_popular.index) & set(val)) / len(set(most_popular.index)) for key, val in test_user_items.items()])
base_rec = np.mean([len(set(most_popular.index) & set(val)) / len(set(val))                for key, val in test_user_items.items()])

print("number of test users: {}".format(len(test_user_items)))
print("baseline hit rate: {:.3f}".format(base_hrt))
print("baseline precision: {:.3f}".format(base_pre))
print("baseline recall: {:.3f}".format(base_rec))




%%time
model_hit_rate = hit_rate(model, interactions_valid, k=k)
model_reciprocal_rank = reciprocal_rank(model, interactions_valid, k=k)
model_dcg = discounted_cumulative_gain(model, interactions_valid, k=k)
model_precision = precision(model, interactions_valid, k=k)
model_recall = recall(model, interactions_valid, k=k)


print("hit_rate: {:.3f}".format(model_hit_rate))
print("reciprocal_rank: {:.3f}".format(model_reciprocal_rank))
print("dcg: {:.3f}".format(model_dcg, 3))
print("precision: {:.3f}".format(model_precision))
print("recall: {:.3f}".format(model_recall))

recommendation_diversity = diversity(model, interactions_valid, k=k)
recommendation_diversity.head(10)

top_items = pd.merge(item_names, recommendation_diversity, on='item_id', how='inner')
top_items = top_items.set_index('item_id').loc[recommendation_diversity.item_id].reset_index()
top_items = top_items[['item_id', 'cnt_users', 'pct_users', 'title', 'genres']]
top_items.head(10)

coverage = np.mean(recommendation_diversity['cnt_users'] > 0)
print("percentage of items recommended to at least one user: {:.3f}".format(coverage))

nonzero_users = recommendation_diversity[recommendation_diversity.cnt_users > 0]
entropy = -np.sum(nonzero_users['pct_users'] * np.log2(nonzero_users['pct_users']))
print("entropy value of recommended items: {:.3f}".format(entropy))

N = 50
fig, axes = plt.subplots(1, 1, figsize=[16, 4])

topN = recommendation_diversity.iloc[:N, :]
axes.bar(topN.index.values + 1, topN.pct_users, width=1, edgecolor='black', alpha=0.75)
axes.set(xlabel='Item Rank', ylabel='Percentage of Users', title='Percentage of Users Recommended by Item Rank')
plt.show()


random_user = np.random.choice(interactions_valid.user_id.unique())
print("random user: {}".format(random_user))

random_user_recs = valid_recommendations.loc[random_user]
random_user_recs = item_names[item_names.item_id.isin(random_user_recs)].set_index('item_id').loc[random_user_recs]
random_user_recs

most_similar_items = model.similar_items(919)
most_similar_items = item_names.set_index('item_id').loc[most_similar_items]
most_similar_items

most_similar_items = model.similar_items(2028)
most_similar_items = item_names.set_index('item_id').loc[most_similar_items]
most_similar_items



ImportError: cannot import name 'RankFM' from 'rankfm.rankfm' (/home/rita/111/111-2MLG/HW3/rankfm/rankfm/__init__.py)

## 10 NN-based RecSys Methods
FNN **IPNN** **OPNN** **PIN** **CCPM** NeuMF **WD** **DeepCross** **NFM** **DeepFM** 

In [8]:
#NN-based model
from deepctr_torch.models import PNN # PIN
from deepctr_torch.models import CCPM # CCPM

from deepctr_torch.models import WDL # WD
from deepctr_torch.models import DCN # DeepCross
from deepctr_torch.models import NFM # NFM
from deepctr_torch.models import DeepFM # DeepFM

#recent nn-based approach
from deepctr_torch.models import AFM # AFM
from deepctr_torch.models import xDeepFM # xDeePFM

# new



In [39]:

class nn_based :
    def __init__(self, data, features, target, device = 'cuda') : 
        self.features = features
        self.target = target
        for feat in self.features :
            enc = LabelEncoder()
            data[feat] = enc.fit_transform(data[feat])
        # SparseFeat(name, vocabulary_size, embedding_dim, use_hash, dtype, embedding_name, group_name)
        # Dense : Numeric 、 Sparse : Category 、 Sequence : Time Series       
        fixed = [SparseFeat(feat, data[feat].nunique()) for feat in self.features]
        linear_feats = fixed
        dnn_feats = fixed
        feature_names = get_feature_names(linear_feats + dnn_feats)

        # train test split
        self.train, self.test = train_test_split(data, test_size = 0.2)
        self.train_data = {name: self.train[name] for name in feature_names}
        self.test_data = {name: self.test[name] for name in feature_names}

        self.model_dict = {"IPNN":PNN(dnn_feats,use_inner=True,use_outter=False,task='regression',device=device),
              "OPNN":PNN(dnn_feats,use_inner=False,use_outter=True,task='regression',device=device),
              "PNN":PNN(dnn_feats,use_inner=True,use_outter=True,task='regression',device=device),
              "CCPM":CCPM(linear_feats, dnn_feats, task='regression',device=device),
             "WDL":WDL(linear_feats, dnn_feats, task='regression',device=device),
             "DCN":DCN(linear_feats, dnn_feats, task='regression',device=device),
             "NFM":NFM(linear_feats, dnn_feats, task='regression',device=device),
             "DeepFM":DeepFM(linear_feats, dnn_feats, task='regression',device=device),
             "AFM":AFM(linear_feats, dnn_feats, task='regression',device=device),
             "xDeepFM":xDeepFM(linear_feats, dnn_feats, task='regression',device=device), 
            #  ""
             }
             
    def train_model(self, model, n_epochs = 10):
        print(self.features, model, '==================', sep = '\n')
        model = self.model_dict[model]

        model.compile('adam', 'mse', metrics = ['mse'])
        history = model.fit(self.train_data,self.train[self.target].values,batch_size=256, epochs=n_epochs, verbose=5, validation_split=0.1)
        pred_ans = model.predict(self.test_data, batch_size=256)
        
        rmse = sqrt(mean_squared_error(self.test[self.target].values, pred_ans))
        
        test_y = np.where(self.test[self.target].values > 3, 1, 0)
        pred_ans = np.where(pred_ans > 3, 1, 0)
        
        ndcg = ndcg_score(test_y.reshape(1, -1), pred_ans.T, k=10)
        recall10 = recall_score(test_y.reshape(1, -1), pred_ans)
        
        print("test MSE : {:.4f}".format(rmse, 4))
        print("test ndcg : {:.4f}".format(ndcg, 4))
        # print("test recall10 : {:.4f}".format(recall10, 4))
        
        return rmse, ndcg, recall10


In [41]:
# 73 min
data_ls = [DoubanBook['user'], Movielens['user'], Yelp['user']]
feats_ls = [['user', 'book', 'location'], ['user', 'movie', 'time', 'age', 'occupation'], ['user', 'business']]
model = ['IPNN', 'OPNN', 'PNN', 'CCPM', 'WDL', 'DCN', 'NFM', 'DeepFM', 'AFM', 'xDeepFM']

with open("./metrics/rmse.txt", "r") as fp:
    rmse = json.load(fp)
with open("./metrics/ndcg.txt", "r") as fp:
    ndcg_10 = json.load(fp)
with open("./metrics/recall.txt", "r") as fp:
    recall_10 = json.load(fp)    
    
for data, feats in zip(data_ls, feats_ls) :
    nn = nn_based(data, feats, 'rating')
    for m in model :
        rmse_t, ndcg_t, recall_t = nn.train_model(m, config['n_epoch'])
        name = feats[1] + m
        rmse[name] = rmse_t
        ndcg_10[name] = ndcg_t
        recall_10[name] = recall_t

with open("./metrics/rmse.txt", "w") as fp:
    json.dump(rmse, fp)
with open("./metrics/ndcg.txt", "w") as fp:
    json.dump(ndcg_10, fp)
with open("./metrics/recall.txt", "w") as fp:
    json.dump(recall_10, fp)

['user', 'book', 'location']
IPNN
cuda
Train on 568006 samples, validate on 63112 samples, 2219 steps per epoch


  return [None if x is None else x[start:stop] for x in arrays]


Epoch 1/20
12s - loss:  0.7740 - mse:  0.7740 - val_mse:  0.4930
Epoch 2/20
12s - loss:  0.4764 - mse:  0.4764 - val_mse:  0.4803
Epoch 3/20
12s - loss:  0.4566 - mse:  0.4565 - val_mse:  0.4797
Epoch 4/20
12s - loss:  0.4419 - mse:  0.4418 - val_mse:  0.4849
Epoch 5/20
12s - loss:  0.4301 - mse:  0.4301 - val_mse:  0.4948
Epoch 6/20
13s - loss:  0.4189 - mse:  0.4189 - val_mse:  0.4957
Epoch 7/20
13s - loss:  0.4077 - mse:  0.4076 - val_mse:  0.5013
Epoch 8/20
14s - loss:  0.3988 - mse:  0.3987 - val_mse:  0.5130
Epoch 9/20
12s - loss:  0.3913 - mse:  0.3912 - val_mse:  0.5169
Epoch 10/20
13s - loss:  0.3847 - mse:  0.3846 - val_mse:  0.5196
Epoch 11/20
12s - loss:  0.3792 - mse:  0.3790 - val_mse:  0.5322
Epoch 12/20
13s - loss:  0.3746 - mse:  0.3745 - val_mse:  0.5349
Epoch 13/20
12s - loss:  0.3702 - mse:  0.3700 - val_mse:  0.5467
Epoch 14/20
12s - loss:  0.3657 - mse:  0.3655 - val_mse:  0.5508
Epoch 15/20
12s - loss:  0.3622 - mse:  0.3620 - val_mse:  0.5484
Epoch 16/20
13s - l

  return [None if x is None else x[start:stop] for x in arrays]


Epoch 1/20
13s - loss:  0.7865 - mse:  0.7865 - val_mse:  0.4905
Epoch 2/20
12s - loss:  0.4757 - mse:  0.4757 - val_mse:  0.4769
Epoch 3/20
13s - loss:  0.4550 - mse:  0.4550 - val_mse:  0.4772
Epoch 4/20
13s - loss:  0.4420 - mse:  0.4419 - val_mse:  0.4844
Epoch 5/20
13s - loss:  0.4304 - mse:  0.4304 - val_mse:  0.4818
Epoch 6/20
13s - loss:  0.4187 - mse:  0.4186 - val_mse:  0.4922
Epoch 7/20
14s - loss:  0.4076 - mse:  0.4075 - val_mse:  0.4939
Epoch 8/20
13s - loss:  0.3985 - mse:  0.3984 - val_mse:  0.4998
Epoch 9/20
13s - loss:  0.3917 - mse:  0.3916 - val_mse:  0.5058
Epoch 10/20
13s - loss:  0.3859 - mse:  0.3858 - val_mse:  0.5113
Epoch 11/20
13s - loss:  0.3800 - mse:  0.3799 - val_mse:  0.5224
Epoch 12/20
14s - loss:  0.3742 - mse:  0.3740 - val_mse:  0.5205
Epoch 13/20
13s - loss:  0.3692 - mse:  0.3690 - val_mse:  0.5283
Epoch 14/20
13s - loss:  0.3646 - mse:  0.3644 - val_mse:  0.5296
Epoch 15/20
14s - loss:  0.3604 - mse:  0.3602 - val_mse:  0.5386
Epoch 16/20
13s - l

  return [None if x is None else x[start:stop] for x in arrays]


Epoch 1/20
17s - loss:  0.7923 - mse:  0.7923 - val_mse:  0.4976
Epoch 2/20
14s - loss:  0.4791 - mse:  0.4791 - val_mse:  0.4866
Epoch 3/20
14s - loss:  0.4621 - mse:  0.4621 - val_mse:  0.4791
Epoch 4/20
14s - loss:  0.4466 - mse:  0.4466 - val_mse:  0.4784
Epoch 5/20
14s - loss:  0.4356 - mse:  0.4356 - val_mse:  0.4835
Epoch 6/20
14s - loss:  0.4242 - mse:  0.4242 - val_mse:  0.4828
Epoch 7/20
14s - loss:  0.4112 - mse:  0.4111 - val_mse:  0.4879
Epoch 8/20
14s - loss:  0.3987 - mse:  0.3986 - val_mse:  0.5000
Epoch 9/20
14s - loss:  0.3886 - mse:  0.3885 - val_mse:  0.5088
Epoch 10/20
14s - loss:  0.3807 - mse:  0.3806 - val_mse:  0.5137
Epoch 11/20
14s - loss:  0.3746 - mse:  0.3745 - val_mse:  0.5162
Epoch 12/20
13s - loss:  0.3698 - mse:  0.3697 - val_mse:  0.5255
Epoch 13/20
14s - loss:  0.3655 - mse:  0.3653 - val_mse:  0.5285
Epoch 14/20
13s - loss:  0.3620 - mse:  0.3618 - val_mse:  0.5357
Epoch 15/20
14s - loss:  0.3587 - mse:  0.3586 - val_mse:  0.5423
Epoch 16/20
14s - l

  return [None if x is None else x[start:stop] for x in arrays]


Epoch 1/20
22s - loss:  0.7788 - mse:  0.7787 - val_mse:  0.4941
Epoch 2/20
17s - loss:  0.4772 - mse:  0.4771 - val_mse:  0.4860
Epoch 3/20
15s - loss:  0.4670 - mse:  0.4670 - val_mse:  0.4828
Epoch 4/20
15s - loss:  0.4615 - mse:  0.4614 - val_mse:  0.4811
Epoch 5/20
18s - loss:  0.4577 - mse:  0.4577 - val_mse:  0.4830
Epoch 6/20
18s - loss:  0.4537 - mse:  0.4536 - val_mse:  0.4813
Epoch 7/20
17s - loss:  0.4485 - mse:  0.4484 - val_mse:  0.4819
Epoch 8/20
17s - loss:  0.4399 - mse:  0.4398 - val_mse:  0.4809
Epoch 9/20
17s - loss:  0.4288 - mse:  0.4287 - val_mse:  0.4825
Epoch 10/20
17s - loss:  0.4165 - mse:  0.4163 - val_mse:  0.4860
Epoch 11/20
18s - loss:  0.4052 - mse:  0.4051 - val_mse:  0.4939
Epoch 12/20
17s - loss:  0.3943 - mse:  0.3941 - val_mse:  0.5017
Epoch 13/20
17s - loss:  0.3852 - mse:  0.3850 - val_mse:  0.5089
Epoch 14/20
17s - loss:  0.3769 - mse:  0.3767 - val_mse:  0.5130
Epoch 15/20
17s - loss:  0.3693 - mse:  0.3690 - val_mse:  0.5245
Epoch 16/20
17s - l

  return [None if x is None else x[start:stop] for x in arrays]


Epoch 1/20
14s - loss:  0.6830 - mse:  0.6830 - val_mse:  0.4936
Epoch 2/20
15s - loss:  0.4809 - mse:  0.4809 - val_mse:  0.4850
Epoch 3/20
14s - loss:  0.4719 - mse:  0.4719 - val_mse:  0.4862
Epoch 4/20
15s - loss:  0.4675 - mse:  0.4675 - val_mse:  0.4856
Epoch 5/20
15s - loss:  0.4645 - mse:  0.4644 - val_mse:  0.4825
Epoch 6/20
17s - loss:  0.4619 - mse:  0.4618 - val_mse:  0.4833
Epoch 7/20
15s - loss:  0.4592 - mse:  0.4591 - val_mse:  0.4844
Epoch 8/20
15s - loss:  0.4564 - mse:  0.4563 - val_mse:  0.4905
Epoch 9/20
15s - loss:  0.4529 - mse:  0.4528 - val_mse:  0.4823
Epoch 10/20
15s - loss:  0.4488 - mse:  0.4487 - val_mse:  0.4789
Epoch 11/20
14s - loss:  0.4424 - mse:  0.4423 - val_mse:  0.4770
Epoch 12/20
17s - loss:  0.4362 - mse:  0.4360 - val_mse:  0.4766
Epoch 13/20
15s - loss:  0.4316 - mse:  0.4314 - val_mse:  0.4780
Epoch 14/20
15s - loss:  0.4283 - mse:  0.4281 - val_mse:  0.4792
Epoch 15/20
14s - loss:  0.4245 - mse:  0.4243 - val_mse:  0.4799
Epoch 16/20
15s - l

  return [None if x is None else x[start:stop] for x in arrays]


Epoch 1/20
18s - loss:  0.7889 - mse:  0.7889 - val_mse:  0.4917
Epoch 2/20
18s - loss:  0.4803 - mse:  0.4803 - val_mse:  0.4847
Epoch 3/20
19s - loss:  0.4707 - mse:  0.4707 - val_mse:  0.4891
Epoch 4/20
18s - loss:  0.4644 - mse:  0.4644 - val_mse:  0.4837
Epoch 5/20
18s - loss:  0.4583 - mse:  0.4582 - val_mse:  0.4787
Epoch 6/20
18s - loss:  0.4503 - mse:  0.4503 - val_mse:  0.4774
Epoch 7/20
18s - loss:  0.4427 - mse:  0.4426 - val_mse:  0.4749
Epoch 8/20
18s - loss:  0.4362 - mse:  0.4361 - val_mse:  0.4775
Epoch 9/20
18s - loss:  0.4295 - mse:  0.4294 - val_mse:  0.4820
Epoch 10/20
18s - loss:  0.4240 - mse:  0.4238 - val_mse:  0.4832
Epoch 11/20
18s - loss:  0.4194 - mse:  0.4193 - val_mse:  0.4902
Epoch 12/20
20s - loss:  0.4151 - mse:  0.4149 - val_mse:  0.4931
Epoch 13/20
18s - loss:  0.4110 - mse:  0.4108 - val_mse:  0.4944
Epoch 14/20
19s - loss:  0.4062 - mse:  0.4060 - val_mse:  0.4969
Epoch 15/20
18s - loss:  0.4005 - mse:  0.4003 - val_mse:  0.5027
Epoch 16/20
18s - l

  return [None if x is None else x[start:stop] for x in arrays]


Epoch 1/20
15s - loss:  0.7203 - mse:  0.7203 - val_mse:  0.4893
Epoch 2/20
15s - loss:  0.4701 - mse:  0.4701 - val_mse:  0.4830
Epoch 3/20
17s - loss:  0.4492 - mse:  0.4492 - val_mse:  0.4862
Epoch 4/20
16s - loss:  0.4338 - mse:  0.4338 - val_mse:  0.4903
Epoch 5/20
15s - loss:  0.4218 - mse:  0.4218 - val_mse:  0.4950
Epoch 6/20
15s - loss:  0.4109 - mse:  0.4108 - val_mse:  0.5012
Epoch 7/20
16s - loss:  0.4015 - mse:  0.4014 - val_mse:  0.5123
Epoch 8/20
16s - loss:  0.3928 - mse:  0.3927 - val_mse:  0.5138
Epoch 9/20
16s - loss:  0.3855 - mse:  0.3853 - val_mse:  0.5208
Epoch 10/20
16s - loss:  0.3782 - mse:  0.3781 - val_mse:  0.5248
Epoch 11/20
16s - loss:  0.3724 - mse:  0.3722 - val_mse:  0.5279
Epoch 12/20
16s - loss:  0.3665 - mse:  0.3663 - val_mse:  0.5376
Epoch 13/20
16s - loss:  0.3616 - mse:  0.3614 - val_mse:  0.5400
Epoch 14/20
16s - loss:  0.3571 - mse:  0.3568 - val_mse:  0.5426
Epoch 15/20
15s - loss:  0.3530 - mse:  0.3528 - val_mse:  0.5478
Epoch 16/20
16s - l

  return [None if x is None else x[start:stop] for x in arrays]


Epoch 1/20
15s - loss:  0.6817 - mse:  0.6816 - val_mse:  0.4921
Epoch 2/20
16s - loss:  0.4809 - mse:  0.4809 - val_mse:  0.4859
Epoch 3/20
15s - loss:  0.4718 - mse:  0.4717 - val_mse:  0.4838
Epoch 4/20
15s - loss:  0.4667 - mse:  0.4667 - val_mse:  0.4856
Epoch 5/20
17s - loss:  0.4626 - mse:  0.4625 - val_mse:  0.4863
Epoch 6/20
16s - loss:  0.4546 - mse:  0.4545 - val_mse:  0.4761
Epoch 7/20
16s - loss:  0.4440 - mse:  0.4440 - val_mse:  0.4777
Epoch 8/20
16s - loss:  0.4346 - mse:  0.4345 - val_mse:  0.4792
Epoch 9/20
16s - loss:  0.4274 - mse:  0.4272 - val_mse:  0.4838
Epoch 10/20
16s - loss:  0.4216 - mse:  0.4215 - val_mse:  0.4860
Epoch 11/20
16s - loss:  0.4167 - mse:  0.4166 - val_mse:  0.4878
Epoch 12/20
16s - loss:  0.4121 - mse:  0.4119 - val_mse:  0.4905
Epoch 13/20
16s - loss:  0.4056 - mse:  0.4054 - val_mse:  0.4946
Epoch 14/20
16s - loss:  0.3964 - mse:  0.3961 - val_mse:  0.5019
Epoch 15/20
15s - loss:  0.3875 - mse:  0.3872 - val_mse:  0.5072
Epoch 16/20
15s - l

  return [None if x is None else x[start:stop] for x in arrays]


Epoch 1/20
15s - loss:  3.8899 - mse:  3.8895 - val_mse:  0.7778
Epoch 2/20
15s - loss:  0.6480 - mse:  0.6479 - val_mse:  0.5932
Epoch 3/20
15s - loss:  0.5452 - mse:  0.5451 - val_mse:  0.5415
Epoch 4/20
16s - loss:  0.5038 - mse:  0.5036 - val_mse:  0.5180
Epoch 5/20
16s - loss:  0.4780 - mse:  0.4778 - val_mse:  0.5068
Epoch 6/20
15s - loss:  0.4603 - mse:  0.4601 - val_mse:  0.5039
Epoch 7/20
17s - loss:  0.4477 - mse:  0.4475 - val_mse:  0.5038
Epoch 8/20
15s - loss:  0.4382 - mse:  0.4380 - val_mse:  0.5050
Epoch 9/20
16s - loss:  0.4305 - mse:  0.4302 - val_mse:  0.5084
Epoch 10/20
15s - loss:  0.4237 - mse:  0.4234 - val_mse:  0.5098
Epoch 11/20
15s - loss:  0.4179 - mse:  0.4175 - val_mse:  0.5127
Epoch 12/20
15s - loss:  0.4125 - mse:  0.4121 - val_mse:  0.5154
Epoch 13/20
17s - loss:  0.4080 - mse:  0.4076 - val_mse:  0.5182
Epoch 14/20
15s - loss:  0.4038 - mse:  0.4034 - val_mse:  0.5217
Epoch 15/20
16s - loss:  0.3999 - mse:  0.3995 - val_mse:  0.5242
Epoch 16/20
16s - l

  return [None if x is None else x[start:stop] for x in arrays]


Epoch 1/20
19s - loss:  0.6155 - mse:  0.6155 - val_mse:  0.4929
Epoch 2/20
18s - loss:  0.4799 - mse:  0.4799 - val_mse:  0.4910
Epoch 3/20
20s - loss:  0.4692 - mse:  0.4692 - val_mse:  0.4834
Epoch 4/20
20s - loss:  0.4621 - mse:  0.4621 - val_mse:  0.4801
Epoch 5/20
20s - loss:  0.4573 - mse:  0.4572 - val_mse:  0.4794
Epoch 6/20
19s - loss:  0.4521 - mse:  0.4520 - val_mse:  0.4780
Epoch 7/20
19s - loss:  0.4456 - mse:  0.4455 - val_mse:  0.4792
Epoch 8/20
21s - loss:  0.4354 - mse:  0.4353 - val_mse:  0.4809
Epoch 9/20
19s - loss:  0.4218 - mse:  0.4217 - val_mse:  0.4843
Epoch 10/20
20s - loss:  0.4085 - mse:  0.4084 - val_mse:  0.4901
Epoch 11/20
20s - loss:  0.3962 - mse:  0.3961 - val_mse:  0.5030
Epoch 12/20
19s - loss:  0.3856 - mse:  0.3855 - val_mse:  0.5100
Epoch 13/20
20s - loss:  0.3764 - mse:  0.3762 - val_mse:  0.5176
Epoch 14/20
19s - loss:  0.3681 - mse:  0.3679 - val_mse:  0.5323
Epoch 15/20
20s - loss:  0.3609 - mse:  0.3607 - val_mse:  0.5368
Epoch 16/20
20s - l

  return [None if x is None else x[start:stop] for x in arrays]


Epoch 1/20
1s - loss:  2.5045 - mse:  2.5009 - val_mse:  0.8911
Epoch 2/20
1s - loss:  0.6984 - mse:  0.6985 - val_mse:  0.9405
Epoch 3/20
1s - loss:  0.5047 - mse:  0.5047 - val_mse:  1.0078
Epoch 4/20
1s - loss:  0.4378 - mse:  0.4380 - val_mse:  1.0486
Epoch 5/20
1s - loss:  0.4074 - mse:  0.4069 - val_mse:  1.0673
Epoch 6/20
1s - loss:  0.3893 - mse:  0.3895 - val_mse:  1.0672
Epoch 7/20
1s - loss:  0.3760 - mse:  0.3760 - val_mse:  1.0850
Epoch 8/20
1s - loss:  0.3626 - mse:  0.3625 - val_mse:  1.0826
Epoch 9/20
1s - loss:  0.3475 - mse:  0.3476 - val_mse:  1.0969
Epoch 10/20
1s - loss:  0.3296 - mse:  0.3299 - val_mse:  1.1071
Epoch 11/20
1s - loss:  0.3083 - mse:  0.3084 - val_mse:  1.1170
Epoch 12/20
1s - loss:  0.2847 - mse:  0.2848 - val_mse:  1.1213
Epoch 13/20
1s - loss:  0.2593 - mse:  0.2596 - val_mse:  1.1452
Epoch 14/20
1s - loss:  0.2301 - mse:  0.2301 - val_mse:  1.1736
Epoch 15/20
1s - loss:  0.1987 - mse:  0.1988 - val_mse:  1.2037
Epoch 16/20
1s - loss:  0.1704 - m

  return [None if x is None else x[start:stop] for x in arrays]


Epoch 1/20
1s - loss:  2.6535 - mse:  2.6486 - val_mse:  0.8918
Epoch 2/20
1s - loss:  0.7043 - mse:  0.7046 - val_mse:  0.9259
Epoch 3/20
1s - loss:  0.5048 - mse:  0.5046 - val_mse:  1.0003
Epoch 4/20
2s - loss:  0.4379 - mse:  0.4378 - val_mse:  1.0416
Epoch 5/20
1s - loss:  0.4076 - mse:  0.4081 - val_mse:  1.0618
Epoch 6/20
2s - loss:  0.3908 - mse:  0.3910 - val_mse:  1.0809
Epoch 7/20
1s - loss:  0.3786 - mse:  0.3786 - val_mse:  1.0797
Epoch 8/20
1s - loss:  0.3699 - mse:  0.3701 - val_mse:  1.0907
Epoch 9/20
2s - loss:  0.3637 - mse:  0.3637 - val_mse:  1.0926
Epoch 10/20
2s - loss:  0.3586 - mse:  0.3590 - val_mse:  1.1032
Epoch 11/20
1s - loss:  0.3545 - mse:  0.3549 - val_mse:  1.1024
Epoch 12/20
1s - loss:  0.3500 - mse:  0.3502 - val_mse:  1.1043
Epoch 13/20
1s - loss:  0.3473 - mse:  0.3475 - val_mse:  1.0889
Epoch 14/20
1s - loss:  0.3419 - mse:  0.3419 - val_mse:  1.1151
Epoch 15/20
2s - loss:  0.3364 - mse:  0.3369 - val_mse:  1.1006
Epoch 16/20
1s - loss:  0.3300 - m

  return [None if x is None else x[start:stop] for x in arrays]


Epoch 1/20
2s - loss:  2.4583 - mse:  2.4539 - val_mse:  0.8923
Epoch 2/20
2s - loss:  0.6955 - mse:  0.6953 - val_mse:  0.9304
Epoch 3/20
2s - loss:  0.5006 - mse:  0.5017 - val_mse:  0.9929
Epoch 4/20
2s - loss:  0.4319 - mse:  0.4320 - val_mse:  1.0255
Epoch 5/20
2s - loss:  0.3955 - mse:  0.3955 - val_mse:  1.0468
Epoch 6/20
2s - loss:  0.3690 - mse:  0.3688 - val_mse:  1.0565
Epoch 7/20
2s - loss:  0.3437 - mse:  0.3438 - val_mse:  1.0786
Epoch 8/20
1s - loss:  0.3165 - mse:  0.3167 - val_mse:  1.1005
Epoch 9/20
1s - loss:  0.2839 - mse:  0.2839 - val_mse:  1.1228
Epoch 10/20
2s - loss:  0.2500 - mse:  0.2498 - val_mse:  1.1566
Epoch 11/20
2s - loss:  0.2152 - mse:  0.2153 - val_mse:  1.1780
Epoch 12/20
2s - loss:  0.1837 - mse:  0.1838 - val_mse:  1.2138
Epoch 13/20
2s - loss:  0.1576 - mse:  0.1578 - val_mse:  1.2205
Epoch 14/20
2s - loss:  0.1347 - mse:  0.1349 - val_mse:  1.2448
Epoch 15/20
2s - loss:  0.1155 - mse:  0.1157 - val_mse:  1.2602
Epoch 16/20
2s - loss:  0.0994 - m

  return [None if x is None else x[start:stop] for x in arrays]


Epoch 1/20
2s - loss:  2.1454 - mse:  2.1424 - val_mse:  0.9769
Epoch 2/20
2s - loss:  0.8221 - mse:  0.8219 - val_mse:  0.8645
Epoch 3/20
2s - loss:  0.6047 - mse:  0.6047 - val_mse:  0.9544
Epoch 4/20
2s - loss:  0.4697 - mse:  0.4698 - val_mse:  1.0089
Epoch 5/20
2s - loss:  0.4109 - mse:  0.4110 - val_mse:  1.0444
Epoch 6/20
2s - loss:  0.3836 - mse:  0.3838 - val_mse:  1.0667
Epoch 7/20
3s - loss:  0.3690 - mse:  0.3691 - val_mse:  1.0779
Epoch 8/20
3s - loss:  0.3598 - mse:  0.3597 - val_mse:  1.0935
Epoch 9/20
2s - loss:  0.3529 - mse:  0.3529 - val_mse:  1.0902
Epoch 10/20
2s - loss:  0.3486 - mse:  0.3484 - val_mse:  1.0986
Epoch 11/20
2s - loss:  0.3442 - mse:  0.3442 - val_mse:  1.1030
Epoch 12/20
2s - loss:  0.3411 - mse:  0.3411 - val_mse:  1.1098
Epoch 13/20
2s - loss:  0.3372 - mse:  0.3369 - val_mse:  1.1145
Epoch 14/20
2s - loss:  0.3334 - mse:  0.3333 - val_mse:  1.1302
Epoch 15/20
2s - loss:  0.3298 - mse:  0.3295 - val_mse:  1.1161
Epoch 16/20
2s - loss:  0.3271 - m

  return [None if x is None else x[start:stop] for x in arrays]


Epoch 1/20
2s - loss:  2.1516 - mse:  2.1479 - val_mse:  0.8834
Epoch 2/20
2s - loss:  0.6890 - mse:  0.6891 - val_mse:  0.9343
Epoch 3/20
2s - loss:  0.5015 - mse:  0.5014 - val_mse:  0.9966
Epoch 4/20
2s - loss:  0.4380 - mse:  0.4382 - val_mse:  1.0489
Epoch 5/20
2s - loss:  0.4084 - mse:  0.4083 - val_mse:  1.0574
Epoch 6/20
2s - loss:  0.3921 - mse:  0.3923 - val_mse:  1.0719
Epoch 7/20
2s - loss:  0.3809 - mse:  0.3810 - val_mse:  1.0776
Epoch 8/20
2s - loss:  0.3728 - mse:  0.3730 - val_mse:  1.0816
Epoch 9/20
2s - loss:  0.3670 - mse:  0.3671 - val_mse:  1.0973
Epoch 10/20
2s - loss:  0.3628 - mse:  0.3627 - val_mse:  1.1065
Epoch 11/20
2s - loss:  0.3588 - mse:  0.3590 - val_mse:  1.1093
Epoch 12/20
2s - loss:  0.3559 - mse:  0.3563 - val_mse:  1.0977
Epoch 13/20
2s - loss:  0.3529 - mse:  0.3528 - val_mse:  1.1205
Epoch 14/20
2s - loss:  0.3500 - mse:  0.3502 - val_mse:  1.1116
Epoch 15/20
2s - loss:  0.3480 - mse:  0.3478 - val_mse:  1.1268
Epoch 16/20
2s - loss:  0.3459 - m

  return [None if x is None else x[start:stop] for x in arrays]


Epoch 1/20
2s - loss:  2.2263 - mse:  2.2223 - val_mse:  0.8846
Epoch 2/20
2s - loss:  0.6946 - mse:  0.6949 - val_mse:  0.9402
Epoch 3/20
2s - loss:  0.5029 - mse:  0.5032 - val_mse:  1.0050
Epoch 4/20
3s - loss:  0.4378 - mse:  0.4383 - val_mse:  1.0399
Epoch 5/20
2s - loss:  0.4081 - mse:  0.4082 - val_mse:  1.0598
Epoch 6/20
2s - loss:  0.3908 - mse:  0.3908 - val_mse:  1.0683
Epoch 7/20
2s - loss:  0.3795 - mse:  0.3797 - val_mse:  1.0817
Epoch 8/20
2s - loss:  0.3720 - mse:  0.3722 - val_mse:  1.0954
Epoch 9/20
2s - loss:  0.3661 - mse:  0.3666 - val_mse:  1.0920
Epoch 10/20
2s - loss:  0.3617 - mse:  0.3618 - val_mse:  1.0970
Epoch 11/20
2s - loss:  0.3570 - mse:  0.3570 - val_mse:  1.1136
Epoch 12/20
2s - loss:  0.3538 - mse:  0.3537 - val_mse:  1.1207
Epoch 13/20
2s - loss:  0.3513 - mse:  0.3511 - val_mse:  1.1267
Epoch 14/20
2s - loss:  0.3488 - mse:  0.3490 - val_mse:  1.1143
Epoch 15/20
2s - loss:  0.3458 - mse:  0.3458 - val_mse:  1.1087
Epoch 16/20
2s - loss:  0.3434 - m

  return [None if x is None else x[start:stop] for x in arrays]


Epoch 1/20
2s - loss:  2.2459 - mse:  2.2423 - val_mse:  0.9247
Epoch 2/20
2s - loss:  0.8006 - mse:  0.8003 - val_mse:  0.8650
Epoch 3/20
2s - loss:  0.6278 - mse:  0.6280 - val_mse:  0.8993
Epoch 4/20
2s - loss:  0.5065 - mse:  0.5063 - val_mse:  0.9639
Epoch 5/20
2s - loss:  0.4354 - mse:  0.4359 - val_mse:  0.9999
Epoch 6/20
3s - loss:  0.3956 - mse:  0.3956 - val_mse:  1.0494
Epoch 7/20
2s - loss:  0.3717 - mse:  0.3721 - val_mse:  1.0692
Epoch 8/20
2s - loss:  0.3559 - mse:  0.3561 - val_mse:  1.0779
Epoch 9/20
2s - loss:  0.3445 - mse:  0.3444 - val_mse:  1.0851
Epoch 10/20
2s - loss:  0.3349 - mse:  0.3354 - val_mse:  1.0961
Epoch 11/20
2s - loss:  0.3265 - mse:  0.3262 - val_mse:  1.0975
Epoch 12/20
2s - loss:  0.3181 - mse:  0.3181 - val_mse:  1.1007
Epoch 13/20
2s - loss:  0.3095 - mse:  0.3093 - val_mse:  1.0969
Epoch 14/20
2s - loss:  0.3007 - mse:  0.3005 - val_mse:  1.1175
Epoch 15/20
2s - loss:  0.2901 - mse:  0.2902 - val_mse:  1.1352
Epoch 16/20
2s - loss:  0.2791 - m

  return [None if x is None else x[start:stop] for x in arrays]


Epoch 1/20
2s - loss:  2.1367 - mse:  2.1326 - val_mse:  0.8819
Epoch 2/20
3s - loss:  0.6918 - mse:  0.6922 - val_mse:  0.9389
Epoch 3/20
2s - loss:  0.5020 - mse:  0.5021 - val_mse:  1.0092
Epoch 4/20
2s - loss:  0.4385 - mse:  0.4389 - val_mse:  1.0473
Epoch 5/20
2s - loss:  0.4095 - mse:  0.4096 - val_mse:  1.0595
Epoch 6/20
2s - loss:  0.3929 - mse:  0.3935 - val_mse:  1.0762
Epoch 7/20
2s - loss:  0.3820 - mse:  0.3824 - val_mse:  1.0813
Epoch 8/20
2s - loss:  0.3739 - mse:  0.3739 - val_mse:  1.0890
Epoch 9/20
2s - loss:  0.3678 - mse:  0.3677 - val_mse:  1.1047
Epoch 10/20
2s - loss:  0.3639 - mse:  0.3641 - val_mse:  1.0915
Epoch 11/20
2s - loss:  0.3596 - mse:  0.3598 - val_mse:  1.1119
Epoch 12/20
2s - loss:  0.3565 - mse:  0.3565 - val_mse:  1.1098
Epoch 13/20
2s - loss:  0.3538 - mse:  0.3540 - val_mse:  1.1201
Epoch 14/20
2s - loss:  0.3510 - mse:  0.3511 - val_mse:  1.1074
Epoch 15/20
2s - loss:  0.3489 - mse:  0.3492 - val_mse:  1.1240
Epoch 16/20
2s - loss:  0.3469 - m

  return [None if x is None else x[start:stop] for x in arrays]


Epoch 1/20
2s - loss:  9.6252 - mse:  9.6083 - val_mse:  3.1043
Epoch 2/20
2s - loss:  1.4176 - mse:  1.4163 - val_mse:  1.1728
Epoch 3/20
2s - loss:  1.1266 - mse:  1.1260 - val_mse:  1.1131
Epoch 4/20
2s - loss:  1.0537 - mse:  1.0537 - val_mse:  1.0570
Epoch 5/20
2s - loss:  0.9835 - mse:  0.9835 - val_mse:  1.0067
Epoch 6/20
2s - loss:  0.9160 - mse:  0.9158 - val_mse:  0.9621
Epoch 7/20
2s - loss:  0.8480 - mse:  0.8481 - val_mse:  0.9249
Epoch 8/20
2s - loss:  0.7716 - mse:  0.7725 - val_mse:  0.8967
Epoch 9/20
2s - loss:  0.6814 - mse:  0.6817 - val_mse:  0.8926
Epoch 10/20
2s - loss:  0.5943 - mse:  0.5943 - val_mse:  0.9204
Epoch 11/20
2s - loss:  0.5248 - mse:  0.5244 - val_mse:  0.9651
Epoch 12/20
2s - loss:  0.4730 - mse:  0.4732 - val_mse:  1.0157
Epoch 13/20
2s - loss:  0.4339 - mse:  0.4337 - val_mse:  1.0515
Epoch 14/20
2s - loss:  0.4037 - mse:  0.4034 - val_mse:  1.0777
Epoch 15/20
2s - loss:  0.3801 - mse:  0.3798 - val_mse:  1.1076
Epoch 16/20
2s - loss:  0.3606 - m

  return [None if x is None else x[start:stop] for x in arrays]


Epoch 1/20
2s - loss:  1.4959 - mse:  1.4946 - val_mse:  0.8763
Epoch 2/20
2s - loss:  0.6703 - mse:  0.6698 - val_mse:  0.9252
Epoch 3/20
3s - loss:  0.4947 - mse:  0.4948 - val_mse:  0.9823
Epoch 4/20
2s - loss:  0.4333 - mse:  0.4335 - val_mse:  1.0480
Epoch 5/20
2s - loss:  0.4013 - mse:  0.4016 - val_mse:  1.0490
Epoch 6/20
2s - loss:  0.3830 - mse:  0.3834 - val_mse:  1.0761
Epoch 7/20
2s - loss:  0.3711 - mse:  0.3711 - val_mse:  1.0941
Epoch 8/20
2s - loss:  0.3626 - mse:  0.3627 - val_mse:  1.1100
Epoch 9/20
2s - loss:  0.3574 - mse:  0.3575 - val_mse:  1.0740
Epoch 10/20
3s - loss:  0.3528 - mse:  0.3531 - val_mse:  1.1170
Epoch 11/20
2s - loss:  0.3476 - mse:  0.3481 - val_mse:  1.1026
Epoch 12/20
2s - loss:  0.3435 - mse:  0.3438 - val_mse:  1.1099
Epoch 13/20
2s - loss:  0.3394 - mse:  0.3400 - val_mse:  1.1283
Epoch 14/20
2s - loss:  0.3363 - mse:  0.3365 - val_mse:  1.1253
Epoch 15/20
2s - loss:  0.3330 - mse:  0.3330 - val_mse:  1.1397
Epoch 16/20
2s - loss:  0.3292 - m

  return [None if x is None else x[start:stop] for x in arrays]


Epoch 1/20
2s - loss:  1.9429 - mse:  1.9426 - val_mse:  1.0694
Epoch 2/20
2s - loss:  0.9648 - mse:  0.9647 - val_mse:  1.0500
Epoch 3/20
2s - loss:  0.9048 - mse:  0.9048 - val_mse:  1.0557
Epoch 4/20
2s - loss:  0.8772 - mse:  0.8773 - val_mse:  1.0723
Epoch 5/20
3s - loss:  0.8576 - mse:  0.8577 - val_mse:  1.0680
Epoch 6/20
2s - loss:  0.8366 - mse:  0.8366 - val_mse:  1.0744
Epoch 7/20
2s - loss:  0.8097 - mse:  0.8098 - val_mse:  1.1018
Epoch 8/20
2s - loss:  0.7842 - mse:  0.7842 - val_mse:  1.1256
Epoch 9/20
3s - loss:  0.7627 - mse:  0.7627 - val_mse:  1.1437
Epoch 10/20
2s - loss:  0.7458 - mse:  0.7457 - val_mse:  1.1599
Epoch 11/20
2s - loss:  0.7320 - mse:  0.7320 - val_mse:  1.1860
Epoch 12/20
2s - loss:  0.7200 - mse:  0.7200 - val_mse:  1.1910
Epoch 13/20
2s - loss:  0.7054 - mse:  0.7054 - val_mse:  1.2233
Epoch 14/20
3s - loss:  0.6822 - mse:  0.6822 - val_mse:  1.2515
Epoch 15/20
2s - loss:  0.6516 - mse:  0.6516 - val_mse:  1.2784
Epoch 16/20
2s - loss:  0.6169 - m

  return [None if x is None else x[start:stop] for x in arrays]


Epoch 1/20
2s - loss:  1.9744 - mse:  1.9742 - val_mse:  1.0709
Epoch 2/20
3s - loss:  0.9649 - mse:  0.9649 - val_mse:  1.0560
Epoch 3/20
2s - loss:  0.9037 - mse:  0.9037 - val_mse:  1.0577
Epoch 4/20
2s - loss:  0.8763 - mse:  0.8763 - val_mse:  1.0643
Epoch 5/20
2s - loss:  0.8597 - mse:  0.8597 - val_mse:  1.0738
Epoch 6/20
2s - loss:  0.8479 - mse:  0.8479 - val_mse:  1.0819
Epoch 7/20
3s - loss:  0.8371 - mse:  0.8371 - val_mse:  1.0916
Epoch 8/20
2s - loss:  0.8185 - mse:  0.8184 - val_mse:  1.0938
Epoch 9/20
2s - loss:  0.7926 - mse:  0.7926 - val_mse:  1.1203
Epoch 10/20
2s - loss:  0.7680 - mse:  0.7680 - val_mse:  1.1327
Epoch 11/20
2s - loss:  0.7484 - mse:  0.7484 - val_mse:  1.1514
Epoch 12/20
3s - loss:  0.7315 - mse:  0.7315 - val_mse:  1.1761
Epoch 13/20
2s - loss:  0.7153 - mse:  0.7153 - val_mse:  1.1998
Epoch 14/20
2s - loss:  0.6966 - mse:  0.6966 - val_mse:  1.2087
Epoch 15/20
2s - loss:  0.6749 - mse:  0.6749 - val_mse:  1.2446
Epoch 16/20
3s - loss:  0.6528 - m

  return [None if x is None else x[start:stop] for x in arrays]


Epoch 1/20
2s - loss:  2.0191 - mse:  2.0187 - val_mse:  1.0710
Epoch 2/20
3s - loss:  0.9667 - mse:  0.9668 - val_mse:  1.0511
Epoch 3/20
3s - loss:  0.9033 - mse:  0.9032 - val_mse:  1.0585
Epoch 4/20
3s - loss:  0.8756 - mse:  0.8756 - val_mse:  1.0640
Epoch 5/20
3s - loss:  0.8589 - mse:  0.8589 - val_mse:  1.0761
Epoch 6/20
3s - loss:  0.8476 - mse:  0.8476 - val_mse:  1.0884
Epoch 7/20
3s - loss:  0.8389 - mse:  0.8389 - val_mse:  1.0912
Epoch 8/20
3s - loss:  0.8234 - mse:  0.8234 - val_mse:  1.1005
Epoch 9/20
3s - loss:  0.7982 - mse:  0.7981 - val_mse:  1.1141
Epoch 10/20
3s - loss:  0.7726 - mse:  0.7726 - val_mse:  1.1361
Epoch 11/20
3s - loss:  0.7506 - mse:  0.7505 - val_mse:  1.1527
Epoch 12/20
2s - loss:  0.7337 - mse:  0.7337 - val_mse:  1.1727
Epoch 13/20
2s - loss:  0.7188 - mse:  0.7187 - val_mse:  1.1886
Epoch 14/20
3s - loss:  0.7021 - mse:  0.7021 - val_mse:  1.2036
Epoch 15/20
3s - loss:  0.6843 - mse:  0.6842 - val_mse:  1.2210
Epoch 16/20
3s - loss:  0.6668 - m

  return [None if x is None else x[start:stop] for x in arrays]


Epoch 1/20
3s - loss:  1.9903 - mse:  1.9901 - val_mse:  1.1211
Epoch 2/20
3s - loss:  1.0072 - mse:  1.0072 - val_mse:  1.0542
Epoch 3/20
3s - loss:  0.9158 - mse:  0.9158 - val_mse:  1.0550
Epoch 4/20
3s - loss:  0.8788 - mse:  0.8788 - val_mse:  1.0609
Epoch 5/20
3s - loss:  0.8595 - mse:  0.8595 - val_mse:  1.0663
Epoch 6/20
3s - loss:  0.8456 - mse:  0.8456 - val_mse:  1.0733
Epoch 7/20
4s - loss:  0.8371 - mse:  0.8370 - val_mse:  1.0790
Epoch 8/20
4s - loss:  0.8304 - mse:  0.8304 - val_mse:  1.0879
Epoch 9/20
3s - loss:  0.8250 - mse:  0.8250 - val_mse:  1.0923
Epoch 10/20
3s - loss:  0.8208 - mse:  0.8208 - val_mse:  1.0969
Epoch 11/20
3s - loss:  0.8158 - mse:  0.8157 - val_mse:  1.0995
Epoch 12/20
3s - loss:  0.8130 - mse:  0.8130 - val_mse:  1.1033
Epoch 13/20
4s - loss:  0.8083 - mse:  0.8083 - val_mse:  1.1136
Epoch 14/20
3s - loss:  0.8035 - mse:  0.8034 - val_mse:  1.1101
Epoch 15/20
3s - loss:  0.7983 - mse:  0.7983 - val_mse:  1.1135
Epoch 16/20
3s - loss:  0.7924 - m

  return [None if x is None else x[start:stop] for x in arrays]


Epoch 1/20
3s - loss:  1.6221 - mse:  1.6219 - val_mse:  1.0644
Epoch 2/20
3s - loss:  0.9615 - mse:  0.9615 - val_mse:  1.0535
Epoch 3/20
3s - loss:  0.9051 - mse:  0.9051 - val_mse:  1.0592
Epoch 4/20
3s - loss:  0.8800 - mse:  0.8800 - val_mse:  1.0689
Epoch 5/20
3s - loss:  0.8664 - mse:  0.8664 - val_mse:  1.0718
Epoch 6/20
3s - loss:  0.8569 - mse:  0.8569 - val_mse:  1.0708
Epoch 7/20
3s - loss:  0.8498 - mse:  0.8498 - val_mse:  1.0805
Epoch 8/20
3s - loss:  0.8451 - mse:  0.8451 - val_mse:  1.0893
Epoch 9/20
3s - loss:  0.8406 - mse:  0.8406 - val_mse:  1.0862
Epoch 10/20
3s - loss:  0.8371 - mse:  0.8370 - val_mse:  1.0873
Epoch 11/20
2s - loss:  0.8355 - mse:  0.8355 - val_mse:  1.0936
Epoch 12/20
3s - loss:  0.8328 - mse:  0.8328 - val_mse:  1.0904
Epoch 13/20
3s - loss:  0.8307 - mse:  0.8307 - val_mse:  1.0983
Epoch 14/20
3s - loss:  0.8284 - mse:  0.8283 - val_mse:  1.1004
Epoch 15/20
3s - loss:  0.8280 - mse:  0.8280 - val_mse:  1.0982
Epoch 16/20
3s - loss:  0.8257 - m

  return [None if x is None else x[start:stop] for x in arrays]


Epoch 1/20
3s - loss:  1.8983 - mse:  1.8981 - val_mse:  1.0654
Epoch 2/20
3s - loss:  0.9649 - mse:  0.9650 - val_mse:  1.0548
Epoch 3/20
4s - loss:  0.9046 - mse:  0.9046 - val_mse:  1.0605
Epoch 4/20
3s - loss:  0.8789 - mse:  0.8790 - val_mse:  1.0648
Epoch 5/20
3s - loss:  0.8643 - mse:  0.8643 - val_mse:  1.0801
Epoch 6/20
3s - loss:  0.8536 - mse:  0.8536 - val_mse:  1.0765
Epoch 7/20
3s - loss:  0.8460 - mse:  0.8460 - val_mse:  1.0834
Epoch 8/20
4s - loss:  0.8400 - mse:  0.8400 - val_mse:  1.0967
Epoch 9/20
3s - loss:  0.8348 - mse:  0.8348 - val_mse:  1.0860
Epoch 10/20
3s - loss:  0.8290 - mse:  0.8290 - val_mse:  1.1009
Epoch 11/20
3s - loss:  0.8250 - mse:  0.8249 - val_mse:  1.0953
Epoch 12/20
4s - loss:  0.8210 - mse:  0.8210 - val_mse:  1.0982
Epoch 13/20
4s - loss:  0.8170 - mse:  0.8170 - val_mse:  1.0995
Epoch 14/20
3s - loss:  0.8102 - mse:  0.8102 - val_mse:  1.1014
Epoch 15/20
3s - loss:  0.8027 - mse:  0.8026 - val_mse:  1.1140
Epoch 16/20
3s - loss:  0.7922 - m

  return [None if x is None else x[start:stop] for x in arrays]


Epoch 1/20
3s - loss:  2.1032 - mse:  2.1029 - val_mse:  1.2203
Epoch 2/20
3s - loss:  1.1535 - mse:  1.1534 - val_mse:  1.1671
Epoch 3/20
3s - loss:  0.9650 - mse:  0.9650 - val_mse:  1.2130
Epoch 4/20
3s - loss:  0.7822 - mse:  0.7822 - val_mse:  1.2852
Epoch 5/20
3s - loss:  0.6712 - mse:  0.6712 - val_mse:  1.3434
Epoch 6/20
3s - loss:  0.6051 - mse:  0.6051 - val_mse:  1.3810
Epoch 7/20
3s - loss:  0.5621 - mse:  0.5621 - val_mse:  1.4182
Epoch 8/20
3s - loss:  0.5319 - mse:  0.5319 - val_mse:  1.4350
Epoch 9/20
3s - loss:  0.5090 - mse:  0.5090 - val_mse:  1.4703
Epoch 10/20
3s - loss:  0.4908 - mse:  0.4907 - val_mse:  1.4917
Epoch 11/20
3s - loss:  0.4776 - mse:  0.4776 - val_mse:  1.5048
Epoch 12/20
3s - loss:  0.4648 - mse:  0.4647 - val_mse:  1.5458
Epoch 13/20
3s - loss:  0.4545 - mse:  0.4544 - val_mse:  1.5487
Epoch 14/20
3s - loss:  0.4451 - mse:  0.4450 - val_mse:  1.5812
Epoch 15/20
3s - loss:  0.4374 - mse:  0.4373 - val_mse:  1.5989
Epoch 16/20
3s - loss:  0.4297 - m

  return [None if x is None else x[start:stop] for x in arrays]


Epoch 1/20
3s - loss:  1.6230 - mse:  1.6228 - val_mse:  1.0636
Epoch 2/20
3s - loss:  0.9607 - mse:  0.9607 - val_mse:  1.0540
Epoch 3/20
3s - loss:  0.9046 - mse:  0.9046 - val_mse:  1.0592
Epoch 4/20
3s - loss:  0.8799 - mse:  0.8799 - val_mse:  1.0649
Epoch 5/20
3s - loss:  0.8655 - mse:  0.8655 - val_mse:  1.0681
Epoch 6/20
3s - loss:  0.8560 - mse:  0.8559 - val_mse:  1.0829
Epoch 7/20
3s - loss:  0.8503 - mse:  0.8502 - val_mse:  1.0805
Epoch 8/20
3s - loss:  0.8455 - mse:  0.8455 - val_mse:  1.0910
Epoch 9/20
3s - loss:  0.8406 - mse:  0.8406 - val_mse:  1.0956
Epoch 10/20
3s - loss:  0.8368 - mse:  0.8368 - val_mse:  1.0911
Epoch 11/20
3s - loss:  0.8359 - mse:  0.8359 - val_mse:  1.0916
Epoch 12/20
3s - loss:  0.8320 - mse:  0.8320 - val_mse:  1.0978
Epoch 13/20
3s - loss:  0.8291 - mse:  0.8291 - val_mse:  1.0982
Epoch 14/20
3s - loss:  0.8276 - mse:  0.8276 - val_mse:  1.0955
Epoch 15/20
3s - loss:  0.8255 - mse:  0.8254 - val_mse:  1.0989
Epoch 16/20
3s - loss:  0.8240 - m

  return [None if x is None else x[start:stop] for x in arrays]


Epoch 1/20
3s - loss:  12.6600 - mse:  12.6591 - val_mse:  9.6370
Epoch 2/20
2s - loss:  6.5687 - mse:  6.5680 - val_mse:  4.2555
Epoch 3/20
2s - loss:  3.0539 - mse:  3.0536 - val_mse:  2.4993
Epoch 4/20
3s - loss:  1.9290 - mse:  1.9286 - val_mse:  1.8597
Epoch 5/20
3s - loss:  1.4631 - mse:  1.4628 - val_mse:  1.5635
Epoch 6/20
3s - loss:  1.2286 - mse:  1.2283 - val_mse:  1.4080
Epoch 7/20
3s - loss:  1.0955 - mse:  1.0953 - val_mse:  1.3180
Epoch 8/20
3s - loss:  1.0132 - mse:  1.0129 - val_mse:  1.2646
Epoch 9/20
3s - loss:  0.9590 - mse:  0.9587 - val_mse:  1.2311
Epoch 10/20
3s - loss:  0.9217 - mse:  0.9214 - val_mse:  1.2097
Epoch 11/20
3s - loss:  0.8951 - mse:  0.8948 - val_mse:  1.1957
Epoch 12/20
3s - loss:  0.8753 - mse:  0.8750 - val_mse:  1.1885
Epoch 13/20
3s - loss:  0.8603 - mse:  0.8599 - val_mse:  1.1842
Epoch 14/20
3s - loss:  0.8484 - mse:  0.8481 - val_mse:  1.1810
Epoch 15/20
3s - loss:  0.8388 - mse:  0.8384 - val_mse:  1.1807
Epoch 16/20
3s - loss:  0.8308 -

  return [None if x is None else x[start:stop] for x in arrays]


Epoch 1/20
4s - loss:  1.4309 - mse:  1.4308 - val_mse:  1.0604
Epoch 2/20
4s - loss:  0.9577 - mse:  0.9577 - val_mse:  1.0626
Epoch 3/20
4s - loss:  0.9020 - mse:  0.9020 - val_mse:  1.0642
Epoch 4/20
4s - loss:  0.8775 - mse:  0.8775 - val_mse:  1.0625
Epoch 5/20
4s - loss:  0.8608 - mse:  0.8608 - val_mse:  1.0768
Epoch 6/20
4s - loss:  0.8477 - mse:  0.8477 - val_mse:  1.0932
Epoch 7/20
4s - loss:  0.8374 - mse:  0.8374 - val_mse:  1.0996
Epoch 8/20
4s - loss:  0.8279 - mse:  0.8280 - val_mse:  1.0878
Epoch 9/20
4s - loss:  0.8182 - mse:  0.8182 - val_mse:  1.1025
Epoch 10/20
4s - loss:  0.8093 - mse:  0.8093 - val_mse:  1.1122
Epoch 11/20
4s - loss:  0.8009 - mse:  0.8009 - val_mse:  1.1117
Epoch 12/20
4s - loss:  0.7888 - mse:  0.7888 - val_mse:  1.1254
Epoch 13/20
4s - loss:  0.7757 - mse:  0.7757 - val_mse:  1.1460
Epoch 14/20
4s - loss:  0.7555 - mse:  0.7555 - val_mse:  1.1619
Epoch 15/20
4s - loss:  0.7290 - mse:  0.7290 - val_mse:  1.1926
Epoch 16/20
4s - loss:  0.6971 - m

##  3 Recent NN-based Methods
* **Attention Factorization Machines(AFM)**
* Collaborative Memory Networks (CMN)
* **xDeepFM**
* **Deep Interest Network (DIN)** *done for movielens*
* DeepGBM

In [167]:
#recent nn-based approach
from deepctr_torch.models import AFM
from deepctr_torch.models import xDeepFM
from deepctr_torch.models import DIN

# Din
import torch_rechub
from torch_rechub.utils.data import create_seq_features, df_to_dict, DataGenerator
from torch_rechub.basic.features import DenseFeature, SparseFeature, SequenceFeature
from torch_rechub.models.ranking import DIN
from torch_rechub.trainers import CTRTrainer


In [181]:

# Din
# need to have 'time' variable, so only movielens can be used
# https://www.cnblogs.com/junwei-kuang/p/DIN-DeepFM.html
# feature engeering
# 8 mins
# 7 epochs
data = Movielens['user_movie'].copy(deep = True)
data.columns = ['user_id', 'item_id', 'cate_id', 'time']


train, val, test = create_seq_features(data, seq_feature_col=['item_id', 'cate_id'], drop_short=3)
# get size of category data
n_users, n_items, n_cates = data["user_id"].max(), data["item_id"].max(), data["cate_id"].max()

features = [SparseFeature("target_item", vocab_size=n_items + 2, embed_dim=8), 
            SparseFeature("target_cate", vocab_size=n_cates + 2, embed_dim=8),
            SparseFeature("user_id", vocab_size=n_users + 2, embed_dim=8)]
target_features = features  
history_features = [
    SequenceFeature("history_item", vocab_size=n_items + 2, embed_dim=8, pooling="concat", shared_with="target_item"),
    SequenceFeature("history_cate", vocab_size=n_cates + 2, embed_dim=8, pooling="concat", shared_with="target_cate")]


train = df_to_dict(train)
val = df_to_dict(val)
test = df_to_dict(test)

train_y, val_y, test_y = train["label"], val["label"], test["label"]

# 删除 label 列
del train["label"]
del val["label"]
del test["label"]
train_x, val_x, test_x = train, val, test

# 构建dataloader，指定模型读取数据的方式，和区分验证集测试集、指定batch大小
dg = DataGenerator(train_x, train_y)
train_dataloader, val_dataloader, test_dataloader = dg.generate_dataloader(x_val=val_x, y_val=val_y, x_test=test_x, y_test=test_y, batch_size=16)



# 定义模型，模型的参数需要我们之前的feature类，用于构建模型的输入层，mlp指定模型后续DNN的结构，attention_mlp指定attention层的结构
model = DIN(features=features, history_features=history_features, target_features=target_features, mlp_params={"dims": [256, 128]}, attention_mlp_params={"dims": [256, 128]})  # 这里的 features = target_features，理论上是不合理的，是为了特征太少来凑数？

# 模型训练，需要学习率、设备等一般的参数，此外我们还支持earlystoping策略，及时发现过拟合
ctr_trainer = CTRTrainer(model, optimizer_params={"lr": 1e-3, "weight_decay": 1e-3}, n_epoch=config['n_epoch'], earlystop_patience=4, device='cuda', model_path='./') 
ctr_trainer.fit(train_dataloader, val_dataloader)

# 查看在测试集上的性能
auc = ctr_trainer.evaluate(ctr_trainer.model, test_dataloader)
pred_y = np.array(ctr_trainer.predict(model, test_dataloader))
r = sqrt(mean_squared_error(test_y, pred_y))

test_y = np.where(test_y > 3, 1, 0)
pred_y = np.where(pred_y > 3, 1, 0)


n = ndcg_score(test_y.reshape(1, -1), pred_y.reshape(1, -1), k=10)
re = recall_score(test_y.reshape(1, -1), pred_y.reshape(1, -1))
name = 'movieDIN' 
print(f'test auc: {auc}')


    
with open("./metrics/rmse.txt", "r") as fp:
    rmse = json.load(fp)
with open("./metrics/ndcg.txt", "r") as fp:
    ndcg_10 = json.load(fp)
with open("./metrics/recall.txt", "r") as fp:
    recall_10 = json.load(fp)
    
rmse[name] = r
ndcg_10[name] = n
recall_10[name] = re

with open("./metrics/rmse.txt", "w") as fp:
    json.dump(rmse, fp)
with open("./metrics/ndcg.txt", "w") as fp:
    json.dump(ndcg_10, fp)
with open("./metrics/recall.txt", "w") as fp:
    json.dump(recall_10, fp)

epoch: 0


train: 100%|██████████| 4756/4756 [00:57<00:00, 83.22it/s, loss=0.432]
validation: 100%|██████████| 118/118 [00:01<00:00, 98.00it/s]


epoch: 0 validation: auc: 0.8392081408019576
epoch: 1


train: 100%|██████████| 4756/4756 [00:54<00:00, 87.47it/s, loss=0.342]
validation: 100%|██████████| 118/118 [00:01<00:00, 93.55it/s]


epoch: 1 validation: auc: 0.8839740050312116
epoch: 2


train: 100%|██████████| 4756/4756 [00:56<00:00, 84.79it/s, loss=0.348]
validation: 100%|██████████| 118/118 [00:01<00:00, 90.19it/s]


epoch: 2 validation: auc: 0.8917772187542523
epoch: 3


train: 100%|██████████| 4756/4756 [00:53<00:00, 89.22it/s, loss=0.347]
validation: 100%|██████████| 118/118 [00:01<00:00, 92.70it/s]


epoch: 3 validation: auc: 0.8921696847564632
epoch: 4


train: 100%|██████████| 4756/4756 [00:54<00:00, 87.35it/s, loss=0.415]
validation: 100%|██████████| 118/118 [00:01<00:00, 97.49it/s]


epoch: 4 validation: auc: 0.908653256849319
epoch: 5


train: 100%|██████████| 4756/4756 [00:55<00:00, 85.09it/s, loss=0.262]
validation: 100%|██████████| 118/118 [00:01<00:00, 92.80it/s]


epoch: 5 validation: auc: 0.9384446875959377
epoch: 6


train: 100%|██████████| 4756/4756 [00:55<00:00, 85.58it/s, loss=0.315]
validation: 100%|██████████| 118/118 [00:01<00:00, 95.06it/s]


epoch: 6 validation: auc: 0.9558492615679074
epoch: 7


train: 100%|██████████| 4756/4756 [00:55<00:00, 85.02it/s, loss=0.228]
validation: 100%|██████████| 118/118 [00:01<00:00, 93.32it/s]


epoch: 7 validation: auc: 0.9538666897573121
epoch: 8


train: 100%|██████████| 4756/4756 [00:55<00:00, 85.09it/s, loss=0.222]
validation: 100%|██████████| 118/118 [00:01<00:00, 88.25it/s]


epoch: 8 validation: auc: 0.9602079957357275
epoch: 9


train: 100%|██████████| 4756/4756 [00:56<00:00, 84.23it/s, loss=0.26] 
validation: 100%|██████████| 118/118 [00:01<00:00, 92.81it/s]


epoch: 9 validation: auc: 0.9618745705646
epoch: 10


train: 100%|██████████| 4756/4756 [00:56<00:00, 83.64it/s, loss=0.286]
validation: 100%|██████████| 118/118 [00:01<00:00, 96.67it/s]


epoch: 10 validation: auc: 0.9638661387305468
epoch: 11


train: 100%|██████████| 4756/4756 [00:56<00:00, 84.71it/s, loss=0.303]
validation: 100%|██████████| 118/118 [00:01<00:00, 97.03it/s]


epoch: 11 validation: auc: 0.9602091202801465
epoch: 12


train: 100%|██████████| 4756/4756 [00:54<00:00, 86.89it/s, loss=0.254]
validation: 100%|██████████| 118/118 [00:01<00:00, 101.61it/s]


epoch: 12 validation: auc: 0.9627202279676446
epoch: 13


train: 100%|██████████| 4756/4756 [00:55<00:00, 84.94it/s, loss=0.224]
validation: 100%|██████████| 118/118 [00:01<00:00, 95.08it/s]


epoch: 13 validation: auc: 0.9669361449942592
epoch: 14


train: 100%|██████████| 4756/4756 [00:55<00:00, 86.09it/s, loss=0.264]
validation: 100%|██████████| 118/118 [00:01<00:00, 93.03it/s]


epoch: 14 validation: auc: 0.9661871984112436
epoch: 15


train: 100%|██████████| 4756/4756 [00:55<00:00, 85.17it/s, loss=0.298]
validation: 100%|██████████| 118/118 [00:01<00:00, 88.36it/s]


epoch: 15 validation: auc: 0.9657857360536811
epoch: 16


train: 100%|██████████| 4756/4756 [00:59<00:00, 80.50it/s, loss=0.182]
validation: 100%|██████████| 118/118 [00:01<00:00, 94.70it/s]


epoch: 16 validation: auc: 0.9690367939688433
epoch: 17


train: 100%|██████████| 4756/4756 [00:54<00:00, 87.32it/s, loss=0.239]
validation: 100%|██████████| 118/118 [00:01<00:00, 97.55it/s]


epoch: 17 validation: auc: 0.9671025775682626
epoch: 18


train: 100%|██████████| 4756/4756 [00:54<00:00, 86.86it/s, loss=0.215]
validation: 100%|██████████| 118/118 [00:01<00:00, 97.09it/s]


epoch: 18 validation: auc: 0.9676603516000581
epoch: 19


train: 100%|██████████| 4756/4756 [00:58<00:00, 81.73it/s, loss=0.218]
validation: 100%|██████████| 118/118 [00:01<00:00, 98.42it/s]


epoch: 19 validation: auc: 0.9652650719877109


validation: 100%|██████████| 118/118 [00:01<00:00, 93.93it/s]
predict: 100%|██████████| 118/118 [00:01<00:00, 96.21it/s]

test auc: 0.9763204681703326





## Results

In [None]:
with open("./metrics/rmse.txt", "w") as fp:
    json.dump(rmse, fp)
with open("./metrics/ndcg.txt", "w") as fp:
    json.dump(ndcg_10, fp)
with open("./metrics/recall.txt", "w") as fp:
    json.dump(recall_10, fp)


In [182]:
with open("./metrics/rmse.txt", "r") as fp:
    rmse = json.load(fp)
with open("./metrics/ndcg.txt", "r") as fp:
    ndcg_10 = json.load(fp)
with open("./metrics/recall.txt", "r") as fp:
    recall_10 = json.load(fp)


<class 'dict'>
49
{'bookIPNN': 0.7630446736162781, 'bookOPNN': 0.7596840523834419, 'bookPNN': 0.7518341524053948, 'bookCCPM': 0.7424491460611815, 'bookWDL': 0.7093089364684898, 'bookDCN': 0.7302810220618285, 'bookNFM': 0.7568314381827344, 'bookDeepFM': 0.7386060319249964, 'bookAFM': 0.7353113174115252, 'bookxDeepFM': 0.7524432051769194, 'movieIPNN': 1.1257715623148956, 'movieOPNN': 1.0723128454116133, 'moviePNN': 1.1457441626815474, 'movieCCPM': 1.053634406962656, 'movieWDL': 1.0492728323684728, 'movieDCN': 1.0530302980259754, 'movieNFM': 1.0942265873231904, 'movieDeepFM': 1.0515179004479969, 'movieAFM': 1.0710224032118236, 'moviexDeepFM': 1.0744275480292602, 'businessIPNN': 1.2039563606524462, 'businessOPNN': 1.1574377340755466, 'businessPNN': 1.1446729801067166, 'businessCCPM': 1.0781101968351026, 'businessWDL': 1.0570848609478811, 'businessDCN': 1.0804563918023302, 'businessNFM': 1.293179055678124, 'businessDeepFM': 1.0519467034898193, 'businessAFM': 1.083461280218211, 'businessxDee

In [4]:
iterables = [['book', 'movie', 'business'], ['rmse', 'recall', 'ndcg_10']]
# temp = pd.DataFrame(columns = ['book', 'movie', 'business'], index = model)
model = ['UCF-s', 'UCF-p', 'ICF-s', 'ICF-p', 'MF', 'FM', 'IPNN', 'OPNN', 'PNN', 'CCPM', 'WDL', 'DCN', 'NFM', 'DeepFM', 'AFM', 'DIN']
a = pd.MultiIndex.from_product(iterables, names = ['data', 'metrics'])
temp = pd.DataFrame(columns = a, index = model)
for i in temp.columns :
    for j in temp.index :
        for k in rmse.keys() :   
            if (i[0] in k) and (j in k) :
                t = sum([a.isupper() for a in k])
                if (t == 2) and (j == 'FM'): 
                    temp.at['FM', (i[0], 'rmse')] = round(rmse[k], 4)
                elif j != 'FM': 
                    # print(j)
                    temp.at[j, (i[0], 'rmse')] = round(rmse[k], 4)

for i in temp.columns :
    for j in temp.index :
        for k in ndcg_10.keys() :   
            if (i[0] in k) and (j in k) :
                t = sum([a.isupper() for a in k])
                if (t == 2) and (j == 'FM'): 
                    temp.at['FM', (i[0], 'ndcg_10')] = round(ndcg_10[k], 4)
                elif j != 'FM': 
                    # print(j)
                    temp.at[j, (i[0], 'ndcg_10')] = round(ndcg_10[k], 4)

print(temp)
# temp.to_csv('results.csv')
temp.to_csv('results_round4.csv', float_format='%.4f')

data       book                  movie                business               
metrics    rmse recall ndcg_10    rmse recall ndcg_10     rmse recall ndcg_10
UCF-s    0.7565    NaN     NaN  1.0173    NaN     NaN   1.1039    NaN     NaN
UCF-p    0.7739    NaN     NaN   1.012    NaN     NaN   1.1652    NaN     NaN
ICF-s    0.7563    NaN     NaN  1.0172    NaN     NaN   1.1049    NaN     NaN
ICF-p    0.7736    NaN     NaN   1.011    NaN     NaN    1.165    NaN     NaN
MF       0.7195    NaN   0.953  0.9534    NaN  0.8523   1.1082    NaN  0.8574
FM       0.7032    NaN  0.9786  0.9461    NaN  0.9739   1.0335    NaN  0.9584
IPNN      0.763    NaN   0.956  1.1258    NaN  0.8057    1.204    NaN    0.69
OPNN     0.7597    NaN  0.9086  1.0723    NaN   0.872   1.1574    NaN  0.6932
PNN      0.7518    NaN  0.8901  1.1457    NaN  0.8566   1.1447    NaN  0.8155
CCPM     0.7424    NaN  0.9575  1.0536    NaN  0.8787   1.0781    NaN  0.9271
WDL      0.7093    NaN  0.8775  1.0493    NaN  0.7686   1.0571  

In [5]:
temp = pd.read_csv('results_round4.csv', header=[0,1], index_col = [0])
print(temp)

data       book                  movie                business               
metrics    rmse recall ndcg_10    rmse recall ndcg_10     rmse recall ndcg_10
UCF-s    0.7565    NaN     NaN  1.0173    NaN     NaN   1.1039    NaN     NaN
UCF-p    0.7739    NaN     NaN  1.0120    NaN     NaN   1.1652    NaN     NaN
ICF-s    0.7563    NaN     NaN  1.0172    NaN     NaN   1.1049    NaN     NaN
ICF-p    0.7736    NaN     NaN  1.0110    NaN     NaN   1.1650    NaN     NaN
MF       0.7195    NaN  0.9530  0.9534    NaN  0.8523   1.1082    NaN  0.8574
FM       0.7032    NaN  0.9786  0.9461    NaN  0.9739   1.0335    NaN  0.9584
IPNN     0.7630    NaN  0.9560  1.1258    NaN  0.8057   1.2040    NaN  0.6900
OPNN     0.7597    NaN  0.9086  1.0723    NaN  0.8720   1.1574    NaN  0.6932
PNN      0.7518    NaN  0.8901  1.1457    NaN  0.8566   1.1447    NaN  0.8155
CCPM     0.7424    NaN  0.9575  1.0536    NaN  0.8787   1.0781    NaN  0.9271
WDL      0.7093    NaN  0.8775  1.0493    NaN  0.7686   1.0571  